// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_USE_THREADS
#include"main.h"
#include <Eigen/CXX11/Tensor>
using Eigen::Tensor; using Eigen::RowMajor; using Eigen::ColMajor; using Eigen::internal::TiledEvaluation;
// A set of tests to verify that different TensorExecutor strategies yields the // same results for all the ops, supporting tiled evaluation.
// Default assignment that does no use block evaluation or vectorization. // We assume that default coefficient evaluation is well tested and correct. template <typename Dst, typename Expr> staticvoid DefaultAssign(Dst& dst, Expr expr) { using Assign = Eigen::TensorAssignOp<Dst, const Expr>; using Executor =
Eigen::internal::TensorExecutor<const Assign, DefaultDevice, /*Vectorizable=*/false, /*Tiling=*/TiledEvaluation::Off>;
template <int NumDims> static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
array<Index, NumDims> dims; for (int i = 0; i < NumDims; ++i) {
dims[i] = internal::random<int>(min_dim, max_dim);
} return dims;
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_unary_expr(Device d)
{ static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation // optimization. auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T square = src.coeff(i) * src.coeff(i);
VERIFY_IS_EQUAL(square, dst.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_binary_expr(Device d)
{ static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation // optimization. auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
// We assume that broadcasting on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_chipping_rvalue(Device d)
{ auto dims = RandomDims<NumDims>(1, 10);
Tensor<T, NumDims, Layout, Index> src(dims);
src.setRandom();
DSizes<Index, NumDims> shuffle; for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
// Test all possible shuffle permutations. do {
DSizes<Index, NumDims> shuffled_dims; for (int i = 0; i < NumDims; ++i) {
shuffled_dims[i] = dims[shuffle[i]];
}
constauto expr = src.shuffle(shuffle);
// We assume that shuffling on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
DefaultAssign(golden, expr);
// Now do the shuffling using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
DeviceAssign<Vectorizable, Tiling>(d, dst, expr);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_shuffle_lvalue(Device d)
{ static constexpr int Options = 0 | Layout;
DSizes<Index, NumDims> shuffle; for (int i = 0; i < NumDims; ++i) shuffle[i] = i;
// Test all possible shuffle permutations. do {
DSizes<Index, NumDims> shuffled_dims; for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i];
// We assume that shuffling on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden(shuffled_dims); auto golden_shuffle = golden.shuffle(shuffle);
DefaultAssign(golden_shuffle, src);
// Now do the shuffling using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(shuffled_dims); auto dst_shuffle = dst.shuffle(shuffle);
DeviceAssign<Vectorizable, Tiling>(d, dst_shuffle, src);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
} while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_reshape(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
static constexpr int ReshapedDims = NumDims - 1; static constexpr int Options = 0 | Layout;
// Multiple 0th dimension and then shuffle.
std::vector<Index> shuffle; for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i);
std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());
DSizes<Index, ReshapedDims> reshaped_dims;
reshaped_dims[shuffle[0]] = dims[0] * dims[1]; for (int i = 1; i < ReshapedDims; ++i) reshaped_dims[shuffle[i]] = dims[i + 1];
Tensor<T, ReshapedDims, Options, Index> golden = src.reshape(reshaped_dims);
// Now reshape using configured tensor executor.
Tensor<T, ReshapedDims, Options, Index> dst(golden.dimensions());
auto expr = src.reshape(reshaped_dims);
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_slice_rvalue(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2"); static constexpr int Options = 0 | Layout;
// Pick a random slice of src tensor. auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>()); auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>());
// Make sure that slice start + size do not overflow tensor dims. for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
Tensor<T, NumDims, Options, Index> golden =
src.slice(slice_start, slice_size);
// Now reshape using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
auto expr = src.slice(slice_start, slice_size);
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_slice_lvalue(Device d)
{
static_assert(NumDims >= 2, "NumDims must be greater or equal than 2"); static constexpr int Options = 0 | Layout;
// Pick a random slice of src tensor. auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10)); auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
// Make sure that slice start + size do not overflow tensor dims. for (int i = 0; i < NumDims; ++i) {
slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
}
// We assume that broadcasting on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template<typename T, int NumDims> struct DummyGenerator {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T operator()(const array <Index, NumDims>& dims) const {
T result = static_cast<T>(0); for (int i = 0; i < NumDims; ++i) {
result += static_cast<T>((i + 1) * dims[i]);
} return result;
}
};
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_generator_op(Device d)
{ static constexpr int Options = 0 | Layout;
// We assume that generator on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor<T, NumDims, Options, Index> golden;
golden = expr;
// Now do the broadcasting using configured tensor executor.
Tensor<T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_execute_reverse_rvalue(Device d)
{ static constexpr int Options = 0 | Layout;
// Reverse half of the dimensions.
Eigen::array<bool, NumDims> reverse; for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();
constauto expr = src.reverse(reverse);
// We assume that reversing on a default device is tested and correct, so // we can rely on it to verify correctness of tensor executor and tiling.
Tensor <T, NumDims, Options, Index> golden;
golden = expr;
// Now do the reversing using configured tensor executor.
Tensor <T, NumDims, Options, Index> dst(golden.dimensions());
using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>; using Executor =
internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;
Executor::run(Assign(dst, expr), d);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_async_execute_unary_expr(Device d)
{ static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation // optimization. auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
T square = src.coeff(i) * src.coeff(i);
VERIFY_IS_EQUAL(square, dst.coeff(i));
}
}
template <typename T, int NumDims, typename Device, bool Vectorizable,
TiledEvaluation Tiling, int Layout> staticvoid test_async_execute_binary_expr(Device d)
{ static constexpr int Options = 0 | Layout;
// Pick a large enough tensor size to bypass small tensor block evaluation // optimization. auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.