Quelle cxx11_tensor_executor.cpp

Sprache: C

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2018 Eugene Zhulenev <ezhulenev@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

#define EIGEN_USE_THREADS

#include "main.h"

#include <Eigen/CXX11/Tensor>

using Eigen::Tensor;
using Eigen::RowMajor;
using Eigen::ColMajor;
using Eigen::internal::TiledEvaluation;

// A set of tests to verify that different TensorExecutor strategies yields the
// same results for all the ops, supporting tiled evaluation.

// Default assignment that does no use block evaluation or vectorization.
// We assume that default coefficient evaluation is well tested and correct.
template <typename Dst, typename Expr>
static void DefaultAssign(Dst& dst, Expr expr) {
  using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
  using Executor =
      Eigen::internal::TensorExecutor<const Assign, DefaultDevice,
                                      /*Vectorizable=*/false,
                                      /*Tiling=*/TiledEvaluation::Off>;

  Executor::run(Assign(dst, expr), DefaultDevice());
}

// Assignment with specified device and tiling strategy.
template <bool Vectorizable, TiledEvaluation Tiling, typename Device,
          typename Dst, typename Expr>
static void DeviceAssign(Device& d, Dst& dst, Expr expr) {
  using Assign = Eigen::TensorAssignOp<Dst, const Expr>;
  using Executor = Eigen::internal::TensorExecutor<const Assign, Device,
                                                   Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);
}

template <int NumDims>
static array<Index, NumDims> RandomDims(int min_dim = 1, int max_dim = 20) {
  array<Index, NumDims> dims;
  for (int i = 0; i < NumDims; ++i) {
    dims[i] = internal::random<int>(min_dim, max_dim);
  }
  return dims;
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_unary_expr(Device d)
{
  static constexpr int Options = 0 | Layout;

  // Pick a large enough tensor size to bypass small tensor block evaluation
  // optimization.
  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);

  Tensor<T, NumDims, Options, Index> src(dims);
  Tensor<T, NumDims, Options, Index> dst(dims);

  src.setRandom();
  const auto expr = src.square();

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    T square = src.coeff(i) * src.coeff(i);
    VERIFY_IS_EQUAL(square, dst.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_binary_expr(Device d)
{
  static constexpr int Options = 0 | Layout;

  // Pick a large enough tensor size to bypass small tensor block evaluation
  // optimization.
  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);

  Tensor<T, NumDims, Options, Index> lhs(dims);
  Tensor<T, NumDims, Options, Index> rhs(dims);
  Tensor<T, NumDims, Options, Index> dst(dims);

  lhs.setRandom();
  rhs.setRandom();

  const auto expr = lhs + rhs;

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    T sum = lhs.coeff(i) + rhs.coeff(i);
    VERIFY_IS_EQUAL(sum, dst.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_broadcasting(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(1, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  const auto broadcasts = RandomDims<NumDims>(1, 7);
  const auto expr = src.broadcast(broadcasts);

  // We assume that broadcasting on a default device is tested and correct, so
  // we can rely on it to verify correctness of tensor executor and tiling.
  Tensor<T, NumDims, Options, Index> golden;
  golden = expr;

  // Now do the broadcasting using configured tensor executor.
  Tensor<T, NumDims, Options, Index> dst(golden.dimensions());

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_chipping_rvalue(Device d)
{
  auto dims = RandomDims<NumDims>(1, 10);
  Tensor<T, NumDims, Layout, Index> src(dims);
  src.setRandom();

#define TEST_CHIPPING(CHIP_DIM)                                           \
  if (NumDims > (CHIP_DIM)) {                                             \
    const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1); \
    const auto expr = src.template chip<(CHIP_DIM)>(offset);              \
                                                                          \
    Tensor<T, NumDims - 1, Layout, Index> golden;                         \
    golden = expr;                                                        \
                                                                          \
    Tensor<T, NumDims - 1, Layout, Index> dst(golden.dimensions());       \
                                                                          \
    using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;   \
    using Executor = internal::TensorExecutor<const Assign, Device,       \
                                              Vectorizable, Tiling>;      \
                                                                          \
    Executor::run(Assign(dst, expr), d);                                  \
                                                                          \
    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {            \
      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));                     \
    }                                                                     \
  }

  TEST_CHIPPING(0)
  TEST_CHIPPING(1)
  TEST_CHIPPING(2)
  TEST_CHIPPING(3)
  TEST_CHIPPING(4)
  TEST_CHIPPING(5)

#undef TEST_CHIPPING
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_chipping_lvalue(Device d)
{
  auto dims = RandomDims<NumDims>(1, 10);

#define TEST_CHIPPING(CHIP_DIM)                                             \
  if (NumDims > (CHIP_DIM)) {                                               \
    /* Generate random data that we'll assign to the chipped tensor dim. */ \
    array<Index, NumDims - 1> src_dims;                                     \
    for (int i = 0; i < NumDims - 1; ++i) {                                 \
      int dim = i < (CHIP_DIM) ? i : i + 1;                                 \
      src_dims[i] = dims[dim];                                              \
    }                                                                       \
                                                                            \
    Tensor<T, NumDims - 1, Layout, Index> src(src_dims);                    \
    src.setRandom();                                                        \
                                                                            \
    const auto offset = internal::random<Index>(0, dims[(CHIP_DIM)] - 1);   \
                                                                            \
    Tensor<T, NumDims, Layout, Index> random(dims);                         \
    random.setZero();                                                       \
                                                                            \
    Tensor<T, NumDims, Layout, Index> golden(dims);                         \
    golden = random;                                                        \
    golden.template chip<(CHIP_DIM)>(offset) = src;                         \
                                                                            \
    Tensor<T, NumDims, Layout, Index> dst(dims);                            \
    dst = random;                                                           \
    auto expr = dst.template chip<(CHIP_DIM)>(offset);                      \
                                                                            \
    using Assign = TensorAssignOp<decltype(expr), const decltype(src)>;     \
    using Executor = internal::TensorExecutor<const Assign, Device,         \
                                              Vectorizable, Tiling>;        \
                                                                            \
    Executor::run(Assign(expr, src), d);                                    \
                                                                            \
    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {              \
      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));                       \
    }                                                                       \
  }

  TEST_CHIPPING(0)
  TEST_CHIPPING(1)
  TEST_CHIPPING(2)
  TEST_CHIPPING(3)
  TEST_CHIPPING(4)
  TEST_CHIPPING(5)

#undef TEST_CHIPPING
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_shuffle_rvalue(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(1, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  DSizes<Index, NumDims> shuffle;
  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;

  // Test all possible shuffle permutations.
  do {
    DSizes<Index, NumDims> shuffled_dims;
    for (int i = 0; i < NumDims; ++i) {
      shuffled_dims[i] = dims[shuffle[i]];
    }

    const auto expr = src.shuffle(shuffle);

    // We assume that shuffling on a default device is tested and correct, so
    // we can rely on it to verify correctness of tensor executor and tiling.
    Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
    DefaultAssign(golden, expr);

    // Now do the shuffling using configured tensor executor.
    Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
    DeviceAssign<Vectorizable, Tiling>(d, dst, expr);

    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
    }

  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_shuffle_lvalue(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(5, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  DSizes<Index, NumDims> shuffle;
  for (int i = 0; i < NumDims; ++i) shuffle[i] = i;

  // Test all possible shuffle permutations.
  do {
    DSizes<Index, NumDims> shuffled_dims;
    for (int i = 0; i < NumDims; ++i) shuffled_dims[shuffle[i]] = dims[i];

    // We assume that shuffling on a default device is tested and correct, so
    // we can rely on it to verify correctness of tensor executor and tiling.
    Tensor<T, NumDims, Options, Index> golden(shuffled_dims);
    auto golden_shuffle = golden.shuffle(shuffle);
    DefaultAssign(golden_shuffle, src);

    // Now do the shuffling using configured tensor executor.
    Tensor<T, NumDims, Options, Index> dst(shuffled_dims);
    auto dst_shuffle = dst.shuffle(shuffle);
    DeviceAssign<Vectorizable, Tiling>(d, dst_shuffle, src);

    for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
      VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
    }

  } while (std::next_permutation(&shuffle[0], &shuffle[0] + NumDims));
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_reshape(Device d)
{
  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");

  static constexpr int ReshapedDims = NumDims - 1;
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(5, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  // Multiple 0th dimension and then shuffle.
  std::vector<Index> shuffle;
  for (int i = 0; i < ReshapedDims; ++i) shuffle.push_back(i);
  std::shuffle(shuffle.begin(), shuffle.end(), std::mt19937());

  DSizes<Index, ReshapedDims> reshaped_dims;
  reshaped_dims[shuffle[0]] = dims[0] * dims[1];
  for (int i = 1; i < ReshapedDims; ++i) reshaped_dims[shuffle[i]] = dims[i + 1];

  Tensor<T, ReshapedDims, Options, Index> golden = src.reshape(reshaped_dims);

  // Now reshape using configured tensor executor.
  Tensor<T, ReshapedDims, Options, Index> dst(golden.dimensions());

  auto expr = src.reshape(reshaped_dims);

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_execute_slice_rvalue(Device d)
{
  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(5, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  // Pick a random slice of src tensor.
  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>());
  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>());

  // Make sure that slice start + size do not overflow tensor dims.
  for (int i = 0; i < NumDims; ++i) {
    slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
    slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
  }

  Tensor<T, NumDims, Options, Index> golden =
      src.slice(slice_start, slice_size);

  // Now reshape using configured tensor executor.
  Tensor<T, NumDims, Options, Index> dst(golden.dimensions());

  auto expr = src.slice(slice_start, slice_size);

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_slice_lvalue(Device d)
{
  static_assert(NumDims >= 2, "NumDims must be greater or equal than 2");
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(5, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  // Pick a random slice of src tensor.
  auto slice_start = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));
  auto slice_size = DSizes<Index, NumDims>(RandomDims<NumDims>(1, 10));

  // Make sure that slice start + size do not overflow tensor dims.
  for (int i = 0; i < NumDims; ++i) {
    slice_start[i] = numext::mini(dims[i] - 1, slice_start[i]);
    slice_size[i] = numext::mini(slice_size[i], dims[i] - slice_start[i]);
  }

  Tensor<T, NumDims, Options, Index> slice(slice_size);
  slice.setRandom();

  // Assign a slice using default executor.
  Tensor<T, NumDims, Options, Index> golden = src;
  golden.slice(slice_start, slice_size) = slice;

  // And using configured execution strategy.
  Tensor<T, NumDims, Options, Index> dst = src;
  auto expr = dst.slice(slice_start, slice_size);

  using Assign = TensorAssignOp<decltype(expr), const decltype(slice)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(expr, slice), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_broadcasting_of_forced_eval(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(1, 10);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  const auto broadcasts = RandomDims<NumDims>(1, 7);
  const auto expr = src.square().eval().broadcast(broadcasts);

  // We assume that broadcasting on a default device is tested and correct, so
  // we can rely on it to verify correctness of tensor executor and tiling.
  Tensor<T, NumDims, Options, Index> golden;
  golden = expr;

  // Now do the broadcasting using configured tensor executor.
  Tensor<T, NumDims, Options, Index> dst(golden.dimensions());

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
      internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template<typename T, int NumDims>
struct DummyGenerator {
  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
  T operator()(const array <Index, NumDims>& dims) const {
    T result = static_cast<T>(0);
    for (int i = 0; i < NumDims; ++i) {
      result += static_cast<T>((i + 1) * dims[i]);
    }
    return result;
  }
};

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_generator_op(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(20, 30);
  Tensor<T, NumDims, Options, Index> src(dims);
  src.setRandom();

  const auto expr = src.generate(DummyGenerator<T, NumDims>());

  // We assume that generator on a default device is tested and correct, so
  // we can rely on it to verify correctness of tensor executor and tiling.
  Tensor<T, NumDims, Options, Index> golden;
  golden = expr;

  // Now do the broadcasting using configured tensor executor.
  Tensor<T, NumDims, Options, Index> dst(golden.dimensions());

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
    internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
    TiledEvaluation Tiling, int Layout>
static void test_execute_reverse_rvalue(Device d)
{
  static constexpr int Options = 0 | Layout;

  auto dims = RandomDims<NumDims>(1, numext::pow(1000000.0, 1.0 / NumDims));
  Tensor <T, NumDims, Options, Index> src(dims);
  src.setRandom();

  // Reverse half of the dimensions.
  Eigen::array<bool, NumDims> reverse;
  for (int i = 0; i < NumDims; ++i) reverse[i] = internal::random<bool>();

  const auto expr = src.reverse(reverse);

  // We assume that reversing on a default device is tested and correct, so
  // we can rely on it to verify correctness of tensor executor and tiling.
  Tensor <T, NumDims, Options, Index> golden;
  golden = expr;

  // Now do the reversing using configured tensor executor.
  Tensor <T, NumDims, Options, Index> dst(golden.dimensions());

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using Executor =
    internal::TensorExecutor<const Assign, Device, Vectorizable, Tiling>;

  Executor::run(Assign(dst, expr), d);

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    VERIFY_IS_EQUAL(dst.coeff(i), golden.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_async_execute_unary_expr(Device d)
{
  static constexpr int Options = 0 | Layout;

  // Pick a large enough tensor size to bypass small tensor block evaluation
  // optimization.
  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);

  Tensor<T, NumDims, Options, Index> src(dims);
  Tensor<T, NumDims, Options, Index> dst(dims);

  src.setRandom();
  const auto expr = src.square();

  Eigen::Barrier done(1);
  auto on_done = [&done]() { done.Notify(); };

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using DoneCallback = decltype(on_done);
  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
                                                 Vectorizable, Tiling>;

  Executor::runAsync(Assign(dst, expr), d, on_done);
  done.Wait();

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    T square = src.coeff(i) * src.coeff(i);
    VERIFY_IS_EQUAL(square, dst.coeff(i));
  }
}

template <typename T, int NumDims, typename Device, bool Vectorizable,
          TiledEvaluation Tiling, int Layout>
static void test_async_execute_binary_expr(Device d)
{
  static constexpr int Options = 0 | Layout;

  // Pick a large enough tensor size to bypass small tensor block evaluation
  // optimization.
  auto dims = RandomDims<NumDims>(50 / NumDims, 100 / NumDims);

  Tensor<T, NumDims, Options, Index> lhs(dims);
  Tensor<T, NumDims, Options, Index> rhs(dims);
  Tensor<T, NumDims, Options, Index> dst(dims);

  lhs.setRandom();
  rhs.setRandom();

  const auto expr = lhs + rhs;

  Eigen::Barrier done(1);
  auto on_done = [&done]() { done.Notify(); };

  using Assign = TensorAssignOp<decltype(dst), const decltype(expr)>;
  using DoneCallback = decltype(on_done);
  using Executor = internal::TensorAsyncExecutor<const Assign, Device, DoneCallback,
                                                 Vectorizable, Tiling>;

  Executor::runAsync(Assign(dst, expr), d, on_done);
  done.Wait();

  for (Index i = 0; i < dst.dimensions().TotalSize(); ++i) {
    T sum = lhs.coeff(i) + rhs.coeff(i);
    VERIFY_IS_EQUAL(sum, dst.coeff(i));
  }
}

#ifdef EIGEN_DONT_VECTORIZE
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
#else
#define VECTORIZABLE(VAL) VAL
#endif

#define CALL_SUBTEST_PART(PART) \
  CALL_SUBTEST_##PART

#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                                 \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     ColMajor>(default_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  ColMajor>(default_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(default_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(default_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     RowMajor>(default_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  RowMajor>(default_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(default_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(default_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device)));      \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));          \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device)));      \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));          \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device)));      \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));          \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device)));      \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))

// NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                      \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));     \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device))); \
  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))

EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
  Eigen::DefaultDevice default_device;
  // Default device is unused in ASYNC tests.
  EIGEN_UNUSED_VARIABLE(default_device);

  const auto num_threads = internal::random<int>(20, 24);
  Eigen::ThreadPool tp(num_threads);
  Eigen::ThreadPoolDevice tp_device(&tp, num_threads);

  CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 3);
  CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 4);
  CALL_SUBTEST_COMBINATIONS(1, test_execute_unary_expr, float, 5);

  CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 3);
  CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 4);
  CALL_SUBTEST_COMBINATIONS(2, test_execute_binary_expr, float, 5);

  CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 3);
  CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 4);
  CALL_SUBTEST_COMBINATIONS(3, test_execute_broadcasting, float, 5);

  CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(4, test_execute_chipping_rvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(5, test_execute_chipping_lvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(6, test_execute_shuffle_rvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(7, test_execute_shuffle_lvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 2);
  CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 3);
  CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 4);
  CALL_SUBTEST_COMBINATIONS(9, test_execute_reshape, float, 5);

  CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 2);
  CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(10, test_execute_slice_rvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 2);
  CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(11, test_execute_slice_lvalue, float, 5);

  CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 2);
  CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 3);
  CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 4);
  CALL_SUBTEST_COMBINATIONS(12, test_execute_broadcasting_of_forced_eval, float, 5);

  CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 2);
  CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 3);
  CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 4);
  CALL_SUBTEST_COMBINATIONS(13, test_execute_generator_op, float, 5);

  CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 1);
  CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 2);
  CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 3);
  CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 4);
  CALL_SUBTEST_COMBINATIONS(14, test_execute_reverse_rvalue, float, 5);

  CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 3);
  CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 4);
  CALL_ASYNC_SUBTEST_COMBINATIONS(15, test_async_execute_unary_expr, float, 5);

  CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 3);
  CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 4);
  CALL_ASYNC_SUBTEST_COMBINATIONS(16, test_async_execute_binary_expr, float, 5);

  // Force CMake to split this test.
  // EIGEN_SUFFIXES;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16
}

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.15 Sekunden (vorverarbeitet am 2026-04-25) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.