// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2016 // Mehdi Goli Codeplay Software Ltd. // Ralph Potter Codeplay Software Ltd. // Luke Iwanski Codeplay Software Ltd. // Contact: <eigen@codeplay.com> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_BENCH_CONTRACT_SYCL #define EIGEN_BENCH_CONTRACT_SYCL #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t #include <SYCL/sycl.hpp> #include <fstream> #include <iostream> #include <chrono> #include <ctime>
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::array; using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap;
std::ofstream out("Result.txt");
typedeftypename Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 0); #ifdef EIGEN_USE_SYCL // warmup for sycl for (int iter = 0; iter < 10; ++iter) {
C.device(device_) = A.contract(B, dims);
} #endif auto start = get_time(); for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = A.contract(B, dims);
} auto end = get_time(); // Record the number of FLOPs executed per second (size_ multiplications and // additions for each value in the resulting tensor)
finalizeBenchmark(start, end, m_, k_, n_, num_iters, "contraction");
device_.deallocate(a_);
device_.deallocate(b_);
device_.deallocate(c_);
device_.synchronize();
}
typedeftypename Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 0); #ifdef EIGEN_USE_SYCL // warmup for sycl for (int iter = 0; iter < 10; ++iter) {
C.device(device_) = A.contract(B, dims);
} #endif auto start = get_time(); for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = A.contract(B, dims);
} auto end = get_time(); // Record the number of FLOPs executed per second (size_ multiplications and // additions for each value in the resulting tensor)
finalizeBenchmark(start, end, m_, k_, n_, num_iters, "contractionRowMajor");
device_.deallocate(a_);
device_.deallocate(b_);
device_.deallocate(c_);
device_.synchronize();
}
typedeftypename Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(0, 0); #ifdef EIGEN_USE_SYCL // warmup for sycl for (int iter = 0; iter < 10; ++iter) {
C.device(device_) = A.contract(B, dims);
} #endif auto start = get_time(); for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = A.contract(B, dims);
} auto end = get_time(); // Record the number of FLOPs executed per second (size_ multiplications and // additions for each value in the resulting tensor)
finalizeBenchmark(start, end, m_, k_, n_, num_iters, "contractionAT");
device_.deallocate(a_);
device_.deallocate(b_);
device_.deallocate(c_);
device_.synchronize();
typedeftypename Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(1, 1); #ifdef EIGEN_USE_SYCL // warmup for sycl for (int iter = 0; iter < 10; ++iter) {
C.device(device_) = A.contract(B, dims);
} #endif auto start = get_time(); for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = A.contract(B, dims);
} auto end = get_time(); // Record the number of FLOPs executed per second (size_ multiplications and // additions for each value in the resulting tensor)
finalizeBenchmark(start, end, m_, k_, n_, num_iters, "contractionBT");
device_.deallocate(a_);
device_.deallocate(b_);
device_.deallocate(c_);
device_.synchronize();
typedeftypename Tensor<T, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims;
dims[0] = DimPair(0, 1); #ifdef EIGEN_USE_SYCL // warmup for sycl for (int iter = 0; iter < 10; ++iter) {
C.device(device_) = A.contract(B, dims);
} #endif auto start = get_time(); for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = A.contract(B, dims);
} auto end = get_time(); // Record the number of FLOPs executed per second (size_ multiplications and // additions for each value in the resulting tensor)
finalizeBenchmark(start, end, m_, k_, n_, num_iters, "contractionABT");
device_.deallocate(a_);
device_.deallocate(b_);
device_.deallocate(c_);
device_.synchronize();
}
int main() {
cl::sycl::gpu_selector selector;
Eigen::QueueInterface queue(selector);
Eigen::SyclDevice device(&queue);
int64_t num_iters =20; for(int64_t m = 32; m <= 4096; m *= 2) for(int64_t k = 32; k <= 4096; k *= 2) for(int64_t n = 32; n <= 4096; n*= 2){
(contraction<float>(device, num_iters, m, k, n));
(contractionRowMajor<float>(device, num_iters, m, k, n));
(contractionAT<float>(device, num_iters, m, k, n));
(contractionBT<float>(device, num_iters, m, k, n));
(contractionABT<float>(device, num_iters, m, k, n));
} return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.