From 88534ba623421c956d8ffcda2d27f41d704d15ef Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Tue, 3 Jul 2018 07:37:12 +0200 Subject: update eigen --- eigen/unsupported/test/CMakeLists.txt | 21 - eigen/unsupported/test/EulerAngles.cpp | 296 ++++------ eigen/unsupported/test/autodiff_scalar.cpp | 15 + .../test/cxx11_non_blocking_thread_pool.cpp | 24 +- .../test/cxx11_tensor_broadcast_sycl.cpp | 114 +--- .../test/cxx11_tensor_builtins_sycl.cpp | 267 --------- eigen/unsupported/test/cxx11_tensor_chipping.cpp | 8 +- .../test/cxx11_tensor_chipping_sycl.cpp | 622 --------------------- .../test/cxx11_tensor_concatenation_sycl.cpp | 180 ------ .../test/cxx11_tensor_contract_sycl.cpp | 290 ---------- .../test/cxx11_tensor_convolution_sycl.cpp | 469 ---------------- .../unsupported/test/cxx11_tensor_device_sycl.cpp | 60 +- eigen/unsupported/test/cxx11_tensor_expr.cpp | 46 -- eigen/unsupported/test/cxx11_tensor_fixed_size.cpp | 2 +- .../test/cxx11_tensor_forced_eval_sycl.cpp | 54 +- .../test/cxx11_tensor_morphing_sycl.cpp | 248 -------- .../unsupported/test/cxx11_tensor_notification.cpp | 17 +- .../test/cxx11_tensor_of_float16_cuda.cu | 6 - .../unsupported/test/cxx11_tensor_padding_sycl.cpp | 157 ------ .../test/cxx11_tensor_reduction_sycl.cpp | 167 ++---- .../unsupported/test/cxx11_tensor_reverse_sycl.cpp | 221 -------- .../test/cxx11_tensor_shuffling_sycl.cpp | 119 ---- .../test/cxx11_tensor_striding_sycl.cpp | 203 ------- eigen/unsupported/test/cxx11_tensor_sycl.cpp | 219 ++------ eigen/unsupported/test/polynomialsolver.cpp | 34 +- eigen/unsupported/test/sparse_extra.cpp | 23 - 26 files changed, 326 insertions(+), 3556 deletions(-) delete mode 100644 eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp delete mode 100644 eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp (limited to 'eigen/unsupported/test') diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt index 003c9de..b5fa1c8 100644 --- a/eigen/unsupported/test/CMakeLists.txt +++ b/eigen/unsupported/test/CMakeLists.txt @@ -21,17 +21,6 @@ include_directories(../../test ../../unsupported ../../Eigen find_package (Threads) -find_package(Xsmm) -if(XSMM_FOUND) - add_definitions("-DEIGEN_USE_LIBXSMM") - include_directories(${XSMM_INCLUDES}) - link_directories(${XSMM_LIBRARIES}) - set(EXTERNAL_LIBS ${EXTERNAL_LIBS} xsmm) - ei_add_property(EIGEN_TESTED_BACKENDS "Xsmm, ") -else(XSMM_FOUND) - ei_add_property(EIGEN_MISSING_BACKENDS "Xsmm, ") -endif(XSMM_FOUND) - find_package(GoogleHash) if(GOOGLEHASH_FOUND) add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT") @@ -157,16 +146,6 @@ if(EIGEN_TEST_CXX11) ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11") endif(EIGEN_TEST_SYCL) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. diff --git a/eigen/unsupported/test/EulerAngles.cpp b/eigen/unsupported/test/EulerAngles.cpp index 79ee728..a8cb528 100644 --- a/eigen/unsupported/test/EulerAngles.cpp +++ b/eigen/unsupported/test/EulerAngles.cpp @@ -13,219 +13,146 @@ using namespace Eigen; -// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework) -template -bool verifyIsApprox(const Eigen::EulerAngles& a, const Eigen::EulerAngles& b) -{ - return verifyIsApprox(a.angles(), b.angles()); -} - -// Verify that x is in the approxed range [a, b] -#define VERIFY_APPROXED_RANGE(a, x, b) \ - do { \ - VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \ - VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \ - } while(0) - -const char X = EULER_X; -const char Y = EULER_Y; -const char Z = EULER_Z; - -template -void verify_euler(const EulerAngles& e) +template +void verify_euler_ranged(const Matrix& ea, + bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma) { typedef EulerAngles EulerAnglesType; typedef Matrix Matrix3; typedef Matrix Vector3; typedef Quaternion QuaternionType; typedef AngleAxis AngleAxisType; + using std::abs; - const Scalar ONE = Scalar(1); - const Scalar HALF_PI = Scalar(EIGEN_PI / 2); - const Scalar PI = Scalar(EIGEN_PI); + Scalar alphaRangeStart, alphaRangeEnd; + Scalar betaRangeStart, betaRangeEnd; + Scalar gammaRangeStart, gammaRangeEnd; - // It's very important calc the acceptable precision depending on the distance from the pole. - const Scalar longitudeRadius = std::abs( - EulerSystem::IsTaitBryan ? - std::cos(e.beta()) : - std::sin(e.beta()) - ); - Scalar precision = test_precision() / longitudeRadius; + if (positiveRangeAlpha) + { + alphaRangeStart = Scalar(0); + alphaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + alphaRangeStart = -Scalar(EIGEN_PI); + alphaRangeEnd = Scalar(EIGEN_PI); + } - Scalar betaRangeStart, betaRangeEnd; - if (EulerSystem::IsTaitBryan) + if (positiveRangeBeta) + { + betaRangeStart = Scalar(0); + betaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + betaRangeStart = -Scalar(EIGEN_PI); + betaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeGamma) { - betaRangeStart = -HALF_PI; - betaRangeEnd = HALF_PI; + gammaRangeStart = Scalar(0); + gammaRangeEnd = Scalar(2 * EIGEN_PI); } else { - if (!EulerSystem::IsBetaOpposite) - { - betaRangeStart = 0; - betaRangeEnd = PI; - } - else - { - betaRangeStart = -PI; - betaRangeEnd = 0; - } + gammaRangeStart = -Scalar(EIGEN_PI); + gammaRangeEnd = Scalar(EIGEN_PI); } + const int i = EulerSystem::AlphaAxisAbs - 1; + const int j = EulerSystem::BetaAxisAbs - 1; + const int k = EulerSystem::GammaAxisAbs - 1; + + const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1; + const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1; + const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1; + const Vector3 I = EulerAnglesType::AlphaAxisVector(); const Vector3 J = EulerAnglesType::BetaAxisVector(); const Vector3 K = EulerAnglesType::GammaAxisVector(); - // Is approx checks - VERIFY(e.isApprox(e)); - VERIFY_IS_APPROX(e, e); - VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE)); - - const Matrix3 m(e); - VERIFY_IS_APPROX(Scalar(m.determinant()), ONE); - - EulerAnglesType ebis(m); + EulerAnglesType e(ea[0], ea[1], ea[2]); - // When no roll(acting like polar representation), we have the best precision. - // One of those cases is when the Euler angles are on the pole, and because it's singular case, - // the computation returns no roll. - if (ebis.beta() == 0) - precision = test_precision(); + Matrix3 m(e); + Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); // Check that eabis in range - VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI); - VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd); - VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI); - - const Matrix3 mbis(AngleAxisType(ebis.alpha(), I) * AngleAxisType(ebis.beta(), J) * AngleAxisType(ebis.gamma(), K)); - VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE); - VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix()); - /*std::cout << "===================\n" << - "e: " << e << std::endl << - "eabis: " << eabis.transpose() << std::endl << - "m: " << m << std::endl << - "mbis: " << mbis << std::endl << - "X: " << (m * Vector3::UnitX()).transpose() << std::endl << - "X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/ - VERIFY(m.isApprox(mbis, precision)); - - // Test if ea and eabis are the same - // Need to check both singular and non-singular cases - // There are two singular cases. - // 1. When I==K and sin(ea(1)) == 0 - // 2. When I!=K and cos(ea(1)) == 0 - - // TODO: Make this test work well, and use range saturation function. - /*// If I==K, and ea[1]==0, then there no unique solution. - // The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2. - if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision())) ) - VERIFY_IS_APPROX(ea, eabis);*/ + VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd); + VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd); + VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd); - // Quaternions - const QuaternionType q(e); - ebis = q; - const QuaternionType qbis(ebis); - VERIFY(internal::isApprox(std::abs(q.dot(qbis)), ONE, precision)); - //VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same + Vector3 eabis2 = m.eulerAngles(i, j, k); - // A suggestion for simple product test when will be supported. - /*EulerAnglesType e2(PI/2, PI/2, PI/2); - Matrix3 m2(e2); - VERIFY_IS_APPROX(e*e2, m*m2);*/ -} - -template -void verify_euler_vec(const Matrix& ea) -{ - verify_euler(EulerAngles >(ea[0], ea[1], ea[2])); -} - -template -void verify_euler_all_neg(const Matrix& ea) -{ - verify_euler_vec<+A,+B,+C>(ea); - verify_euler_vec<+A,+B,-C>(ea); - verify_euler_vec<+A,-B,+C>(ea); - verify_euler_vec<+A,-B,-C>(ea); + // Invert the relevant axes + eabis2[0] *= iFactor; + eabis2[1] *= jFactor; + eabis2[2] *= kFactor; - verify_euler_vec<-A,+B,+C>(ea); - verify_euler_vec<-A,+B,-C>(ea); - verify_euler_vec<-A,-B,+C>(ea); - verify_euler_vec<-A,-B,-C>(ea); -} - -template void check_all_var(const Matrix& ea) -{ - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); + // Saturate the angles to the correct range + if (positiveRangeAlpha && (eabis2[0] < 0)) + eabis2[0] += Scalar(2 * EIGEN_PI); + if (positiveRangeBeta && (eabis2[1] < 0)) + eabis2[1] += Scalar(2 * EIGEN_PI); + if (positiveRangeGamma && (eabis2[2] < 0)) + eabis2[2] += Scalar(2 * EIGEN_PI); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); - verify_euler_all_neg(ea); -} - -template void check_singular_cases(const Scalar& singularBeta) -{ - typedef Matrix Vector3; - const Scalar PI = Scalar(EIGEN_PI); + Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K)); + VERIFY_IS_APPROX(m, mbis); - for (Scalar epsilon = NumTraits::epsilon(); epsilon < 1; epsilon *= Scalar(1.2)) + // Tests that are only relevant for no possitive range + if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma)) { - check_all_var(Vector3(PI/4, singularBeta, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3)); - check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI)); - check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3))); - check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3))); - check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4))); - check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI)); + /* If I==K, and ea[1]==0, then there no unique solution. */ + /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision())) ) + VERIFY((ea-eabis).norm() <= test_precision()); + + // approx_or_less_than does not work for 0 + VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); } - // This one for sanity, it had a problem with near pole cases in float scalar. - check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI)); + // Quaternions + QuaternionType q(e); + eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same } -template void eulerangles_manual() +template +void verify_euler(const Matrix& ea) { - typedef Matrix Vector3; - const Vector3 Zero = Vector3::Zero(); - const Scalar PI = Scalar(EIGEN_PI); - - check_all_var(Zero); - - // singular cases - check_singular_cases(PI/2); - check_singular_cases(-PI/2); - - check_singular_cases(Scalar(0)); - check_singular_cases(Scalar(-0)); - - check_singular_cases(PI); - check_singular_cases(-PI); - - // non-singular cases - VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); - VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); - VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); - for (int i = 0; i < alpha.size(); ++i) { - for (int j = 0; j < beta.size(); ++j) { - for (int k = 0; k < gamma.size(); ++k) { - check_all_var(Vector3d(alpha(i), beta(j), gamma(k))); - } - } - } + verify_euler_ranged(ea, false, false, false); + verify_euler_ranged(ea, false, false, true); + verify_euler_ranged(ea, false, true, false); + verify_euler_ranged(ea, false, true, true); + verify_euler_ranged(ea, true, false, false); + verify_euler_ranged(ea, true, false, true); + verify_euler_ranged(ea, true, true, false); + verify_euler_ranged(ea, true, true, true); } -template void eulerangles_rand() +template void check_all_var(const Matrix& ea) +{ + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); + + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); + + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); + verify_euler(ea); +} + +template void eulerangles() { typedef Matrix Matrix3; typedef Matrix Vector3; @@ -274,19 +201,8 @@ template void eulerangles_rand() void test_EulerAngles() { - // Simple cast test - EulerAnglesXYZd onesEd(1, 1, 1); - EulerAnglesXYZf onesEf = onesEd.cast(); - VERIFY_IS_APPROX(onesEd, onesEf.cast()); - - CALL_SUBTEST_1( eulerangles_manual() ); - CALL_SUBTEST_2( eulerangles_manual() ); - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_3( eulerangles_rand() ); - CALL_SUBTEST_4( eulerangles_rand() ); + CALL_SUBTEST_1( eulerangles() ); + CALL_SUBTEST_2( eulerangles() ); } - - // TODO: Add tests for auto diff - // TODO: Add tests for complex numbers } diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp index 4df2f5c..9cf1128 100644 --- a/eigen/unsupported/test/autodiff_scalar.cpp +++ b/eigen/unsupported/test/autodiff_scalar.cpp @@ -72,6 +72,20 @@ template void check_hyperbolic_functions() VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150)); } +template +void check_limits_specialization() +{ + typedef Eigen::Matrix Deriv; + typedef Eigen::AutoDiffScalar AD; + + typedef std::numeric_limits A; + typedef std::numeric_limits B; + +#if EIGEN_HAS_CXX11 + VERIFY(bool(std::is_base_of::value)); +#endif +} + void test_autodiff_scalar() { for(int i = 0; i < g_repeat; i++) { @@ -79,5 +93,6 @@ void test_autodiff_scalar() CALL_SUBTEST_2( check_atan2() ); CALL_SUBTEST_3( check_hyperbolic_functions() ); CALL_SUBTEST_4( check_hyperbolic_functions() ); + CALL_SUBTEST_5( check_limits_specialization()); } } diff --git a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp index 48cd2d4..5f9bb93 100644 --- a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp +++ b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -11,7 +11,6 @@ #define EIGEN_USE_THREADS #include "main.h" #include "Eigen/CXX11/ThreadPool" -#include "Eigen/CXX11/Tensor" static void test_create_destroy_empty_pool() { @@ -23,11 +22,11 @@ static void test_create_destroy_empty_pool() } -static void test_parallelism(bool allow_spinning) +static void test_parallelism() { // Test we never-ever fail to match available tasks with idle threads. const int kThreads = 16; // code below expects that this is a multiple of 4 - NonBlockingThreadPool tp(kThreads, allow_spinning); + NonBlockingThreadPool tp(kThreads); VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); for (int iter = 0; iter < 100; ++iter) { @@ -101,25 +100,8 @@ static void test_parallelism(bool allow_spinning) } } - -static void test_cancel() -{ - NonBlockingThreadPool tp(2); - - // Schedule a large number of closure that each sleeps for one second. This - // will keep the thread pool busy for much longer than the default test timeout. - for (int i = 0; i < 1000; ++i) { - tp.Schedule([]() { EIGEN_SLEEP(2000); }); - } - - // Cancel the processing of all the closures that are still pending. - tp.Cancel(); -} - void test_cxx11_non_blocking_thread_pool() { CALL_SUBTEST(test_create_destroy_empty_pool()); - CALL_SUBTEST(test_parallelism(true)); - CALL_SUBTEST(test_parallelism(false)); - CALL_SUBTEST(test_cancel()); + CALL_SUBTEST(test_parallelism()); } diff --git a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp index 21fdfca..7201bfe 100644 --- a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp @@ -14,7 +14,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" @@ -25,99 +25,39 @@ using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap; -template -static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){ - - // BROADCAST test: - IndexType inDim1=2; - IndexType inDim2=3; - IndexType inDim3=5; - IndexType inDim4=7; - IndexType bDim1=2; - IndexType bDim2=3; - IndexType bDim3=1; - IndexType bDim4=4; - array in_range = {{inDim1, inDim2, inDim3, inDim4}}; - array broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; - array out_range; // = in_range * broadcasts - for (size_t i = 0; i < out_range.size(); ++i) - out_range[i] = in_range[i] * broadcasts[i]; - - Tensor input(in_range); - Tensor out(out_range); - - for (size_t i = 0; i < in_range.size(); ++i) - VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); - - - for (IndexType i = 0; i < input.size(); ++i) - input(i) = static_cast(i); - - DataType * gpu_in_data = static_cast(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range); - TensorMap> gpu_out(gpu_out_data, out_range); - sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); - gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); - - for (IndexType i = 0; i < inDim1*bDim1; ++i) { - for (IndexType j = 0; j < inDim2*bDim2; ++j) { - for (IndexType k = 0; k < inDim3*bDim3; ++k) { - for (IndexType l = 0; l < inDim4*bDim4; ++l) { - VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); - } - } - } - } - printf("Broadcast Test with fixed size Passed\n"); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} - -template static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ // BROADCAST test: - IndexType inDim1=2; - IndexType inDim2=3; - IndexType inDim3=5; - IndexType inDim4=7; - IndexType bDim1=2; - IndexType bDim2=3; - IndexType bDim3=1; - IndexType bDim4=4; - array in_range = {{inDim1, inDim2, inDim3, inDim4}}; - array broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; - array out_range; // = in_range * broadcasts + array in_range = {{2, 3, 5, 7}}; + array broadcasts = {{2, 3, 1, 4}}; + array out_range; // = in_range * broadcasts for (size_t i = 0; i < out_range.size(); ++i) out_range[i] = in_range[i] * broadcasts[i]; - Tensor input(in_range); - Tensor out(out_range); + Tensor input(in_range); + Tensor out(out_range); for (size_t i = 0; i < in_range.size(); ++i) VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); - for (IndexType i = 0; i < input.size(); ++i) - input(i) = static_cast(i); + for (int i = 0; i < input.size(); ++i) + input(i) = static_cast(i); - DataType * gpu_in_data = static_cast(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + float * gpu_in_data = static_cast(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); - TensorMap> gpu_in(gpu_in_data, in_range); - TensorMap> gpu_out(gpu_out_data, out_range); - sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); + TensorMap> gpu_in(gpu_in_data, in_range); + TensorMap> gpu_out(gpu_out_data, out_range); + sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); - for (IndexType i = 0; i < inDim1*bDim1; ++i) { - for (IndexType j = 0; j < inDim2*bDim2; ++j) { - for (IndexType k = 0; k < inDim3*bDim3; ++k) { - for (IndexType l = 0; l < inDim4*bDim4; ++l) { - VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l)); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); } } } @@ -127,18 +67,8 @@ static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ sycl_device.deallocate(gpu_out_data); } -template void sycl_broadcast_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_broadcast_sycl(sycl_device); - test_broadcast_sycl(sycl_device); - test_broadcast_sycl_fixed(sycl_device); - test_broadcast_sycl_fixed(sycl_device); -} - void test_cxx11_tensor_broadcast_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_broadcast_test_per_device(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_broadcast_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp deleted file mode 100644 index 400a31d..0000000 --- a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -namespace std { -template T rsqrt(T x) { return 1 / std::sqrt(x); } -template T square(T x) { return x * x; } -template T cube(T x) { return x * x * x; } -template T inverse(T x) { return 1 / x; } -} - -#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \ - { \ - /* out OPERATOR in.FUNC() */ \ - Tensor in(tensorRange); \ - Tensor out(tensorRange); \ - in = in.random() + static_cast(0.01); \ - out = out.random() + static_cast(0.01); \ - Tensor reference(out); \ - SCALAR *gpu_data = static_cast( \ - sycl_device.allocate(in.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap> gpu(gpu_data, tensorRange); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ - (in.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ - (out.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver OPERATOR std::FUNC(in(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data); \ - sycl_device.deallocate(gpu_data_out); \ - } \ - { \ - /* out OPERATOR out.FUNC() */ \ - Tensor out(tensorRange); \ - out = out.random() + static_cast(0.01); \ - Tensor reference(out); \ - SCALAR *gpu_data_out = static_cast( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ - (out.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver OPERATOR std::FUNC(reference(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout) - -#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \ - { \ - /* out = in.FUNC() */ \ - Tensor in(tensorRange); \ - Tensor out(tensorRange); \ - in = in.random() + static_cast(0.01); \ - SCALAR *gpu_data = static_cast( \ - sycl_device.allocate(in.size() * sizeof(SCALAR))); \ - bool *gpu_data_out = \ - static_cast(sycl_device.allocate(out.size() * sizeof(bool))); \ - TensorMap> gpu(gpu_data, tensorRange); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ - (in.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(bool)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \ - } \ - sycl_device.deallocate(gpu_data); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_UNARY_BUILTINS(SCALAR, Layout) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout) - -static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) { - int64_t sizeDim1 = 10; - int64_t sizeDim2 = 10; - int64_t sizeDim3 = 10; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - - TEST_UNARY_BUILTINS(float, RowMajor) - TEST_UNARY_BUILTINS(float, ColMajor) -} - -namespace std { -template T cwiseMax(T x, T y) { return std::max(x, y); } -template T cwiseMin(T x, T y) { return std::min(x, y); } -} - -#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \ - { \ - /* out = in_1.FUNC(in_2) */ \ - Tensor in_1(tensorRange); \ - Tensor in_2(tensorRange); \ - Tensor out(tensorRange); \ - in_1 = in_1.random() + static_cast(0.01); \ - in_2 = in_2.random() + static_cast(0.01); \ - Tensor reference(out); \ - SCALAR *gpu_data_1 = static_cast( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_2 = static_cast( \ - sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap> gpu_1(gpu_data_1, tensorRange); \ - TensorMap> gpu_2(gpu_data_2, tensorRange); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ - (in_2.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver = std::FUNC(in_1(i), in_2(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_2); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \ - { \ - /* out = in_1 OPERATOR in_2 */ \ - Tensor in_1(tensorRange); \ - Tensor in_2(tensorRange); \ - Tensor out(tensorRange); \ - in_1 = in_1.random() + static_cast(0.01); \ - in_2 = in_2.random() + static_cast(0.01); \ - Tensor reference(out); \ - SCALAR *gpu_data_1 = static_cast( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_2 = static_cast( \ - sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap> gpu_1(gpu_data_1, tensorRange); \ - TensorMap> gpu_2(gpu_data_2, tensorRange); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ - (in_2.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_2); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \ - { \ - /* out = in_1 OPERATOR 2 */ \ - Tensor in_1(tensorRange); \ - Tensor out(tensorRange); \ - in_1 = in_1.random() + static_cast(0.01); \ - Tensor reference(out); \ - SCALAR *gpu_data_1 = static_cast( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap> gpu_1(gpu_data_1, tensorRange); \ - TensorMap> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS(SCALAR, Layout) \ - TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \ - TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout) - -static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) { - int64_t sizeDim1 = 10; - int64_t sizeDim2 = 10; - int64_t sizeDim3 = 10; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - TEST_BINARY_BUILTINS(float, RowMajor) - TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor) - TEST_BINARY_BUILTINS(float, ColMajor) - TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor) -} - -void test_cxx11_tensor_builtins_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - QueueInterface queueInterface(device); - Eigen::SyclDevice sycl_device(&queueInterface); - CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); - CALL_SUBTEST(test_builtin_binary_sycl(sycl_device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_chipping.cpp b/eigen/unsupported/test/cxx11_tensor_chipping.cpp index 89cf5c7..1832dec 100644 --- a/eigen/unsupported/test/cxx11_tensor_chipping.cpp +++ b/eigen/unsupported/test/cxx11_tensor_chipping.cpp @@ -43,7 +43,7 @@ static void test_simple_chip() VERIFY_IS_EQUAL(chip2.dimension(2), 7); VERIFY_IS_EQUAL(chip2.dimension(3), 11); for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 5; ++j) { + for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { for (int l = 0; l < 11; ++l) { VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); @@ -75,7 +75,7 @@ static void test_simple_chip() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { - for (int l = 0; l < 11; ++l) { + for (int l = 0; l < 7; ++l) { VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); } } @@ -126,7 +126,7 @@ static void test_dynamic_chip() VERIFY_IS_EQUAL(chip2.dimension(2), 7); VERIFY_IS_EQUAL(chip2.dimension(3), 11); for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 5; ++j) { + for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { for (int l = 0; l < 11; ++l) { VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); @@ -158,7 +158,7 @@ static void test_dynamic_chip() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { - for (int l = 0; l < 11; ++l) { + for (int l = 0; l < 7; ++l) { VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); } } diff --git a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp deleted file mode 100644 index 39e4f0a..0000000 --- a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp +++ /dev/null @@ -1,622 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_chipping_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" - -#include - -using Eigen::Tensor; - -template -static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor tensor(tensorRange); - Tensor chip1(chip1TensorRange); - - tensor.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap> gpu_chip1(gpu_data_chip1, chip1TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l); - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); - VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim2; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); - } - } - } - } - - array chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor chip2(chip2TensorRange); - const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); - DataType* gpu_data_chip2 = static_cast(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l); - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); - } - } - } - } - - array chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor chip3(chip3TensorRange); - const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); - DataType* gpu_data_chip3 = static_cast(sycl_device.allocate(chip3TensorBuffSize)); - TensorMap> gpu_chip3(gpu_data_chip3, chip3TensorRange); - - gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l); - sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); - - VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); - } - } - } - } - - array chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor chip4(chip4TensorRange); - const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); - DataType* gpu_data_chip4 = static_cast(sycl_device.allocate(chip4TensorBuffSize)); - TensorMap> gpu_chip4(gpu_data_chip4, chip4TensorRange); - - gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l); - sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); - - VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); - } - } - } - } - - - array chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor chip5(chip5TensorRange); - const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); - DataType* gpu_data_chip5 = static_cast(sycl_device.allocate(chip5TensorBuffSize)); - TensorMap> gpu_chip5(gpu_data_chip5, chip5TensorRange); - - gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l); - sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); - - VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); - } - } - } - } - - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_chip2); - sycl_device.deallocate(gpu_data_chip3); - sycl_device.deallocate(gpu_data_chip4); - sycl_device.deallocate(gpu_data_chip5); -} - -template -static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor tensor(tensorRange); - Tensor chip1(chip1TensorRange); - - tensor.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap> gpu_chip1(gpu_data_chip1, chip1TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l); - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); - VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim2; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); - } - } - } - } - - array chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor chip2(chip2TensorRange); - const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); - DataType* gpu_data_chip2 = static_cast(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l); - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); - } - } - } - } - - array chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor chip3(chip3TensorRange); - const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); - DataType* gpu_data_chip3 = static_cast(sycl_device.allocate(chip3TensorBuffSize)); - TensorMap> gpu_chip3(gpu_data_chip3, chip3TensorRange); - - gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l); - sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); - - VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); - } - } - } - } - - array chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor chip4(chip4TensorRange); - const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); - DataType* gpu_data_chip4 = static_cast(sycl_device.allocate(chip4TensorBuffSize)); - TensorMap> gpu_chip4(gpu_data_chip4, chip4TensorRange); - - gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l); - sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); - - VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); - } - } - } - } - - - array chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor chip5(chip5TensorRange); - const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); - DataType* gpu_data_chip5 = static_cast(sycl_device.allocate(chip5TensorBuffSize)); - TensorMap> gpu_chip5(gpu_data_chip5, chip5TensorRange); - - gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l); - sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); - - VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); - } - } - } - } - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_chip2); - sycl_device.deallocate(gpu_data_chip3); - sycl_device.deallocate(gpu_data_chip4); - sycl_device.deallocate(gpu_data_chip5); -} - -template -static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) { - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor tensor(tensorRange); - - Tensor chip1(chip1TensorRange); - Tensor tensor1(chip1TensorRange); - tensor.setRandom(); - tensor1.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast(sycl_device.allocate(chip1TensorBuffSize)); - DataType* gpu_data_tensor1 = static_cast(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap> gpu_chip1(gpu_data_chip1, chip1TensorRange); - TensorMap> gpu_tensor1(gpu_data_tensor1, chip1TensorRange); - - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1; - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - for (int i = 0; i < sizeDim2; ++i) { - for (int j = 0; j < sizeDim3; ++j) { - for (int k = 0; k < sizeDim4; ++k) { - for (int l = 0; l < sizeDim5; ++l) { - float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l); - VERIFY_IS_EQUAL(chip1(i,j,k,l), expected); - } - } - } - } - - array chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}}; - Tensor tensor2(chip2TensorRange); - Tensor chip2(chip2TensorRange); - tensor2.setRandom(); - const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType); - DataType* gpu_data_tensor2 = static_cast(sycl_device.allocate(chip2TensorBuffSize)); - DataType* gpu_data_chip2 = static_cast(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap> gpu_tensor2(gpu_data_tensor2, chip2TensorRange); - TensorMap> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize); - gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2; - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - for (int i = 0; i < sizeDim2; ++i) { - for (int j = 0; j < sizeDim4; ++j) { - for (int k = 0; k < sizeDim5; ++k) { - float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k); - VERIFY_IS_EQUAL(chip2(i,j,k), expected); - } - } - } - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_tensor1); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_tensor2); - sycl_device.deallocate(gpu_data_chip2); -} - -template -static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device) -{ - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor tensor(tensorRange); - Tensor input1(tensorRange); - Tensor input2(input2TensorRange); - input1.setRandom(); - input2.setRandom(); - - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t input2TensorBuffSize =input2.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_input1 = static_cast(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_input2 = static_cast(sycl_device.allocate(input2TensorBuffSize)); - - TensorMap> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap> gpu_input1(gpu_data_input1, tensorRange); - TensorMap> gpu_input2(gpu_data_input2, input2TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize); - gpu_tensor.device(sycl_device)=gpu_input1; - sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize); - gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k < sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (i != 1) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m)); - } - } - } - } - } - } - - gpu_tensor.device(sycl_device)=gpu_input1; - array input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor input3(input3TensorRange); - input3.setRandom(); - - const size_t input3TensorBuffSize =input3.size()*sizeof(DataType); - DataType* gpu_data_input3 = static_cast(sycl_device.allocate(input3TensorBuffSize)); - TensorMap> gpu_input3(gpu_data_input3, input3TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize); - gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor input4(input4TensorRange); - input4.setRandom(); - - const size_t input4TensorBuffSize =input4.size()*sizeof(DataType); - DataType* gpu_data_input4 = static_cast(sycl_device.allocate(input4TensorBuffSize)); - TensorMap> gpu_input4(gpu_data_input4, input4TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize); - gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor input5(input5TensorRange); - input5.setRandom(); - - const size_t input5TensorBuffSize =input5.size()*sizeof(DataType); - DataType* gpu_data_input5 = static_cast(sycl_device.allocate(input5TensorBuffSize)); - TensorMap> gpu_input5(gpu_data_input5, input5TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize); - gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor input6(input6TensorRange); - input6.setRandom(); - - const size_t input6TensorBuffSize =input6.size()*sizeof(DataType); - DataType* gpu_data_input6 = static_cast(sycl_device.allocate(input6TensorBuffSize)); - TensorMap> gpu_input6(gpu_data_input6, input6TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize); - gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k input7(tensorRange); - input7.setRandom(); - - DataType* gpu_data_input7 = static_cast(sycl_device.allocate(tensorBuffSize)); - TensorMap> gpu_input7(gpu_data_input7, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize); - gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l); - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k void sycl_chipping_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_static_chip_sycl(sycl_device); - test_static_chip_sycl(sycl_device); - test_dynamic_chip_sycl(sycl_device); - test_dynamic_chip_sycl(sycl_device); - test_chip_in_expr(sycl_device); - test_chip_in_expr(sycl_device); - test_chip_as_lvalue_sycl(sycl_device); - test_chip_as_lvalue_sycl(sycl_device); -} -void test_cxx11_tensor_chipping_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_chipping_test_per_device(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp deleted file mode 100644 index e3023a3..0000000 --- a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_concatenation_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include - -using Eigen::Tensor; - -template -static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device) -{ - IndexType leftDim1 = 2; - IndexType leftDim2 = 3; - IndexType leftDim3 = 1; - Eigen::array leftRange = {{leftDim1, leftDim2, leftDim3}}; - IndexType rightDim1 = 2; - IndexType rightDim2 = 3; - IndexType rightDim3 = 1; - Eigen::array rightRange = {{rightDim1, rightDim2, rightDim3}}; - - //IndexType concatDim1 = 3; -// IndexType concatDim2 = 3; -// IndexType concatDim3 = 1; - //Eigen::array concatRange = {{concatDim1, concatDim2, concatDim3}}; - - Tensor left(leftRange); - Tensor right(rightRange); - left.setRandom(); - right.setRandom(); - - DataType * gpu_in1_data = static_cast(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); - - Eigen::TensorMap> gpu_in1(gpu_in1_data, leftRange); - Eigen::TensorMap> gpu_in2(gpu_in2_data, rightRange); - sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); - /// - Tensor concatenation1(leftDim1+rightDim1, leftDim2, leftDim3); - DataType * gpu_out_data1 = static_cast(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap> gpu_out1(gpu_out_data1, concatenation1.dimensions()); - - //concatenation = left.concatenate(right, 0); - gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0); - sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType)); - - VERIFY_IS_EQUAL(concatenation1.dimension(0), 4); - VERIFY_IS_EQUAL(concatenation1.dimension(1), 3); - VERIFY_IS_EQUAL(concatenation1.dimension(2), 1); - for (IndexType j = 0; j < 3; ++j) { - for (IndexType i = 0; i < 2; ++i) { - VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0)); - } - for (IndexType i = 2; i < 4; ++i) { - VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0)); - } - } - - sycl_device.deallocate(gpu_out_data1); - Tensor concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3); - DataType * gpu_out_data2 = static_cast(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap> gpu_out2(gpu_out_data2, concatenation2.dimensions()); - gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1); - sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType)); - - //concatenation = left.concatenate(right, 1); - VERIFY_IS_EQUAL(concatenation2.dimension(0), 2); - VERIFY_IS_EQUAL(concatenation2.dimension(1), 6); - VERIFY_IS_EQUAL(concatenation2.dimension(2), 1); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0)); - } - for (IndexType j = 3; j < 6; ++j) { - VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0)); - } - } - sycl_device.deallocate(gpu_out_data2); - Tensor concatenation3(leftDim1, leftDim2, leftDim3+rightDim3); - DataType * gpu_out_data3 = static_cast(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap> gpu_out3(gpu_out_data3, concatenation3.dimensions()); - gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2); - sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType)); - - //concatenation = left.concatenate(right, 2); - VERIFY_IS_EQUAL(concatenation3.dimension(0), 2); - VERIFY_IS_EQUAL(concatenation3.dimension(1), 3); - VERIFY_IS_EQUAL(concatenation3.dimension(2), 2); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0)); - VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0)); - } - } - sycl_device.deallocate(gpu_out_data3); - sycl_device.deallocate(gpu_in1_data); - sycl_device.deallocate(gpu_in2_data); -} -template -static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - - IndexType leftDim1 = 2; - IndexType leftDim2 = 3; - Eigen::array leftRange = {{leftDim1, leftDim2}}; - - IndexType rightDim1 = 2; - IndexType rightDim2 = 3; - Eigen::array rightRange = {{rightDim1, rightDim2}}; - - IndexType concatDim1 = 4; - IndexType concatDim2 = 3; - Eigen::array resRange = {{concatDim1, concatDim2}}; - - Tensor left(leftRange); - Tensor right(rightRange); - Tensor result(resRange); - - left.setRandom(); - right.setRandom(); - result.setRandom(); - - DataType * gpu_in1_data = static_cast(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); - - - Eigen::TensorMap> gpu_in1(gpu_in1_data, leftRange); - Eigen::TensorMap> gpu_in2(gpu_in2_data, rightRange); - Eigen::TensorMap> gpu_out(gpu_out_data, resRange); - - sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); - -// t1.concatenate(t2, 0) = result; - gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out; - sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(left(i, j), result(i, j)); - VERIFY_IS_EQUAL(right(i, j), result(i+2, j)); - } - } - sycl_device.deallocate(gpu_in1_data); - sycl_device.deallocate(gpu_in2_data); - sycl_device.deallocate(gpu_out_data); -} - - -template void tensorConcat_perDevice(Dev_selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_concatenation(sycl_device); - test_simple_concatenation(sycl_device); - test_concatenation_as_lvalue(sycl_device); -} -void test_cxx11_tensor_concatenation_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorConcat_perDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp deleted file mode 100644 index 5bace66..0000000 --- a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp +++ /dev/null @@ -1,290 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_contract_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include -#include -#include - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; -template -void static test_sycl_contraction(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) -{ - typedef typename Tensor::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; -// std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; - // with these dimensions, the output has 300 * 140 elements, which is - // more than 30 * 1024, which is the number of threads in blocks on - // a 15 SM GK110 GPU - Tensor t_left(m_size, k_size); - Tensor t_right(k_size, n_size); - Tensor t_result(m_size, n_size); - Tensor t_result_gpu(m_size, n_size); -// Eigen::array dims(DimPair(1, 0)); - Eigen::array dims = {{DimPair(1, 0)}}; - Eigen::array left_dims = {{m_size, k_size}}; - Eigen::array right_dims = {{k_size, n_size}}; - Eigen::array result_dims = {{m_size, n_size}}; - - t_left.setRandom(); - t_right.setRandom(); - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = t_result.size() * sizeof(DataType); - - DataType * d_t_left = static_cast(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap > gpu_t_result(d_t_result, result_dims); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - for (IndexType i = 0; i < t_result.size(); i++) { - if (static_cast(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { - continue; - } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { - continue; - } - std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) - << " vs " << t_result_gpu(i) << std::endl; - assert(false); - } - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); -} - -template -void test_TF(const Device& sycl_device) -{ - typedef typename Tensor::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; - Eigen::array left_dims = {{2, 3}}; - Eigen::array right_dims = {{3, 1}}; - Eigen::array res_dims = {{2, 1}}; - Eigen::array dims = {{DimPair(1, 0)}}; - - - Tensor t_left(left_dims); - Tensor t_right(right_dims); - Tensor t_result_gpu(res_dims); - Tensor t_result(res_dims); - - t_left.data()[0] = 1.0f; - t_left.data()[1] = 2.0f; - t_left.data()[2] = 3.0f; - t_left.data()[3] = 4.0f; - t_left.data()[4] = 5.0f; - t_left.data()[5] = 6.0f; - - t_right.data()[0] = -1.0f; - t_right.data()[1] = 0.5f; - t_right.data()[2] = 2.0f; - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = t_result.size()*sizeof(DataType); - - - DataType * d_t_left = static_cast(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap > gpu_t_result(d_t_result, res_dims); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - for (IndexType i = 0; i < t_result.size(); i++) { - if (static_cast(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { - continue; - } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { - continue; - } - std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) - << " vs " << t_result_gpu(i) << std::endl; - assert(false); - } - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); - - -} - -template -void test_scalar(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) -{ - //std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; - // with these dimensions, the output has 300 * 140 elements, which is - // more than 30 * 1024, which is the number of threads in blocks on - // a 15 SM GK110 GPU - typedef typename Tensor::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; - Tensor t_left(m_size, k_size); - Tensor t_right(k_size, n_size); - Tensor t_result; - Tensor t_result_gpu; - Eigen::array dims = {{DimPair(0, 0), DimPair(1, 1)}}; - Eigen::array left_dims = {{m_size, k_size}}; - Eigen::array right_dims = {{k_size, n_size}}; - t_left.setRandom(); - t_right.setRandom(); - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = sizeof(DataType); - - - DataType * d_t_left = static_cast(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap > gpu_t_result(d_t_result); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - if (static_cast(fabs(t_result() - t_result_gpu())) > error_threshold && - !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) { - std::cout << "mismatch detected: " << t_result() - << " vs " << t_result_gpu() << std::endl; - assert(false); - } - - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); -} - - -template -void test_sycl_contraction_m(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction(sycl_device, k, 128, 128); - } -} - -template -void test_sycl_contraction_k(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction(sycl_device, 128, k, 128); - } -} - -template -void test_sycl_contraction_n(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction(sycl_device, 128, 128, k); - } -} - - -template -void test_sycl_contraction_sizes(const Device& sycl_device) { - IndexType m_sizes[] = { 31, 39, 63, 64, 65, - 127, 129, 255, 257 , 511, - 512, 513, 1023, 1024, 1025}; - - IndexType n_sizes[] = { 31, 39, 63, 64, 65, - 127, 129, 255, 257, 511, - 512, 513, 1023, 1024, 1025}; - - IndexType k_sizes[] = { 31, 39, 63, 64, 65, - 95, 96, 127, 129, 255, - 257, 511, 512, 513, 1023, - 1024, 1025}; - - for (IndexType i = 0; i < 15; i++) { - for (IndexType j = 0; j < 15; j++) { - for (IndexType k = 0; k < 17; k++) { - test_sycl_contraction(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]); - } - } - } -} - -template void tensorContractionPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_sycl_contraction(sycl_device, 32, 32, 32); - test_sycl_contraction(sycl_device, 32, 32, 32); - test_scalar(sycl_device, 32, 32, 32); - test_scalar(sycl_device, 32, 32, 32); - std::chrono::time_point start, end; - start = std::chrono::system_clock::now(); - test_sycl_contraction(sycl_device, 128, 128, 128); - test_sycl_contraction(sycl_device, 128, 128, 128); - test_scalar(sycl_device, 128, 128, 128); - test_scalar(sycl_device, 128, 128, 128); - test_sycl_contraction_m(sycl_device); - test_sycl_contraction_m(sycl_device); - test_sycl_contraction_n(sycl_device); - test_sycl_contraction_n(sycl_device); - test_sycl_contraction_k(sycl_device); - test_sycl_contraction_k(sycl_device); - test_sycl_contraction_sizes(sycl_device); - test_sycl_contraction_sizes(sycl_device); - test_TF(sycl_device); - test_TF(sycl_device); - - end = std::chrono::system_clock::now(); - std::chrono::duration elapsed_seconds = end-start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - std::cout << "finished computation at " << std::ctime(&end_time) - << "elapsed time: " << elapsed_seconds.count() << "s\n"; - -} - -void test_cxx11_tensor_contract_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorContractionPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp deleted file mode 100644 index a4226a6..0000000 --- a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp +++ /dev/null @@ -1,469 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include -#include -#include - -#include "main.h" -#include -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; -static const float error_threshold =1e-4f; - - -template -static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=55; - IndexType outdim2=51; - Eigen::array input_dims = {{indim0, indim1, indim2}}; - Eigen::array kernel_dims = {{4}}; - Eigen::array result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(result_dims); - Tensor result_host(result_dims); - - Eigen::array dims3{{0}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout < -static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=51; - IndexType outdim2=51; - Eigen::array input_dims = {{indim0, indim1, indim2}}; - Eigen::array kernel_dims = {{4,5}}; - Eigen::array result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(result_dims); - Tensor result_host(result_dims); - - Eigen::array dims3{{0,1}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout < -static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=51; - IndexType outdim2=49; - Eigen::array input_dims = {{indim0, indim1, indim2}}; - Eigen::array kernel_dims = {{4,5,3}}; - Eigen::array result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(result_dims); - Tensor result_host(result_dims); - - Eigen::array dims3{{0,1,2}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout < -static void test_evals(const Eigen::SyclDevice& sycl_device) -{ - Eigen::array input_dims = {{3, 3}}; - Eigen::array kernel_dims = {{2}}; - Eigen::array result_dims = {{2, 3}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(result_dims); - - Eigen::array dims3{{0}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 - VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 - VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 - VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 - VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 - VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); -} - -template -static void test_expr(const Eigen::SyclDevice& sycl_device) -{ - Eigen::array input_dims = {{3, 3}}; - Eigen::array kernel_dims = {{2, 2}}; - Eigen::array result_dims = {{2, 2}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(result_dims); - - input.setRandom(); - kernel.setRandom(); - Eigen::array dims; - dims[0] = 0; - dims[1] = 1; - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + - input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); - VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + - input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); - VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + - input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); - VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + - input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); -} - - -template -static void test_modes(const Eigen::SyclDevice& sycl_device){ - -Eigen::array input_dims = {{3}}; -Eigen::array kernel_dims = {{3}}; - -Tensor input(input_dims); -Tensor kernel(kernel_dims); - -input.setRandom(); -kernel.setRandom(); -Eigen::array dims; -dims[0] = 0; - - input(0) = 1.0f; - input(1) = 2.0f; - input(2) = 3.0f; - kernel(0) = 0.5f; - kernel(1) = 1.0f; - kernel(2) = 0.0f; - - Eigen::array, 1> padding; - - // Emulate VALID mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(0, 0); - Tensor valid(1); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t valid_bytes = valid.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_valid = static_cast(sycl_device.allocate(valid_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_valid(d_valid, valid.dimensions()); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes); - - VERIFY_IS_EQUAL(valid.dimension(0), 1); - VERIFY_IS_APPROX(valid(0), 2.5f); - - // Emulate SAME mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(1, 1); - Tensor same(3); - std::size_t same_bytes = same.size() * sizeof(DataType); - DataType * d_same = static_cast(sycl_device.allocate(same_bytes)); - Eigen::TensorMap > gpu_same(d_same, same.dimensions()); - gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes); - - VERIFY_IS_EQUAL(same.dimension(0), 3); - VERIFY_IS_APPROX(same(0), 1.0f); - VERIFY_IS_APPROX(same(1), 2.5f); - VERIFY_IS_APPROX(same(2), 4.0f); - - // Emulate FULL mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(2, 2); - - Tensor full(5); - std::size_t full_bytes = full.size() * sizeof(DataType); - DataType * d_full = static_cast(sycl_device.allocate(full_bytes)); - Eigen::TensorMap > gpu_full(d_full, full.dimensions()); - gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes); - - VERIFY_IS_EQUAL(full.dimension(0), 5); - VERIFY_IS_APPROX(full(0), 0.0f); - VERIFY_IS_APPROX(full(1), 1.0f); - VERIFY_IS_APPROX(full(2), 2.5f); - VERIFY_IS_APPROX(full(3), 4.0f); - VERIFY_IS_APPROX(full(4), 1.5f); - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_valid); - sycl_device.deallocate(d_same); - sycl_device.deallocate(d_full); - -} - -template -static void test_strides(const Eigen::SyclDevice& sycl_device){ - - Eigen::array input_dims = {{13}}; - Eigen::array kernel_dims = {{3}}; - - Tensor input(input_dims); - Tensor kernel(kernel_dims); - Tensor result(2); - - input.setRandom(); - kernel.setRandom(); - Eigen::array dims; - dims[0] = 0; - - Eigen::array stride_of_3; - stride_of_3[0] = 3; - Eigen::array stride_of_2; - stride_of_2[0] = 2; - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap > gpu_input(d_input, input_dims); - Eigen::TensorMap > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap > gpu_result(d_result, result.dimensions()); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_EQUAL(result.dimension(0), 2); - VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + - input(6)*kernel(2))); - VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + - input(12)*kernel(2))); -} - -template void tensorConvolutionPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_larg_expr1D(sycl_device); - test_larg_expr1D(sycl_device); - test_larg_expr2D(sycl_device); - test_larg_expr2D(sycl_device); - test_larg_expr3D(sycl_device); - test_larg_expr3D(sycl_device); - test_evals(sycl_device); - test_evals(sycl_device); - test_expr(sycl_device); - test_expr(sycl_device); - test_modes(sycl_device); - test_modes(sycl_device); - test_strides(sycl_device); - test_strides(sycl_device); -} - -void test_cxx11_tensor_convolution_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorConvolutionPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp index 3ecc68d..7f79753 100644 --- a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -14,64 +14,18 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_device_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include -#include -#include -template -void test_device_memory(const Eigen::SyclDevice &sycl_device) { - std::cout << "Running on : " - << sycl_device.sycl_queue().get_device(). template get_info() - < tensorRange = {{sizeDim1}}; - Tensor in(tensorRange); - Tensor in1(tensorRange); - memset(in1.data(), 1, in1.size() * sizeof(DataType)); - DataType* gpu_in_data = static_cast(sycl_device.allocate(in.size()*sizeof(DataType))); - sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType)); - for (IndexType i=0; i() < -void test_device_exceptions(const Eigen::SyclDevice &sycl_device) { - VERIFY(sycl_device.ok()); - IndexType sizeDim1 = 100; - array tensorDims = {{sizeDim1}}; - DataType* gpu_data = static_cast(sycl_device.allocate(sizeDim1*sizeof(DataType))); - sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType)); - - TensorMap> in(gpu_data, tensorDims); - TensorMap> out(gpu_data, tensorDims); - out.device(sycl_device) = in / in.constant(0); - - sycl_device.synchronize(); - VERIFY(!sycl_device.ok()); - sycl_device.deallocate(gpu_data); -} - -template void sycl_device_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_device_memory(sycl_device); - test_device_memory(sycl_device); - /// this test throw an exception. enable it if you want to see the exception - //test_device_exceptions(sycl_device); - /// this test throw an exception. enable it if you want to see the exception - //test_device_exceptions(sycl_device); -} - void test_cxx11_tensor_device_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_device_test_per_device(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_device_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_expr.cpp b/eigen/unsupported/test/cxx11_tensor_expr.cpp index 129b4e6..77e24cb 100644 --- a/eigen/unsupported/test/cxx11_tensor_expr.cpp +++ b/eigen/unsupported/test/cxx11_tensor_expr.cpp @@ -300,51 +300,6 @@ static void test_select() } } -template -void test_minmax_nan_propagation_templ() { - for (int size = 1; size < 17; ++size) { - const Scalar kNan = std::numeric_limits::quiet_NaN(); - Tensor vec_nan(size); - Tensor vec_zero(size); - Tensor vec_res(size); - vec_nan.setConstant(kNan); - vec_zero.setZero(); - vec_res.setZero(); - - // Test that we propagate NaNs in the tensor when applying the - // cwiseMax(scalar) operator, which is used for the Relu operator. - vec_res = vec_nan.cwiseMax(Scalar(0)); - for (int i = 0; i < size; ++i) { - VERIFY((numext::isnan)(vec_res(i))); - } - - // Test that NaNs do not propagate if we reverse the arguments. - vec_res = vec_zero.cwiseMax(kNan); - for (int i = 0; i < size; ++i) { - VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); - } - - // Test that we propagate NaNs in the tensor when applying the - // cwiseMin(scalar) operator. - vec_res.setZero(); - vec_res = vec_nan.cwiseMin(Scalar(0)); - for (int i = 0; i < size; ++i) { - VERIFY((numext::isnan)(vec_res(i))); - } - - // Test that NaNs do not propagate if we reverse the arguments. - vec_res = vec_zero.cwiseMin(kNan); - for (int i = 0; i < size; ++i) { - VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); - } - } -} - -static void test_minmax_nan_propagation() -{ - test_minmax_nan_propagation_templ(); - test_minmax_nan_propagation_templ(); -} void test_cxx11_tensor_expr() { @@ -356,5 +311,4 @@ void test_cxx11_tensor_expr() CALL_SUBTEST(test_functors()); CALL_SUBTEST(test_type_casting()); CALL_SUBTEST(test_select()); - CALL_SUBTEST(test_minmax_nan_propagation()); } diff --git a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp index e6274f8..4c660de 100644 --- a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -21,7 +21,7 @@ static void test_0d() TensorFixedSize, RowMajor> scalar2; VERIFY_IS_EQUAL(scalar1.rank(), 0); VERIFY_IS_EQUAL(scalar1.size(), 1); - VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1); + VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1); scalar1() = 7.0; scalar2() = 13.0; diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp index aca036c..5690da7 100644 --- a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -14,43 +14,43 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include using Eigen::Tensor; -template + void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 20; - IndexType sizeDim3 = 20; - Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Eigen::Tensor in1(tensorRange); - Eigen::Tensor in2(tensorRange); - Eigen::Tensor out(tensorRange); + int sizeDim1 = 100; + int sizeDim2 = 200; + int sizeDim3 = 200; + Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor in1(tensorRange); + Eigen::Tensor in2(tensorRange); + Eigen::Tensor out(tensorRange); - DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + float * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); + float * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); // creating TensorMap from tensor - Eigen::TensorMap> gpu_in1(gpu_in1_data, tensorRange); - Eigen::TensorMap> gpu_in2(gpu_in2_data, tensorRange); - Eigen::TensorMap> gpu_out(gpu_out_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + Eigen::TensorMap> gpu_in1(gpu_in1_data, tensorRange); + Eigen::TensorMap> gpu_in2(gpu_in2_data, tensorRange); + Eigen::TensorMap> gpu_out(gpu_out_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float)); /// c=(a+b)*b gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); } @@ -63,14 +63,8 @@ void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { } -template void tensorForced_evalperDevice(Dev_selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_forced_eval_sycl(sycl_device); - test_forced_eval_sycl(sycl_device); -} void test_cxx11_tensor_forced_eval_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorForced_evalperDevice(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_forced_eval_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp deleted file mode 100644 index 9b521bc..0000000 --- a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_morphing_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -template -static void test_simple_reshape(const Eigen::SyclDevice& sycl_device) -{ - typename Tensor::Dimensions dim1(2,3,1,7,1); - typename Tensor::Dimensions dim2(2,3,7); - typename Tensor::Dimensions dim3(6,7); - typename Tensor::Dimensions dim4(2,21); - - Tensor tensor1(dim1); - Tensor tensor2(dim2); - Tensor tensor3(dim3); - Tensor tensor4(dim4); - - tensor1.setRandom(); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor1.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(tensor2.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast(sycl_device.allocate(tensor3.size()*sizeof(DataType))); - DataType* gpu_data4 = static_cast(sycl_device.allocate(tensor4.size()*sizeof(DataType))); - - TensorMap> gpu1(gpu_data1, dim1); - TensorMap> gpu2(gpu_data2, dim2); - TensorMap> gpu3(gpu_data3, dim3); - TensorMap> gpu4(gpu_data4, dim4); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType)); - - gpu2.device(sycl_device)=gpu1.reshape(dim2); - sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType)); - - gpu3.device(sycl_device)=gpu1.reshape(dim3); - sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType)); - - gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4); - sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType)); - for (IndexType i = 0; i < 2; ++i){ - for (IndexType j = 0; j < 3; ++j){ - for (IndexType k = 0; k < 7; ++k){ - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor - if (static_cast(DataLayout) == static_cast(ColMajor)) { - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor - } - else{ - //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); - sycl_device.deallocate(gpu_data4); -} - - -template -static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - typename Tensor::Dimensions dim1(2,3,7); - typename Tensor::Dimensions dim2(6,7); - typename Tensor::Dimensions dim3(2,3,1,7,1); - Tensor tensor(dim1); - Tensor tensor2d(dim2); - Tensor tensor5d(dim3); - - tensor.setRandom(); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(tensor2d.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast(sycl_device.allocate(tensor5d.size()*sizeof(DataType))); - - TensorMap< Tensor > gpu1(gpu_data1, dim1); - TensorMap< Tensor > gpu2(gpu_data2, dim2); - TensorMap< Tensor > gpu3(gpu_data3, dim3); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - - gpu2.reshape(dim1).device(sycl_device)=gpu1; - sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType)); - - gpu3.reshape(dim1).device(sycl_device)=gpu1; - sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType)); - - - for (IndexType i = 0; i < 2; ++i){ - for (IndexType j = 0; j < 3; ++j){ - for (IndexType k = 0; k < 7; ++k){ - VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k)); - if (static_cast(DataLayout) == static_cast(ColMajor)) { - VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor - } - else{ - VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); -} - - -template -static void test_simple_slice(const Eigen::SyclDevice &sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - Tensor tensor(tensorRange); - tensor.setRandom(); - array slice1_range ={{1, 1, 1, 1, 1}}; - Tensor slice1(slice1_range); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(slice1.size()*sizeof(DataType))); - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, slice1_range); - Eigen::DSizes indices(1,2,3,4,5); - Eigen::DSizes sizes(1,1,1,1,1); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.slice(indices, sizes); - sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType)); - VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); - - - array slice2_range ={{1,1,2,2,3}}; - Tensor slice2(slice2_range); - DataType* gpu_data3 = static_cast(sycl_device.allocate(slice2.size()*sizeof(DataType))); - TensorMap> gpu3(gpu_data3, slice2_range); - Eigen::DSizes indices2(1,1,3,4,5); - Eigen::DSizes sizes2(1,1,2,2,3); - gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2); - sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType)); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 2; ++j) { - for (IndexType k = 0; k < 3; ++k) { - VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); -} - -template -static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device) -{ - typedef Tensor Tensor2f; - typedef Eigen::DSizes Index2; - IndexType sizeDim1 = 7L; - IndexType sizeDim2 = 11L; - array tensorRange = {{sizeDim1, sizeDim2}}; - Tensor tensor(tensorRange),tensor2(tensorRange); - IndexType sliceDim1 = 2; - IndexType sliceDim2 = 3; - array sliceRange = {{sliceDim1, sliceDim2}}; - Tensor2f slice(sliceRange); - Index2 strides(1L,1L); - Index2 indicesStart(3L,4L); - Index2 indicesStop(5L,7L); - Index2 lengths(2L,3L); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(tensor2.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast(sycl_device.allocate(slice.size()*sizeof(DataType))); - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, tensorRange); - TensorMap> gpu3(gpu_data3, sliceRange); - - - tensor.setRandom(); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1; - - slice.setRandom(); - sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType)); - - - gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3; - gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType)); - - for(IndexType i=0;i void sycl_morphing_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_slice(sycl_device); - test_simple_slice(sycl_device); - test_simple_reshape(sycl_device); - test_simple_reshape(sycl_device); - test_reshape_as_lvalue(sycl_device); - test_reshape_as_lvalue(sycl_device); - test_strided_slice_write_sycl(sycl_device); - test_strided_slice_write_sycl(sycl_device); -} -void test_cxx11_tensor_morphing_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_morphing_test_per_device(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_notification.cpp b/eigen/unsupported/test/cxx11_tensor_notification.cpp index 183ef02..c946007 100644 --- a/eigen/unsupported/test/cxx11_tensor_notification.cpp +++ b/eigen/unsupported/test/cxx11_tensor_notification.cpp @@ -13,6 +13,15 @@ #include "main.h" #include +#if EIGEN_OS_WIN || EIGEN_OS_WIN64 +#include +void sleep(int seconds) { + Sleep(seconds*1000); +} +#else +#include +#endif + namespace { @@ -31,7 +40,7 @@ static void test_notification_single() Eigen::Notification n; std::function func = std::bind(&WaitAndAdd, &n, &counter); thread_pool.Schedule(func); - EIGEN_SLEEP(1000); + sleep(1); // The thread should be waiting for the notification. VERIFY_IS_EQUAL(counter, 0); @@ -39,7 +48,7 @@ static void test_notification_single() // Unblock the thread n.Notify(); - EIGEN_SLEEP(1000); + sleep(1); // Verify the counter has been incremented VERIFY_IS_EQUAL(counter, 1); @@ -58,10 +67,10 @@ static void test_notification_multiple() thread_pool.Schedule(func); thread_pool.Schedule(func); thread_pool.Schedule(func); - EIGEN_SLEEP(1000); + sleep(1); VERIFY_IS_EQUAL(counter, 0); n.Notify(); - EIGEN_SLEEP(1000); + sleep(1); VERIFY_IS_EQUAL(counter, 4); } diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 908a5e5..2f86980 100644 --- a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -200,8 +200,6 @@ void test_cuda_trancendental() { Eigen::TensorMap, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); Eigen::TensorMap, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); Eigen::TensorMap, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); - Eigen::TensorMap, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem); - Eigen::TensorMap, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); @@ -209,7 +207,6 @@ void test_cuda_trancendental() { gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast(); gpu_res2_float.device(gpu_device) = gpu_float2.log().cast(); gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast(); - gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast(); gpu_res1_half.device(gpu_device) = gpu_float1.cast(); gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); @@ -220,9 +217,6 @@ void test_cuda_trancendental() { gpu_res3_half.device(gpu_device) = gpu_float3.cast(); gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); - gpu_res3_half.device(gpu_device) = gpu_float3.cast(); - gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1(); - Tensor input1(num_elem); Tensor half_prec1(num_elem); Tensor full_prec1(num_elem); diff --git a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp deleted file mode 100644 index dc748b7..0000000 --- a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp +++ /dev/null @@ -1,157 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - - -template -static void test_simple_padding(const Eigen::SyclDevice& sycl_device) -{ - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - - Tensor tensor(tensorRange); - tensor.setRandom(); - - array, 4> paddings; - paddings[0] = std::make_pair(0, 0); - paddings[1] = std::make_pair(2, 1); - paddings[2] = std::make_pair(3, 4); - paddings[3] = std::make_pair(0, 0); - - IndexType padedSizeDim1 = 2; - IndexType padedSizeDim2 = 6; - IndexType padedSizeDim3 = 12; - IndexType padedSizeDim4 = 7; - array padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}}; - - Tensor padded(padedtensorRange); - - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(padded.size()*sizeof(DataType))); - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, padedtensorRange); - - VERIFY_IS_EQUAL(padded.dimension(0), 2+0); - VERIFY_IS_EQUAL(padded.dimension(1), 3+3); - VERIFY_IS_EQUAL(padded.dimension(2), 5+7); - VERIFY_IS_EQUAL(padded.dimension(3), 7+0); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.pad(paddings); - sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType)); - for (IndexType i = 0; i < padedSizeDim1; ++i) { - for (IndexType j = 0; j < padedSizeDim2; ++j) { - for (IndexType k = 0; k < padedSizeDim3; ++k) { - for (IndexType l = 0; l < padedSizeDim4; ++l) { - if (j >= 2 && j < 5 && k >= 3 && k < 8) { - VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l)); - } else { - VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); - } - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template -static void test_padded_expr(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - - Tensor tensor(tensorRange); - tensor.setRandom(); - - array, 4> paddings; - paddings[0] = std::make_pair(0, 0); - paddings[1] = std::make_pair(2, 1); - paddings[2] = std::make_pair(3, 4); - paddings[3] = std::make_pair(0, 0); - - Eigen::DSizes reshape_dims; - reshape_dims[0] = 12; - reshape_dims[1] = 84; - - - Tensor result(reshape_dims); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(result.size()*sizeof(DataType))); - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, reshape_dims); - - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims); - sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 6; ++j) { - for (IndexType k = 0; k < 12; ++k) { - for (IndexType l = 0; l < 7; ++l) { - const float result_value = DataLayout == ColMajor ? - result(i+2*j,k+12*l) : result(j+6*i,l+7*k); - if (j >= 2 && j < 5 && k >= 3 && k < 8) { - VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); - } else { - VERIFY_IS_EQUAL(result_value, 0.0f); - } - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template void sycl_padding_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_padding(sycl_device); - test_simple_padding(sycl_device); - test_padded_expr(sycl_device); - test_padded_expr(sycl_device); - -} -void test_cxx11_tensor_padding_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_padding_test_per_device(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp index 440d48b..a9ef829 100644 --- a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp @@ -14,168 +14,125 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include -template -static void test_full_reductions_mean_sycl(const Eigen::SyclDevice& sycl_device) { - const IndexType num_rows = 452; - const IndexType num_cols = 765; - array tensorRange = {{num_rows, num_cols}}; +static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) { - Tensor in(tensorRange); - Tensor full_redux; - Tensor full_redux_gpu; - - in.setRandom(); - - full_redux = in.mean(); - - DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); - - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data); - - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.mean(); - sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); - // Check that the CPU and GPU reductions return the same result. - VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} + const int num_rows = 452; + const int num_cols = 765; + array tensorRange = {{num_rows, num_cols}}; - -template -static void test_full_reductions_min_sycl(const Eigen::SyclDevice& sycl_device) { - - const IndexType num_rows = 876; - const IndexType num_cols = 953; - array tensorRange = {{num_rows, num_cols}}; - - Tensor in(tensorRange); - Tensor full_redux; - Tensor full_redux_gpu; + Tensor in(tensorRange); + Tensor full_redux; + Tensor full_redux_gpu; in.setRandom(); - full_redux = in.minimum(); + full_redux = in.sum(); - DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); + float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float)); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.minimum(); - sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + out_gpu.device(sycl_device) = in_gpu.sum(); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float)); // Check that the CPU and GPU reductions return the same result. VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); + sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } +static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) { -template -static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) { - - IndexType dim_x = 145; - IndexType dim_y = 1; - IndexType dim_z = 67; + int dim_x = 145; + int dim_y = 1; + int dim_z = 67; - array tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array red_axis; + array tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array red_axis; red_axis[0] = 0; - array reduced_tensorRange = {{dim_y, dim_z}}; + array reduced_tensorRange = {{dim_y, dim_z}}; - Tensor in(tensorRange); - Tensor redux(reduced_tensorRange); - Tensor redux_gpu(reduced_tensorRange); + Tensor in(tensorRange); + Tensor redux(reduced_tensorRange); + Tensor redux_gpu(reduced_tensorRange); in.setRandom(); - redux= in.maximum(red_axis); + redux= in.sum(red_axis); - DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.maximum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + out_gpu.device(sycl_device) = in_gpu.sum(red_axis); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); // Check that the CPU and GPU reductions return the same result. - for(IndexType j=0; j -static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) { +static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) { - IndexType dim_x = 567; - IndexType dim_y = 1; - IndexType dim_z = 47; + int dim_x = 567; + int dim_y = 1; + int dim_z = 47; - array tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array red_axis; + array tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array red_axis; red_axis[0] = 2; - array reduced_tensorRange = {{dim_x, dim_y}}; + array reduced_tensorRange = {{dim_x, dim_y}}; - Tensor in(tensorRange); - Tensor redux(reduced_tensorRange); - Tensor redux_gpu(reduced_tensorRange); + Tensor in(tensorRange); + Tensor redux(reduced_tensorRange); + Tensor redux_gpu(reduced_tensorRange); in.setRandom(); redux= in.sum(red_axis); - DataType* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + float* gpu_in_data = static_cast(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data = static_cast(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap > in_gpu(gpu_in_data, tensorRange); + TensorMap > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); out_gpu.device(sycl_device) = in_gpu.sum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); // Check that the CPU and GPU reductions return the same result. - for(IndexType j=0; j void sycl_reduction_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - - test_full_reductions_mean_sycl(sycl_device); - test_full_reductions_min_sycl(sycl_device); - test_first_dim_reductions_max_sycl(sycl_device); - test_last_dim_reductions_sum_sycl(sycl_device); - test_full_reductions_mean_sycl(sycl_device); - test_full_reductions_min_sycl(sycl_device); - test_first_dim_reductions_max_sycl(sycl_device); - test_last_dim_reductions_sum_sycl(sycl_device); -} + void test_cxx11_tensor_reduction_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_reduction_test_per_device(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST((test_full_reductions_sycl(sycl_device))); + CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device))); + CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device))); + } diff --git a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp deleted file mode 100644 index 2f54844..0000000 --- a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_reverse_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include - - -template -static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { - - IndexType dim1 = 2; - IndexType dim2 = 3; - IndexType dim3 = 5; - IndexType dim4 = 7; - - array tensorRange = {{dim1, dim2, dim3, dim4}}; - Tensor tensor(tensorRange); - Tensor reversed_tensor(tensorRange); - tensor.setRandom(); - - array dim_rev; - dim_rev[0] = false; - dim_rev[1] = true; - dim_rev[2] = true; - dim_rev[3] = false; - - DataType* gpu_in_data = static_cast(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =static_cast(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu(gpu_out_data, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - // Check that the CPU and GPU reductions return the same result. - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l)); - } - } - } - } - dim_rev[0] = true; - dim_rev[1] = false; - dim_rev[2] = false; - dim_rev[3] = false; - - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l)); - } - } - } - } - - dim_rev[0] = true; - dim_rev[1] = false; - dim_rev[2] = false; - dim_rev[3] = true; - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l)); - } - } - } - } - - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} - - - -template -static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue) -{ - IndexType dim1 = 2; - IndexType dim2 = 3; - IndexType dim3 = 5; - IndexType dim4 = 7; - - array tensorRange = {{dim1, dim2, dim3, dim4}}; - Tensor tensor(tensorRange); - Tensor expected(tensorRange); - Tensor result(tensorRange); - tensor.setRandom(); - - array dim_rev; - dim_rev[0] = false; - dim_rev[1] = true; - dim_rev[2] = false; - dim_rev[3] = true; - - DataType* gpu_in_data = static_cast(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_expected =static_cast(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_result =static_cast(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap > in_gpu(gpu_in_data, tensorRange); - TensorMap > out_gpu_expected(gpu_out_data_expected, tensorRange); - TensorMap > out_gpu_result(gpu_out_data_result, tensorRange); - - - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); - - if (LValue) { - out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu; - } else { - out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev); - } - sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType)); - - - array src_slice_dim; - src_slice_dim[0] = 2; - src_slice_dim[1] = 3; - src_slice_dim[2] = 1; - src_slice_dim[3] = 7; - array src_slice_start; - src_slice_start[0] = 0; - src_slice_start[1] = 0; - src_slice_start[2] = 0; - src_slice_start[3] = 0; - array dst_slice_dim = src_slice_dim; - array dst_slice_start = src_slice_start; - - for (IndexType i = 0; i < 5; ++i) { - if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(src_slice_start, src_slice_dim); - } else { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = - in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev); - } - src_slice_start[2] += 1; - dst_slice_start[2] += 1; - } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < expected.dimension(0); ++i) { - for (IndexType j = 0; j < expected.dimension(1); ++j) { - for (IndexType k = 0; k < expected.dimension(2); ++k) { - for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); - } - } - } - } - - dst_slice_start[2] = 0; - result.setRandom(); - sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); - for (IndexType i = 0; i < 5; ++i) { - if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(dst_slice_start, dst_slice_dim); - } else { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = - in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); - } - dst_slice_start[2] += 1; - } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < expected.dimension(0); ++i) { - for (IndexType j = 0; j < expected.dimension(1); ++j) { - for (IndexType k = 0; k < expected.dimension(2); ++k) { - for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); - } - } - } - } -} - - - -template void sycl_reverse_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_reverse(sycl_device); - test_simple_reverse(sycl_device); - test_expr_reverse(sycl_device, false); - test_expr_reverse(sycl_device, false); - test_expr_reverse(sycl_device, true); - test_expr_reverse(sycl_device, true); -} -void test_cxx11_tensor_reverse_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_reverse_test_per_device(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp deleted file mode 100644 index c88db7c..0000000 --- a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -template -static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor tensor(tensorRange); - Tensor no_shuffle(tensorRange); - tensor.setRandom(); - - const size_t buffSize =tensor.size()*sizeof(DataType); - array shuffles; - shuffles[0] = 0; - shuffles[1] = 1; - shuffles[2] = 2; - shuffles[3] = 3; - DataType* gpu_data1 = static_cast(sycl_device.allocate(buffSize)); - DataType* gpu_data2 = static_cast(sycl_device.allocate(buffSize)); - - - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize); - - gpu2.device(sycl_device)=gpu1.shuffle(shuffles); - sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize); - sycl_device.synchronize(); - - VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l)); - } - } - } - } - - shuffles[0] = 2; - shuffles[1] = 3; - shuffles[2] = 1; - shuffles[3] = 0; - array tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}}; - Tensor shuffle(tensorrangeShuffle); - DataType* gpu_data3 = static_cast(sycl_device.allocate(buffSize)); - TensorMap> gpu3(gpu_data3, tensorrangeShuffle); - - gpu3.device(sycl_device)=gpu1.shuffle(shuffles); - sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); - sycl_device.synchronize(); - - VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3); - VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4); - VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2); - VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); - } - } - } - } -} - - -template void sycl_shuffling_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_shuffling_sycl(sycl_device); - test_simple_shuffling_sycl(sycl_device); - -} -void test_cxx11_tensor_shuffling_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_shuffling_test_per_device(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp deleted file mode 100644 index 603c374..0000000 --- a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_striding_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include -#include -#include - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - - -template -static void test_simple_striding(const Eigen::SyclDevice& sycl_device) -{ - - Eigen::array tensor_dims = {{2,3,5,7}}; - Eigen::array stride_dims = {{1,1,3,3}}; - - - Tensor tensor(tensor_dims); - Tensor no_stride(tensor_dims); - Tensor stride(stride_dims); - - - std::size_t tensor_bytes = tensor.size() * sizeof(DataType); - std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); - std::size_t stride_bytes = stride.size() * sizeof(DataType); - DataType * d_tensor = static_cast(sycl_device.allocate(tensor_bytes)); - DataType * d_no_stride = static_cast(sycl_device.allocate(no_stride_bytes)); - DataType * d_stride = static_cast(sycl_device.allocate(stride_bytes)); - - Eigen::TensorMap > gpu_tensor(d_tensor, tensor_dims); - Eigen::TensorMap > gpu_no_stride(d_no_stride, tensor_dims); - Eigen::TensorMap > gpu_stride(d_stride, stride_dims); - - - tensor.setRandom(); - array strides; - strides[0] = 1; - strides[1] = 1; - strides[2] = 1; - strides[3] = 1; - sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); - gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides); - sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); - - //no_stride = tensor.stride(strides); - - VERIFY_IS_EQUAL(no_stride.dimension(0), 2); - VERIFY_IS_EQUAL(no_stride.dimension(1), 3); - VERIFY_IS_EQUAL(no_stride.dimension(2), 5); - VERIFY_IS_EQUAL(no_stride.dimension(3), 7); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); - } - } - } - } - - strides[0] = 2; - strides[1] = 4; - strides[2] = 2; - strides[3] = 3; -//Tensor stride; -// stride = tensor.stride(strides); - - gpu_stride.device(sycl_device)=gpu_tensor.stride(strides); - sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); - - VERIFY_IS_EQUAL(stride.dimension(0), 1); - VERIFY_IS_EQUAL(stride.dimension(1), 1); - VERIFY_IS_EQUAL(stride.dimension(2), 3); - VERIFY_IS_EQUAL(stride.dimension(3), 3); - - for (IndexType i = 0; i < 1; ++i) { - for (IndexType j = 0; j < 1; ++j) { - for (IndexType k = 0; k < 3; ++k) { - for (IndexType l = 0; l < 3; ++l) { - VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l)); - } - } - } - } - - sycl_device.deallocate(d_tensor); - sycl_device.deallocate(d_no_stride); - sycl_device.deallocate(d_stride); -} - -template -static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - - Eigen::array tensor_dims = {{2,3,5,7}}; - Eigen::array stride_dims = {{3,12,10,21}}; - - - Tensor tensor(tensor_dims); - Tensor no_stride(stride_dims); - Tensor stride(stride_dims); - - - std::size_t tensor_bytes = tensor.size() * sizeof(DataType); - std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); - std::size_t stride_bytes = stride.size() * sizeof(DataType); - - DataType * d_tensor = static_cast(sycl_device.allocate(tensor_bytes)); - DataType * d_no_stride = static_cast(sycl_device.allocate(no_stride_bytes)); - DataType * d_stride = static_cast(sycl_device.allocate(stride_bytes)); - - Eigen::TensorMap > gpu_tensor(d_tensor, tensor_dims); - Eigen::TensorMap > gpu_no_stride(d_no_stride, stride_dims); - Eigen::TensorMap > gpu_stride(d_stride, stride_dims); - - //Tensor tensor(2,3,5,7); - tensor.setRandom(); - array strides; - strides[0] = 2; - strides[1] = 4; - strides[2] = 2; - strides[3] = 3; - -// Tensor result(3, 12, 10, 21); -// result.stride(strides) = tensor; - sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); - gpu_stride.stride(strides).device(sycl_device)=gpu_tensor; - sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l)); - } - } - } - } - - array no_strides; - no_strides[0] = 1; - no_strides[1] = 1; - no_strides[2] = 1; - no_strides[3] = 1; -// Tensor result2(3, 12, 10, 21); -// result2.stride(strides) = tensor.stride(no_strides); - - gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides); - sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l)); - } - } - } - } - sycl_device.deallocate(d_tensor); - sycl_device.deallocate(d_no_stride); - sycl_device.deallocate(d_stride); -} - - -template void tensorStridingPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_simple_striding(sycl_device); - test_simple_striding(sycl_device); - test_striding_as_lvalue(sycl_device); - test_striding_as_lvalue(sycl_device); -} - -void test_cxx11_tensor_striding_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorStridingPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_sycl.cpp index 5cd0f4c..6a9c334 100644 --- a/eigen/unsupported/test/cxx11_tensor_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_sycl.cpp @@ -16,7 +16,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" @@ -27,105 +27,36 @@ using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap; -template -void test_sycl_mem_transfers(const Eigen::SyclDevice &sycl_device) { - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 10; - IndexType sizeDim3 = 20; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Tensor in1(tensorRange); - Tensor out1(tensorRange); - Tensor out2(tensorRange); - Tensor out3(tensorRange); +void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) { - in1 = in1.random(); - - DataType* gpu_data1 = static_cast(sycl_device.allocate(in1.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast(sycl_device.allocate(out1.size()*sizeof(DataType))); - - TensorMap> gpu1(gpu_data1, tensorRange); - TensorMap> gpu2(gpu_data2, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_data2, in1.data(),(in1.size())*sizeof(DataType)); - gpu1.device(sycl_device) = gpu1 * 3.14f; - gpu2.device(sycl_device) = gpu2 * 2.7f; - sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(out2.data(), gpu_data1,(out2.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(out3.data(), gpu_data2,(out3.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < in1.size(); ++i) { - VERIFY_IS_APPROX(out1(i), in1(i) * 3.14f); - VERIFY_IS_APPROX(out2(i), in1(i) * 3.14f); - VERIFY_IS_APPROX(out3(i), in1(i) * 2.7f); - } - - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template -void test_sycl_mem_sync(const Eigen::SyclDevice &sycl_device) { - IndexType size = 20; - array tensorRange = {{size}}; - Tensor in1(tensorRange); - Tensor in2(tensorRange); - Tensor out(tensorRange); - - in1 = in1.random(); - in2 = in1; - - DataType* gpu_data = static_cast(sycl_device.allocate(in1.size()*sizeof(DataType))); - - TensorMap> gpu1(gpu_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_data, in1.data(),(in1.size())*sizeof(DataType)); - sycl_device.synchronize(); - in1.setZero(); - - sycl_device.memcpyDeviceToHost(out.data(), gpu_data, out.size()*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < in1.size(); ++i) { - VERIFY_IS_APPROX(out(i), in2(i)); - } - - sycl_device.deallocate(gpu_data); -} - -template -void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { - - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 10; - IndexType sizeDim3 = 20; - array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Tensor in1(tensorRange); - Tensor in2(tensorRange); - Tensor in3(tensorRange); - Tensor out(tensorRange); + int sizeDim1 = 100; + int sizeDim2 = 100; + int sizeDim3 = 100; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Tensor in1(tensorRange); + Tensor in2(tensorRange); + Tensor in3(tensorRange); + Tensor out(tensorRange); in2 = in2.random(); in3 = in3.random(); - DataType * gpu_in1_data = static_cast(sycl_device.allocate(in1.size()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast(sycl_device.allocate(in2.size()*sizeof(DataType))); - DataType * gpu_in3_data = static_cast(sycl_device.allocate(in3.size()*sizeof(DataType))); - DataType * gpu_out_data = static_cast(sycl_device.allocate(out.size()*sizeof(DataType))); + float * gpu_in1_data = static_cast(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); + float * gpu_in2_data = static_cast(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); + float * gpu_in3_data = static_cast(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); - TensorMap> gpu_in1(gpu_in1_data, tensorRange); - TensorMap> gpu_in2(gpu_in2_data, tensorRange); - TensorMap> gpu_in3(gpu_in3_data, tensorRange); - TensorMap> gpu_out(gpu_out_data, tensorRange); + TensorMap> gpu_in1(gpu_in1_data, tensorRange); + TensorMap> gpu_in2(gpu_in2_data, tensorRange); + TensorMap> gpu_in3(gpu_in3_data, tensorRange); + TensorMap> gpu_out(gpu_out_data, tensorRange); /// a=1.2f gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); - sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(in1(i,j,k), 1.2f); } } @@ -134,12 +65,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// a=b*1.2f gpu_out.device(sycl_device) = gpu_in1 * 1.2f; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 1.2f); } @@ -148,14 +77,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { printf("a=b*1.2f Test Passed\n"); /// c=a*b - sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in2(i,j,k)); @@ -166,11 +93,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// c=a+b gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k)); @@ -181,11 +107,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// c=a*a gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in1(i,j,k)); @@ -196,11 +121,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { //a*3.14f + b*2.7f gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); - sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 3.14f + in2(i,j,k) * 2.7f); @@ -210,13 +134,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { printf("a*3.14f + b*2.7f Test Passed\n"); ///d= (a>0.5? b:c) - sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) ? in2(i, j, k) : in3(i, j, k)); @@ -229,48 +152,8 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { sycl_device.deallocate(gpu_in3_data); sycl_device.deallocate(gpu_out_data); } -template -static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){ - IndexType size = 20; - array tensorRange = {{size}}; - Tensor in(tensorRange); - Tensor out(tensorRange); - Tensor out_host(tensorRange); - - in = in.random(); - - Scalar1* gpu_in_data = static_cast(sycl_device.allocate(in.size()*sizeof(Scalar1))); - Scalar2 * gpu_out_data = static_cast(sycl_device.allocate(out.size()*sizeof(Scalar2))); - - TensorMap> gpu_in(gpu_in_data, tensorRange); - TensorMap> gpu_out(gpu_out_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1)); - gpu_out.device(sycl_device) = gpu_in. template cast(); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, out.size()*sizeof(Scalar2)); - out_host = in. template cast(); - for(IndexType i=0; i< size; i++) - { - VERIFY_IS_APPROX(out(i), out_host(i)); - } - printf("cast Test Passed\n"); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} -template void sycl_computing_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_sycl_mem_transfers(sycl_device); - test_sycl_computations(sycl_device); - test_sycl_mem_sync(sycl_device); - test_sycl_mem_transfers(sycl_device); - test_sycl_computations(sycl_device); - test_sycl_mem_sync(sycl_device); - test_sycl_cast(sycl_device); - test_sycl_cast(sycl_device); -} - void test_cxx11_tensor_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_computing_test_per_device(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_sycl_cpu(sycl_device)); } diff --git a/eigen/unsupported/test/polynomialsolver.cpp b/eigen/unsupported/test/polynomialsolver.cpp index 7ad4aa6..0c87478 100644 --- a/eigen/unsupported/test/polynomialsolver.cpp +++ b/eigen/unsupported/test/polynomialsolver.cpp @@ -32,10 +32,9 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) { typedef typename POLYNOMIAL::Index Index; typedef typename POLYNOMIAL::Scalar Scalar; - typedef typename POLYNOMIAL::RealScalar RealScalar; typedef typename SOLVER::RootsType RootsType; - typedef Matrix EvalRootsType; + typedef Matrix EvalRootsType; const Index deg = pols.size()-1; @@ -58,7 +57,7 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) cerr << endl; } - std::vector rootModuli( roots.size() ); + std::vector rootModuli( roots.size() ); Map< EvalRootsType > aux( &rootModuli[0], roots.size() ); aux = roots.array().abs(); std::sort( rootModuli.begin(), rootModuli.end() ); @@ -84,7 +83,7 @@ void evalSolver( const POLYNOMIAL& pols ) { typedef typename POLYNOMIAL::Scalar Scalar; - typedef PolynomialSolver PolynomialSolverType; + typedef PolynomialSolver PolynomialSolverType; PolynomialSolverType psolve; aux_evalSolver( pols, psolve ); @@ -98,7 +97,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const { using std::sqrt; typedef typename POLYNOMIAL::Scalar Scalar; - typedef typename POLYNOMIAL::RealScalar RealScalar; typedef PolynomialSolver PolynomialSolverType; @@ -109,12 +107,15 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const // 1) the roots found are correct // 2) the roots have distinct moduli + typedef typename POLYNOMIAL::Scalar Scalar; + typedef typename REAL_ROOTS::Scalar Real; + //Test realRoots - std::vector< RealScalar > calc_realRoots; - psolve.realRoots( calc_realRoots, test_precision()); - VERIFY_IS_EQUAL( calc_realRoots.size() , (size_t)real_roots.size() ); + std::vector< Real > calc_realRoots; + psolve.realRoots( calc_realRoots ); + VERIFY( calc_realRoots.size() == (size_t)real_roots.size() ); - const RealScalar psPrec = sqrt( test_precision() ); + const Scalar psPrec = sqrt( test_precision() ); for( size_t i=0; i 0 ) ); if( hasRealRoot ){ VERIFY( internal::isApprox( real_roots.array().abs().maxCoeff(), abs(r), psPrec ) ); } @@ -166,11 +167,9 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const template void polynomialsolver(int deg) { - typedef typename NumTraits<_Scalar>::Real RealScalar; - typedef internal::increment_if_fixed_size<_Deg> Dim; + typedef internal::increment_if_fixed_size<_Deg> Dim; typedef Matrix<_Scalar,Dim::ret,1> PolynomialType; typedef Matrix<_Scalar,_Deg,1> EvalRootsType; - typedef Matrix RealRootsType; cout << "Standard cases" << endl; PolynomialType pols = PolynomialType::Random(deg+1); @@ -183,11 +182,15 @@ void polynomialsolver(int deg) evalSolver<_Deg,PolynomialType>( pols ); cout << "Test sugar" << endl; - RealRootsType realRoots = RealRootsType::Random(deg); + EvalRootsType realRoots = EvalRootsType::Random(deg); roots_to_monicPolynomial( realRoots, pols ); evalSolverSugarFunction<_Deg>( pols, - realRoots.template cast >().eval(), + realRoots.template cast < + std::complex< + typename NumTraits<_Scalar>::Real + > + >(), realRoots ); } @@ -211,6 +214,5 @@ void test_polynomialsolver() internal::random(9,13) )) ); CALL_SUBTEST_11((polynomialsolver(1)) ); - CALL_SUBTEST_12((polynomialsolver,Dynamic>(internal::random(2,13))) ); } } diff --git a/eigen/unsupported/test/sparse_extra.cpp b/eigen/unsupported/test/sparse_extra.cpp index 4f6723d..a010ceb 100644 --- a/eigen/unsupported/test/sparse_extra.cpp +++ b/eigen/unsupported/test/sparse_extra.cpp @@ -129,19 +129,6 @@ template void sparse_extra(const SparseMatrixType& re } -template -void check_marketio() -{ - typedef Matrix DenseMatrix; - Index rows = internal::random(1,100); - Index cols = internal::random(1,100); - SparseMatrixType m1, m2; - m1 = DenseMatrix::Random(rows, cols).sparseView(); - saveMarket(m1, "sparse_extra.mtx"); - loadMarket(m2, "sparse_extra.mtx"); - VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2)); -} - void test_sparse_extra() { for(int i = 0; i < g_repeat; i++) { @@ -156,15 +143,5 @@ void test_sparse_extra() CALL_SUBTEST_3( (sparse_product >()) ); CALL_SUBTEST_3( (sparse_product >()) ); - - CALL_SUBTEST_4( (check_marketio >()) ); - CALL_SUBTEST_4( (check_marketio >()) ); - CALL_SUBTEST_4( (check_marketio,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio >()) ); - CALL_SUBTEST_4( (check_marketio >()) ); - CALL_SUBTEST_4( (check_marketio,ColMajor,long int> >()) ); - CALL_SUBTEST_4( (check_marketio,ColMajor,long int> >()) ); - TEST_SET_BUT_UNUSED_VARIABLE(s); } } -- cgit v1.2.3