diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2018-07-03 07:37:12 +0200 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2018-07-03 08:13:09 +0200 |
commit | 88534ba623421c956d8ffcda2d27f41d704d15ef (patch) | |
tree | fccc55245aec3f7381cd525a1355568e10ea37f4 /eigen/unsupported/test | |
parent | 3ee09beb3f0458fbeb0b0e816f854b9d5b406e6b (diff) |
update eigen
Diffstat (limited to 'eigen/unsupported/test')
26 files changed, 326 insertions, 3556 deletions
diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt index 003c9de..b5fa1c8 100644 --- a/eigen/unsupported/test/CMakeLists.txt +++ b/eigen/unsupported/test/CMakeLists.txt @@ -21,17 +21,6 @@ include_directories(../../test ../../unsupported ../../Eigen find_package (Threads) -find_package(Xsmm) -if(XSMM_FOUND) - add_definitions("-DEIGEN_USE_LIBXSMM") - include_directories(${XSMM_INCLUDES}) - link_directories(${XSMM_LIBRARIES}) - set(EXTERNAL_LIBS ${EXTERNAL_LIBS} xsmm) - ei_add_property(EIGEN_TESTED_BACKENDS "Xsmm, ") -else(XSMM_FOUND) - ei_add_property(EIGEN_MISSING_BACKENDS "Xsmm, ") -endif(XSMM_FOUND) - find_package(GoogleHash) if(GOOGLEHASH_FOUND) add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT") @@ -157,16 +146,6 @@ if(EIGEN_TEST_CXX11) ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11") ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11") endif(EIGEN_TEST_SYCL) # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. diff --git a/eigen/unsupported/test/EulerAngles.cpp b/eigen/unsupported/test/EulerAngles.cpp index 79ee728..a8cb528 100644 --- a/eigen/unsupported/test/EulerAngles.cpp +++ b/eigen/unsupported/test/EulerAngles.cpp @@ -13,219 +13,146 @@ using namespace Eigen; -// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework) -template <typename Scalar, class System> -bool verifyIsApprox(const Eigen::EulerAngles<Scalar, System>& a, const Eigen::EulerAngles<Scalar, System>& b) -{ - return verifyIsApprox(a.angles(), b.angles()); -} - -// Verify that x is in the approxed range [a, b] -#define VERIFY_APPROXED_RANGE(a, x, b) \ - do { \ - VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \ - VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \ - } while(0) - -const char X = EULER_X; -const char Y = EULER_Y; -const char Z = EULER_Z; - -template<typename Scalar, class EulerSystem> -void verify_euler(const EulerAngles<Scalar, EulerSystem>& e) +template<typename EulerSystem, typename Scalar> +void verify_euler_ranged(const Matrix<Scalar,3,1>& ea, + bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma) { typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType; typedef Matrix<Scalar,3,3> Matrix3; typedef Matrix<Scalar,3,1> Vector3; typedef Quaternion<Scalar> QuaternionType; typedef AngleAxis<Scalar> AngleAxisType; + using std::abs; - const Scalar ONE = Scalar(1); - const Scalar HALF_PI = Scalar(EIGEN_PI / 2); - const Scalar PI = Scalar(EIGEN_PI); + Scalar alphaRangeStart, alphaRangeEnd; + Scalar betaRangeStart, betaRangeEnd; + Scalar gammaRangeStart, gammaRangeEnd; - // It's very important calc the acceptable precision depending on the distance from the pole. - const Scalar longitudeRadius = std::abs( - EulerSystem::IsTaitBryan ? - std::cos(e.beta()) : - std::sin(e.beta()) - ); - Scalar precision = test_precision<Scalar>() / longitudeRadius; + if (positiveRangeAlpha) + { + alphaRangeStart = Scalar(0); + alphaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + alphaRangeStart = -Scalar(EIGEN_PI); + alphaRangeEnd = Scalar(EIGEN_PI); + } - Scalar betaRangeStart, betaRangeEnd; - if (EulerSystem::IsTaitBryan) + if (positiveRangeBeta) + { + betaRangeStart = Scalar(0); + betaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + betaRangeStart = -Scalar(EIGEN_PI); + betaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeGamma) { - betaRangeStart = -HALF_PI; - betaRangeEnd = HALF_PI; + gammaRangeStart = Scalar(0); + gammaRangeEnd = Scalar(2 * EIGEN_PI); } else { - if (!EulerSystem::IsBetaOpposite) - { - betaRangeStart = 0; - betaRangeEnd = PI; - } - else - { - betaRangeStart = -PI; - betaRangeEnd = 0; - } + gammaRangeStart = -Scalar(EIGEN_PI); + gammaRangeEnd = Scalar(EIGEN_PI); } + const int i = EulerSystem::AlphaAxisAbs - 1; + const int j = EulerSystem::BetaAxisAbs - 1; + const int k = EulerSystem::GammaAxisAbs - 1; + + const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1; + const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1; + const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1; + const Vector3 I = EulerAnglesType::AlphaAxisVector(); const Vector3 J = EulerAnglesType::BetaAxisVector(); const Vector3 K = EulerAnglesType::GammaAxisVector(); - // Is approx checks - VERIFY(e.isApprox(e)); - VERIFY_IS_APPROX(e, e); - VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE)); - - const Matrix3 m(e); - VERIFY_IS_APPROX(Scalar(m.determinant()), ONE); - - EulerAnglesType ebis(m); + EulerAnglesType e(ea[0], ea[1], ea[2]); - // When no roll(acting like polar representation), we have the best precision. - // One of those cases is when the Euler angles are on the pole, and because it's singular case, - // the computation returns no roll. - if (ebis.beta() == 0) - precision = test_precision<Scalar>(); + Matrix3 m(e); + Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); // Check that eabis in range - VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI); - VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd); - VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI); - - const Matrix3 mbis(AngleAxisType(ebis.alpha(), I) * AngleAxisType(ebis.beta(), J) * AngleAxisType(ebis.gamma(), K)); - VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE); - VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix()); - /*std::cout << "===================\n" << - "e: " << e << std::endl << - "eabis: " << eabis.transpose() << std::endl << - "m: " << m << std::endl << - "mbis: " << mbis << std::endl << - "X: " << (m * Vector3::UnitX()).transpose() << std::endl << - "X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/ - VERIFY(m.isApprox(mbis, precision)); - - // Test if ea and eabis are the same - // Need to check both singular and non-singular cases - // There are two singular cases. - // 1. When I==K and sin(ea(1)) == 0 - // 2. When I!=K and cos(ea(1)) == 0 - - // TODO: Make this test work well, and use range saturation function. - /*// If I==K, and ea[1]==0, then there no unique solution. - // The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2. - if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) - VERIFY_IS_APPROX(ea, eabis);*/ + VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd); + VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd); + VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd); - // Quaternions - const QuaternionType q(e); - ebis = q; - const QuaternionType qbis(ebis); - VERIFY(internal::isApprox<Scalar>(std::abs(q.dot(qbis)), ONE, precision)); - //VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same + Vector3 eabis2 = m.eulerAngles(i, j, k); - // A suggestion for simple product test when will be supported. - /*EulerAnglesType e2(PI/2, PI/2, PI/2); - Matrix3 m2(e2); - VERIFY_IS_APPROX(e*e2, m*m2);*/ -} - -template<signed char A, signed char B, signed char C, typename Scalar> -void verify_euler_vec(const Matrix<Scalar,3,1>& ea) -{ - verify_euler(EulerAngles<Scalar, EulerSystem<A, B, C> >(ea[0], ea[1], ea[2])); -} - -template<signed char A, signed char B, signed char C, typename Scalar> -void verify_euler_all_neg(const Matrix<Scalar,3,1>& ea) -{ - verify_euler_vec<+A,+B,+C>(ea); - verify_euler_vec<+A,+B,-C>(ea); - verify_euler_vec<+A,-B,+C>(ea); - verify_euler_vec<+A,-B,-C>(ea); + // Invert the relevant axes + eabis2[0] *= iFactor; + eabis2[1] *= jFactor; + eabis2[2] *= kFactor; - verify_euler_vec<-A,+B,+C>(ea); - verify_euler_vec<-A,+B,-C>(ea); - verify_euler_vec<-A,-B,+C>(ea); - verify_euler_vec<-A,-B,-C>(ea); -} - -template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea) -{ - verify_euler_all_neg<X,Y,Z>(ea); - verify_euler_all_neg<X,Y,X>(ea); - verify_euler_all_neg<X,Z,Y>(ea); - verify_euler_all_neg<X,Z,X>(ea); + // Saturate the angles to the correct range + if (positiveRangeAlpha && (eabis2[0] < 0)) + eabis2[0] += Scalar(2 * EIGEN_PI); + if (positiveRangeBeta && (eabis2[1] < 0)) + eabis2[1] += Scalar(2 * EIGEN_PI); + if (positiveRangeGamma && (eabis2[2] < 0)) + eabis2[2] += Scalar(2 * EIGEN_PI); - verify_euler_all_neg<Y,Z,X>(ea); - verify_euler_all_neg<Y,Z,Y>(ea); - verify_euler_all_neg<Y,X,Z>(ea); - verify_euler_all_neg<Y,X,Y>(ea); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is - verify_euler_all_neg<Z,X,Y>(ea); - verify_euler_all_neg<Z,X,Z>(ea); - verify_euler_all_neg<Z,Y,X>(ea); - verify_euler_all_neg<Z,Y,Z>(ea); -} - -template<typename Scalar> void check_singular_cases(const Scalar& singularBeta) -{ - typedef Matrix<Scalar,3,1> Vector3; - const Scalar PI = Scalar(EIGEN_PI); + Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K)); + VERIFY_IS_APPROX(m, mbis); - for (Scalar epsilon = NumTraits<Scalar>::epsilon(); epsilon < 1; epsilon *= Scalar(1.2)) + // Tests that are only relevant for no possitive range + if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma)) { - check_all_var(Vector3(PI/4, singularBeta, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3)); - check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3)); - check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI)); - check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3))); - check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3))); - check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4))); - check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI)); + /* If I==K, and ea[1]==0, then there no unique solution. */ + /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) + VERIFY((ea-eabis).norm() <= test_precision<Scalar>()); + + // approx_or_less_than does not work for 0 + VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); } - // This one for sanity, it had a problem with near pole cases in float scalar. - check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI)); + // Quaternions + QuaternionType q(e); + eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same } -template<typename Scalar> void eulerangles_manual() +template<typename EulerSystem, typename Scalar> +void verify_euler(const Matrix<Scalar,3,1>& ea) { - typedef Matrix<Scalar,3,1> Vector3; - const Vector3 Zero = Vector3::Zero(); - const Scalar PI = Scalar(EIGEN_PI); - - check_all_var(Zero); - - // singular cases - check_singular_cases(PI/2); - check_singular_cases(-PI/2); - - check_singular_cases(Scalar(0)); - check_singular_cases(Scalar(-0)); - - check_singular_cases(PI); - check_singular_cases(-PI); - - // non-singular cases - VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); - VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); - VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); - for (int i = 0; i < alpha.size(); ++i) { - for (int j = 0; j < beta.size(); ++j) { - for (int k = 0; k < gamma.size(); ++k) { - check_all_var(Vector3d(alpha(i), beta(j), gamma(k))); - } - } - } + verify_euler_ranged<EulerSystem>(ea, false, false, false); + verify_euler_ranged<EulerSystem>(ea, false, false, true); + verify_euler_ranged<EulerSystem>(ea, false, true, false); + verify_euler_ranged<EulerSystem>(ea, false, true, true); + verify_euler_ranged<EulerSystem>(ea, true, false, false); + verify_euler_ranged<EulerSystem>(ea, true, false, true); + verify_euler_ranged<EulerSystem>(ea, true, true, false); + verify_euler_ranged<EulerSystem>(ea, true, true, true); } -template<typename Scalar> void eulerangles_rand() +template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea) +{ + verify_euler<EulerSystemXYZ>(ea); + verify_euler<EulerSystemXYX>(ea); + verify_euler<EulerSystemXZY>(ea); + verify_euler<EulerSystemXZX>(ea); + + verify_euler<EulerSystemYZX>(ea); + verify_euler<EulerSystemYZY>(ea); + verify_euler<EulerSystemYXZ>(ea); + verify_euler<EulerSystemYXY>(ea); + + verify_euler<EulerSystemZXY>(ea); + verify_euler<EulerSystemZXZ>(ea); + verify_euler<EulerSystemZYX>(ea); + verify_euler<EulerSystemZYZ>(ea); +} + +template<typename Scalar> void eulerangles() { typedef Matrix<Scalar,3,3> Matrix3; typedef Matrix<Scalar,3,1> Vector3; @@ -274,19 +201,8 @@ template<typename Scalar> void eulerangles_rand() void test_EulerAngles() { - // Simple cast test - EulerAnglesXYZd onesEd(1, 1, 1); - EulerAnglesXYZf onesEf = onesEd.cast<float>(); - VERIFY_IS_APPROX(onesEd, onesEf.cast<double>()); - - CALL_SUBTEST_1( eulerangles_manual<float>() ); - CALL_SUBTEST_2( eulerangles_manual<double>() ); - for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_3( eulerangles_rand<float>() ); - CALL_SUBTEST_4( eulerangles_rand<double>() ); + CALL_SUBTEST_1( eulerangles<float>() ); + CALL_SUBTEST_2( eulerangles<double>() ); } - - // TODO: Add tests for auto diff - // TODO: Add tests for complex numbers } diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp index 4df2f5c..9cf1128 100644 --- a/eigen/unsupported/test/autodiff_scalar.cpp +++ b/eigen/unsupported/test/autodiff_scalar.cpp @@ -72,6 +72,20 @@ template<typename Scalar> void check_hyperbolic_functions() VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150)); } +template <typename Scalar> +void check_limits_specialization() +{ + typedef Eigen::Matrix<Scalar, 1, 1> Deriv; + typedef Eigen::AutoDiffScalar<Deriv> AD; + + typedef std::numeric_limits<AD> A; + typedef std::numeric_limits<Scalar> B; + +#if EIGEN_HAS_CXX11 + VERIFY(bool(std::is_base_of<B, A>::value)); +#endif +} + void test_autodiff_scalar() { for(int i = 0; i < g_repeat; i++) { @@ -79,5 +93,6 @@ void test_autodiff_scalar() CALL_SUBTEST_2( check_atan2<double>() ); CALL_SUBTEST_3( check_hyperbolic_functions<float>() ); CALL_SUBTEST_4( check_hyperbolic_functions<double>() ); + CALL_SUBTEST_5( check_limits_specialization<double>()); } } diff --git a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp index 48cd2d4..5f9bb93 100644 --- a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp +++ b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -11,7 +11,6 @@ #define EIGEN_USE_THREADS #include "main.h" #include "Eigen/CXX11/ThreadPool" -#include "Eigen/CXX11/Tensor" static void test_create_destroy_empty_pool() { @@ -23,11 +22,11 @@ static void test_create_destroy_empty_pool() } -static void test_parallelism(bool allow_spinning) +static void test_parallelism() { // Test we never-ever fail to match available tasks with idle threads. const int kThreads = 16; // code below expects that this is a multiple of 4 - NonBlockingThreadPool tp(kThreads, allow_spinning); + NonBlockingThreadPool tp(kThreads); VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); for (int iter = 0; iter < 100; ++iter) { @@ -101,25 +100,8 @@ static void test_parallelism(bool allow_spinning) } } - -static void test_cancel() -{ - NonBlockingThreadPool tp(2); - - // Schedule a large number of closure that each sleeps for one second. This - // will keep the thread pool busy for much longer than the default test timeout. - for (int i = 0; i < 1000; ++i) { - tp.Schedule([]() { EIGEN_SLEEP(2000); }); - } - - // Cancel the processing of all the closures that are still pending. - tp.Cancel(); -} - void test_cxx11_non_blocking_thread_pool() { CALL_SUBTEST(test_create_destroy_empty_pool()); - CALL_SUBTEST(test_parallelism(true)); - CALL_SUBTEST(test_parallelism(false)); - CALL_SUBTEST(test_cancel()); + CALL_SUBTEST(test_parallelism()); } diff --git a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp index 21fdfca..7201bfe 100644 --- a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp @@ -14,7 +14,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" @@ -25,99 +25,39 @@ using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap; -template <typename DataType, int DataLayout, typename IndexType> -static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){ - - // BROADCAST test: - IndexType inDim1=2; - IndexType inDim2=3; - IndexType inDim3=5; - IndexType inDim4=7; - IndexType bDim1=2; - IndexType bDim2=3; - IndexType bDim3=1; - IndexType bDim4=4; - array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}}; - array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; - array<IndexType, 4> out_range; // = in_range * broadcasts - for (size_t i = 0; i < out_range.size(); ++i) - out_range[i] = in_range[i] * broadcasts[i]; - - Tensor<DataType, 4, DataLayout, IndexType> input(in_range); - Tensor<DataType, 4, DataLayout, IndexType> out(out_range); - - for (size_t i = 0; i < in_range.size(); ++i) - VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); - - - for (IndexType i = 0; i < input.size(); ++i) - input(i) = static_cast<DataType>(i); - - DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range); - sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); - gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); - - for (IndexType i = 0; i < inDim1*bDim1; ++i) { - for (IndexType j = 0; j < inDim2*bDim2; ++j) { - for (IndexType k = 0; k < inDim3*bDim3; ++k) { - for (IndexType l = 0; l < inDim4*bDim4; ++l) { - VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); - } - } - } - } - printf("Broadcast Test with fixed size Passed\n"); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} - -template <typename DataType, int DataLayout, typename IndexType> static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ // BROADCAST test: - IndexType inDim1=2; - IndexType inDim2=3; - IndexType inDim3=5; - IndexType inDim4=7; - IndexType bDim1=2; - IndexType bDim2=3; - IndexType bDim3=1; - IndexType bDim4=4; - array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}}; - array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; - array<IndexType, 4> out_range; // = in_range * broadcasts + array<int, 4> in_range = {{2, 3, 5, 7}}; + array<int, 4> broadcasts = {{2, 3, 1, 4}}; + array<int, 4> out_range; // = in_range * broadcasts for (size_t i = 0; i < out_range.size(); ++i) out_range[i] = in_range[i] * broadcasts[i]; - Tensor<DataType, 4, DataLayout, IndexType> input(in_range); - Tensor<DataType, 4, DataLayout, IndexType> out(out_range); + Tensor<float, 4> input(in_range); + Tensor<float, 4> out(out_range); for (size_t i = 0; i < in_range.size(); ++i) VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); - for (IndexType i = 0; i < input.size(); ++i) - input(i) = static_cast<DataType>(i); + for (int i = 0; i < input.size(); ++i) + input(i) = static_cast<float>(i); - DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + float * gpu_in_data = static_cast<float*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range); - sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); + TensorMap<Tensor<float, 4>> gpu_in(gpu_in_data, in_range); + TensorMap<Tensor<float, 4>> gpu_out(gpu_out_data, out_range); + sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); - for (IndexType i = 0; i < inDim1*bDim1; ++i) { - for (IndexType j = 0; j < inDim2*bDim2; ++j) { - for (IndexType k = 0; k < inDim3*bDim3; ++k) { - for (IndexType l = 0; l < inDim4*bDim4; ++l) { - VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l)); + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); } } } @@ -127,18 +67,8 @@ static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ sycl_device.deallocate(gpu_out_data); } -template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device); - test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device); -} - void test_cxx11_tensor_broadcast_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_broadcast_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp deleted file mode 100644 index 400a31d..0000000 --- a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp +++ /dev/null @@ -1,267 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -namespace std { -template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); } -template <typename T> T square(T x) { return x * x; } -template <typename T> T cube(T x) { return x * x * x; } -template <typename T> T inverse(T x) { return 1 / x; } -} - -#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \ - { \ - /* out OPERATOR in.FUNC() */ \ - Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ - in = in.random() + static_cast<SCALAR>(0.01); \ - out = out.random() + static_cast<SCALAR>(0.01); \ - Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ - SCALAR *gpu_data = static_cast<SCALAR *>( \ - sycl_device.allocate(in.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast<SCALAR *>( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ - (in.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ - (out.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver OPERATOR std::FUNC(in(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data); \ - sycl_device.deallocate(gpu_data_out); \ - } \ - { \ - /* out OPERATOR out.FUNC() */ \ - Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ - out = out.random() + static_cast<SCALAR>(0.01); \ - Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ - SCALAR *gpu_data_out = static_cast<SCALAR *>( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ - (out.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver OPERATOR std::FUNC(reference(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \ - TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout) - -#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \ - { \ - /* out = in.FUNC() */ \ - Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ - Tensor<bool, 3, Layout, int64_t> out(tensorRange); \ - in = in.random() + static_cast<SCALAR>(0.01); \ - SCALAR *gpu_data = static_cast<SCALAR *>( \ - sycl_device.allocate(in.size() * sizeof(SCALAR))); \ - bool *gpu_data_out = \ - static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ - TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ - (in.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu.FUNC(); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(bool)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \ - } \ - sycl_device.deallocate(gpu_data); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_UNARY_BUILTINS(SCALAR, Layout) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \ - TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \ - TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout) - -static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) { - int64_t sizeDim1 = 10; - int64_t sizeDim2 = 10; - int64_t sizeDim3 = 10; - array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - - TEST_UNARY_BUILTINS(float, RowMajor) - TEST_UNARY_BUILTINS(float, ColMajor) -} - -namespace std { -template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); } -template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); } -} - -#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \ - { \ - /* out = in_1.FUNC(in_2) */ \ - Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ - in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ - in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ - Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ - SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ - sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast<SCALAR *>( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ - (in_2.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - SCALAR ver = reference(i); \ - ver = std::FUNC(in_1(i), in_2(i)); \ - VERIFY_IS_APPROX(out(i), ver); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_2); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \ - { \ - /* out = in_1 OPERATOR in_2 */ \ - Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ - in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ - in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ - Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ - SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ - sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast<SCALAR *>( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ - (in_2.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_2); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \ - { \ - /* out = in_1 OPERATOR 2 */ \ - Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ - Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ - in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ - Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ - SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ - sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ - SCALAR *gpu_data_out = static_cast<SCALAR *>( \ - sycl_device.allocate(out.size() * sizeof(SCALAR))); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ - TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ - sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ - (in_1.size()) * sizeof(SCALAR)); \ - gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \ - sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ - (out.size()) * sizeof(SCALAR)); \ - for (int64_t i = 0; i < out.size(); ++i) { \ - VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \ - } \ - sycl_device.deallocate(gpu_data_1); \ - sycl_device.deallocate(gpu_data_out); \ - } - -#define TEST_BINARY_BUILTINS(SCALAR, Layout) \ - TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \ - TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \ - TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout) - -static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) { - int64_t sizeDim1 = 10; - int64_t sizeDim2 = 10; - int64_t sizeDim3 = 10; - array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - TEST_BINARY_BUILTINS(float, RowMajor) - TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor) - TEST_BINARY_BUILTINS(float, ColMajor) - TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor) -} - -void test_cxx11_tensor_builtins_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - QueueInterface queueInterface(device); - Eigen::SyclDevice sycl_device(&queueInterface); - CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); - CALL_SUBTEST(test_builtin_binary_sycl(sycl_device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_chipping.cpp b/eigen/unsupported/test/cxx11_tensor_chipping.cpp index 89cf5c7..1832dec 100644 --- a/eigen/unsupported/test/cxx11_tensor_chipping.cpp +++ b/eigen/unsupported/test/cxx11_tensor_chipping.cpp @@ -43,7 +43,7 @@ static void test_simple_chip() VERIFY_IS_EQUAL(chip2.dimension(2), 7); VERIFY_IS_EQUAL(chip2.dimension(3), 11); for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 5; ++j) { + for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { for (int l = 0; l < 11; ++l) { VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); @@ -75,7 +75,7 @@ static void test_simple_chip() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { - for (int l = 0; l < 11; ++l) { + for (int l = 0; l < 7; ++l) { VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); } } @@ -126,7 +126,7 @@ static void test_dynamic_chip() VERIFY_IS_EQUAL(chip2.dimension(2), 7); VERIFY_IS_EQUAL(chip2.dimension(3), 11); for (int i = 0; i < 2; ++i) { - for (int j = 0; j < 5; ++j) { + for (int j = 0; j < 3; ++j) { for (int k = 0; k < 7; ++k) { for (int l = 0; l < 11; ++l) { VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); @@ -158,7 +158,7 @@ static void test_dynamic_chip() for (int i = 0; i < 2; ++i) { for (int j = 0; j < 3; ++j) { for (int k = 0; k < 5; ++k) { - for (int l = 0; l < 11; ++l) { + for (int l = 0; l < 7; ++l) { VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); } } diff --git a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp deleted file mode 100644 index 39e4f0a..0000000 --- a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp +++ /dev/null @@ -1,622 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_chipping_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" - -#include <Eigen/CXX11/Tensor> - -using Eigen::Tensor; - -template <typename DataType, int DataLayout, typename IndexType> -static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); - Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); - - tensor.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l); - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); - VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim2; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); - } - } - } - } - - array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange); - const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); - DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l); - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); - } - } - } - } - - array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange); - const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); - DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange); - - gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l); - sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); - - VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); - } - } - } - } - - array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange); - const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); - DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange); - - gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l); - sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); - - VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); - } - } - } - } - - - array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange); - const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); - DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange); - - gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l); - sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); - - VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); - } - } - } - } - - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_chip2); - sycl_device.deallocate(gpu_data_chip3); - sycl_device.deallocate(gpu_data_chip4); - sycl_device.deallocate(gpu_data_chip5); -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); - Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); - - tensor.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l); - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); - VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim2; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); - } - } - } - } - - array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange); - const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); - DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l); - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); - VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim3; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); - } - } - } - } - - array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange); - const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); - DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange); - - gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l); - sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); - - VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); - VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim4; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); - } - } - } - } - - array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange); - const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); - DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange); - - gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l); - sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); - - VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim5; ++l) { - VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); - } - } - } - } - - - array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange); - const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); - DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange); - - gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l); - sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); - - VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); - } - } - } - } - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_chip2); - sycl_device.deallocate(gpu_data_chip3); - sycl_device.deallocate(gpu_data_chip4); - sycl_device.deallocate(gpu_data_chip5); -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) { - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); - - Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); - Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange); - tensor.setRandom(); - tensor1.setRandom(); - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); - DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); - - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange); - - - sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); - sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize); - gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1; - sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); - - for (int i = 0; i < sizeDim2; ++i) { - for (int j = 0; j < sizeDim3; ++j) { - for (int k = 0; k < sizeDim4; ++k) { - for (int l = 0; l < sizeDim5; ++l) { - float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l); - VERIFY_IS_EQUAL(chip1(i,j,k,l), expected); - } - } - } - } - - array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}}; - Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange); - Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange); - tensor2.setRandom(); - const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType); - DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); - DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); - TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange); - TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize); - gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2; - sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); - - for (int i = 0; i < sizeDim2; ++i) { - for (int j = 0; j < sizeDim4; ++j) { - for (int k = 0; k < sizeDim5; ++k) { - float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k); - VERIFY_IS_EQUAL(chip2(i,j,k), expected); - } - } - } - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_tensor1); - sycl_device.deallocate(gpu_data_chip1); - sycl_device.deallocate(gpu_data_tensor2); - sycl_device.deallocate(gpu_data_chip2); -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device) -{ - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - - array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - - Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); - Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange); - Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange); - input1.setRandom(); - input2.setRandom(); - - - const size_t tensorBuffSize =tensor.size()*sizeof(DataType); - const size_t input2TensorBuffSize =input2.size()*sizeof(DataType); - DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize)); - - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize); - gpu_tensor.device(sycl_device)=gpu_input1; - sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize); - gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k < sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (i != 1) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m)); - } - } - } - } - } - } - - gpu_tensor.device(sycl_device)=gpu_input1; - array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange); - input3.setRandom(); - - const size_t input3TensorBuffSize =input3.size()*sizeof(DataType); - DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize); - gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k <sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (j != 1) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m)); - } - } - } - } - } - } - - gpu_tensor.device(sycl_device)=gpu_input1; - array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange); - input4.setRandom(); - - const size_t input4TensorBuffSize =input4.size()*sizeof(DataType); - DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize); - gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k <sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (k != 3) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m)); - } - } - } - } - } - } - - gpu_tensor.device(sycl_device)=gpu_input1; - array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; - Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange); - input5.setRandom(); - - const size_t input5TensorBuffSize =input5.size()*sizeof(DataType); - DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize); - gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k <sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (l != 4) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m)); - } - } - } - } - } - } - gpu_tensor.device(sycl_device)=gpu_input1; - array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange); - input6.setRandom(); - - const size_t input6TensorBuffSize =input6.size()*sizeof(DataType); - DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize)); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize); - gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k <sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (m != 5) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l)); - } - } - } - } - } - } - - - gpu_tensor.device(sycl_device)=gpu_input1; - Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange); - input7.setRandom(); - - DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); - TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize); - gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l); - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); - - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k <sizeDim3; ++k) { - for (int l = 0; l < sizeDim4; ++l) { - for (int m = 0; m < sizeDim5; ++m) { - if (i != 0) { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); - } else { - VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m)); - } - } - } - } - } - } - sycl_device.deallocate(gpu_data_tensor); - sycl_device.deallocate(gpu_data_input1); - sycl_device.deallocate(gpu_data_input2); - sycl_device.deallocate(gpu_data_input3); - sycl_device.deallocate(gpu_data_input4); - sycl_device.deallocate(gpu_data_input5); - sycl_device.deallocate(gpu_data_input6); - sycl_device.deallocate(gpu_data_input7); - -} - -template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device); - test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device); - test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device); -} -void test_cxx11_tensor_chipping_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_chipping_test_per_device<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp deleted file mode 100644 index e3023a3..0000000 --- a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp +++ /dev/null @@ -1,180 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_concatenation_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::Tensor; - -template<typename DataType, int DataLayout, typename IndexType> -static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device) -{ - IndexType leftDim1 = 2; - IndexType leftDim2 = 3; - IndexType leftDim3 = 1; - Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}}; - IndexType rightDim1 = 2; - IndexType rightDim2 = 3; - IndexType rightDim3 = 1; - Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}}; - - //IndexType concatDim1 = 3; -// IndexType concatDim2 = 3; -// IndexType concatDim3 = 1; - //Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}}; - - Tensor<DataType, 3, DataLayout, IndexType> left(leftRange); - Tensor<DataType, 3, DataLayout, IndexType> right(rightRange); - left.setRandom(); - right.setRandom(); - - DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); - - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange); - sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); - /// - Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3); - DataType * gpu_out_data1 = static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions()); - - //concatenation = left.concatenate(right, 0); - gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0); - sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType)); - - VERIFY_IS_EQUAL(concatenation1.dimension(0), 4); - VERIFY_IS_EQUAL(concatenation1.dimension(1), 3); - VERIFY_IS_EQUAL(concatenation1.dimension(2), 1); - for (IndexType j = 0; j < 3; ++j) { - for (IndexType i = 0; i < 2; ++i) { - VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0)); - } - for (IndexType i = 2; i < 4; ++i) { - VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0)); - } - } - - sycl_device.deallocate(gpu_out_data1); - Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3); - DataType * gpu_out_data2 = static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions()); - gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1); - sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType)); - - //concatenation = left.concatenate(right, 1); - VERIFY_IS_EQUAL(concatenation2.dimension(0), 2); - VERIFY_IS_EQUAL(concatenation2.dimension(1), 6); - VERIFY_IS_EQUAL(concatenation2.dimension(2), 1); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0)); - } - for (IndexType j = 3; j < 6; ++j) { - VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0)); - } - } - sycl_device.deallocate(gpu_out_data2); - Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3); - DataType * gpu_out_data3 = static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType))); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions()); - gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2); - sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType)); - - //concatenation = left.concatenate(right, 2); - VERIFY_IS_EQUAL(concatenation3.dimension(0), 2); - VERIFY_IS_EQUAL(concatenation3.dimension(1), 3); - VERIFY_IS_EQUAL(concatenation3.dimension(2), 2); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0)); - VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0)); - } - } - sycl_device.deallocate(gpu_out_data3); - sycl_device.deallocate(gpu_in1_data); - sycl_device.deallocate(gpu_in2_data); -} -template<typename DataType, int DataLayout, typename IndexType> -static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - - IndexType leftDim1 = 2; - IndexType leftDim2 = 3; - Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}}; - - IndexType rightDim1 = 2; - IndexType rightDim2 = 3; - Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}}; - - IndexType concatDim1 = 4; - IndexType concatDim2 = 3; - Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}}; - - Tensor<DataType, 2, DataLayout, IndexType> left(leftRange); - Tensor<DataType, 2, DataLayout, IndexType> right(rightRange); - Tensor<DataType, 2, DataLayout, IndexType> result(resRange); - - left.setRandom(); - right.setRandom(); - result.setRandom(); - - DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); - - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange); - - sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); - -// t1.concatenate(t2, 0) = result; - gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out; - sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - VERIFY_IS_EQUAL(left(i, j), result(i, j)); - VERIFY_IS_EQUAL(right(i, j), result(i+2, j)); - } - } - sycl_device.deallocate(gpu_in1_data); - sycl_device.deallocate(gpu_in2_data); - sycl_device.deallocate(gpu_out_data); -} - - -template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device); - test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device); - test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device); -} -void test_cxx11_tensor_concatenation_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorConcat_perDevice<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp deleted file mode 100644 index 5bace66..0000000 --- a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp +++ /dev/null @@ -1,290 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_contract_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include <iostream> -#include <chrono> -#include <ctime> - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void static test_sycl_contraction(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) -{ - typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; -// std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; - // with these dimensions, the output has 300 * 140 elements, which is - // more than 30 * 1024, which is the number of threads in blocks on - // a 15 SM GK110 GPU - Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size); - Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size); - Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size); - Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(m_size, n_size); -// Eigen::array<DimPair, 1> dims(DimPair(1, 0)); - Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; - Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}}; - Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}}; - Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}}; - - t_left.setRandom(); - t_right.setRandom(); - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = t_result.size() * sizeof(DataType); - - DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, result_dims); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - for (IndexType i = 0; i < t_result.size(); i++) { - if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { - continue; - } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { - continue; - } - std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) - << " vs " << t_result_gpu(i) << std::endl; - assert(false); - } - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); -} - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_TF(const Device& sycl_device) -{ - typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; - Eigen::array<IndexType, 2> left_dims = {{2, 3}}; - Eigen::array<IndexType, 2> right_dims = {{3, 1}}; - Eigen::array<IndexType, 2> res_dims = {{2, 1}}; - Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; - - - Tensor<DataType, 2, DataLayout, IndexType> t_left(left_dims); - Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims); - Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims); - Tensor<DataType, 2, DataLayout, IndexType> t_result(res_dims); - - t_left.data()[0] = 1.0f; - t_left.data()[1] = 2.0f; - t_left.data()[2] = 3.0f; - t_left.data()[3] = 4.0f; - t_left.data()[4] = 5.0f; - t_left.data()[5] = 6.0f; - - t_right.data()[0] = -1.0f; - t_right.data()[1] = 0.5f; - t_right.data()[2] = 2.0f; - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = t_result.size()*sizeof(DataType); - - - DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, res_dims); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - for (IndexType i = 0; i < t_result.size(); i++) { - if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { - continue; - } - if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { - continue; - } - std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) - << " vs " << t_result_gpu(i) << std::endl; - assert(false); - } - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); - - -} - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_scalar(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) -{ - //std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; - // with these dimensions, the output has 300 * 140 elements, which is - // more than 30 * 1024, which is the number of threads in blocks on - // a 15 SM GK110 GPU - typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; - static const DataType error_threshold =1e-4f; - Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size); - Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size); - Tensor<DataType, 0, DataLayout, IndexType> t_result; - Tensor<DataType, 0, DataLayout, IndexType> t_result_gpu; - Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}}; - Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}}; - Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}}; - t_left.setRandom(); - t_right.setRandom(); - - std::size_t t_left_bytes = t_left.size() * sizeof(DataType); - std::size_t t_right_bytes = t_right.size() * sizeof(DataType); - std::size_t t_result_bytes = sizeof(DataType); - - - DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); - DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); - DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 0, DataLayout, IndexType> > gpu_t_result(d_t_result); - - sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); - sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); - - gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); - sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); - - t_result = t_left.contract(t_right, dims); - - if (static_cast<DataType>(fabs(t_result() - t_result_gpu())) > error_threshold && - !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) { - std::cout << "mismatch detected: " << t_result() - << " vs " << t_result_gpu() << std::endl; - assert(false); - } - - sycl_device.deallocate(d_t_left); - sycl_device.deallocate(d_t_right); - sycl_device.deallocate(d_t_result); -} - - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_sycl_contraction_m(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, k, 128, 128); - } -} - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_sycl_contraction_k(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, k, 128); - } -} - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_sycl_contraction_n(const Device& sycl_device) { - for (IndexType k = 32; k < 256; k++) { - test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, 128, k); - } -} - - -template<int DataLayout, typename DataType, typename IndexType, typename Device> -void test_sycl_contraction_sizes(const Device& sycl_device) { - IndexType m_sizes[] = { 31, 39, 63, 64, 65, - 127, 129, 255, 257 , 511, - 512, 513, 1023, 1024, 1025}; - - IndexType n_sizes[] = { 31, 39, 63, 64, 65, - 127, 129, 255, 257, 511, - 512, 513, 1023, 1024, 1025}; - - IndexType k_sizes[] = { 31, 39, 63, 64, 65, - 95, 96, 127, 129, 255, - 257, 511, 512, 513, 1023, - 1024, 1025}; - - for (IndexType i = 0; i < 15; i++) { - for (IndexType j = 0; j < 15; j++) { - for (IndexType k = 0; k < 17; k++) { - test_sycl_contraction<DataLayout, DataType,IndexType>(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]); - } - } - } -} - -template <typename Dev_selector> void tensorContractionPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_sycl_contraction<ColMajor, float,int64_t>(sycl_device, 32, 32, 32); - test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 32, 32, 32); - test_scalar<ColMajor,float,int64_t>(sycl_device, 32, 32, 32); - test_scalar<RowMajor,float,int64_t>(sycl_device, 32, 32, 32); - std::chrono::time_point<std::chrono::system_clock> start, end; - start = std::chrono::system_clock::now(); - test_sycl_contraction<ColMajor,float,int64_t>(sycl_device, 128, 128, 128); - test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 128, 128, 128); - test_scalar<ColMajor,float,int64_t>(sycl_device, 128, 128, 128); - test_scalar<RowMajor,float,int64_t>(sycl_device, 128, 128, 128); - test_sycl_contraction_m<ColMajor, float, int64_t>(sycl_device); - test_sycl_contraction_m<RowMajor, float, int64_t>(sycl_device); - test_sycl_contraction_n<ColMajor, float, int64_t>(sycl_device); - test_sycl_contraction_n<RowMajor, float, int64_t>(sycl_device); - test_sycl_contraction_k<ColMajor, float, int64_t>(sycl_device); - test_sycl_contraction_k<RowMajor, float, int64_t>(sycl_device); - test_sycl_contraction_sizes<ColMajor, float, int64_t>(sycl_device); - test_sycl_contraction_sizes<RowMajor, float, int64_t>(sycl_device); - test_TF<RowMajor, float, int64_t>(sycl_device); - test_TF<ColMajor, float, int64_t>(sycl_device); - - end = std::chrono::system_clock::now(); - std::chrono::duration<double> elapsed_seconds = end-start; - std::time_t end_time = std::chrono::system_clock::to_time_t(end); - std::cout << "finished computation at " << std::ctime(&end_time) - << "elapsed time: " << elapsed_seconds.count() << "s\n"; - -} - -void test_cxx11_tensor_contract_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorContractionPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp deleted file mode 100644 index a4226a6..0000000 --- a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp +++ /dev/null @@ -1,469 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include <iostream> -#include <chrono> -#include <ctime> - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> -#include <iomanip> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; -static const float error_threshold =1e-4f; - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=55; - IndexType outdim2=51; - Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; - Eigen::array<IndexType, 1> kernel_dims = {{4}}; - Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); - Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); - Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); - - Eigen::array<IndexType, 1> dims3{{0}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; - assert(false); - } - } - } -} - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); - -} - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=51; - IndexType outdim2=51; - Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; - Eigen::array<IndexType, 2> kernel_dims = {{4,5}}; - Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims); - Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); - Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); - - Eigen::array<IndexType, 2> dims3{{0,1}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; - assert(false); - } - } - } -} - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); - -} - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device) -{ - IndexType indim0 =53; - IndexType indim1= 55; - IndexType indim2= 51; - IndexType outdim0=50; - IndexType outdim1=51; - IndexType outdim2=49; - Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; - Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}}; - Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; - - Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims); - Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); - Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); - - Eigen::array<IndexType, 3> dims3{{0,1,2}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - result_host.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - result_host=input.convolve(kernel, dims3); - -for(IndexType i=0; i< outdim0; i++ ){ - for(IndexType j=0; j< outdim1; j++ ){ - for(IndexType k=0; k< outdim2; k++ ){ - if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { - std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; - assert(false); - } - } - } -} - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); - -} - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_evals(const Eigen::SyclDevice& sycl_device) -{ - Eigen::array<IndexType, 2> input_dims = {{3, 3}}; - Eigen::array<IndexType, 1> kernel_dims = {{2}}; - Eigen::array<IndexType, 2> result_dims = {{2, 3}}; - - Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); - Tensor<DataType, 2, DataLayout,IndexType> result(result_dims); - - Eigen::array<IndexType, 1> dims3{{0}}; - - input.setRandom(); - kernel.setRandom(); - result.setZero(); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 - VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 - VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 - VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 - VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 - VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_expr(const Eigen::SyclDevice& sycl_device) -{ - Eigen::array<IndexType, 2> input_dims = {{3, 3}}; - Eigen::array<IndexType, 2> kernel_dims = {{2, 2}}; - Eigen::array<IndexType, 2> result_dims = {{2, 2}}; - - Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims); - Tensor<DataType, 2, DataLayout, IndexType> result(result_dims); - - input.setRandom(); - kernel.setRandom(); - Eigen::array<IndexType, 2> dims; - dims[0] = 0; - dims[1] = 1; - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + - input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); - VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + - input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); - VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + - input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); - VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + - input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_result); -} - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_modes(const Eigen::SyclDevice& sycl_device){ - -Eigen::array<IndexType, 1> input_dims = {{3}}; -Eigen::array<IndexType, 1> kernel_dims = {{3}}; - -Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); -Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); - -input.setRandom(); -kernel.setRandom(); -Eigen::array<IndexType, 1> dims; -dims[0] = 0; - - input(0) = 1.0f; - input(1) = 2.0f; - input(2) = 3.0f; - kernel(0) = 0.5f; - kernel(1) = 1.0f; - kernel(2) = 0.0f; - - Eigen::array<std::pair<IndexType, IndexType>, 1> padding; - - // Emulate VALID mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(0, 0); - Tensor<DataType, 1, DataLayout, IndexType> valid(1); - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t valid_bytes = valid.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions()); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes); - - VERIFY_IS_EQUAL(valid.dimension(0), 1); - VERIFY_IS_APPROX(valid(0), 2.5f); - - // Emulate SAME mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(1, 1); - Tensor<DataType, 1, DataLayout, IndexType> same(3); - std::size_t same_bytes = same.size() * sizeof(DataType); - DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes)); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions()); - gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes); - - VERIFY_IS_EQUAL(same.dimension(0), 3); - VERIFY_IS_APPROX(same(0), 1.0f); - VERIFY_IS_APPROX(same(1), 2.5f); - VERIFY_IS_APPROX(same(2), 4.0f); - - // Emulate FULL mode (as defined in - // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). - padding[0] = std::make_pair(2, 2); - - Tensor<DataType, 1, DataLayout, IndexType> full(5); - std::size_t full_bytes = full.size() * sizeof(DataType); - DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes)); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions()); - gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); - sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes); - - VERIFY_IS_EQUAL(full.dimension(0), 5); - VERIFY_IS_APPROX(full(0), 0.0f); - VERIFY_IS_APPROX(full(1), 1.0f); - VERIFY_IS_APPROX(full(2), 2.5f); - VERIFY_IS_APPROX(full(3), 4.0f); - VERIFY_IS_APPROX(full(4), 1.5f); - - sycl_device.deallocate(d_input); - sycl_device.deallocate(d_kernel); - sycl_device.deallocate(d_valid); - sycl_device.deallocate(d_same); - sycl_device.deallocate(d_full); - -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_strides(const Eigen::SyclDevice& sycl_device){ - - Eigen::array<IndexType, 1> input_dims = {{13}}; - Eigen::array<IndexType, 1> kernel_dims = {{3}}; - - Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); - Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); - Tensor<DataType, 1, DataLayout, IndexType> result(2); - - input.setRandom(); - kernel.setRandom(); - Eigen::array<IndexType, 1> dims; - dims[0] = 0; - - Eigen::array<IndexType, 1> stride_of_3; - stride_of_3[0] = 3; - Eigen::array<IndexType, 1> stride_of_2; - stride_of_2[0] = 2; - - std::size_t input_bytes = input.size() * sizeof(DataType); - std::size_t kernel_bytes = kernel.size() * sizeof(DataType); - std::size_t result_bytes = result.size() * sizeof(DataType); - - DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); - DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); - DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions()); - sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); - sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); - - gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2); - sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); - - VERIFY_IS_EQUAL(result.dimension(0), 2); - VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + - input(6)*kernel(2))); - VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + - input(12)*kernel(2))); -} - -template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_larg_expr1D<float, RowMajor, int64_t>(sycl_device); - test_larg_expr1D<float, ColMajor, int64_t>(sycl_device); - test_larg_expr2D<float, RowMajor, int64_t>(sycl_device); - test_larg_expr2D<float, ColMajor, int64_t>(sycl_device); - test_larg_expr3D<float, RowMajor, int64_t>(sycl_device); - test_larg_expr3D<float, ColMajor, int64_t>(sycl_device); - test_evals<float, ColMajor, int64_t>(sycl_device); - test_evals<float, RowMajor, int64_t>(sycl_device); - test_expr<float, ColMajor, int64_t>(sycl_device); - test_expr<float, RowMajor, int64_t>(sycl_device); - test_modes<float, ColMajor, int64_t>(sycl_device); - test_modes<float, RowMajor, int64_t>(sycl_device); - test_strides<float, ColMajor, int64_t>(sycl_device); - test_strides<float, RowMajor, int64_t>(sycl_device); -} - -void test_cxx11_tensor_convolution_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorConvolutionPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp index 3ecc68d..7f79753 100644 --- a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -14,64 +14,18 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_device_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> -#include <stdint.h> -#include <iostream> -template <typename DataType, int DataLayout, typename IndexType> -void test_device_memory(const Eigen::SyclDevice &sycl_device) { - std::cout << "Running on : " - << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>() - <<std::endl; - IndexType sizeDim1 = 100; - array<IndexType, 1> tensorRange = {{sizeDim1}}; - Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange); - Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange); - memset(in1.data(), 1, in1.size() * sizeof(DataType)); - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType))); - sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType)); - for (IndexType i=0; i<in.size(); i++) { - VERIFY_IS_EQUAL(in(i), in1(i)); - } - sycl_device.deallocate(gpu_in_data); +void test_device_sycl(const Eigen::SyclDevice &sycl_device) { + std::cout <<"Helo from ComputeCpp: the requested device exists and the device name is : " + << sycl_device.m_queue.get_device(). template get_info<cl::sycl::info::device::name>() <<std::endl;; } - -template <typename DataType, int DataLayout, typename IndexType> -void test_device_exceptions(const Eigen::SyclDevice &sycl_device) { - VERIFY(sycl_device.ok()); - IndexType sizeDim1 = 100; - array<IndexType, 1> tensorDims = {{sizeDim1}}; - DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(sizeDim1*sizeof(DataType))); - sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType)); - - TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> in(gpu_data, tensorDims); - TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> out(gpu_data, tensorDims); - out.device(sycl_device) = in / in.constant(0); - - sycl_device.synchronize(); - VERIFY(!sycl_device.ok()); - sycl_device.deallocate(gpu_data); -} - -template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_device_memory<DataType, RowMajor, int64_t>(sycl_device); - test_device_memory<DataType, ColMajor, int64_t>(sycl_device); - /// this test throw an exception. enable it if you want to see the exception - //test_device_exceptions<DataType, RowMajor>(sycl_device); - /// this test throw an exception. enable it if you want to see the exception - //test_device_exceptions<DataType, ColMajor>(sycl_device); -} - void test_cxx11_tensor_device_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_device_test_per_device<float>(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_device_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_expr.cpp b/eigen/unsupported/test/cxx11_tensor_expr.cpp index 129b4e6..77e24cb 100644 --- a/eigen/unsupported/test/cxx11_tensor_expr.cpp +++ b/eigen/unsupported/test/cxx11_tensor_expr.cpp @@ -300,51 +300,6 @@ static void test_select() } } -template <typename Scalar> -void test_minmax_nan_propagation_templ() { - for (int size = 1; size < 17; ++size) { - const Scalar kNan = std::numeric_limits<Scalar>::quiet_NaN(); - Tensor<Scalar, 1> vec_nan(size); - Tensor<Scalar, 1> vec_zero(size); - Tensor<Scalar, 1> vec_res(size); - vec_nan.setConstant(kNan); - vec_zero.setZero(); - vec_res.setZero(); - - // Test that we propagate NaNs in the tensor when applying the - // cwiseMax(scalar) operator, which is used for the Relu operator. - vec_res = vec_nan.cwiseMax(Scalar(0)); - for (int i = 0; i < size; ++i) { - VERIFY((numext::isnan)(vec_res(i))); - } - - // Test that NaNs do not propagate if we reverse the arguments. - vec_res = vec_zero.cwiseMax(kNan); - for (int i = 0; i < size; ++i) { - VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); - } - - // Test that we propagate NaNs in the tensor when applying the - // cwiseMin(scalar) operator. - vec_res.setZero(); - vec_res = vec_nan.cwiseMin(Scalar(0)); - for (int i = 0; i < size; ++i) { - VERIFY((numext::isnan)(vec_res(i))); - } - - // Test that NaNs do not propagate if we reverse the arguments. - vec_res = vec_zero.cwiseMin(kNan); - for (int i = 0; i < size; ++i) { - VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); - } - } -} - -static void test_minmax_nan_propagation() -{ - test_minmax_nan_propagation_templ<float>(); - test_minmax_nan_propagation_templ<double>(); -} void test_cxx11_tensor_expr() { @@ -356,5 +311,4 @@ void test_cxx11_tensor_expr() CALL_SUBTEST(test_functors()); CALL_SUBTEST(test_type_casting()); CALL_SUBTEST(test_select()); - CALL_SUBTEST(test_minmax_nan_propagation()); } diff --git a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp index e6274f8..4c660de 100644 --- a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp +++ b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -21,7 +21,7 @@ static void test_0d() TensorFixedSize<float, Sizes<>, RowMajor> scalar2; VERIFY_IS_EQUAL(scalar1.rank(), 0); VERIFY_IS_EQUAL(scalar1.size(), 1); - VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1); + VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1); scalar1() = 7.0; scalar2() = 13.0; diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp index aca036c..5690da7 100644 --- a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -14,43 +14,43 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> using Eigen::Tensor; -template <typename DataType, int DataLayout, typename IndexType> + void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 20; - IndexType sizeDim3 = 20; - Eigen::array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange); - Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange); - Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange); + int sizeDim1 = 100; + int sizeDim2 = 200; + int sizeDim3 = 200; + Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor<float, 3> in1(tensorRange); + Eigen::Tensor<float, 3> in2(tensorRange); + Eigen::Tensor<float, 3> out(tensorRange); - DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); - DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + float * gpu_in1_data = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); + float * gpu_in2_data = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); // creating TensorMap from tensor - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange); - Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange); + Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange); + Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float)); /// c=(a+b)*b gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); } @@ -63,14 +63,8 @@ void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { } -template <typename DataType, typename Dev_selector> void tensorForced_evalperDevice(Dev_selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_forced_eval_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_forced_eval_sycl<DataType, ColMajor, int64_t>(sycl_device); -} void test_cxx11_tensor_forced_eval_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorForced_evalperDevice<float>(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_forced_eval_sycl(sycl_device)); } diff --git a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp deleted file mode 100644 index 9b521bc..0000000 --- a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp +++ /dev/null @@ -1,248 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_morphing_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_reshape(const Eigen::SyclDevice& sycl_device) -{ - typename Tensor<DataType, 5 ,DataLayout, IndexType>::Dimensions dim1(2,3,1,7,1); - typename Tensor<DataType, 3 ,DataLayout, IndexType>::Dimensions dim2(2,3,7); - typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim3(6,7); - typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim4(2,21); - - Tensor<DataType, 5, DataLayout, IndexType> tensor1(dim1); - Tensor<DataType, 3, DataLayout, IndexType> tensor2(dim2); - Tensor<DataType, 2, DataLayout, IndexType> tensor3(dim3); - Tensor<DataType, 2, DataLayout, IndexType> tensor4(dim4); - - tensor1.setRandom(); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType))); - DataType* gpu_data4 = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, dim1); - TensorMap<Tensor<DataType, 3,DataLayout, IndexType>> gpu2(gpu_data2, dim2); - TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu3(gpu_data3, dim3); - TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu4(gpu_data4, dim4); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType)); - - gpu2.device(sycl_device)=gpu1.reshape(dim2); - sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType)); - - gpu3.device(sycl_device)=gpu1.reshape(dim3); - sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType)); - - gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4); - sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType)); - for (IndexType i = 0; i < 2; ++i){ - for (IndexType j = 0; j < 3; ++j){ - for (IndexType k = 0; k < 7; ++k){ - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor - if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) { - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor - } - else{ - //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor - VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); - sycl_device.deallocate(gpu_data4); -} - - -template<typename DataType, int DataLayout, typename IndexType> -static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - typename Tensor<DataType, 3, DataLayout, IndexType>::Dimensions dim1(2,3,7); - typename Tensor<DataType, 2, DataLayout, IndexType>::Dimensions dim2(6,7); - typename Tensor<DataType, 5, DataLayout, IndexType>::Dimensions dim3(2,3,1,7,1); - Tensor<DataType, 3, DataLayout, IndexType> tensor(dim1); - Tensor<DataType, 2, DataLayout, IndexType> tensor2d(dim2); - Tensor<DataType, 5, DataLayout, IndexType> tensor5d(dim3); - - tensor.setRandom(); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType))); - - TensorMap< Tensor<DataType, 3, DataLayout, IndexType> > gpu1(gpu_data1, dim1); - TensorMap< Tensor<DataType, 2, DataLayout, IndexType> > gpu2(gpu_data2, dim2); - TensorMap< Tensor<DataType, 5, DataLayout, IndexType> > gpu3(gpu_data3, dim3); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - - gpu2.reshape(dim1).device(sycl_device)=gpu1; - sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType)); - - gpu3.reshape(dim1).device(sycl_device)=gpu1; - sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType)); - - - for (IndexType i = 0; i < 2; ++i){ - for (IndexType j = 0; j < 3; ++j){ - for (IndexType k = 0; k < 7; ++k){ - VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k)); - if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) { - VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor - } - else{ - VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); -} - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_slice(const Eigen::SyclDevice &sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - IndexType sizeDim5 = 11; - array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; - Tensor<DataType, 5,DataLayout, IndexType> tensor(tensorRange); - tensor.setRandom(); - array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}}; - Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType))); - TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range); - Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5); - Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.slice(indices, sizes); - sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType)); - VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); - - - array<IndexType, 5> slice2_range ={{1,1,2,2,3}}; - Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range); - DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType))); - TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range); - Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5); - Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3); - gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2); - sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType)); - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 2; ++j) { - for (IndexType k = 0; k < 3; ++k) { - VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); -} - -template<typename DataType, int DataLayout, typename IndexType> -static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device) -{ - typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f; - typedef Eigen::DSizes<IndexType, 2> Index2; - IndexType sizeDim1 = 7L; - IndexType sizeDim2 = 11L; - array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}}; - Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange); - IndexType sliceDim1 = 2; - IndexType sliceDim2 = 3; - array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}}; - Tensor2f slice(sliceRange); - Index2 strides(1L,1L); - Index2 indicesStart(3L,4L); - Index2 indicesStop(5L,7L); - Index2 lengths(2L,3L); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType))); - DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType))); - TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange); - TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange); - - - tensor.setRandom(); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1; - - slice.setRandom(); - sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType)); - - - gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3; - gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3; - sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType)); - - for(IndexType i=0;i<sizeDim1;i++) - for(IndexType j=0;j<sizeDim2;j++){ - VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j)); - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); - sycl_device.deallocate(gpu_data3); -} - -template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_slice<DataType, RowMajor, int64_t>(sycl_device); - test_simple_slice<DataType, ColMajor, int64_t>(sycl_device); - test_simple_reshape<DataType, RowMajor, int64_t>(sycl_device); - test_simple_reshape<DataType, ColMajor, int64_t>(sycl_device); - test_reshape_as_lvalue<DataType, RowMajor, int64_t>(sycl_device); - test_reshape_as_lvalue<DataType, ColMajor, int64_t>(sycl_device); - test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device); -} -void test_cxx11_tensor_morphing_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_morphing_test_per_device<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_notification.cpp b/eigen/unsupported/test/cxx11_tensor_notification.cpp index 183ef02..c946007 100644 --- a/eigen/unsupported/test/cxx11_tensor_notification.cpp +++ b/eigen/unsupported/test/cxx11_tensor_notification.cpp @@ -13,6 +13,15 @@ #include "main.h" #include <Eigen/CXX11/Tensor> +#if EIGEN_OS_WIN || EIGEN_OS_WIN64 +#include <windows.h> +void sleep(int seconds) { + Sleep(seconds*1000); +} +#else +#include <unistd.h> +#endif + namespace { @@ -31,7 +40,7 @@ static void test_notification_single() Eigen::Notification n; std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter); thread_pool.Schedule(func); - EIGEN_SLEEP(1000); + sleep(1); // The thread should be waiting for the notification. VERIFY_IS_EQUAL(counter, 0); @@ -39,7 +48,7 @@ static void test_notification_single() // Unblock the thread n.Notify(); - EIGEN_SLEEP(1000); + sleep(1); // Verify the counter has been incremented VERIFY_IS_EQUAL(counter, 1); @@ -58,10 +67,10 @@ static void test_notification_multiple() thread_pool.Schedule(func); thread_pool.Schedule(func); thread_pool.Schedule(func); - EIGEN_SLEEP(1000); + sleep(1); VERIFY_IS_EQUAL(counter, 0); n.Notify(); - EIGEN_SLEEP(1000); + sleep(1); VERIFY_IS_EQUAL(counter, 4); } diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 908a5e5..2f86980 100644 --- a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -200,8 +200,6 @@ void test_cuda_trancendental() { Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); @@ -209,7 +207,6 @@ void test_cuda_trancendental() { gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>(); gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>(); gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>(); - gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::half>(); gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>(); gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); @@ -220,9 +217,6 @@ void test_cuda_trancendental() { gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); - gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); - gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1(); - Tensor<float, 1> input1(num_elem); Tensor<Eigen::half, 1> half_prec1(num_elem); Tensor<Eigen::half, 1> full_prec1(num_elem); diff --git a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp deleted file mode 100644 index dc748b7..0000000 --- a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp +++ /dev/null @@ -1,157 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - - -template<typename DataType, int DataLayout, typename IndexType> -static void test_simple_padding(const Eigen::SyclDevice& sycl_device) -{ - - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); - tensor.setRandom(); - - array<std::pair<IndexType, IndexType>, 4> paddings; - paddings[0] = std::make_pair(0, 0); - paddings[1] = std::make_pair(2, 1); - paddings[2] = std::make_pair(3, 4); - paddings[3] = std::make_pair(0, 0); - - IndexType padedSizeDim1 = 2; - IndexType padedSizeDim2 = 6; - IndexType padedSizeDim3 = 12; - IndexType padedSizeDim4 = 7; - array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}}; - - Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange); - - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType))); - TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange); - - VERIFY_IS_EQUAL(padded.dimension(0), 2+0); - VERIFY_IS_EQUAL(padded.dimension(1), 3+3); - VERIFY_IS_EQUAL(padded.dimension(2), 5+7); - VERIFY_IS_EQUAL(padded.dimension(3), 7+0); - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.pad(paddings); - sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType)); - for (IndexType i = 0; i < padedSizeDim1; ++i) { - for (IndexType j = 0; j < padedSizeDim2; ++j) { - for (IndexType k = 0; k < padedSizeDim3; ++k) { - for (IndexType l = 0; l < padedSizeDim4; ++l) { - if (j >= 2 && j < 5 && k >= 3 && k < 8) { - VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l)); - } else { - VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); - } - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template<typename DataType, int DataLayout, typename IndexType> -static void test_padded_expr(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); - tensor.setRandom(); - - array<std::pair<IndexType, IndexType>, 4> paddings; - paddings[0] = std::make_pair(0, 0); - paddings[1] = std::make_pair(2, 1); - paddings[2] = std::make_pair(3, 4); - paddings[3] = std::make_pair(0, 0); - - Eigen::DSizes<IndexType, 2> reshape_dims; - reshape_dims[0] = 12; - reshape_dims[1] = 84; - - - Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType))); - TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims); - - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); - gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims); - sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 6; ++j) { - for (IndexType k = 0; k < 12; ++k) { - for (IndexType l = 0; l < 7; ++l) { - const float result_value = DataLayout == ColMajor ? - result(i+2*j,k+12*l) : result(j+6*i,l+7*k); - if (j >= 2 && j < 5 && k >= 3 && k < 8) { - VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); - } else { - VERIFY_IS_EQUAL(result_value, 0.0f); - } - } - } - } - } - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_padding<DataType, RowMajor, int64_t>(sycl_device); - test_simple_padding<DataType, ColMajor, int64_t>(sycl_device); - test_padded_expr<DataType, RowMajor, int64_t>(sycl_device); - test_padded_expr<DataType, ColMajor, int64_t>(sycl_device); - -} -void test_cxx11_tensor_padding_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_padding_test_per_device<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp index 440d48b..a9ef829 100644 --- a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp @@ -14,168 +14,125 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> -template <typename DataType, int DataLayout, typename IndexType> -static void test_full_reductions_mean_sycl(const Eigen::SyclDevice& sycl_device) { - const IndexType num_rows = 452; - const IndexType num_cols = 765; - array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; +static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) { - Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); - Tensor<DataType, 0, DataLayout, IndexType> full_redux; - Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; - - in.setRandom(); - - full_redux = in.mean(); - - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); - - TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); - - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.mean(); - sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); - // Check that the CPU and GPU reductions return the same result. - VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} + const int num_rows = 452; + const int num_cols = 765; + array<int, 2> tensorRange = {{num_rows, num_cols}}; - -template <typename DataType, int DataLayout, typename IndexType> -static void test_full_reductions_min_sycl(const Eigen::SyclDevice& sycl_device) { - - const IndexType num_rows = 876; - const IndexType num_cols = 953; - array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; - - Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); - Tensor<DataType, 0, DataLayout, IndexType> full_redux; - Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; + Tensor<float, 2> in(tensorRange); + Tensor<float, 0> full_redux; + Tensor<float, 0> full_redux_gpu; in.setRandom(); - full_redux = in.minimum(); + full_redux = in.sum(); - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); + float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float)); - TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); + TensorMap<Tensor<float, 2> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<float, 0> > out_gpu(gpu_out_data); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.minimum(); - sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + out_gpu.device(sycl_device) = in_gpu.sum(); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float)); // Check that the CPU and GPU reductions return the same result. VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); + sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } +static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) { -template <typename DataType, int DataLayout, typename IndexType> -static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) { - - IndexType dim_x = 145; - IndexType dim_y = 1; - IndexType dim_z = 67; + int dim_x = 145; + int dim_y = 1; + int dim_z = 67; - array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array<IndexType, 1> red_axis; + array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<int, 1> red_axis; red_axis[0] = 0; - array<IndexType, 2> reduced_tensorRange = {{dim_y, dim_z}}; + array<int, 2> reduced_tensorRange = {{dim_y, dim_z}}; - Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); - Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); - Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); + Tensor<float, 3> in(tensorRange); + Tensor<float, 2> redux(reduced_tensorRange); + Tensor<float, 2> redux_gpu(reduced_tensorRange); in.setRandom(); - redux= in.maximum(red_axis); + redux= in.sum(red_axis); - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.maximum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); + out_gpu.device(sycl_device) = in_gpu.sum(red_axis); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); // Check that the CPU and GPU reductions return the same result. - for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) - for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) + for(int j=0; j<reduced_tensorRange[0]; j++ ) + for(int k=0; k<reduced_tensorRange[1]; k++ ) VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } -template <typename DataType, int DataLayout, typename IndexType> -static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) { +static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) { - IndexType dim_x = 567; - IndexType dim_y = 1; - IndexType dim_z = 47; + int dim_x = 567; + int dim_y = 1; + int dim_z = 47; - array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; - Eigen::array<IndexType, 1> red_axis; + array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<int, 1> red_axis; red_axis[0] = 2; - array<IndexType, 2> reduced_tensorRange = {{dim_x, dim_y}}; + array<int, 2> reduced_tensorRange = {{dim_x, dim_y}}; - Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); - Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); - Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); + Tensor<float, 3> in(tensorRange); + Tensor<float, 2> redux(reduced_tensorRange); + Tensor<float, 2> redux_gpu(reduced_tensorRange); in.setRandom(); redux= in.sum(red_axis); - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float))); + float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float))); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); + TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float)); out_gpu.device(sycl_device) = in_gpu.sum(red_axis); - sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float)); // Check that the CPU and GPU reductions return the same result. - for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) - for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) + for(int j=0; j<reduced_tensorRange[0]; j++ ) + for(int k=0; k<reduced_tensorRange[1]; k++ ) VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); } -template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - - test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_first_dim_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device); - test_last_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device); -} + void test_cxx11_tensor_reduction_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_reduction_test_per_device<float>(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST((test_full_reductions_sycl(sycl_device))); + CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device))); + CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device))); + } diff --git a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp deleted file mode 100644 index 2f54844..0000000 --- a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_reverse_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { - - IndexType dim1 = 2; - IndexType dim2 = 3; - IndexType dim3 = 5; - IndexType dim4 = 7; - - array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); - Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange); - tensor.setRandom(); - - array<bool, 4> dim_rev; - dim_rev[0] = false; - dim_rev[1] = true; - dim_rev[2] = true; - dim_rev[3] = false; - - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data =static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - // Check that the CPU and GPU reductions return the same result. - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l)); - } - } - } - } - dim_rev[0] = true; - dim_rev[1] = false; - dim_rev[2] = false; - dim_rev[3] = false; - - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l)); - } - } - } - } - - dim_rev[0] = true; - dim_rev[1] = false; - dim_rev[2] = false; - dim_rev[3] = true; - out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); - sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l)); - } - } - } - } - - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} - - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue) -{ - IndexType dim1 = 2; - IndexType dim2 = 3; - IndexType dim3 = 5; - IndexType dim4 = 7; - - array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); - Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange); - Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange); - tensor.setRandom(); - - array<bool, 4> dim_rev; - dim_rev[0] = false; - dim_rev[1] = true; - dim_rev[2] = false; - dim_rev[3] = true; - - DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_expected =static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType))); - DataType* gpu_out_data_result =static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(gpu_out_data_expected, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(gpu_out_data_result, tensorRange); - - - sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); - - if (LValue) { - out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu; - } else { - out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev); - } - sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType)); - - - array<IndexType, 4> src_slice_dim; - src_slice_dim[0] = 2; - src_slice_dim[1] = 3; - src_slice_dim[2] = 1; - src_slice_dim[3] = 7; - array<IndexType, 4> src_slice_start; - src_slice_start[0] = 0; - src_slice_start[1] = 0; - src_slice_start[2] = 0; - src_slice_start[3] = 0; - array<IndexType, 4> dst_slice_dim = src_slice_dim; - array<IndexType, 4> dst_slice_start = src_slice_start; - - for (IndexType i = 0; i < 5; ++i) { - if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(src_slice_start, src_slice_dim); - } else { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = - in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev); - } - src_slice_start[2] += 1; - dst_slice_start[2] += 1; - } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < expected.dimension(0); ++i) { - for (IndexType j = 0; j < expected.dimension(1); ++j) { - for (IndexType k = 0; k < expected.dimension(2); ++k) { - for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); - } - } - } - } - - dst_slice_start[2] = 0; - result.setRandom(); - sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); - for (IndexType i = 0; i < 5; ++i) { - if (LValue) { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = - in_gpu.slice(dst_slice_start, dst_slice_dim); - } else { - out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = - in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); - } - dst_slice_start[2] += 1; - } - sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); - - for (IndexType i = 0; i < expected.dimension(0); ++i) { - for (IndexType j = 0; j < expected.dimension(1); ++j) { - for (IndexType k = 0; k < expected.dimension(2); ++k) { - for (IndexType l = 0; l < expected.dimension(3); ++l) { - VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); - } - } - } - } -} - - - -template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::device& d){ - std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; - QueueInterface queueInterface(d); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device); - test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device); - test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false); - test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false); - test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true); - test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true); -} -void test_cxx11_tensor_reverse_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_reverse_test_per_device<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp deleted file mode 100644 index c88db7c..0000000 --- a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// Benoit Steiner <benoit.steiner.goog@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) -{ - IndexType sizeDim1 = 2; - IndexType sizeDim2 = 3; - IndexType sizeDim3 = 5; - IndexType sizeDim4 = 7; - array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; - Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange); - Tensor<DataType, 4, DataLayout,IndexType> no_shuffle(tensorRange); - tensor.setRandom(); - - const size_t buffSize =tensor.size()*sizeof(DataType); - array<IndexType, 4> shuffles; - shuffles[0] = 0; - shuffles[1] = 1; - shuffles[2] = 2; - shuffles[3] = 3; - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(buffSize)); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(buffSize)); - - - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu2(gpu_data2, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize); - - gpu2.device(sycl_device)=gpu1.shuffle(shuffles); - sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize); - sycl_device.synchronize(); - - VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1); - VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2); - VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3); - VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l)); - } - } - } - } - - shuffles[0] = 2; - shuffles[1] = 3; - shuffles[2] = 1; - shuffles[3] = 0; - array<IndexType, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}}; - Tensor<DataType, 4, DataLayout,IndexType> shuffle(tensorrangeShuffle); - DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize)); - TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu3(gpu_data3, tensorrangeShuffle); - - gpu3.device(sycl_device)=gpu1.shuffle(shuffles); - sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); - sycl_device.synchronize(); - - VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3); - VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4); - VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2); - VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { - for (IndexType l = 0; l < sizeDim4; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); - } - } - } - } -} - - -template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device); - test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device); - -} -void test_cxx11_tensor_shuffling_sycl() -{ - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp deleted file mode 100644 index 603c374..0000000 --- a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: <eigen@codeplay.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_striding_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t -#define EIGEN_USE_SYCL - -#include <iostream> -#include <chrono> -#include <ctime> - -#include "main.h" -#include <unsupported/Eigen/CXX11/Tensor> - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - - -template <typename DataType, int DataLayout, typename IndexType> -static void test_simple_striding(const Eigen::SyclDevice& sycl_device) -{ - - Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; - Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}}; - - - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); - Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims); - Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); - - - std::size_t tensor_bytes = tensor.size() * sizeof(DataType); - std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); - std::size_t stride_bytes = stride.size() * sizeof(DataType); - DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); - DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); - DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); - - - tensor.setRandom(); - array<IndexType, 4> strides; - strides[0] = 1; - strides[1] = 1; - strides[2] = 1; - strides[3] = 1; - sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); - gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides); - sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); - - //no_stride = tensor.stride(strides); - - VERIFY_IS_EQUAL(no_stride.dimension(0), 2); - VERIFY_IS_EQUAL(no_stride.dimension(1), 3); - VERIFY_IS_EQUAL(no_stride.dimension(2), 5); - VERIFY_IS_EQUAL(no_stride.dimension(3), 7); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); - } - } - } - } - - strides[0] = 2; - strides[1] = 4; - strides[2] = 2; - strides[3] = 3; -//Tensor<float, 4, DataLayout> stride; -// stride = tensor.stride(strides); - - gpu_stride.device(sycl_device)=gpu_tensor.stride(strides); - sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); - - VERIFY_IS_EQUAL(stride.dimension(0), 1); - VERIFY_IS_EQUAL(stride.dimension(1), 1); - VERIFY_IS_EQUAL(stride.dimension(2), 3); - VERIFY_IS_EQUAL(stride.dimension(3), 3); - - for (IndexType i = 0; i < 1; ++i) { - for (IndexType j = 0; j < 1; ++j) { - for (IndexType k = 0; k < 3; ++k) { - for (IndexType l = 0; l < 3; ++l) { - VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l)); - } - } - } - } - - sycl_device.deallocate(d_tensor); - sycl_device.deallocate(d_no_stride); - sycl_device.deallocate(d_stride); -} - -template <typename DataType, int DataLayout, typename IndexType> -static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device) -{ - - Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; - Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}}; - - - Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); - Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims); - Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); - - - std::size_t tensor_bytes = tensor.size() * sizeof(DataType); - std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); - std::size_t stride_bytes = stride.size() * sizeof(DataType); - - DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); - DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); - DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); - - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims); - Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); - - //Tensor<float, 4, DataLayout> tensor(2,3,5,7); - tensor.setRandom(); - array<IndexType, 4> strides; - strides[0] = 2; - strides[1] = 4; - strides[2] = 2; - strides[3] = 3; - -// Tensor<float, 4, DataLayout> result(3, 12, 10, 21); -// result.stride(strides) = tensor; - sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); - gpu_stride.stride(strides).device(sycl_device)=gpu_tensor; - sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l)); - } - } - } - } - - array<IndexType, 4> no_strides; - no_strides[0] = 1; - no_strides[1] = 1; - no_strides[2] = 1; - no_strides[3] = 1; -// Tensor<float, 4, DataLayout> result2(3, 12, 10, 21); -// result2.stride(strides) = tensor.stride(no_strides); - - gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides); - sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); - - for (IndexType i = 0; i < 2; ++i) { - for (IndexType j = 0; j < 3; ++j) { - for (IndexType k = 0; k < 5; ++k) { - for (IndexType l = 0; l < 7; ++l) { - VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l)); - } - } - } - } - sycl_device.deallocate(d_tensor); - sycl_device.deallocate(d_no_stride); - sycl_device.deallocate(d_stride); -} - - -template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){ - QueueInterface queueInterface(s); - auto sycl_device=Eigen::SyclDevice(&queueInterface); - test_simple_striding<float, ColMajor, int64_t>(sycl_device); - test_simple_striding<float, RowMajor, int64_t>(sycl_device); - test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device); - test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device); -} - -void test_cxx11_tensor_striding_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(tensorStridingPerDevice(device)); - } -} diff --git a/eigen/unsupported/test/cxx11_tensor_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_sycl.cpp index 5cd0f4c..6a9c334 100644 --- a/eigen/unsupported/test/cxx11_tensor_sycl.cpp +++ b/eigen/unsupported/test/cxx11_tensor_sycl.cpp @@ -16,7 +16,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_sycl -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL #include "main.h" @@ -27,105 +27,36 @@ using Eigen::SyclDevice; using Eigen::Tensor; using Eigen::TensorMap; -template <typename DataType, int DataLayout, typename IndexType> -void test_sycl_mem_transfers(const Eigen::SyclDevice &sycl_device) { - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 10; - IndexType sizeDim3 = 20; - array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange); - Tensor<DataType, 3, DataLayout, IndexType> out1(tensorRange); - Tensor<DataType, 3, DataLayout, IndexType> out2(tensorRange); - Tensor<DataType, 3, DataLayout, IndexType> out3(tensorRange); +void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) { - in1 = in1.random(); - - DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); - DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(out1.size()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange); - - sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(DataType)); - sycl_device.memcpyHostToDevice(gpu_data2, in1.data(),(in1.size())*sizeof(DataType)); - gpu1.device(sycl_device) = gpu1 * 3.14f; - gpu2.device(sycl_device) = gpu2 * 2.7f; - sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(out2.data(), gpu_data1,(out2.size())*sizeof(DataType)); - sycl_device.memcpyDeviceToHost(out3.data(), gpu_data2,(out3.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < in1.size(); ++i) { - VERIFY_IS_APPROX(out1(i), in1(i) * 3.14f); - VERIFY_IS_APPROX(out2(i), in1(i) * 3.14f); - VERIFY_IS_APPROX(out3(i), in1(i) * 2.7f); - } - - sycl_device.deallocate(gpu_data1); - sycl_device.deallocate(gpu_data2); -} - -template <typename DataType, int DataLayout, typename IndexType> -void test_sycl_mem_sync(const Eigen::SyclDevice &sycl_device) { - IndexType size = 20; - array<IndexType, 1> tensorRange = {{size}}; - Tensor<DataType, 1, DataLayout, IndexType> in1(tensorRange); - Tensor<DataType, 1, DataLayout, IndexType> in2(tensorRange); - Tensor<DataType, 1, DataLayout, IndexType> out(tensorRange); - - in1 = in1.random(); - in2 = in1; - - DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); - - TensorMap<Tensor<DataType, 1, DataLayout, IndexType>> gpu1(gpu_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_data, in1.data(),(in1.size())*sizeof(DataType)); - sycl_device.synchronize(); - in1.setZero(); - - sycl_device.memcpyDeviceToHost(out.data(), gpu_data, out.size()*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < in1.size(); ++i) { - VERIFY_IS_APPROX(out(i), in2(i)); - } - - sycl_device.deallocate(gpu_data); -} - -template <typename DataType, int DataLayout, typename IndexType> -void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { - - IndexType sizeDim1 = 100; - IndexType sizeDim2 = 10; - IndexType sizeDim3 = 20; - array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange); - Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange); - Tensor<DataType, 3,DataLayout, IndexType> in3(tensorRange); - Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange); + int sizeDim1 = 100; + int sizeDim2 = 100; + int sizeDim3 = 100; + array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Tensor<float, 3> in1(tensorRange); + Tensor<float, 3> in2(tensorRange); + Tensor<float, 3> in3(tensorRange); + Tensor<float, 3> out(tensorRange); in2 = in2.random(); in3 = in3.random(); - DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); - DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType))); - DataType * gpu_in3_data = static_cast<DataType*>(sycl_device.allocate(in3.size()*sizeof(DataType))); - DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType))); + float * gpu_in1_data = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); + float * gpu_in2_data = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); + float * gpu_in3_data = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float))); + float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in3(gpu_in3_data, tensorRange); - TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); + TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange); + TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange); + TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange); + TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange); /// a=1.2f gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); - sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(in1(i,j,k), 1.2f); } } @@ -134,12 +65,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// a=b*1.2f gpu_out.device(sycl_device) = gpu_in1 * 1.2f; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 1.2f); } @@ -148,14 +77,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { printf("a=b*1.2f Test Passed\n"); /// c=a*b - sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in2(i,j,k)); @@ -166,11 +93,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// c=a+b gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k)); @@ -181,11 +107,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { /// c=a*a gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in1(i,j,k)); @@ -196,11 +121,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { //a*3.14f + b*2.7f gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); - sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 3.14f + in2(i,j,k) * 2.7f); @@ -210,13 +134,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { printf("a*3.14f + b*2.7f Test Passed\n"); ///d= (a>0.5? b:c) - sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); - sycl_device.synchronize(); - for (IndexType i = 0; i < sizeDim1; ++i) { - for (IndexType j = 0; j < sizeDim2; ++j) { - for (IndexType k = 0; k < sizeDim3; ++k) { + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) ? in2(i, j, k) : in3(i, j, k)); @@ -229,48 +152,8 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { sycl_device.deallocate(gpu_in3_data); sycl_device.deallocate(gpu_out_data); } -template<typename Scalar1, typename Scalar2, int DataLayout, typename IndexType> -static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){ - IndexType size = 20; - array<IndexType, 1> tensorRange = {{size}}; - Tensor<Scalar1, 1, DataLayout, IndexType> in(tensorRange); - Tensor<Scalar2, 1, DataLayout, IndexType> out(tensorRange); - Tensor<Scalar2, 1, DataLayout, IndexType> out_host(tensorRange); - - in = in.random(); - - Scalar1* gpu_in_data = static_cast<Scalar1*>(sycl_device.allocate(in.size()*sizeof(Scalar1))); - Scalar2 * gpu_out_data = static_cast<Scalar2*>(sycl_device.allocate(out.size()*sizeof(Scalar2))); - - TensorMap<Tensor<Scalar1, 1, DataLayout, IndexType>> gpu_in(gpu_in_data, tensorRange); - TensorMap<Tensor<Scalar2, 1, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); - sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1)); - gpu_out.device(sycl_device) = gpu_in. template cast<Scalar2>(); - sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, out.size()*sizeof(Scalar2)); - out_host = in. template cast<Scalar2>(); - for(IndexType i=0; i< size; i++) - { - VERIFY_IS_APPROX(out(i), out_host(i)); - } - printf("cast Test Passed\n"); - sycl_device.deallocate(gpu_in_data); - sycl_device.deallocate(gpu_out_data); -} -template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){ - QueueInterface queueInterface(s); - auto sycl_device = Eigen::SyclDevice(&queueInterface); - test_sycl_mem_transfers<DataType, RowMajor, int64_t>(sycl_device); - test_sycl_computations<DataType, RowMajor, int64_t>(sycl_device); - test_sycl_mem_sync<DataType, RowMajor, int64_t>(sycl_device); - test_sycl_mem_transfers<DataType, ColMajor, int64_t>(sycl_device); - test_sycl_computations<DataType, ColMajor, int64_t>(sycl_device); - test_sycl_mem_sync<DataType, ColMajor, int64_t>(sycl_device); - test_sycl_cast<DataType, int, RowMajor, int64_t>(sycl_device); - test_sycl_cast<DataType, int, ColMajor, int64_t>(sycl_device); -} - void test_cxx11_tensor_sycl() { - for (const auto& device :Eigen::get_sycl_supported_devices()) { - CALL_SUBTEST(sycl_computing_test_per_device<float>(device)); - } + cl::sycl::gpu_selector s; + Eigen::SyclDevice sycl_device(s); + CALL_SUBTEST(test_sycl_cpu(sycl_device)); } diff --git a/eigen/unsupported/test/polynomialsolver.cpp b/eigen/unsupported/test/polynomialsolver.cpp index 7ad4aa6..0c87478 100644 --- a/eigen/unsupported/test/polynomialsolver.cpp +++ b/eigen/unsupported/test/polynomialsolver.cpp @@ -32,10 +32,9 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) { typedef typename POLYNOMIAL::Index Index; typedef typename POLYNOMIAL::Scalar Scalar; - typedef typename POLYNOMIAL::RealScalar RealScalar; typedef typename SOLVER::RootsType RootsType; - typedef Matrix<RealScalar,Deg,1> EvalRootsType; + typedef Matrix<Scalar,Deg,1> EvalRootsType; const Index deg = pols.size()-1; @@ -58,7 +57,7 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) cerr << endl; } - std::vector<RealScalar> rootModuli( roots.size() ); + std::vector<Scalar> rootModuli( roots.size() ); Map< EvalRootsType > aux( &rootModuli[0], roots.size() ); aux = roots.array().abs(); std::sort( rootModuli.begin(), rootModuli.end() ); @@ -84,7 +83,7 @@ void evalSolver( const POLYNOMIAL& pols ) { typedef typename POLYNOMIAL::Scalar Scalar; - typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; + typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; PolynomialSolverType psolve; aux_evalSolver<Deg, POLYNOMIAL, PolynomialSolverType>( pols, psolve ); @@ -98,7 +97,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const { using std::sqrt; typedef typename POLYNOMIAL::Scalar Scalar; - typedef typename POLYNOMIAL::RealScalar RealScalar; typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; @@ -109,12 +107,15 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const // 1) the roots found are correct // 2) the roots have distinct moduli + typedef typename POLYNOMIAL::Scalar Scalar; + typedef typename REAL_ROOTS::Scalar Real; + //Test realRoots - std::vector< RealScalar > calc_realRoots; - psolve.realRoots( calc_realRoots, test_precision<RealScalar>()); - VERIFY_IS_EQUAL( calc_realRoots.size() , (size_t)real_roots.size() ); + std::vector< Real > calc_realRoots; + psolve.realRoots( calc_realRoots ); + VERIFY( calc_realRoots.size() == (size_t)real_roots.size() ); - const RealScalar psPrec = sqrt( test_precision<RealScalar>() ); + const Scalar psPrec = sqrt( test_precision<Scalar>() ); for( size_t i=0; i<calc_realRoots.size(); ++i ) { @@ -137,7 +138,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const bool hasRealRoot; //Test absGreatestRealRoot - RealScalar r = psolve.absGreatestRealRoot( hasRealRoot ); + Real r = psolve.absGreatestRealRoot( hasRealRoot ); VERIFY( hasRealRoot == (real_roots.size() > 0 ) ); if( hasRealRoot ){ VERIFY( internal::isApprox( real_roots.array().abs().maxCoeff(), abs(r), psPrec ) ); } @@ -166,11 +167,9 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const template<typename _Scalar, int _Deg> void polynomialsolver(int deg) { - typedef typename NumTraits<_Scalar>::Real RealScalar; - typedef internal::increment_if_fixed_size<_Deg> Dim; + typedef internal::increment_if_fixed_size<_Deg> Dim; typedef Matrix<_Scalar,Dim::ret,1> PolynomialType; typedef Matrix<_Scalar,_Deg,1> EvalRootsType; - typedef Matrix<RealScalar,_Deg,1> RealRootsType; cout << "Standard cases" << endl; PolynomialType pols = PolynomialType::Random(deg+1); @@ -183,11 +182,15 @@ void polynomialsolver(int deg) evalSolver<_Deg,PolynomialType>( pols ); cout << "Test sugar" << endl; - RealRootsType realRoots = RealRootsType::Random(deg); + EvalRootsType realRoots = EvalRootsType::Random(deg); roots_to_monicPolynomial( realRoots, pols ); evalSolverSugarFunction<_Deg>( pols, - realRoots.template cast <std::complex<RealScalar> >().eval(), + realRoots.template cast < + std::complex< + typename NumTraits<_Scalar>::Real + > + >(), realRoots ); } @@ -211,6 +214,5 @@ void test_polynomialsolver() internal::random<int>(9,13) )) ); CALL_SUBTEST_11((polynomialsolver<float,Dynamic>(1)) ); - CALL_SUBTEST_12((polynomialsolver<std::complex<double>,Dynamic>(internal::random<int>(2,13))) ); } } diff --git a/eigen/unsupported/test/sparse_extra.cpp b/eigen/unsupported/test/sparse_extra.cpp index 4f6723d..a010ceb 100644 --- a/eigen/unsupported/test/sparse_extra.cpp +++ b/eigen/unsupported/test/sparse_extra.cpp @@ -129,19 +129,6 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re } -template<typename SparseMatrixType> -void check_marketio() -{ - typedef Matrix<typename SparseMatrixType::Scalar, Dynamic, Dynamic> DenseMatrix; - Index rows = internal::random<Index>(1,100); - Index cols = internal::random<Index>(1,100); - SparseMatrixType m1, m2; - m1 = DenseMatrix::Random(rows, cols).sparseView(); - saveMarket(m1, "sparse_extra.mtx"); - loadMarket(m2, "sparse_extra.mtx"); - VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2)); -} - void test_sparse_extra() { for(int i = 0; i < g_repeat; i++) { @@ -156,15 +143,5 @@ void test_sparse_extra() CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) ); CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) ); - - CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) ); - CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) ); - TEST_SET_BUT_UNUSED_VARIABLE(s); } } |