summaryrefslogtreecommitdiffhomepage
path: root/eigen/unsupported/test
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2018-07-03 07:37:12 +0200
committerStanislaw Halik <sthalik@misaki.pl>2018-07-03 08:13:09 +0200
commit88534ba623421c956d8ffcda2d27f41d704d15ef (patch)
treefccc55245aec3f7381cd525a1355568e10ea37f4 /eigen/unsupported/test
parent3ee09beb3f0458fbeb0b0e816f854b9d5b406e6b (diff)
update eigen
Diffstat (limited to 'eigen/unsupported/test')
-rw-r--r--eigen/unsupported/test/CMakeLists.txt21
-rw-r--r--eigen/unsupported/test/EulerAngles.cpp296
-rw-r--r--eigen/unsupported/test/autodiff_scalar.cpp15
-rw-r--r--eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp24
-rw-r--r--eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp114
-rw-r--r--eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp267
-rw-r--r--eigen/unsupported/test/cxx11_tensor_chipping.cpp8
-rw-r--r--eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp622
-rw-r--r--eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp180
-rw-r--r--eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp290
-rw-r--r--eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp469
-rw-r--r--eigen/unsupported/test/cxx11_tensor_device_sycl.cpp60
-rw-r--r--eigen/unsupported/test/cxx11_tensor_expr.cpp46
-rw-r--r--eigen/unsupported/test/cxx11_tensor_fixed_size.cpp2
-rw-r--r--eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp54
-rw-r--r--eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp248
-rw-r--r--eigen/unsupported/test/cxx11_tensor_notification.cpp17
-rw-r--r--eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu6
-rw-r--r--eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp157
-rw-r--r--eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp167
-rw-r--r--eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp221
-rw-r--r--eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp119
-rw-r--r--eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp203
-rw-r--r--eigen/unsupported/test/cxx11_tensor_sycl.cpp219
-rw-r--r--eigen/unsupported/test/polynomialsolver.cpp34
-rw-r--r--eigen/unsupported/test/sparse_extra.cpp23
26 files changed, 326 insertions, 3556 deletions
diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt
index 003c9de..b5fa1c8 100644
--- a/eigen/unsupported/test/CMakeLists.txt
+++ b/eigen/unsupported/test/CMakeLists.txt
@@ -21,17 +21,6 @@ include_directories(../../test ../../unsupported ../../Eigen
find_package (Threads)
-find_package(Xsmm)
-if(XSMM_FOUND)
- add_definitions("-DEIGEN_USE_LIBXSMM")
- include_directories(${XSMM_INCLUDES})
- link_directories(${XSMM_LIBRARIES})
- set(EXTERNAL_LIBS ${EXTERNAL_LIBS} xsmm)
- ei_add_property(EIGEN_TESTED_BACKENDS "Xsmm, ")
-else(XSMM_FOUND)
- ei_add_property(EIGEN_MISSING_BACKENDS "Xsmm, ")
-endif(XSMM_FOUND)
-
find_package(GoogleHash)
if(GOOGLEHASH_FOUND)
add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
@@ -157,16 +146,6 @@ if(EIGEN_TEST_CXX11)
ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11")
- ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
endif(EIGEN_TEST_SYCL)
# It should be safe to always run these tests as there is some fallback code for
# older compiler that don't support cxx11.
diff --git a/eigen/unsupported/test/EulerAngles.cpp b/eigen/unsupported/test/EulerAngles.cpp
index 79ee728..a8cb528 100644
--- a/eigen/unsupported/test/EulerAngles.cpp
+++ b/eigen/unsupported/test/EulerAngles.cpp
@@ -13,219 +13,146 @@
using namespace Eigen;
-// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework)
-template <typename Scalar, class System>
-bool verifyIsApprox(const Eigen::EulerAngles<Scalar, System>& a, const Eigen::EulerAngles<Scalar, System>& b)
-{
- return verifyIsApprox(a.angles(), b.angles());
-}
-
-// Verify that x is in the approxed range [a, b]
-#define VERIFY_APPROXED_RANGE(a, x, b) \
- do { \
- VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \
- VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \
- } while(0)
-
-const char X = EULER_X;
-const char Y = EULER_Y;
-const char Z = EULER_Z;
-
-template<typename Scalar, class EulerSystem>
-void verify_euler(const EulerAngles<Scalar, EulerSystem>& e)
+template<typename EulerSystem, typename Scalar>
+void verify_euler_ranged(const Matrix<Scalar,3,1>& ea,
+ bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma)
{
typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
typedef Matrix<Scalar,3,3> Matrix3;
typedef Matrix<Scalar,3,1> Vector3;
typedef Quaternion<Scalar> QuaternionType;
typedef AngleAxis<Scalar> AngleAxisType;
+ using std::abs;
- const Scalar ONE = Scalar(1);
- const Scalar HALF_PI = Scalar(EIGEN_PI / 2);
- const Scalar PI = Scalar(EIGEN_PI);
+ Scalar alphaRangeStart, alphaRangeEnd;
+ Scalar betaRangeStart, betaRangeEnd;
+ Scalar gammaRangeStart, gammaRangeEnd;
- // It's very important calc the acceptable precision depending on the distance from the pole.
- const Scalar longitudeRadius = std::abs(
- EulerSystem::IsTaitBryan ?
- std::cos(e.beta()) :
- std::sin(e.beta())
- );
- Scalar precision = test_precision<Scalar>() / longitudeRadius;
+ if (positiveRangeAlpha)
+ {
+ alphaRangeStart = Scalar(0);
+ alphaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ alphaRangeStart = -Scalar(EIGEN_PI);
+ alphaRangeEnd = Scalar(EIGEN_PI);
+ }
- Scalar betaRangeStart, betaRangeEnd;
- if (EulerSystem::IsTaitBryan)
+ if (positiveRangeBeta)
+ {
+ betaRangeStart = Scalar(0);
+ betaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ betaRangeStart = -Scalar(EIGEN_PI);
+ betaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ if (positiveRangeGamma)
{
- betaRangeStart = -HALF_PI;
- betaRangeEnd = HALF_PI;
+ gammaRangeStart = Scalar(0);
+ gammaRangeEnd = Scalar(2 * EIGEN_PI);
}
else
{
- if (!EulerSystem::IsBetaOpposite)
- {
- betaRangeStart = 0;
- betaRangeEnd = PI;
- }
- else
- {
- betaRangeStart = -PI;
- betaRangeEnd = 0;
- }
+ gammaRangeStart = -Scalar(EIGEN_PI);
+ gammaRangeEnd = Scalar(EIGEN_PI);
}
+ const int i = EulerSystem::AlphaAxisAbs - 1;
+ const int j = EulerSystem::BetaAxisAbs - 1;
+ const int k = EulerSystem::GammaAxisAbs - 1;
+
+ const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1;
+ const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1;
+ const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1;
+
const Vector3 I = EulerAnglesType::AlphaAxisVector();
const Vector3 J = EulerAnglesType::BetaAxisVector();
const Vector3 K = EulerAnglesType::GammaAxisVector();
- // Is approx checks
- VERIFY(e.isApprox(e));
- VERIFY_IS_APPROX(e, e);
- VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE));
-
- const Matrix3 m(e);
- VERIFY_IS_APPROX(Scalar(m.determinant()), ONE);
-
- EulerAnglesType ebis(m);
+ EulerAnglesType e(ea[0], ea[1], ea[2]);
- // When no roll(acting like polar representation), we have the best precision.
- // One of those cases is when the Euler angles are on the pole, and because it's singular case,
- // the computation returns no roll.
- if (ebis.beta() == 0)
- precision = test_precision<Scalar>();
+ Matrix3 m(e);
+ Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
// Check that eabis in range
- VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI);
- VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd);
- VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI);
-
- const Matrix3 mbis(AngleAxisType(ebis.alpha(), I) * AngleAxisType(ebis.beta(), J) * AngleAxisType(ebis.gamma(), K));
- VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE);
- VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix());
- /*std::cout << "===================\n" <<
- "e: " << e << std::endl <<
- "eabis: " << eabis.transpose() << std::endl <<
- "m: " << m << std::endl <<
- "mbis: " << mbis << std::endl <<
- "X: " << (m * Vector3::UnitX()).transpose() << std::endl <<
- "X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/
- VERIFY(m.isApprox(mbis, precision));
-
- // Test if ea and eabis are the same
- // Need to check both singular and non-singular cases
- // There are two singular cases.
- // 1. When I==K and sin(ea(1)) == 0
- // 2. When I!=K and cos(ea(1)) == 0
-
- // TODO: Make this test work well, and use range saturation function.
- /*// If I==K, and ea[1]==0, then there no unique solution.
- // The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2.
- if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) )
- VERIFY_IS_APPROX(ea, eabis);*/
+ VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd);
+ VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd);
+ VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd);
- // Quaternions
- const QuaternionType q(e);
- ebis = q;
- const QuaternionType qbis(ebis);
- VERIFY(internal::isApprox<Scalar>(std::abs(q.dot(qbis)), ONE, precision));
- //VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
+ Vector3 eabis2 = m.eulerAngles(i, j, k);
- // A suggestion for simple product test when will be supported.
- /*EulerAnglesType e2(PI/2, PI/2, PI/2);
- Matrix3 m2(e2);
- VERIFY_IS_APPROX(e*e2, m*m2);*/
-}
-
-template<signed char A, signed char B, signed char C, typename Scalar>
-void verify_euler_vec(const Matrix<Scalar,3,1>& ea)
-{
- verify_euler(EulerAngles<Scalar, EulerSystem<A, B, C> >(ea[0], ea[1], ea[2]));
-}
-
-template<signed char A, signed char B, signed char C, typename Scalar>
-void verify_euler_all_neg(const Matrix<Scalar,3,1>& ea)
-{
- verify_euler_vec<+A,+B,+C>(ea);
- verify_euler_vec<+A,+B,-C>(ea);
- verify_euler_vec<+A,-B,+C>(ea);
- verify_euler_vec<+A,-B,-C>(ea);
+ // Invert the relevant axes
+ eabis2[0] *= iFactor;
+ eabis2[1] *= jFactor;
+ eabis2[2] *= kFactor;
- verify_euler_vec<-A,+B,+C>(ea);
- verify_euler_vec<-A,+B,-C>(ea);
- verify_euler_vec<-A,-B,+C>(ea);
- verify_euler_vec<-A,-B,-C>(ea);
-}
-
-template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
-{
- verify_euler_all_neg<X,Y,Z>(ea);
- verify_euler_all_neg<X,Y,X>(ea);
- verify_euler_all_neg<X,Z,Y>(ea);
- verify_euler_all_neg<X,Z,X>(ea);
+ // Saturate the angles to the correct range
+ if (positiveRangeAlpha && (eabis2[0] < 0))
+ eabis2[0] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeBeta && (eabis2[1] < 0))
+ eabis2[1] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeGamma && (eabis2[2] < 0))
+ eabis2[2] += Scalar(2 * EIGEN_PI);
- verify_euler_all_neg<Y,Z,X>(ea);
- verify_euler_all_neg<Y,Z,Y>(ea);
- verify_euler_all_neg<Y,X,Z>(ea);
- verify_euler_all_neg<Y,X,Y>(ea);
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is
- verify_euler_all_neg<Z,X,Y>(ea);
- verify_euler_all_neg<Z,X,Z>(ea);
- verify_euler_all_neg<Z,Y,X>(ea);
- verify_euler_all_neg<Z,Y,Z>(ea);
-}
-
-template<typename Scalar> void check_singular_cases(const Scalar& singularBeta)
-{
- typedef Matrix<Scalar,3,1> Vector3;
- const Scalar PI = Scalar(EIGEN_PI);
+ Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K));
+ VERIFY_IS_APPROX(m, mbis);
- for (Scalar epsilon = NumTraits<Scalar>::epsilon(); epsilon < 1; epsilon *= Scalar(1.2))
+ // Tests that are only relevant for no possitive range
+ if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma))
{
- check_all_var(Vector3(PI/4, singularBeta, PI/3));
- check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3));
- check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3));
- check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3));
- check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI));
- check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3)));
- check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3)));
- check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4)));
- check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI));
+ /* If I==K, and ea[1]==0, then there no unique solution. */
+ /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */
+ if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) )
+ VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
+
+ // approx_or_less_than does not work for 0
+ VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
}
- // This one for sanity, it had a problem with near pole cases in float scalar.
- check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI));
+ // Quaternions
+ QuaternionType q(e);
+ eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
}
-template<typename Scalar> void eulerangles_manual()
+template<typename EulerSystem, typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea)
{
- typedef Matrix<Scalar,3,1> Vector3;
- const Vector3 Zero = Vector3::Zero();
- const Scalar PI = Scalar(EIGEN_PI);
-
- check_all_var(Zero);
-
- // singular cases
- check_singular_cases(PI/2);
- check_singular_cases(-PI/2);
-
- check_singular_cases(Scalar(0));
- check_singular_cases(Scalar(-0));
-
- check_singular_cases(PI);
- check_singular_cases(-PI);
-
- // non-singular cases
- VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
- VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
- VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
- for (int i = 0; i < alpha.size(); ++i) {
- for (int j = 0; j < beta.size(); ++j) {
- for (int k = 0; k < gamma.size(); ++k) {
- check_all_var(Vector3d(alpha(i), beta(j), gamma(k)));
- }
- }
- }
+ verify_euler_ranged<EulerSystem>(ea, false, false, false);
+ verify_euler_ranged<EulerSystem>(ea, false, false, true);
+ verify_euler_ranged<EulerSystem>(ea, false, true, false);
+ verify_euler_ranged<EulerSystem>(ea, false, true, true);
+ verify_euler_ranged<EulerSystem>(ea, true, false, false);
+ verify_euler_ranged<EulerSystem>(ea, true, false, true);
+ verify_euler_ranged<EulerSystem>(ea, true, true, false);
+ verify_euler_ranged<EulerSystem>(ea, true, true, true);
}
-template<typename Scalar> void eulerangles_rand()
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+ verify_euler<EulerSystemXYZ>(ea);
+ verify_euler<EulerSystemXYX>(ea);
+ verify_euler<EulerSystemXZY>(ea);
+ verify_euler<EulerSystemXZX>(ea);
+
+ verify_euler<EulerSystemYZX>(ea);
+ verify_euler<EulerSystemYZY>(ea);
+ verify_euler<EulerSystemYXZ>(ea);
+ verify_euler<EulerSystemYXY>(ea);
+
+ verify_euler<EulerSystemZXY>(ea);
+ verify_euler<EulerSystemZXZ>(ea);
+ verify_euler<EulerSystemZYX>(ea);
+ verify_euler<EulerSystemZYZ>(ea);
+}
+
+template<typename Scalar> void eulerangles()
{
typedef Matrix<Scalar,3,3> Matrix3;
typedef Matrix<Scalar,3,1> Vector3;
@@ -274,19 +201,8 @@ template<typename Scalar> void eulerangles_rand()
void test_EulerAngles()
{
- // Simple cast test
- EulerAnglesXYZd onesEd(1, 1, 1);
- EulerAnglesXYZf onesEf = onesEd.cast<float>();
- VERIFY_IS_APPROX(onesEd, onesEf.cast<double>());
-
- CALL_SUBTEST_1( eulerangles_manual<float>() );
- CALL_SUBTEST_2( eulerangles_manual<double>() );
-
for(int i = 0; i < g_repeat; i++) {
- CALL_SUBTEST_3( eulerangles_rand<float>() );
- CALL_SUBTEST_4( eulerangles_rand<double>() );
+ CALL_SUBTEST_1( eulerangles<float>() );
+ CALL_SUBTEST_2( eulerangles<double>() );
}
-
- // TODO: Add tests for auto diff
- // TODO: Add tests for complex numbers
}
diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp
index 4df2f5c..9cf1128 100644
--- a/eigen/unsupported/test/autodiff_scalar.cpp
+++ b/eigen/unsupported/test/autodiff_scalar.cpp
@@ -72,6 +72,20 @@ template<typename Scalar> void check_hyperbolic_functions()
VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150));
}
+template <typename Scalar>
+void check_limits_specialization()
+{
+ typedef Eigen::Matrix<Scalar, 1, 1> Deriv;
+ typedef Eigen::AutoDiffScalar<Deriv> AD;
+
+ typedef std::numeric_limits<AD> A;
+ typedef std::numeric_limits<Scalar> B;
+
+#if EIGEN_HAS_CXX11
+ VERIFY(bool(std::is_base_of<B, A>::value));
+#endif
+}
+
void test_autodiff_scalar()
{
for(int i = 0; i < g_repeat; i++) {
@@ -79,5 +93,6 @@ void test_autodiff_scalar()
CALL_SUBTEST_2( check_atan2<double>() );
CALL_SUBTEST_3( check_hyperbolic_functions<float>() );
CALL_SUBTEST_4( check_hyperbolic_functions<double>() );
+ CALL_SUBTEST_5( check_limits_specialization<double>());
}
}
diff --git a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
index 48cd2d4..5f9bb93 100644
--- a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
+++ b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
@@ -11,7 +11,6 @@
#define EIGEN_USE_THREADS
#include "main.h"
#include "Eigen/CXX11/ThreadPool"
-#include "Eigen/CXX11/Tensor"
static void test_create_destroy_empty_pool()
{
@@ -23,11 +22,11 @@ static void test_create_destroy_empty_pool()
}
-static void test_parallelism(bool allow_spinning)
+static void test_parallelism()
{
// Test we never-ever fail to match available tasks with idle threads.
const int kThreads = 16; // code below expects that this is a multiple of 4
- NonBlockingThreadPool tp(kThreads, allow_spinning);
+ NonBlockingThreadPool tp(kThreads);
VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
for (int iter = 0; iter < 100; ++iter) {
@@ -101,25 +100,8 @@ static void test_parallelism(bool allow_spinning)
}
}
-
-static void test_cancel()
-{
- NonBlockingThreadPool tp(2);
-
- // Schedule a large number of closure that each sleeps for one second. This
- // will keep the thread pool busy for much longer than the default test timeout.
- for (int i = 0; i < 1000; ++i) {
- tp.Schedule([]() { EIGEN_SLEEP(2000); });
- }
-
- // Cancel the processing of all the closures that are still pending.
- tp.Cancel();
-}
-
void test_cxx11_non_blocking_thread_pool()
{
CALL_SUBTEST(test_create_destroy_empty_pool());
- CALL_SUBTEST(test_parallelism(true));
- CALL_SUBTEST(test_parallelism(false));
- CALL_SUBTEST(test_cancel());
+ CALL_SUBTEST(test_parallelism());
}
diff --git a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
index 21fdfca..7201bfe 100644
--- a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
@@ -14,7 +14,7 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_SYCL
#include "main.h"
@@ -25,99 +25,39 @@ using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){
-
- // BROADCAST test:
- IndexType inDim1=2;
- IndexType inDim2=3;
- IndexType inDim3=5;
- IndexType inDim4=7;
- IndexType bDim1=2;
- IndexType bDim2=3;
- IndexType bDim3=1;
- IndexType bDim4=4;
- array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
- array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
- array<IndexType, 4> out_range; // = in_range * broadcasts
- for (size_t i = 0; i < out_range.size(); ++i)
- out_range[i] = in_range[i] * broadcasts[i];
-
- Tensor<DataType, 4, DataLayout, IndexType> input(in_range);
- Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
-
- for (size_t i = 0; i < in_range.size(); ++i)
- VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
-
-
- for (IndexType i = 0; i < input.size(); ++i)
- input(i) = static_cast<DataType>(i);
-
- DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
-
- TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
- sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
- gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
-
- for (IndexType i = 0; i < inDim1*bDim1; ++i) {
- for (IndexType j = 0; j < inDim2*bDim2; ++j) {
- for (IndexType k = 0; k < inDim3*bDim3; ++k) {
- for (IndexType l = 0; l < inDim4*bDim4; ++l) {
- VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
- }
- }
- }
- }
- printf("Broadcast Test with fixed size Passed\n");
- sycl_device.deallocate(gpu_in_data);
- sycl_device.deallocate(gpu_out_data);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
// BROADCAST test:
- IndexType inDim1=2;
- IndexType inDim2=3;
- IndexType inDim3=5;
- IndexType inDim4=7;
- IndexType bDim1=2;
- IndexType bDim2=3;
- IndexType bDim3=1;
- IndexType bDim4=4;
- array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}};
- array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
- array<IndexType, 4> out_range; // = in_range * broadcasts
+ array<int, 4> in_range = {{2, 3, 5, 7}};
+ array<int, 4> broadcasts = {{2, 3, 1, 4}};
+ array<int, 4> out_range; // = in_range * broadcasts
for (size_t i = 0; i < out_range.size(); ++i)
out_range[i] = in_range[i] * broadcasts[i];
- Tensor<DataType, 4, DataLayout, IndexType> input(in_range);
- Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
+ Tensor<float, 4> input(in_range);
+ Tensor<float, 4> out(out_range);
for (size_t i = 0; i < in_range.size(); ++i)
VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
- for (IndexType i = 0; i < input.size(); ++i)
- input(i) = static_cast<DataType>(i);
+ for (int i = 0; i < input.size(); ++i)
+ input(i) = static_cast<float>(i);
- DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
+ float * gpu_in_data = static_cast<float*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
- sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
+ TensorMap<Tensor<float, 4>> gpu_in(gpu_in_data, in_range);
+ TensorMap<Tensor<float, 4>> gpu_out(gpu_out_data, out_range);
+ sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float));
gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
- for (IndexType i = 0; i < inDim1*bDim1; ++i) {
- for (IndexType j = 0; j < inDim2*bDim2; ++j) {
- for (IndexType k = 0; k < inDim3*bDim3; ++k) {
- for (IndexType l = 0; l < inDim4*bDim4; ++l) {
- VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l));
+ for (int i = 0; i < 4; ++i) {
+ for (int j = 0; j < 9; ++j) {
+ for (int k = 0; k < 5; ++k) {
+ for (int l = 0; l < 28; ++l) {
+ VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
}
}
}
@@ -127,18 +67,8 @@ static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
sycl_device.deallocate(gpu_out_data);
}
-template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::device& d){
- std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
- QueueInterface queueInterface(d);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
- test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
-}
-
void test_cxx11_tensor_broadcast_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
- }
+ cl::sycl::gpu_selector s;
+ Eigen::SyclDevice sycl_device(s);
+ CALL_SUBTEST(test_broadcast_sycl(sycl_device));
}
diff --git a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
deleted file mode 100644
index 400a31d..0000000
--- a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-namespace std {
-template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); }
-template <typename T> T square(T x) { return x * x; }
-template <typename T> T cube(T x) { return x * x * x; }
-template <typename T> T inverse(T x) { return 1 / x; }
-}
-
-#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \
- { \
- /* out OPERATOR in.FUNC() */ \
- Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \
- in = in.random() + static_cast<SCALAR>(0.01); \
- out = out.random() + static_cast<SCALAR>(0.01); \
- Tensor<SCALAR, 3, Layout, int64_t> reference(out); \
- SCALAR *gpu_data = static_cast<SCALAR *>( \
- sycl_device.allocate(in.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_out = static_cast<SCALAR *>( \
- sycl_device.allocate(out.size() * sizeof(SCALAR))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data, in.data(), \
- (in.size()) * sizeof(SCALAR)); \
- sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \
- (out.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(SCALAR)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- SCALAR ver = reference(i); \
- ver OPERATOR std::FUNC(in(i)); \
- VERIFY_IS_APPROX(out(i), ver); \
- } \
- sycl_device.deallocate(gpu_data); \
- sycl_device.deallocate(gpu_data_out); \
- } \
- { \
- /* out OPERATOR out.FUNC() */ \
- Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \
- out = out.random() + static_cast<SCALAR>(0.01); \
- Tensor<SCALAR, 3, Layout, int64_t> reference(out); \
- SCALAR *gpu_data_out = static_cast<SCALAR *>( \
- sycl_device.allocate(out.size() * sizeof(SCALAR))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \
- (out.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(SCALAR)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- SCALAR ver = reference(i); \
- ver OPERATOR std::FUNC(reference(i)); \
- VERIFY_IS_APPROX(out(i), ver); \
- } \
- sycl_device.deallocate(gpu_data_out); \
- }
-
-#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \
- TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout)
-
-#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \
- { \
- /* out = in.FUNC() */ \
- Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \
- Tensor<bool, 3, Layout, int64_t> out(tensorRange); \
- in = in.random() + static_cast<SCALAR>(0.01); \
- SCALAR *gpu_data = static_cast<SCALAR *>( \
- sycl_device.allocate(in.size() * sizeof(SCALAR))); \
- bool *gpu_data_out = \
- static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \
- TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data, in.data(), \
- (in.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) = gpu.FUNC(); \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(bool)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \
- } \
- sycl_device.deallocate(gpu_data); \
- sycl_device.deallocate(gpu_data_out); \
- }
-
-#define TEST_UNARY_BUILTINS(SCALAR, Layout) \
- TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \
- TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \
- TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \
- TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \
- TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout)
-
-static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
- int64_t sizeDim1 = 10;
- int64_t sizeDim2 = 10;
- int64_t sizeDim3 = 10;
- array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-
- TEST_UNARY_BUILTINS(float, RowMajor)
- TEST_UNARY_BUILTINS(float, ColMajor)
-}
-
-namespace std {
-template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); }
-template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); }
-}
-
-#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \
- { \
- /* out = in_1.FUNC(in_2) */ \
- Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \
- in_1 = in_1.random() + static_cast<SCALAR>(0.01); \
- in_2 = in_2.random() + static_cast<SCALAR>(0.01); \
- Tensor<SCALAR, 3, Layout, int64_t> reference(out); \
- SCALAR *gpu_data_1 = static_cast<SCALAR *>( \
- sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_2 = static_cast<SCALAR *>( \
- sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_out = static_cast<SCALAR *>( \
- sycl_device.allocate(out.size() * sizeof(SCALAR))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \
- (in_1.size()) * sizeof(SCALAR)); \
- sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \
- (in_2.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(SCALAR)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- SCALAR ver = reference(i); \
- ver = std::FUNC(in_1(i), in_2(i)); \
- VERIFY_IS_APPROX(out(i), ver); \
- } \
- sycl_device.deallocate(gpu_data_1); \
- sycl_device.deallocate(gpu_data_2); \
- sycl_device.deallocate(gpu_data_out); \
- }
-
-#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \
- { \
- /* out = in_1 OPERATOR in_2 */ \
- Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \
- in_1 = in_1.random() + static_cast<SCALAR>(0.01); \
- in_2 = in_2.random() + static_cast<SCALAR>(0.01); \
- Tensor<SCALAR, 3, Layout, int64_t> reference(out); \
- SCALAR *gpu_data_1 = static_cast<SCALAR *>( \
- sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_2 = static_cast<SCALAR *>( \
- sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_out = static_cast<SCALAR *>( \
- sycl_device.allocate(out.size() * sizeof(SCALAR))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \
- (in_1.size()) * sizeof(SCALAR)); \
- sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \
- (in_2.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(SCALAR)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \
- } \
- sycl_device.deallocate(gpu_data_1); \
- sycl_device.deallocate(gpu_data_2); \
- sycl_device.deallocate(gpu_data_out); \
- }
-
-#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \
- { \
- /* out = in_1 OPERATOR 2 */ \
- Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \
- Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \
- in_1 = in_1.random() + static_cast<SCALAR>(0.01); \
- Tensor<SCALAR, 3, Layout, int64_t> reference(out); \
- SCALAR *gpu_data_1 = static_cast<SCALAR *>( \
- sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \
- SCALAR *gpu_data_out = static_cast<SCALAR *>( \
- sycl_device.allocate(out.size() * sizeof(SCALAR))); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \
- TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \
- sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \
- (in_1.size()) * sizeof(SCALAR)); \
- gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \
- (out.size()) * sizeof(SCALAR)); \
- for (int64_t i = 0; i < out.size(); ++i) { \
- VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \
- } \
- sycl_device.deallocate(gpu_data_1); \
- sycl_device.deallocate(gpu_data_out); \
- }
-
-#define TEST_BINARY_BUILTINS(SCALAR, Layout) \
- TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \
- TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \
- TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \
- TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \
- TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \
- TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout)
-
-static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
- int64_t sizeDim1 = 10;
- int64_t sizeDim2 = 10;
- int64_t sizeDim3 = 10;
- array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
- TEST_BINARY_BUILTINS(float, RowMajor)
- TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor)
- TEST_BINARY_BUILTINS(float, ColMajor)
- TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor)
-}
-
-void test_cxx11_tensor_builtins_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- QueueInterface queueInterface(device);
- Eigen::SyclDevice sycl_device(&queueInterface);
- CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
- CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_chipping.cpp b/eigen/unsupported/test/cxx11_tensor_chipping.cpp
index 89cf5c7..1832dec 100644
--- a/eigen/unsupported/test/cxx11_tensor_chipping.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_chipping.cpp
@@ -43,7 +43,7 @@ static void test_simple_chip()
VERIFY_IS_EQUAL(chip2.dimension(2), 7);
VERIFY_IS_EQUAL(chip2.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
- for (int j = 0; j < 5; ++j) {
+ for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
@@ -75,7 +75,7 @@ static void test_simple_chip()
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
- for (int l = 0; l < 11; ++l) {
+ for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
}
}
@@ -126,7 +126,7 @@ static void test_dynamic_chip()
VERIFY_IS_EQUAL(chip2.dimension(2), 7);
VERIFY_IS_EQUAL(chip2.dimension(3), 11);
for (int i = 0; i < 2; ++i) {
- for (int j = 0; j < 5; ++j) {
+ for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 7; ++k) {
for (int l = 0; l < 11; ++l) {
VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
@@ -158,7 +158,7 @@ static void test_dynamic_chip()
for (int i = 0; i < 2; ++i) {
for (int j = 0; j < 3; ++j) {
for (int k = 0; k < 5; ++k) {
- for (int l = 0; l < 11; ++l) {
+ for (int l = 0; l < 7; ++l) {
VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
}
}
diff --git a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp
deleted file mode 100644
index 39e4f0a..0000000
--- a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp
+++ /dev/null
@@ -1,622 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_chipping_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-
-#include <Eigen/CXX11/Tensor>
-
-using Eigen::Tensor;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
-{
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- IndexType sizeDim5 = 11;
-
- array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
- array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
- Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
- Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
-
- tensor.setRandom();
-
- const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
- const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
- DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
- gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
- sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
- VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
- VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim2; ++i) {
- for (IndexType j = 0; j < sizeDim3; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
- const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
- DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
- gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
- sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
- VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim3; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
- const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
- DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
-
- gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
- sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
- const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
- DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
-
- gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
- sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
- VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
- }
- }
- }
- }
-
-
- array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
- Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
- const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
- DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
-
- gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
- sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
- VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim4; ++l) {
- VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
- }
- }
- }
- }
-
- sycl_device.deallocate(gpu_data_tensor);
- sycl_device.deallocate(gpu_data_chip1);
- sycl_device.deallocate(gpu_data_chip2);
- sycl_device.deallocate(gpu_data_chip3);
- sycl_device.deallocate(gpu_data_chip4);
- sycl_device.deallocate(gpu_data_chip5);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
-{
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- IndexType sizeDim5 = 11;
-
- array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
- array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
- Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
- Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
-
- tensor.setRandom();
-
- const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
- const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
- DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
- gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
- sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
- VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
- VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim2; ++i) {
- for (IndexType j = 0; j < sizeDim3; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
- const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
- DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
- gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
- sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
- VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim3; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
- const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
- DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
-
- gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
- sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
- VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim4; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
- }
- }
- }
- }
-
- array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
- const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
- DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
-
- gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
- sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
- VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim5; ++l) {
- VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
- }
- }
- }
- }
-
-
- array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
- Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
- const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
- DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
-
- gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
- sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
-
- VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
- VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim4; ++l) {
- VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data_tensor);
- sycl_device.deallocate(gpu_data_chip1);
- sycl_device.deallocate(gpu_data_chip2);
- sycl_device.deallocate(gpu_data_chip3);
- sycl_device.deallocate(gpu_data_chip4);
- sycl_device.deallocate(gpu_data_chip5);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
-
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- IndexType sizeDim5 = 11;
-
- array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
- array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
- Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
-
- Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
- Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
- tensor.setRandom();
- tensor1.setRandom();
-
- const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
- const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
- DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
- DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
-
-
- sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
- sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
- gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
- sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
- for (int i = 0; i < sizeDim2; ++i) {
- for (int j = 0; j < sizeDim3; ++j) {
- for (int k = 0; k < sizeDim4; ++k) {
- for (int l = 0; l < sizeDim5; ++l) {
- float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
- VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
- }
- }
- }
- }
-
- array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
- Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
- Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
- tensor2.setRandom();
- const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
- DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
- DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
- TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
- TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
- gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
- sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
- for (int i = 0; i < sizeDim2; ++i) {
- for (int j = 0; j < sizeDim4; ++j) {
- for (int k = 0; k < sizeDim5; ++k) {
- float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
- VERIFY_IS_EQUAL(chip2(i,j,k), expected);
- }
- }
- }
- sycl_device.deallocate(gpu_data_tensor);
- sycl_device.deallocate(gpu_data_tensor1);
- sycl_device.deallocate(gpu_data_chip1);
- sycl_device.deallocate(gpu_data_tensor2);
- sycl_device.deallocate(gpu_data_chip2);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
-{
-
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- IndexType sizeDim5 = 11;
-
- array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
- array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
- Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
- Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
- Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
- input1.setRandom();
- input2.setRandom();
-
-
- const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
- const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
- DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
-
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
- gpu_tensor.device(sycl_device)=gpu_input1;
- sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
- gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k < sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (i != 1) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
- }
- }
- }
- }
- }
- }
-
- gpu_tensor.device(sycl_device)=gpu_input1;
- array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
- input3.setRandom();
-
- const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
- DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
- gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k <sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (j != 1) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
- }
- }
- }
- }
- }
- }
-
- gpu_tensor.device(sycl_device)=gpu_input1;
- array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
- input4.setRandom();
-
- const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
- DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
- gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k <sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (k != 3) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
- }
- }
- }
- }
- }
- }
-
- gpu_tensor.device(sycl_device)=gpu_input1;
- array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
- Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
- input5.setRandom();
-
- const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
- DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
- gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k <sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (l != 4) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
- }
- }
- }
- }
- }
- }
- gpu_tensor.device(sycl_device)=gpu_input1;
- array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
- Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
- input6.setRandom();
-
- const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
- DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
- gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k <sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (m != 5) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
- }
- }
- }
- }
- }
- }
-
-
- gpu_tensor.device(sycl_device)=gpu_input1;
- Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
- input7.setRandom();
-
- DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
- TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
- gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
- for (int i = 0; i < sizeDim1; ++i) {
- for (int j = 0; j < sizeDim2; ++j) {
- for (int k = 0; k <sizeDim3; ++k) {
- for (int l = 0; l < sizeDim4; ++l) {
- for (int m = 0; m < sizeDim5; ++m) {
- if (i != 0) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
- } else {
- VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
- }
- }
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data_tensor);
- sycl_device.deallocate(gpu_data_input1);
- sycl_device.deallocate(gpu_data_input2);
- sycl_device.deallocate(gpu_data_input3);
- sycl_device.deallocate(gpu_data_input4);
- sycl_device.deallocate(gpu_data_input5);
- sycl_device.deallocate(gpu_data_input6);
- sycl_device.deallocate(gpu_data_input7);
-
-}
-
-template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
- test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);
- test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_chipping_sycl()
-{
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp
deleted file mode 100644
index e3023a3..0000000
--- a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_concatenation_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::Tensor;
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device)
-{
- IndexType leftDim1 = 2;
- IndexType leftDim2 = 3;
- IndexType leftDim3 = 1;
- Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}};
- IndexType rightDim1 = 2;
- IndexType rightDim2 = 3;
- IndexType rightDim3 = 1;
- Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}};
-
- //IndexType concatDim1 = 3;
-// IndexType concatDim2 = 3;
-// IndexType concatDim3 = 1;
- //Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}};
-
- Tensor<DataType, 3, DataLayout, IndexType> left(leftRange);
- Tensor<DataType, 3, DataLayout, IndexType> right(rightRange);
- left.setRandom();
- right.setRandom();
-
- DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
- sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
- ///
- Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3);
- DataType * gpu_out_data1 = static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType)));
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions());
-
- //concatenation = left.concatenate(right, 0);
- gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0);
- sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType));
-
- VERIFY_IS_EQUAL(concatenation1.dimension(0), 4);
- VERIFY_IS_EQUAL(concatenation1.dimension(1), 3);
- VERIFY_IS_EQUAL(concatenation1.dimension(2), 1);
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType i = 0; i < 2; ++i) {
- VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0));
- }
- for (IndexType i = 2; i < 4; ++i) {
- VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0));
- }
- }
-
- sycl_device.deallocate(gpu_out_data1);
- Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3);
- DataType * gpu_out_data2 = static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType)));
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions());
- gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1);
- sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType));
-
- //concatenation = left.concatenate(right, 1);
- VERIFY_IS_EQUAL(concatenation2.dimension(0), 2);
- VERIFY_IS_EQUAL(concatenation2.dimension(1), 6);
- VERIFY_IS_EQUAL(concatenation2.dimension(2), 1);
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0));
- }
- for (IndexType j = 3; j < 6; ++j) {
- VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0));
- }
- }
- sycl_device.deallocate(gpu_out_data2);
- Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3);
- DataType * gpu_out_data3 = static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType)));
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions());
- gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2);
- sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType));
-
- //concatenation = left.concatenate(right, 2);
- VERIFY_IS_EQUAL(concatenation3.dimension(0), 2);
- VERIFY_IS_EQUAL(concatenation3.dimension(1), 3);
- VERIFY_IS_EQUAL(concatenation3.dimension(2), 2);
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0));
- VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0));
- }
- }
- sycl_device.deallocate(gpu_out_data3);
- sycl_device.deallocate(gpu_in1_data);
- sycl_device.deallocate(gpu_in2_data);
-}
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
-
- IndexType leftDim1 = 2;
- IndexType leftDim2 = 3;
- Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}};
-
- IndexType rightDim1 = 2;
- IndexType rightDim2 = 3;
- Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}};
-
- IndexType concatDim1 = 4;
- IndexType concatDim2 = 3;
- Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}};
-
- Tensor<DataType, 2, DataLayout, IndexType> left(leftRange);
- Tensor<DataType, 2, DataLayout, IndexType> right(rightRange);
- Tensor<DataType, 2, DataLayout, IndexType> result(resRange);
-
- left.setRandom();
- right.setRandom();
- result.setRandom();
-
- DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
-
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange);
-
- sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
-
-// t1.concatenate(t2, 0) = result;
- gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out;
- sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType));
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- VERIFY_IS_EQUAL(left(i, j), result(i, j));
- VERIFY_IS_EQUAL(right(i, j), result(i+2, j));
- }
- }
- sycl_device.deallocate(gpu_in1_data);
- sycl_device.deallocate(gpu_in2_data);
- sycl_device.deallocate(gpu_out_data);
-}
-
-
-template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
- test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_concatenation_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(tensorConcat_perDevice<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp
deleted file mode 100644
index 5bace66..0000000
--- a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp
+++ /dev/null
@@ -1,290 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_contract_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void static test_sycl_contraction(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
-{
- typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
- static const DataType error_threshold =1e-4f;
-// std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
- // with these dimensions, the output has 300 * 140 elements, which is
- // more than 30 * 1024, which is the number of threads in blocks on
- // a 15 SM GK110 GPU
- Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
- Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
- Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size);
- Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(m_size, n_size);
-// Eigen::array<DimPair, 1> dims(DimPair(1, 0));
- Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
- Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
- Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
- Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}};
-
- t_left.setRandom();
- t_right.setRandom();
-
- std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
- std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
- std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
-
- DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
- DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
- DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, result_dims);
-
- sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
- sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
- gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
- sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
- t_result = t_left.contract(t_right, dims);
-
- for (IndexType i = 0; i < t_result.size(); i++) {
- if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) {
- continue;
- }
- if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
- continue;
- }
- std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i)
- << " vs " << t_result_gpu(i) << std::endl;
- assert(false);
- }
- sycl_device.deallocate(d_t_left);
- sycl_device.deallocate(d_t_right);
- sycl_device.deallocate(d_t_result);
-}
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_TF(const Device& sycl_device)
-{
- typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
- static const DataType error_threshold =1e-4f;
- Eigen::array<IndexType, 2> left_dims = {{2, 3}};
- Eigen::array<IndexType, 2> right_dims = {{3, 1}};
- Eigen::array<IndexType, 2> res_dims = {{2, 1}};
- Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
-
-
- Tensor<DataType, 2, DataLayout, IndexType> t_left(left_dims);
- Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims);
- Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims);
- Tensor<DataType, 2, DataLayout, IndexType> t_result(res_dims);
-
- t_left.data()[0] = 1.0f;
- t_left.data()[1] = 2.0f;
- t_left.data()[2] = 3.0f;
- t_left.data()[3] = 4.0f;
- t_left.data()[4] = 5.0f;
- t_left.data()[5] = 6.0f;
-
- t_right.data()[0] = -1.0f;
- t_right.data()[1] = 0.5f;
- t_right.data()[2] = 2.0f;
-
- std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
- std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
- std::size_t t_result_bytes = t_result.size()*sizeof(DataType);
-
-
- DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
- DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
- DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, res_dims);
-
- sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
- sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
- gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
- sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
- t_result = t_left.contract(t_right, dims);
-
- for (IndexType i = 0; i < t_result.size(); i++) {
- if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) {
- continue;
- }
- if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
- continue;
- }
- std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i)
- << " vs " << t_result_gpu(i) << std::endl;
- assert(false);
- }
- sycl_device.deallocate(d_t_left);
- sycl_device.deallocate(d_t_right);
- sycl_device.deallocate(d_t_result);
-
-
-}
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_scalar(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
-{
- //std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
- // with these dimensions, the output has 300 * 140 elements, which is
- // more than 30 * 1024, which is the number of threads in blocks on
- // a 15 SM GK110 GPU
- typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
- static const DataType error_threshold =1e-4f;
- Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
- Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
- Tensor<DataType, 0, DataLayout, IndexType> t_result;
- Tensor<DataType, 0, DataLayout, IndexType> t_result_gpu;
- Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
- Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
- Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
- t_left.setRandom();
- t_right.setRandom();
-
- std::size_t t_left_bytes = t_left.size() * sizeof(DataType);
- std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
- std::size_t t_result_bytes = sizeof(DataType);
-
-
- DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
- DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
- DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 0, DataLayout, IndexType> > gpu_t_result(d_t_result);
-
- sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
- sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
- gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
- sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
- t_result = t_left.contract(t_right, dims);
-
- if (static_cast<DataType>(fabs(t_result() - t_result_gpu())) > error_threshold &&
- !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) {
- std::cout << "mismatch detected: " << t_result()
- << " vs " << t_result_gpu() << std::endl;
- assert(false);
- }
-
- sycl_device.deallocate(d_t_left);
- sycl_device.deallocate(d_t_right);
- sycl_device.deallocate(d_t_result);
-}
-
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_m(const Device& sycl_device) {
- for (IndexType k = 32; k < 256; k++) {
- test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, k, 128, 128);
- }
-}
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_k(const Device& sycl_device) {
- for (IndexType k = 32; k < 256; k++) {
- test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, k, 128);
- }
-}
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_n(const Device& sycl_device) {
- for (IndexType k = 32; k < 256; k++) {
- test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, 128, k);
- }
-}
-
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_sizes(const Device& sycl_device) {
- IndexType m_sizes[] = { 31, 39, 63, 64, 65,
- 127, 129, 255, 257 , 511,
- 512, 513, 1023, 1024, 1025};
-
- IndexType n_sizes[] = { 31, 39, 63, 64, 65,
- 127, 129, 255, 257, 511,
- 512, 513, 1023, 1024, 1025};
-
- IndexType k_sizes[] = { 31, 39, 63, 64, 65,
- 95, 96, 127, 129, 255,
- 257, 511, 512, 513, 1023,
- 1024, 1025};
-
- for (IndexType i = 0; i < 15; i++) {
- for (IndexType j = 0; j < 15; j++) {
- for (IndexType k = 0; k < 17; k++) {
- test_sycl_contraction<DataLayout, DataType,IndexType>(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]);
- }
- }
- }
-}
-
-template <typename Dev_selector> void tensorContractionPerDevice(Dev_selector& s){
- QueueInterface queueInterface(s);
- auto sycl_device=Eigen::SyclDevice(&queueInterface);
- test_sycl_contraction<ColMajor, float,int64_t>(sycl_device, 32, 32, 32);
- test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 32, 32, 32);
- test_scalar<ColMajor,float,int64_t>(sycl_device, 32, 32, 32);
- test_scalar<RowMajor,float,int64_t>(sycl_device, 32, 32, 32);
- std::chrono::time_point<std::chrono::system_clock> start, end;
- start = std::chrono::system_clock::now();
- test_sycl_contraction<ColMajor,float,int64_t>(sycl_device, 128, 128, 128);
- test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 128, 128, 128);
- test_scalar<ColMajor,float,int64_t>(sycl_device, 128, 128, 128);
- test_scalar<RowMajor,float,int64_t>(sycl_device, 128, 128, 128);
- test_sycl_contraction_m<ColMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_m<RowMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_n<ColMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_n<RowMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_k<ColMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_k<RowMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_sizes<ColMajor, float, int64_t>(sycl_device);
- test_sycl_contraction_sizes<RowMajor, float, int64_t>(sycl_device);
- test_TF<RowMajor, float, int64_t>(sycl_device);
- test_TF<ColMajor, float, int64_t>(sycl_device);
-
- end = std::chrono::system_clock::now();
- std::chrono::duration<double> elapsed_seconds = end-start;
- std::time_t end_time = std::chrono::system_clock::to_time_t(end);
- std::cout << "finished computation at " << std::ctime(&end_time)
- << "elapsed time: " << elapsed_seconds.count() << "s\n";
-
-}
-
-void test_cxx11_tensor_contract_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(tensorContractionPerDevice(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp
deleted file mode 100644
index a4226a6..0000000
--- a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp
+++ /dev/null
@@ -1,469 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-#include <iomanip>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-static const float error_threshold =1e-4f;
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device)
-{
- IndexType indim0 =53;
- IndexType indim1= 55;
- IndexType indim2= 51;
- IndexType outdim0=50;
- IndexType outdim1=55;
- IndexType outdim2=51;
- Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
- Eigen::array<IndexType, 1> kernel_dims = {{4}};
- Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
- Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
- Eigen::array<IndexType, 1> dims3{{0}};
-
- input.setRandom();
- kernel.setRandom();
- result.setZero();
- result_host.setZero();
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
- for(IndexType j=0; j< outdim1; j++ ){
- for(IndexType k=0; k< outdim2; k++ ){
- if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
- std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
- assert(false);
- }
- }
- }
-}
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device)
-{
- IndexType indim0 =53;
- IndexType indim1= 55;
- IndexType indim2= 51;
- IndexType outdim0=50;
- IndexType outdim1=51;
- IndexType outdim2=51;
- Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
- Eigen::array<IndexType, 2> kernel_dims = {{4,5}};
- Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
- Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
- Eigen::array<IndexType, 2> dims3{{0,1}};
-
- input.setRandom();
- kernel.setRandom();
- result.setZero();
- result_host.setZero();
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
- for(IndexType j=0; j< outdim1; j++ ){
- for(IndexType k=0; k< outdim2; k++ ){
- if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
- std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
- assert(false);
- }
- }
- }
-}
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device)
-{
- IndexType indim0 =53;
- IndexType indim1= 55;
- IndexType indim2= 51;
- IndexType outdim0=50;
- IndexType outdim1=51;
- IndexType outdim2=49;
- Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
- Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}};
- Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
- Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
- Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
- Eigen::array<IndexType, 3> dims3{{0,1,2}};
-
- input.setRandom();
- kernel.setRandom();
- result.setZero();
- result_host.setZero();
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
- for(IndexType j=0; j< outdim1; j++ ){
- for(IndexType k=0; k< outdim2; k++ ){
- if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
- std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl;
- assert(false);
- }
- }
- }
-}
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_evals(const Eigen::SyclDevice& sycl_device)
-{
- Eigen::array<IndexType, 2> input_dims = {{3, 3}};
- Eigen::array<IndexType, 1> kernel_dims = {{2}};
- Eigen::array<IndexType, 2> result_dims = {{2, 3}};
-
- Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
- Tensor<DataType, 2, DataLayout,IndexType> result(result_dims);
-
- Eigen::array<IndexType, 1> dims3{{0}};
-
- input.setRandom();
- kernel.setRandom();
- result.setZero();
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0
- VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2
- VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4
- VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1
- VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3
- VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5
-
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_result);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_expr(const Eigen::SyclDevice& sycl_device)
-{
- Eigen::array<IndexType, 2> input_dims = {{3, 3}};
- Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
- Eigen::array<IndexType, 2> result_dims = {{2, 2}};
-
- Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
- Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
-
- input.setRandom();
- kernel.setRandom();
- Eigen::array<IndexType, 2> dims;
- dims[0] = 0;
- dims[1] = 1;
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims);
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
- input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
- VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
- input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
- VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
- input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
- VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
- input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
-
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_result);
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_modes(const Eigen::SyclDevice& sycl_device){
-
-Eigen::array<IndexType, 1> input_dims = {{3}};
-Eigen::array<IndexType, 1> kernel_dims = {{3}};
-
-Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
-Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
-
-input.setRandom();
-kernel.setRandom();
-Eigen::array<IndexType, 1> dims;
-dims[0] = 0;
-
- input(0) = 1.0f;
- input(1) = 2.0f;
- input(2) = 3.0f;
- kernel(0) = 0.5f;
- kernel(1) = 1.0f;
- kernel(2) = 0.0f;
-
- Eigen::array<std::pair<IndexType, IndexType>, 1> padding;
-
- // Emulate VALID mode (as defined in
- // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
- padding[0] = std::make_pair(0, 0);
- Tensor<DataType, 1, DataLayout, IndexType> valid(1);
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t valid_bytes = valid.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions());
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
- sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
-
- VERIFY_IS_EQUAL(valid.dimension(0), 1);
- VERIFY_IS_APPROX(valid(0), 2.5f);
-
- // Emulate SAME mode (as defined in
- // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
- padding[0] = std::make_pair(1, 1);
- Tensor<DataType, 1, DataLayout, IndexType> same(3);
- std::size_t same_bytes = same.size() * sizeof(DataType);
- DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes));
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions());
- gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
- sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
-
- VERIFY_IS_EQUAL(same.dimension(0), 3);
- VERIFY_IS_APPROX(same(0), 1.0f);
- VERIFY_IS_APPROX(same(1), 2.5f);
- VERIFY_IS_APPROX(same(2), 4.0f);
-
- // Emulate FULL mode (as defined in
- // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
- padding[0] = std::make_pair(2, 2);
-
- Tensor<DataType, 1, DataLayout, IndexType> full(5);
- std::size_t full_bytes = full.size() * sizeof(DataType);
- DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes));
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions());
- gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
- sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
-
- VERIFY_IS_EQUAL(full.dimension(0), 5);
- VERIFY_IS_APPROX(full(0), 0.0f);
- VERIFY_IS_APPROX(full(1), 1.0f);
- VERIFY_IS_APPROX(full(2), 2.5f);
- VERIFY_IS_APPROX(full(3), 4.0f);
- VERIFY_IS_APPROX(full(4), 1.5f);
-
- sycl_device.deallocate(d_input);
- sycl_device.deallocate(d_kernel);
- sycl_device.deallocate(d_valid);
- sycl_device.deallocate(d_same);
- sycl_device.deallocate(d_full);
-
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_strides(const Eigen::SyclDevice& sycl_device){
-
- Eigen::array<IndexType, 1> input_dims = {{13}};
- Eigen::array<IndexType, 1> kernel_dims = {{3}};
-
- Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
- Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
- Tensor<DataType, 1, DataLayout, IndexType> result(2);
-
- input.setRandom();
- kernel.setRandom();
- Eigen::array<IndexType, 1> dims;
- dims[0] = 0;
-
- Eigen::array<IndexType, 1> stride_of_3;
- stride_of_3[0] = 3;
- Eigen::array<IndexType, 1> stride_of_2;
- stride_of_2[0] = 2;
-
- std::size_t input_bytes = input.size() * sizeof(DataType);
- std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
- std::size_t result_bytes = result.size() * sizeof(DataType);
-
- DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes));
- DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
- DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions());
- sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
- sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
- gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
- sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
- VERIFY_IS_EQUAL(result.dimension(0), 2);
- VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
- input(6)*kernel(2)));
- VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
- input(12)*kernel(2)));
-}
-
-template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){
- QueueInterface queueInterface(s);
- auto sycl_device=Eigen::SyclDevice(&queueInterface);
- test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
- test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
- test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
- test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
- test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
- test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
- test_evals<float, ColMajor, int64_t>(sycl_device);
- test_evals<float, RowMajor, int64_t>(sycl_device);
- test_expr<float, ColMajor, int64_t>(sycl_device);
- test_expr<float, RowMajor, int64_t>(sycl_device);
- test_modes<float, ColMajor, int64_t>(sycl_device);
- test_modes<float, RowMajor, int64_t>(sycl_device);
- test_strides<float, ColMajor, int64_t>(sycl_device);
- test_strides<float, RowMajor, int64_t>(sycl_device);
-}
-
-void test_cxx11_tensor_convolution_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(tensorConvolutionPerDevice(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
index 3ecc68d..7f79753 100644
--- a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -14,64 +14,18 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_device_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
-#include <stdint.h>
-#include <iostream>
-template <typename DataType, int DataLayout, typename IndexType>
-void test_device_memory(const Eigen::SyclDevice &sycl_device) {
- std::cout << "Running on : "
- << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
- <<std::endl;
- IndexType sizeDim1 = 100;
- array<IndexType, 1> tensorRange = {{sizeDim1}};
- Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
- Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
- memset(in1.data(), 1, in1.size() * sizeof(DataType));
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
- sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
- for (IndexType i=0; i<in.size(); i++) {
- VERIFY_IS_EQUAL(in(i), in1(i));
- }
- sycl_device.deallocate(gpu_in_data);
+void test_device_sycl(const Eigen::SyclDevice &sycl_device) {
+ std::cout <<"Helo from ComputeCpp: the requested device exists and the device name is : "
+ << sycl_device.m_queue.get_device(). template get_info<cl::sycl::info::device::name>() <<std::endl;;
}
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
- VERIFY(sycl_device.ok());
- IndexType sizeDim1 = 100;
- array<IndexType, 1> tensorDims = {{sizeDim1}};
- DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(sizeDim1*sizeof(DataType)));
- sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType));
-
- TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> in(gpu_data, tensorDims);
- TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> out(gpu_data, tensorDims);
- out.device(sycl_device) = in / in.constant(0);
-
- sycl_device.synchronize();
- VERIFY(!sycl_device.ok());
- sycl_device.deallocate(gpu_data);
-}
-
-template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
- std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
- QueueInterface queueInterface(d);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_device_memory<DataType, RowMajor, int64_t>(sycl_device);
- test_device_memory<DataType, ColMajor, int64_t>(sycl_device);
- /// this test throw an exception. enable it if you want to see the exception
- //test_device_exceptions<DataType, RowMajor>(sycl_device);
- /// this test throw an exception. enable it if you want to see the exception
- //test_device_exceptions<DataType, ColMajor>(sycl_device);
-}
-
void test_cxx11_tensor_device_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_device_test_per_device<float>(device));
- }
+ cl::sycl::gpu_selector s;
+ Eigen::SyclDevice sycl_device(s);
+ CALL_SUBTEST(test_device_sycl(sycl_device));
}
diff --git a/eigen/unsupported/test/cxx11_tensor_expr.cpp b/eigen/unsupported/test/cxx11_tensor_expr.cpp
index 129b4e6..77e24cb 100644
--- a/eigen/unsupported/test/cxx11_tensor_expr.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_expr.cpp
@@ -300,51 +300,6 @@ static void test_select()
}
}
-template <typename Scalar>
-void test_minmax_nan_propagation_templ() {
- for (int size = 1; size < 17; ++size) {
- const Scalar kNan = std::numeric_limits<Scalar>::quiet_NaN();
- Tensor<Scalar, 1> vec_nan(size);
- Tensor<Scalar, 1> vec_zero(size);
- Tensor<Scalar, 1> vec_res(size);
- vec_nan.setConstant(kNan);
- vec_zero.setZero();
- vec_res.setZero();
-
- // Test that we propagate NaNs in the tensor when applying the
- // cwiseMax(scalar) operator, which is used for the Relu operator.
- vec_res = vec_nan.cwiseMax(Scalar(0));
- for (int i = 0; i < size; ++i) {
- VERIFY((numext::isnan)(vec_res(i)));
- }
-
- // Test that NaNs do not propagate if we reverse the arguments.
- vec_res = vec_zero.cwiseMax(kNan);
- for (int i = 0; i < size; ++i) {
- VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
- }
-
- // Test that we propagate NaNs in the tensor when applying the
- // cwiseMin(scalar) operator.
- vec_res.setZero();
- vec_res = vec_nan.cwiseMin(Scalar(0));
- for (int i = 0; i < size; ++i) {
- VERIFY((numext::isnan)(vec_res(i)));
- }
-
- // Test that NaNs do not propagate if we reverse the arguments.
- vec_res = vec_zero.cwiseMin(kNan);
- for (int i = 0; i < size; ++i) {
- VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
- }
- }
-}
-
-static void test_minmax_nan_propagation()
-{
- test_minmax_nan_propagation_templ<float>();
- test_minmax_nan_propagation_templ<double>();
-}
void test_cxx11_tensor_expr()
{
@@ -356,5 +311,4 @@ void test_cxx11_tensor_expr()
CALL_SUBTEST(test_functors());
CALL_SUBTEST(test_type_casting());
CALL_SUBTEST(test_select());
- CALL_SUBTEST(test_minmax_nan_propagation());
}
diff --git a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
index e6274f8..4c660de 100644
--- a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -21,7 +21,7 @@ static void test_0d()
TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
VERIFY_IS_EQUAL(scalar1.rank(), 0);
VERIFY_IS_EQUAL(scalar1.size(), 1);
- VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1);
+ VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
scalar1() = 7.0;
scalar2() = 13.0;
diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
index aca036c..5690da7 100644
--- a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
@@ -14,43 +14,43 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
-template <typename DataType, int DataLayout, typename IndexType>
+
void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
- IndexType sizeDim1 = 100;
- IndexType sizeDim2 = 20;
- IndexType sizeDim3 = 20;
- Eigen::array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
- Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange);
- Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange);
- Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange);
+ int sizeDim1 = 100;
+ int sizeDim2 = 200;
+ int sizeDim3 = 200;
+ Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+ Eigen::Tensor<float, 3> in1(tensorRange);
+ Eigen::Tensor<float, 3> in2(tensorRange);
+ Eigen::Tensor<float, 3> out(tensorRange);
- DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
- DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
+ float * gpu_in1_data = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_in2_data = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
in1 = in1.random() + in1.constant(10.0f);
in2 = in2.random() + in2.constant(10.0f);
// creating TensorMap from tensor
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
- Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
- sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+ Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
+ sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float));
+ sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float));
/// c=(a+b)*b
gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i, j, k),
(in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
}
@@ -63,14 +63,8 @@ void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
}
-template <typename DataType, typename Dev_selector> void tensorForced_evalperDevice(Dev_selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_forced_eval_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_forced_eval_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
void test_cxx11_tensor_forced_eval_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(tensorForced_evalperDevice<float>(device));
- }
+ cl::sycl::gpu_selector s;
+ Eigen::SyclDevice sycl_device(s);
+ CALL_SUBTEST(test_forced_eval_sycl(sycl_device));
}
diff --git a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp
deleted file mode 100644
index 9b521bc..0000000
--- a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_morphing_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_reshape(const Eigen::SyclDevice& sycl_device)
-{
- typename Tensor<DataType, 5 ,DataLayout, IndexType>::Dimensions dim1(2,3,1,7,1);
- typename Tensor<DataType, 3 ,DataLayout, IndexType>::Dimensions dim2(2,3,7);
- typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim3(6,7);
- typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim4(2,21);
-
- Tensor<DataType, 5, DataLayout, IndexType> tensor1(dim1);
- Tensor<DataType, 3, DataLayout, IndexType> tensor2(dim2);
- Tensor<DataType, 2, DataLayout, IndexType> tensor3(dim3);
- Tensor<DataType, 2, DataLayout, IndexType> tensor4(dim4);
-
- tensor1.setRandom();
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
- DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
- DataType* gpu_data4 = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType)));
-
- TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, dim1);
- TensorMap<Tensor<DataType, 3,DataLayout, IndexType>> gpu2(gpu_data2, dim2);
- TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu3(gpu_data3, dim3);
- TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu4(gpu_data4, dim4);
-
- sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
-
- gpu2.device(sycl_device)=gpu1.reshape(dim2);
- sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType));
-
- gpu3.device(sycl_device)=gpu1.reshape(dim3);
- sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
-
- gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4);
- sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType));
- for (IndexType i = 0; i < 2; ++i){
- for (IndexType j = 0; j < 3; ++j){
- for (IndexType k = 0; k < 7; ++k){
- VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor
- if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
- VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor
- VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor
- }
- else{
- //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor
- VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor
- VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
- sycl_device.deallocate(gpu_data3);
- sycl_device.deallocate(gpu_data4);
-}
-
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
- typename Tensor<DataType, 3, DataLayout, IndexType>::Dimensions dim1(2,3,7);
- typename Tensor<DataType, 2, DataLayout, IndexType>::Dimensions dim2(6,7);
- typename Tensor<DataType, 5, DataLayout, IndexType>::Dimensions dim3(2,3,1,7,1);
- Tensor<DataType, 3, DataLayout, IndexType> tensor(dim1);
- Tensor<DataType, 2, DataLayout, IndexType> tensor2d(dim2);
- Tensor<DataType, 5, DataLayout, IndexType> tensor5d(dim3);
-
- tensor.setRandom();
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType)));
- DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType)));
-
- TensorMap< Tensor<DataType, 3, DataLayout, IndexType> > gpu1(gpu_data1, dim1);
- TensorMap< Tensor<DataType, 2, DataLayout, IndexType> > gpu2(gpu_data2, dim2);
- TensorMap< Tensor<DataType, 5, DataLayout, IndexType> > gpu3(gpu_data3, dim3);
-
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-
- gpu2.reshape(dim1).device(sycl_device)=gpu1;
- sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType));
-
- gpu3.reshape(dim1).device(sycl_device)=gpu1;
- sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType));
-
-
- for (IndexType i = 0; i < 2; ++i){
- for (IndexType j = 0; j < 3; ++j){
- for (IndexType k = 0; k < 7; ++k){
- VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
- if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
- VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor
- }
- else{
- VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
- sycl_device.deallocate(gpu_data3);
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
-{
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- IndexType sizeDim5 = 11;
- array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
- Tensor<DataType, 5,DataLayout, IndexType> tensor(tensorRange);
- tensor.setRandom();
- array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}};
- Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range);
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType)));
- TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range);
- Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5);
- Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1);
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
- gpu2.device(sycl_device)=gpu1.slice(indices, sizes);
- sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType));
- VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
-
-
- array<IndexType, 5> slice2_range ={{1,1,2,2,3}};
- Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range);
- DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType)));
- TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range);
- Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5);
- Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3);
- gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2);
- sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType));
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 2; ++j) {
- for (IndexType k = 0; k < 3; ++k) {
- VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
- }
- }
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
- sycl_device.deallocate(gpu_data3);
-}
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device)
-{
- typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f;
- typedef Eigen::DSizes<IndexType, 2> Index2;
- IndexType sizeDim1 = 7L;
- IndexType sizeDim2 = 11L;
- array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
- Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange);
- IndexType sliceDim1 = 2;
- IndexType sliceDim2 = 3;
- array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}};
- Tensor2f slice(sliceRange);
- Index2 strides(1L,1L);
- Index2 indicesStart(3L,4L);
- Index2 indicesStop(5L,7L);
- Index2 lengths(2L,3L);
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
- DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType)));
- TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
- TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange);
-
-
- tensor.setRandom();
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
- gpu2.device(sycl_device)=gpu1;
-
- slice.setRandom();
- sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType));
-
-
- gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3;
- gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3;
- sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
-
- for(IndexType i=0;i<sizeDim1;i++)
- for(IndexType j=0;j<sizeDim2;j++){
- VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
- sycl_device.deallocate(gpu_data3);
-}
-
-template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_simple_slice<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_slice<DataType, ColMajor, int64_t>(sycl_device);
- test_simple_reshape<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_reshape<DataType, ColMajor, int64_t>(sycl_device);
- test_reshape_as_lvalue<DataType, RowMajor, int64_t>(sycl_device);
- test_reshape_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
- test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_morphing_sycl()
-{
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_morphing_test_per_device<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_notification.cpp b/eigen/unsupported/test/cxx11_tensor_notification.cpp
index 183ef02..c946007 100644
--- a/eigen/unsupported/test/cxx11_tensor_notification.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_notification.cpp
@@ -13,6 +13,15 @@
#include "main.h"
#include <Eigen/CXX11/Tensor>
+#if EIGEN_OS_WIN || EIGEN_OS_WIN64
+#include <windows.h>
+void sleep(int seconds) {
+ Sleep(seconds*1000);
+}
+#else
+#include <unistd.h>
+#endif
+
namespace {
@@ -31,7 +40,7 @@ static void test_notification_single()
Eigen::Notification n;
std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
thread_pool.Schedule(func);
- EIGEN_SLEEP(1000);
+ sleep(1);
// The thread should be waiting for the notification.
VERIFY_IS_EQUAL(counter, 0);
@@ -39,7 +48,7 @@ static void test_notification_single()
// Unblock the thread
n.Notify();
- EIGEN_SLEEP(1000);
+ sleep(1);
// Verify the counter has been incremented
VERIFY_IS_EQUAL(counter, 1);
@@ -58,10 +67,10 @@ static void test_notification_multiple()
thread_pool.Schedule(func);
thread_pool.Schedule(func);
thread_pool.Schedule(func);
- EIGEN_SLEEP(1000);
+ sleep(1);
VERIFY_IS_EQUAL(counter, 0);
n.Notify();
- EIGEN_SLEEP(1000);
+ sleep(1);
VERIFY_IS_EQUAL(counter, 4);
}
diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index 908a5e5..2f86980 100644
--- a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -200,8 +200,6 @@ void test_cuda_trancendental() {
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
@@ -209,7 +207,6 @@ void test_cuda_trancendental() {
gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
- gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::half>();
gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
@@ -220,9 +217,6 @@ void test_cuda_trancendental() {
gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
- gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
- gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1();
-
Tensor<float, 1> input1(num_elem);
Tensor<Eigen::half, 1> half_prec1(num_elem);
Tensor<Eigen::half, 1> full_prec1(num_elem);
diff --git a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp
deleted file mode 100644
index dc748b7..0000000
--- a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_simple_padding(const Eigen::SyclDevice& sycl_device)
-{
-
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
- tensor.setRandom();
-
- array<std::pair<IndexType, IndexType>, 4> paddings;
- paddings[0] = std::make_pair(0, 0);
- paddings[1] = std::make_pair(2, 1);
- paddings[2] = std::make_pair(3, 4);
- paddings[3] = std::make_pair(0, 0);
-
- IndexType padedSizeDim1 = 2;
- IndexType padedSizeDim2 = 6;
- IndexType padedSizeDim3 = 12;
- IndexType padedSizeDim4 = 7;
- array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
-
- Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange);
-
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType)));
- TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange);
-
- VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
- VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
- VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
- VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
- gpu2.device(sycl_device)=gpu1.pad(paddings);
- sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType));
- for (IndexType i = 0; i < padedSizeDim1; ++i) {
- for (IndexType j = 0; j < padedSizeDim2; ++j) {
- for (IndexType k = 0; k < padedSizeDim3; ++k) {
- for (IndexType l = 0; l < padedSizeDim4; ++l) {
- if (j >= 2 && j < 5 && k >= 3 && k < 8) {
- VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
- } else {
- VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
- }
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
-}
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_padded_expr(const Eigen::SyclDevice& sycl_device)
-{
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
- tensor.setRandom();
-
- array<std::pair<IndexType, IndexType>, 4> paddings;
- paddings[0] = std::make_pair(0, 0);
- paddings[1] = std::make_pair(2, 1);
- paddings[2] = std::make_pair(3, 4);
- paddings[3] = std::make_pair(0, 0);
-
- Eigen::DSizes<IndexType, 2> reshape_dims;
- reshape_dims[0] = 12;
- reshape_dims[1] = 84;
-
-
- Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims);
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType)));
- TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims);
-
-
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
- gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims);
- sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType));
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 6; ++j) {
- for (IndexType k = 0; k < 12; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- const float result_value = DataLayout == ColMajor ?
- result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
- if (j >= 2 && j < 5 && k >= 3 && k < 8) {
- VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
- } else {
- VERIFY_IS_EQUAL(result_value, 0.0f);
- }
- }
- }
- }
- }
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
-}
-
-template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_simple_padding<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_padding<DataType, ColMajor, int64_t>(sycl_device);
- test_padded_expr<DataType, RowMajor, int64_t>(sycl_device);
- test_padded_expr<DataType, ColMajor, int64_t>(sycl_device);
-
-}
-void test_cxx11_tensor_padding_sycl()
-{
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_padding_test_per_device<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
index 440d48b..a9ef829 100644
--- a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -14,168 +14,125 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_SYCL
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_full_reductions_mean_sycl(const Eigen::SyclDevice& sycl_device) {
- const IndexType num_rows = 452;
- const IndexType num_cols = 765;
- array<IndexType, 2> tensorRange = {{num_rows, num_cols}};
+static void test_full_reductions_sycl(const Eigen::SyclDevice& sycl_device) {
- Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange);
- Tensor<DataType, 0, DataLayout, IndexType> full_redux;
- Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu;
-
- in.setRandom();
-
- full_redux = in.mean();
-
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType));
-
- TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data);
-
- sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
- out_gpu.device(sycl_device) = in_gpu.mean();
- sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType));
- // Check that the CPU and GPU reductions return the same result.
- VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
- sycl_device.deallocate(gpu_in_data);
- sycl_device.deallocate(gpu_out_data);
-}
+ const int num_rows = 452;
+ const int num_cols = 765;
+ array<int, 2> tensorRange = {{num_rows, num_cols}};
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_full_reductions_min_sycl(const Eigen::SyclDevice& sycl_device) {
-
- const IndexType num_rows = 876;
- const IndexType num_cols = 953;
- array<IndexType, 2> tensorRange = {{num_rows, num_cols}};
-
- Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange);
- Tensor<DataType, 0, DataLayout, IndexType> full_redux;
- Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu;
+ Tensor<float, 2> in(tensorRange);
+ Tensor<float, 0> full_redux;
+ Tensor<float, 0> full_redux_gpu;
in.setRandom();
- full_redux = in.minimum();
+ full_redux = in.sum();
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType));
+ float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+ float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float));
- TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data);
+ TensorMap<Tensor<float, 2> > in_gpu(gpu_in_data, tensorRange);
+ TensorMap<Tensor<float, 0> > out_gpu(gpu_out_data);
- sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
- out_gpu.device(sycl_device) = in_gpu.minimum();
- sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+ out_gpu.device(sycl_device) = in_gpu.sum();
+ sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float));
// Check that the CPU and GPU reductions return the same result.
VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
+
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
+static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) {
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) {
-
- IndexType dim_x = 145;
- IndexType dim_y = 1;
- IndexType dim_z = 67;
+ int dim_x = 145;
+ int dim_y = 1;
+ int dim_z = 67;
- array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}};
- Eigen::array<IndexType, 1> red_axis;
+ array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+ Eigen::array<int, 1> red_axis;
red_axis[0] = 0;
- array<IndexType, 2> reduced_tensorRange = {{dim_y, dim_z}};
+ array<int, 2> reduced_tensorRange = {{dim_y, dim_z}};
- Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
- Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange);
- Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange);
+ Tensor<float, 3> in(tensorRange);
+ Tensor<float, 2> redux(reduced_tensorRange);
+ Tensor<float, 2> redux_gpu(reduced_tensorRange);
in.setRandom();
- redux= in.maximum(red_axis);
+ redux= in.sum(red_axis);
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType)));
+ float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+ float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange);
+ TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange);
+ TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange);
- sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
- out_gpu.device(sycl_device) = in_gpu.maximum(red_axis);
- sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+ out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+ sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
// Check that the CPU and GPU reductions return the same result.
- for(IndexType j=0; j<reduced_tensorRange[0]; j++ )
- for(IndexType k=0; k<reduced_tensorRange[1]; k++ )
+ for(int j=0; j<reduced_tensorRange[0]; j++ )
+ for(int k=0; k<reduced_tensorRange[1]; k++ )
VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) {
+static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) {
- IndexType dim_x = 567;
- IndexType dim_y = 1;
- IndexType dim_z = 47;
+ int dim_x = 567;
+ int dim_y = 1;
+ int dim_z = 47;
- array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}};
- Eigen::array<IndexType, 1> red_axis;
+ array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+ Eigen::array<int, 1> red_axis;
red_axis[0] = 2;
- array<IndexType, 2> reduced_tensorRange = {{dim_x, dim_y}};
+ array<int, 2> reduced_tensorRange = {{dim_x, dim_y}};
- Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
- Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange);
- Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange);
+ Tensor<float, 3> in(tensorRange);
+ Tensor<float, 2> redux(reduced_tensorRange);
+ Tensor<float, 2> redux_gpu(reduced_tensorRange);
in.setRandom();
redux= in.sum(red_axis);
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType)));
+ float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+ float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange);
+ TensorMap<Tensor<float, 3> > in_gpu(gpu_in_data, tensorRange);
+ TensorMap<Tensor<float, 2> > out_gpu(gpu_out_data, reduced_tensorRange);
- sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
- sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType));
+ sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
// Check that the CPU and GPU reductions return the same result.
- for(IndexType j=0; j<reduced_tensorRange[0]; j++ )
- for(IndexType k=0; k<reduced_tensorRange[1]; k++ )
+ for(int j=0; j<reduced_tensorRange[0]; j++ )
+ for(int k=0; k<reduced_tensorRange[1]; k++ )
VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
sycl_device.deallocate(gpu_in_data);
sycl_device.deallocate(gpu_out_data);
}
-template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl::device& d){
- std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
- QueueInterface queueInterface(d);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
-
- test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_first_dim_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device);
- test_last_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
+
void test_cxx11_tensor_reduction_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_reduction_test_per_device<float>(device));
- }
+ cl::sycl::gpu_selector s;
+ Eigen::SyclDevice sycl_device(s);
+ CALL_SUBTEST((test_full_reductions_sycl(sycl_device)));
+ CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device)));
+ CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device)));
+
}
diff --git a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp
deleted file mode 100644
index 2f54844..0000000
--- a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_reverse_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) {
-
- IndexType dim1 = 2;
- IndexType dim2 = 3;
- IndexType dim3 = 5;
- IndexType dim4 = 7;
-
- array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
- Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange);
- tensor.setRandom();
-
- array<bool, 4> dim_rev;
- dim_rev[0] = false;
- dim_rev[1] = true;
- dim_rev[2] = true;
- dim_rev[3] = false;
-
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data =static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType)));
-
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, tensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType));
- out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
- sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
- // Check that the CPU and GPU reductions return the same result.
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l));
- }
- }
- }
- }
- dim_rev[0] = true;
- dim_rev[1] = false;
- dim_rev[2] = false;
- dim_rev[3] = false;
-
- out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
- sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l));
- }
- }
- }
- }
-
- dim_rev[0] = true;
- dim_rev[1] = false;
- dim_rev[2] = false;
- dim_rev[3] = true;
- out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
- sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l));
- }
- }
- }
- }
-
- sycl_device.deallocate(gpu_in_data);
- sycl_device.deallocate(gpu_out_data);
-}
-
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue)
-{
- IndexType dim1 = 2;
- IndexType dim2 = 3;
- IndexType dim3 = 5;
- IndexType dim4 = 7;
-
- array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
- Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange);
- Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange);
- tensor.setRandom();
-
- array<bool, 4> dim_rev;
- dim_rev[0] = false;
- dim_rev[1] = true;
- dim_rev[2] = false;
- dim_rev[3] = true;
-
- DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data_expected =static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType)));
- DataType* gpu_out_data_result =static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
-
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(gpu_out_data_expected, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(gpu_out_data_result, tensorRange);
-
-
- sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType));
-
- if (LValue) {
- out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
- } else {
- out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
- }
- sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType));
-
-
- array<IndexType, 4> src_slice_dim;
- src_slice_dim[0] = 2;
- src_slice_dim[1] = 3;
- src_slice_dim[2] = 1;
- src_slice_dim[3] = 7;
- array<IndexType, 4> src_slice_start;
- src_slice_start[0] = 0;
- src_slice_start[1] = 0;
- src_slice_start[2] = 0;
- src_slice_start[3] = 0;
- array<IndexType, 4> dst_slice_dim = src_slice_dim;
- array<IndexType, 4> dst_slice_start = src_slice_start;
-
- for (IndexType i = 0; i < 5; ++i) {
- if (LValue) {
- out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
- in_gpu.slice(src_slice_start, src_slice_dim);
- } else {
- out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
- in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
- }
- src_slice_start[2] += 1;
- dst_slice_start[2] += 1;
- }
- sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType));
-
- for (IndexType i = 0; i < expected.dimension(0); ++i) {
- for (IndexType j = 0; j < expected.dimension(1); ++j) {
- for (IndexType k = 0; k < expected.dimension(2); ++k) {
- for (IndexType l = 0; l < expected.dimension(3); ++l) {
- VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
- }
- }
- }
- }
-
- dst_slice_start[2] = 0;
- result.setRandom();
- sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
- for (IndexType i = 0; i < 5; ++i) {
- if (LValue) {
- out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
- in_gpu.slice(dst_slice_start, dst_slice_dim);
- } else {
- out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
- in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
- }
- dst_slice_start[2] += 1;
- }
- sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType));
-
- for (IndexType i = 0; i < expected.dimension(0); ++i) {
- for (IndexType j = 0; j < expected.dimension(1); ++j) {
- for (IndexType k = 0; k < expected.dimension(2); ++k) {
- for (IndexType l = 0; l < expected.dimension(3); ++l) {
- VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
- }
- }
- }
- }
-}
-
-
-
-template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::device& d){
- std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
- QueueInterface queueInterface(d);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
- test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false);
- test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false);
- test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true);
- test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true);
-}
-void test_cxx11_tensor_reverse_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_reverse_test_per_device<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
deleted file mode 100644
index c88db7c..0000000
--- a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device)
-{
- IndexType sizeDim1 = 2;
- IndexType sizeDim2 = 3;
- IndexType sizeDim3 = 5;
- IndexType sizeDim4 = 7;
- array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
- Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
- Tensor<DataType, 4, DataLayout,IndexType> no_shuffle(tensorRange);
- tensor.setRandom();
-
- const size_t buffSize =tensor.size()*sizeof(DataType);
- array<IndexType, 4> shuffles;
- shuffles[0] = 0;
- shuffles[1] = 1;
- shuffles[2] = 2;
- shuffles[3] = 3;
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(buffSize));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(buffSize));
-
-
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize);
-
- gpu2.device(sycl_device)=gpu1.shuffle(shuffles);
- sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize);
- sycl_device.synchronize();
-
- VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1);
- VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2);
- VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3);
- VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim4; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
- }
- }
- }
- }
-
- shuffles[0] = 2;
- shuffles[1] = 3;
- shuffles[2] = 1;
- shuffles[3] = 0;
- array<IndexType, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}};
- Tensor<DataType, 4, DataLayout,IndexType> shuffle(tensorrangeShuffle);
- DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize));
- TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu3(gpu_data3, tensorrangeShuffle);
-
- gpu3.device(sycl_device)=gpu1.shuffle(shuffles);
- sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize);
- sycl_device.synchronize();
-
- VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3);
- VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4);
- VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2);
- VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1);
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
- for (IndexType l = 0; l < sizeDim4; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
- }
- }
- }
- }
-}
-
-
-template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device);
-
-}
-void test_cxx11_tensor_shuffling_sycl()
-{
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp
deleted file mode 100644
index 603c374..0000000
--- a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli Codeplay Software Ltd.
-// Ralph Potter Codeplay Software Ltd.
-// Luke Iwanski Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_striding_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_striding(const Eigen::SyclDevice& sycl_device)
-{
-
- Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
- Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}};
-
-
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
- Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims);
- Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
-
-
- std::size_t tensor_bytes = tensor.size() * sizeof(DataType);
- std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
- std::size_t stride_bytes = stride.size() * sizeof(DataType);
- DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
- DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
- DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
-
-
- tensor.setRandom();
- array<IndexType, 4> strides;
- strides[0] = 1;
- strides[1] = 1;
- strides[2] = 1;
- strides[3] = 1;
- sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
- gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides);
- sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
-
- //no_stride = tensor.stride(strides);
-
- VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
- VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
- VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
- VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
- }
- }
- }
- }
-
- strides[0] = 2;
- strides[1] = 4;
- strides[2] = 2;
- strides[3] = 3;
-//Tensor<float, 4, DataLayout> stride;
-// stride = tensor.stride(strides);
-
- gpu_stride.device(sycl_device)=gpu_tensor.stride(strides);
- sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
-
- VERIFY_IS_EQUAL(stride.dimension(0), 1);
- VERIFY_IS_EQUAL(stride.dimension(1), 1);
- VERIFY_IS_EQUAL(stride.dimension(2), 3);
- VERIFY_IS_EQUAL(stride.dimension(3), 3);
-
- for (IndexType i = 0; i < 1; ++i) {
- for (IndexType j = 0; j < 1; ++j) {
- for (IndexType k = 0; k < 3; ++k) {
- for (IndexType l = 0; l < 3; ++l) {
- VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l));
- }
- }
- }
- }
-
- sycl_device.deallocate(d_tensor);
- sycl_device.deallocate(d_no_stride);
- sycl_device.deallocate(d_stride);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
-
- Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
- Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}};
-
-
- Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
- Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims);
- Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
-
-
- std::size_t tensor_bytes = tensor.size() * sizeof(DataType);
- std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
- std::size_t stride_bytes = stride.size() * sizeof(DataType);
-
- DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
- DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
- DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
-
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims);
- Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
-
- //Tensor<float, 4, DataLayout> tensor(2,3,5,7);
- tensor.setRandom();
- array<IndexType, 4> strides;
- strides[0] = 2;
- strides[1] = 4;
- strides[2] = 2;
- strides[3] = 3;
-
-// Tensor<float, 4, DataLayout> result(3, 12, 10, 21);
-// result.stride(strides) = tensor;
- sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
- gpu_stride.stride(strides).device(sycl_device)=gpu_tensor;
- sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l));
- }
- }
- }
- }
-
- array<IndexType, 4> no_strides;
- no_strides[0] = 1;
- no_strides[1] = 1;
- no_strides[2] = 1;
- no_strides[3] = 1;
-// Tensor<float, 4, DataLayout> result2(3, 12, 10, 21);
-// result2.stride(strides) = tensor.stride(no_strides);
-
- gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides);
- sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
-
- for (IndexType i = 0; i < 2; ++i) {
- for (IndexType j = 0; j < 3; ++j) {
- for (IndexType k = 0; k < 5; ++k) {
- for (IndexType l = 0; l < 7; ++l) {
- VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l));
- }
- }
- }
- }
- sycl_device.deallocate(d_tensor);
- sycl_device.deallocate(d_no_stride);
- sycl_device.deallocate(d_stride);
-}
-
-
-template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){
- QueueInterface queueInterface(s);
- auto sycl_device=Eigen::SyclDevice(&queueInterface);
- test_simple_striding<float, ColMajor, int64_t>(sycl_device);
- test_simple_striding<float, RowMajor, int64_t>(sycl_device);
- test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device);
- test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device);
-}
-
-void test_cxx11_tensor_striding_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(tensorStridingPerDevice(device));
- }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_sycl.cpp
index 5cd0f4c..6a9c334 100644
--- a/eigen/unsupported/test/cxx11_tensor_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_sycl.cpp
@@ -16,7 +16,7 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_SYCL
#include "main.h"
@@ -27,105 +27,36 @@ using Eigen::SyclDevice;
using Eigen::Tensor;
using Eigen::TensorMap;
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_mem_transfers(const Eigen::SyclDevice &sycl_device) {
- IndexType sizeDim1 = 100;
- IndexType sizeDim2 = 10;
- IndexType sizeDim3 = 20;
- array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
- Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange);
- Tensor<DataType, 3, DataLayout, IndexType> out1(tensorRange);
- Tensor<DataType, 3, DataLayout, IndexType> out2(tensorRange);
- Tensor<DataType, 3, DataLayout, IndexType> out3(tensorRange);
+void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) {
- in1 = in1.random();
-
- DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
- DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(out1.size()*sizeof(DataType)));
-
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange);
-
- sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(DataType));
- sycl_device.memcpyHostToDevice(gpu_data2, in1.data(),(in1.size())*sizeof(DataType));
- gpu1.device(sycl_device) = gpu1 * 3.14f;
- gpu2.device(sycl_device) = gpu2 * 2.7f;
- sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(out2.data(), gpu_data1,(out2.size())*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(out3.data(), gpu_data2,(out3.size())*sizeof(DataType));
- sycl_device.synchronize();
-
- for (IndexType i = 0; i < in1.size(); ++i) {
- VERIFY_IS_APPROX(out1(i), in1(i) * 3.14f);
- VERIFY_IS_APPROX(out2(i), in1(i) * 3.14f);
- VERIFY_IS_APPROX(out3(i), in1(i) * 2.7f);
- }
-
- sycl_device.deallocate(gpu_data1);
- sycl_device.deallocate(gpu_data2);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_mem_sync(const Eigen::SyclDevice &sycl_device) {
- IndexType size = 20;
- array<IndexType, 1> tensorRange = {{size}};
- Tensor<DataType, 1, DataLayout, IndexType> in1(tensorRange);
- Tensor<DataType, 1, DataLayout, IndexType> in2(tensorRange);
- Tensor<DataType, 1, DataLayout, IndexType> out(tensorRange);
-
- in1 = in1.random();
- in2 = in1;
-
- DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
-
- TensorMap<Tensor<DataType, 1, DataLayout, IndexType>> gpu1(gpu_data, tensorRange);
- sycl_device.memcpyHostToDevice(gpu_data, in1.data(),(in1.size())*sizeof(DataType));
- sycl_device.synchronize();
- in1.setZero();
-
- sycl_device.memcpyDeviceToHost(out.data(), gpu_data, out.size()*sizeof(DataType));
- sycl_device.synchronize();
-
- for (IndexType i = 0; i < in1.size(); ++i) {
- VERIFY_IS_APPROX(out(i), in2(i));
- }
-
- sycl_device.deallocate(gpu_data);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
-
- IndexType sizeDim1 = 100;
- IndexType sizeDim2 = 10;
- IndexType sizeDim3 = 20;
- array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
- Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange);
- Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange);
- Tensor<DataType, 3,DataLayout, IndexType> in3(tensorRange);
- Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange);
+ int sizeDim1 = 100;
+ int sizeDim2 = 100;
+ int sizeDim3 = 100;
+ array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+ Tensor<float, 3> in1(tensorRange);
+ Tensor<float, 3> in2(tensorRange);
+ Tensor<float, 3> in3(tensorRange);
+ Tensor<float, 3> out(tensorRange);
in2 = in2.random();
in3 = in3.random();
- DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
- DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType)));
- DataType * gpu_in3_data = static_cast<DataType*>(sycl_device.allocate(in3.size()*sizeof(DataType)));
- DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType)));
+ float * gpu_in1_data = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_in2_data = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_in3_data = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float)));
+ float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in3(gpu_in3_data, tensorRange);
- TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
+ TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+ TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+ TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange);
+ TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
/// a=1.2f
gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
- sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.size())*sizeof(DataType));
- sycl_device.synchronize();
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
}
}
@@ -134,12 +65,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
/// a=b*1.2f
gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i,j,k),
in1(i,j,k) * 1.2f);
}
@@ -148,14 +77,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
printf("a=b*1.2f Test Passed\n");
/// c=a*b
- sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float));
gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
-
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i,j,k),
in1(i,j,k) *
in2(i,j,k));
@@ -166,11 +93,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
/// c=a+b
gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i,j,k),
in1(i,j,k) +
in2(i,j,k));
@@ -181,11 +107,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
/// c=a*a
gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i,j,k),
in1(i,j,k) *
in1(i,j,k));
@@ -196,11 +121,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
//a*3.14f + b*2.7f
gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
- sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i,j,k),
in1(i,j,k) * 3.14f
+ in2(i,j,k) * 2.7f);
@@ -210,13 +134,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
printf("a*3.14f + b*2.7f Test Passed\n");
///d= (a>0.5? b:c)
- sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.size())*sizeof(DataType));
+ sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float));
gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
- sycl_device.synchronize();
- for (IndexType i = 0; i < sizeDim1; ++i) {
- for (IndexType j = 0; j < sizeDim2; ++j) {
- for (IndexType k = 0; k < sizeDim3; ++k) {
+ sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+ for (int i = 0; i < sizeDim1; ++i) {
+ for (int j = 0; j < sizeDim2; ++j) {
+ for (int k = 0; k < sizeDim3; ++k) {
VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
? in2(i, j, k)
: in3(i, j, k));
@@ -229,48 +152,8 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
sycl_device.deallocate(gpu_in3_data);
sycl_device.deallocate(gpu_out_data);
}
-template<typename Scalar1, typename Scalar2, int DataLayout, typename IndexType>
-static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){
- IndexType size = 20;
- array<IndexType, 1> tensorRange = {{size}};
- Tensor<Scalar1, 1, DataLayout, IndexType> in(tensorRange);
- Tensor<Scalar2, 1, DataLayout, IndexType> out(tensorRange);
- Tensor<Scalar2, 1, DataLayout, IndexType> out_host(tensorRange);
-
- in = in.random();
-
- Scalar1* gpu_in_data = static_cast<Scalar1*>(sycl_device.allocate(in.size()*sizeof(Scalar1)));
- Scalar2 * gpu_out_data = static_cast<Scalar2*>(sycl_device.allocate(out.size()*sizeof(Scalar2)));
-
- TensorMap<Tensor<Scalar1, 1, DataLayout, IndexType>> gpu_in(gpu_in_data, tensorRange);
- TensorMap<Tensor<Scalar2, 1, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
- sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1));
- gpu_out.device(sycl_device) = gpu_in. template cast<Scalar2>();
- sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, out.size()*sizeof(Scalar2));
- out_host = in. template cast<Scalar2>();
- for(IndexType i=0; i< size; i++)
- {
- VERIFY_IS_APPROX(out(i), out_host(i));
- }
- printf("cast Test Passed\n");
- sycl_device.deallocate(gpu_in_data);
- sycl_device.deallocate(gpu_out_data);
-}
-template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){
- QueueInterface queueInterface(s);
- auto sycl_device = Eigen::SyclDevice(&queueInterface);
- test_sycl_mem_transfers<DataType, RowMajor, int64_t>(sycl_device);
- test_sycl_computations<DataType, RowMajor, int64_t>(sycl_device);
- test_sycl_mem_sync<DataType, RowMajor, int64_t>(sycl_device);
- test_sycl_mem_transfers<DataType, ColMajor, int64_t>(sycl_device);
- test_sycl_computations<DataType, ColMajor, int64_t>(sycl_device);
- test_sycl_mem_sync<DataType, ColMajor, int64_t>(sycl_device);
- test_sycl_cast<DataType, int, RowMajor, int64_t>(sycl_device);
- test_sycl_cast<DataType, int, ColMajor, int64_t>(sycl_device);
-}
-
void test_cxx11_tensor_sycl() {
- for (const auto& device :Eigen::get_sycl_supported_devices()) {
- CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
- }
+ cl::sycl::gpu_selector s;
+ Eigen::SyclDevice sycl_device(s);
+ CALL_SUBTEST(test_sycl_cpu(sycl_device));
}
diff --git a/eigen/unsupported/test/polynomialsolver.cpp b/eigen/unsupported/test/polynomialsolver.cpp
index 7ad4aa6..0c87478 100644
--- a/eigen/unsupported/test/polynomialsolver.cpp
+++ b/eigen/unsupported/test/polynomialsolver.cpp
@@ -32,10 +32,9 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
{
typedef typename POLYNOMIAL::Index Index;
typedef typename POLYNOMIAL::Scalar Scalar;
- typedef typename POLYNOMIAL::RealScalar RealScalar;
typedef typename SOLVER::RootsType RootsType;
- typedef Matrix<RealScalar,Deg,1> EvalRootsType;
+ typedef Matrix<Scalar,Deg,1> EvalRootsType;
const Index deg = pols.size()-1;
@@ -58,7 +57,7 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
cerr << endl;
}
- std::vector<RealScalar> rootModuli( roots.size() );
+ std::vector<Scalar> rootModuli( roots.size() );
Map< EvalRootsType > aux( &rootModuli[0], roots.size() );
aux = roots.array().abs();
std::sort( rootModuli.begin(), rootModuli.end() );
@@ -84,7 +83,7 @@ void evalSolver( const POLYNOMIAL& pols )
{
typedef typename POLYNOMIAL::Scalar Scalar;
- typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType;
+ typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType;
PolynomialSolverType psolve;
aux_evalSolver<Deg, POLYNOMIAL, PolynomialSolverType>( pols, psolve );
@@ -98,7 +97,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
{
using std::sqrt;
typedef typename POLYNOMIAL::Scalar Scalar;
- typedef typename POLYNOMIAL::RealScalar RealScalar;
typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType;
@@ -109,12 +107,15 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
// 1) the roots found are correct
// 2) the roots have distinct moduli
+ typedef typename POLYNOMIAL::Scalar Scalar;
+ typedef typename REAL_ROOTS::Scalar Real;
+
//Test realRoots
- std::vector< RealScalar > calc_realRoots;
- psolve.realRoots( calc_realRoots, test_precision<RealScalar>());
- VERIFY_IS_EQUAL( calc_realRoots.size() , (size_t)real_roots.size() );
+ std::vector< Real > calc_realRoots;
+ psolve.realRoots( calc_realRoots );
+ VERIFY( calc_realRoots.size() == (size_t)real_roots.size() );
- const RealScalar psPrec = sqrt( test_precision<RealScalar>() );
+ const Scalar psPrec = sqrt( test_precision<Scalar>() );
for( size_t i=0; i<calc_realRoots.size(); ++i )
{
@@ -137,7 +138,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
bool hasRealRoot;
//Test absGreatestRealRoot
- RealScalar r = psolve.absGreatestRealRoot( hasRealRoot );
+ Real r = psolve.absGreatestRealRoot( hasRealRoot );
VERIFY( hasRealRoot == (real_roots.size() > 0 ) );
if( hasRealRoot ){
VERIFY( internal::isApprox( real_roots.array().abs().maxCoeff(), abs(r), psPrec ) ); }
@@ -166,11 +167,9 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
template<typename _Scalar, int _Deg>
void polynomialsolver(int deg)
{
- typedef typename NumTraits<_Scalar>::Real RealScalar;
- typedef internal::increment_if_fixed_size<_Deg> Dim;
+ typedef internal::increment_if_fixed_size<_Deg> Dim;
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
- typedef Matrix<RealScalar,_Deg,1> RealRootsType;
cout << "Standard cases" << endl;
PolynomialType pols = PolynomialType::Random(deg+1);
@@ -183,11 +182,15 @@ void polynomialsolver(int deg)
evalSolver<_Deg,PolynomialType>( pols );
cout << "Test sugar" << endl;
- RealRootsType realRoots = RealRootsType::Random(deg);
+ EvalRootsType realRoots = EvalRootsType::Random(deg);
roots_to_monicPolynomial( realRoots, pols );
evalSolverSugarFunction<_Deg>(
pols,
- realRoots.template cast <std::complex<RealScalar> >().eval(),
+ realRoots.template cast <
+ std::complex<
+ typename NumTraits<_Scalar>::Real
+ >
+ >(),
realRoots );
}
@@ -211,6 +214,5 @@ void test_polynomialsolver()
internal::random<int>(9,13)
)) );
CALL_SUBTEST_11((polynomialsolver<float,Dynamic>(1)) );
- CALL_SUBTEST_12((polynomialsolver<std::complex<double>,Dynamic>(internal::random<int>(2,13))) );
}
}
diff --git a/eigen/unsupported/test/sparse_extra.cpp b/eigen/unsupported/test/sparse_extra.cpp
index 4f6723d..a010ceb 100644
--- a/eigen/unsupported/test/sparse_extra.cpp
+++ b/eigen/unsupported/test/sparse_extra.cpp
@@ -129,19 +129,6 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
}
-template<typename SparseMatrixType>
-void check_marketio()
-{
- typedef Matrix<typename SparseMatrixType::Scalar, Dynamic, Dynamic> DenseMatrix;
- Index rows = internal::random<Index>(1,100);
- Index cols = internal::random<Index>(1,100);
- SparseMatrixType m1, m2;
- m1 = DenseMatrix::Random(rows, cols).sparseView();
- saveMarket(m1, "sparse_extra.mtx");
- loadMarket(m2, "sparse_extra.mtx");
- VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2));
-}
-
void test_sparse_extra()
{
for(int i = 0; i < g_repeat; i++) {
@@ -156,15 +143,5 @@ void test_sparse_extra()
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
-
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
- CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
- TEST_SET_BUT_UNUSED_VARIABLE(s);
}
}