From 88534ba623421c956d8ffcda2d27f41d704d15ef Mon Sep 17 00:00:00 2001
From: Stanislaw Halik <sthalik@misaki.pl>
Date: Tue, 3 Jul 2018 07:37:12 +0200
Subject: update eigen

---
 eigen/unsupported/test/CMakeLists.txt              |  21 -
 eigen/unsupported/test/EulerAngles.cpp             | 296 ++++------
 eigen/unsupported/test/autodiff_scalar.cpp         |  15 +
 .../test/cxx11_non_blocking_thread_pool.cpp        |  24 +-
 .../test/cxx11_tensor_broadcast_sycl.cpp           | 114 +---
 .../test/cxx11_tensor_builtins_sycl.cpp            | 267 ---------
 eigen/unsupported/test/cxx11_tensor_chipping.cpp   |   8 +-
 .../test/cxx11_tensor_chipping_sycl.cpp            | 622 ---------------------
 .../test/cxx11_tensor_concatenation_sycl.cpp       | 180 ------
 .../test/cxx11_tensor_contract_sycl.cpp            | 290 ----------
 .../test/cxx11_tensor_convolution_sycl.cpp         | 469 ----------------
 .../unsupported/test/cxx11_tensor_device_sycl.cpp  |  60 +-
 eigen/unsupported/test/cxx11_tensor_expr.cpp       |  46 --
 eigen/unsupported/test/cxx11_tensor_fixed_size.cpp |   2 +-
 .../test/cxx11_tensor_forced_eval_sycl.cpp         |  54 +-
 .../test/cxx11_tensor_morphing_sycl.cpp            | 248 --------
 .../unsupported/test/cxx11_tensor_notification.cpp |  17 +-
 .../test/cxx11_tensor_of_float16_cuda.cu           |   6 -
 .../unsupported/test/cxx11_tensor_padding_sycl.cpp | 157 ------
 .../test/cxx11_tensor_reduction_sycl.cpp           | 167 ++----
 .../unsupported/test/cxx11_tensor_reverse_sycl.cpp | 221 --------
 .../test/cxx11_tensor_shuffling_sycl.cpp           | 119 ----
 .../test/cxx11_tensor_striding_sycl.cpp            | 203 -------
 eigen/unsupported/test/cxx11_tensor_sycl.cpp       | 219 ++------
 eigen/unsupported/test/polynomialsolver.cpp        |  34 +-
 eigen/unsupported/test/sparse_extra.cpp            |  23 -
 26 files changed, 326 insertions(+), 3556 deletions(-)
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
 delete mode 100644 eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp

(limited to 'eigen/unsupported/test')

diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt
index 003c9de..b5fa1c8 100644
--- a/eigen/unsupported/test/CMakeLists.txt
+++ b/eigen/unsupported/test/CMakeLists.txt
@@ -21,17 +21,6 @@ include_directories(../../test ../../unsupported ../../Eigen
 
 find_package (Threads)
 
-find_package(Xsmm)
-if(XSMM_FOUND)
-  add_definitions("-DEIGEN_USE_LIBXSMM")
-  include_directories(${XSMM_INCLUDES})
-  link_directories(${XSMM_LIBRARIES})
-  set(EXTERNAL_LIBS ${EXTERNAL_LIBS} xsmm)
-  ei_add_property(EIGEN_TESTED_BACKENDS  "Xsmm, ")
-else(XSMM_FOUND)
-  ei_add_property(EIGEN_MISSING_BACKENDS  "Xsmm, ")
-endif(XSMM_FOUND)
-
 find_package(GoogleHash)
 if(GOOGLEHASH_FOUND)
   add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
@@ -157,16 +146,6 @@ if(EIGEN_TEST_CXX11)
     ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11")
     ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
     ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11")
-    ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11")
   endif(EIGEN_TEST_SYCL)
   # It should be safe to always run these tests as there is some fallback code for
   # older compiler that don't support cxx11.
diff --git a/eigen/unsupported/test/EulerAngles.cpp b/eigen/unsupported/test/EulerAngles.cpp
index 79ee728..a8cb528 100644
--- a/eigen/unsupported/test/EulerAngles.cpp
+++ b/eigen/unsupported/test/EulerAngles.cpp
@@ -13,219 +13,146 @@
 
 using namespace Eigen;
 
-// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework)
-template <typename Scalar, class System>
-bool verifyIsApprox(const Eigen::EulerAngles<Scalar, System>& a, const Eigen::EulerAngles<Scalar, System>& b)
-{
-  return verifyIsApprox(a.angles(), b.angles());
-}
-
-// Verify that x is in the approxed range [a, b]
-#define VERIFY_APPROXED_RANGE(a, x, b) \
-  do { \
-  VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \
-  VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \
-  } while(0)
-
-const char X = EULER_X;
-const char Y = EULER_Y;
-const char Z = EULER_Z;
-
-template<typename Scalar, class EulerSystem>
-void verify_euler(const EulerAngles<Scalar, EulerSystem>& e)
+template<typename EulerSystem, typename Scalar>
+void verify_euler_ranged(const Matrix<Scalar,3,1>& ea,
+  bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma)
 {
   typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
   typedef Quaternion<Scalar> QuaternionType;
   typedef AngleAxis<Scalar> AngleAxisType;
+  using std::abs;
   
-  const Scalar ONE = Scalar(1);
-  const Scalar HALF_PI = Scalar(EIGEN_PI / 2);
-  const Scalar PI = Scalar(EIGEN_PI);
+  Scalar alphaRangeStart, alphaRangeEnd;
+  Scalar betaRangeStart, betaRangeEnd;
+  Scalar gammaRangeStart, gammaRangeEnd;
   
-  // It's very important calc the acceptable precision depending on the distance from the pole.
-  const Scalar longitudeRadius = std::abs(
-    EulerSystem::IsTaitBryan ?
-    std::cos(e.beta()) :
-    std::sin(e.beta())
-    );
-  Scalar precision = test_precision<Scalar>() / longitudeRadius;
+  if (positiveRangeAlpha)
+  {
+    alphaRangeStart = Scalar(0);
+    alphaRangeEnd = Scalar(2 * EIGEN_PI);
+  }
+  else
+  {
+    alphaRangeStart = -Scalar(EIGEN_PI);
+    alphaRangeEnd = Scalar(EIGEN_PI);
+  }
   
-  Scalar betaRangeStart, betaRangeEnd;
-  if (EulerSystem::IsTaitBryan)
+  if (positiveRangeBeta)
+  {
+    betaRangeStart = Scalar(0);
+    betaRangeEnd = Scalar(2 * EIGEN_PI);
+  }
+  else
+  {
+    betaRangeStart = -Scalar(EIGEN_PI);
+    betaRangeEnd = Scalar(EIGEN_PI);
+  }
+  
+  if (positiveRangeGamma)
   {
-    betaRangeStart = -HALF_PI;
-    betaRangeEnd = HALF_PI;
+    gammaRangeStart = Scalar(0);
+    gammaRangeEnd = Scalar(2 * EIGEN_PI);
   }
   else
   {
-    if (!EulerSystem::IsBetaOpposite)
-    {
-      betaRangeStart = 0;
-      betaRangeEnd = PI;
-    }
-    else
-    {
-      betaRangeStart = -PI;
-      betaRangeEnd = 0;
-    }
+    gammaRangeStart = -Scalar(EIGEN_PI);
+    gammaRangeEnd = Scalar(EIGEN_PI);
   }
   
+  const int i = EulerSystem::AlphaAxisAbs - 1;
+  const int j = EulerSystem::BetaAxisAbs - 1;
+  const int k = EulerSystem::GammaAxisAbs - 1;
+  
+  const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1;
+  const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1;
+  const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1;
+  
   const Vector3 I = EulerAnglesType::AlphaAxisVector();
   const Vector3 J = EulerAnglesType::BetaAxisVector();
   const Vector3 K = EulerAnglesType::GammaAxisVector();
   
-  // Is approx checks
-  VERIFY(e.isApprox(e));
-  VERIFY_IS_APPROX(e, e);
-  VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE));
-
-  const Matrix3 m(e);
-  VERIFY_IS_APPROX(Scalar(m.determinant()), ONE);
-
-  EulerAnglesType ebis(m);
+  EulerAnglesType e(ea[0], ea[1], ea[2]);
   
-  // When no roll(acting like polar representation), we have the best precision.
-  // One of those cases is when the Euler angles are on the pole, and because it's singular case,
-  //  the computation returns no roll.
-  if (ebis.beta() == 0)
-    precision = test_precision<Scalar>();
+  Matrix3 m(e);
+  Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
   
   // Check that eabis in range
-  VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI);
-  VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd);
-  VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI);
-
-  const Matrix3 mbis(AngleAxisType(ebis.alpha(), I) * AngleAxisType(ebis.beta(), J) * AngleAxisType(ebis.gamma(), K));
-  VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE);
-  VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix());
-  /*std::cout << "===================\n" <<
-    "e: " << e << std::endl <<
-    "eabis: " << eabis.transpose() << std::endl <<
-    "m: " << m << std::endl <<
-    "mbis: " << mbis << std::endl <<
-    "X: " << (m * Vector3::UnitX()).transpose() << std::endl <<
-    "X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/
-  VERIFY(m.isApprox(mbis, precision));
-
-  // Test if ea and eabis are the same
-  // Need to check both singular and non-singular cases
-  // There are two singular cases.
-  // 1. When I==K and sin(ea(1)) == 0
-  // 2. When I!=K and cos(ea(1)) == 0
-
-  // TODO: Make this test work well, and use range saturation function.
-  /*// If I==K, and ea[1]==0, then there no unique solution.
-  // The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2.
-  if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) 
-      VERIFY_IS_APPROX(ea, eabis);*/
+  VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd);
+  VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd);
+  VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd);
   
-  // Quaternions
-  const QuaternionType q(e);
-  ebis = q;
-  const QuaternionType qbis(ebis);
-  VERIFY(internal::isApprox<Scalar>(std::abs(q.dot(qbis)), ONE, precision));
-  //VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
+  Vector3 eabis2 = m.eulerAngles(i, j, k);
   
-  // A suggestion for simple product test when will be supported.
-  /*EulerAnglesType e2(PI/2, PI/2, PI/2);
-  Matrix3 m2(e2);
-  VERIFY_IS_APPROX(e*e2, m*m2);*/
-}
-
-template<signed char A, signed char B, signed char C, typename Scalar>
-void verify_euler_vec(const Matrix<Scalar,3,1>& ea)
-{
-  verify_euler(EulerAngles<Scalar, EulerSystem<A, B, C> >(ea[0], ea[1], ea[2]));
-}
-
-template<signed char A, signed char B, signed char C, typename Scalar>
-void verify_euler_all_neg(const Matrix<Scalar,3,1>& ea)
-{
-  verify_euler_vec<+A,+B,+C>(ea);
-  verify_euler_vec<+A,+B,-C>(ea);
-  verify_euler_vec<+A,-B,+C>(ea);
-  verify_euler_vec<+A,-B,-C>(ea);
+  // Invert the relevant axes
+  eabis2[0] *= iFactor;
+  eabis2[1] *= jFactor;
+  eabis2[2] *= kFactor;
   
-  verify_euler_vec<-A,+B,+C>(ea);
-  verify_euler_vec<-A,+B,-C>(ea);
-  verify_euler_vec<-A,-B,+C>(ea);
-  verify_euler_vec<-A,-B,-C>(ea);
-}
-
-template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
-{
-  verify_euler_all_neg<X,Y,Z>(ea);
-  verify_euler_all_neg<X,Y,X>(ea);
-  verify_euler_all_neg<X,Z,Y>(ea);
-  verify_euler_all_neg<X,Z,X>(ea);
+  // Saturate the angles to the correct range
+  if (positiveRangeAlpha && (eabis2[0] < 0))
+    eabis2[0] += Scalar(2 * EIGEN_PI);
+  if (positiveRangeBeta && (eabis2[1] < 0))
+    eabis2[1] += Scalar(2 * EIGEN_PI);
+  if (positiveRangeGamma && (eabis2[2] < 0))
+    eabis2[2] += Scalar(2 * EIGEN_PI);
   
-  verify_euler_all_neg<Y,Z,X>(ea);
-  verify_euler_all_neg<Y,Z,Y>(ea);
-  verify_euler_all_neg<Y,X,Z>(ea);
-  verify_euler_all_neg<Y,X,Y>(ea);
+  VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is
   
-  verify_euler_all_neg<Z,X,Y>(ea);
-  verify_euler_all_neg<Z,X,Z>(ea);
-  verify_euler_all_neg<Z,Y,X>(ea);
-  verify_euler_all_neg<Z,Y,Z>(ea);
-}
-
-template<typename Scalar> void check_singular_cases(const Scalar& singularBeta)
-{
-  typedef Matrix<Scalar,3,1> Vector3;
-  const Scalar PI = Scalar(EIGEN_PI);
+  Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K));
+  VERIFY_IS_APPROX(m,  mbis);
   
-  for (Scalar epsilon = NumTraits<Scalar>::epsilon(); epsilon < 1; epsilon *= Scalar(1.2))
+  // Tests that are only relevant for no possitive range
+  if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma))
   {
-    check_all_var(Vector3(PI/4, singularBeta, PI/3));
-    check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3));
-    check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3));
-    check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3));
-    check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI));
-    check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3)));
-    check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3)));
-    check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4)));
-    check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI));
+    /* If I==K, and ea[1]==0, then there no unique solution. */ 
+    /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ 
+    if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) 
+      VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
+    
+    // approx_or_less_than does not work for 0
+    VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
   }
   
-  // This one for sanity, it had a problem with near pole cases in float scalar.
-  check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI));
+  // Quaternions
+  QuaternionType q(e);
+  eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+  VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
 }
 
-template<typename Scalar> void eulerangles_manual()
+template<typename EulerSystem, typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea)
 {
-  typedef Matrix<Scalar,3,1> Vector3;
-  const Vector3 Zero = Vector3::Zero();
-  const Scalar PI = Scalar(EIGEN_PI);
-  
-  check_all_var(Zero);
-  
-  // singular cases
-  check_singular_cases(PI/2);
-  check_singular_cases(-PI/2);
-  
-  check_singular_cases(Scalar(0));
-  check_singular_cases(Scalar(-0));
-  
-  check_singular_cases(PI);
-  check_singular_cases(-PI);
-  
-  // non-singular cases
-  VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
-  VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI);
-  VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI);
-  for (int i = 0; i < alpha.size(); ++i) {
-    for (int j = 0; j < beta.size(); ++j) {
-      for (int k = 0; k < gamma.size(); ++k) {
-        check_all_var(Vector3d(alpha(i), beta(j), gamma(k)));
-      }
-    }
-  }
+  verify_euler_ranged<EulerSystem>(ea, false, false, false);
+  verify_euler_ranged<EulerSystem>(ea, false, false, true);
+  verify_euler_ranged<EulerSystem>(ea, false, true, false);
+  verify_euler_ranged<EulerSystem>(ea, false, true, true);
+  verify_euler_ranged<EulerSystem>(ea, true, false, false);
+  verify_euler_ranged<EulerSystem>(ea, true, false, true);
+  verify_euler_ranged<EulerSystem>(ea, true, true, false);
+  verify_euler_ranged<EulerSystem>(ea, true, true, true);
 }
 
-template<typename Scalar> void eulerangles_rand()
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+  verify_euler<EulerSystemXYZ>(ea);
+  verify_euler<EulerSystemXYX>(ea);
+  verify_euler<EulerSystemXZY>(ea);
+  verify_euler<EulerSystemXZX>(ea);
+  
+  verify_euler<EulerSystemYZX>(ea);
+  verify_euler<EulerSystemYZY>(ea);
+  verify_euler<EulerSystemYXZ>(ea);
+  verify_euler<EulerSystemYXY>(ea);
+  
+  verify_euler<EulerSystemZXY>(ea);
+  verify_euler<EulerSystemZXZ>(ea);
+  verify_euler<EulerSystemZYX>(ea);
+  verify_euler<EulerSystemZYZ>(ea);
+}
+
+template<typename Scalar> void eulerangles()
 {
   typedef Matrix<Scalar,3,3> Matrix3;
   typedef Matrix<Scalar,3,1> Vector3;
@@ -274,19 +201,8 @@ template<typename Scalar> void eulerangles_rand()
 
 void test_EulerAngles()
 {
-  // Simple cast test
-  EulerAnglesXYZd onesEd(1, 1, 1);
-  EulerAnglesXYZf onesEf = onesEd.cast<float>();
-  VERIFY_IS_APPROX(onesEd, onesEf.cast<double>());
-  
-  CALL_SUBTEST_1( eulerangles_manual<float>() );
-  CALL_SUBTEST_2( eulerangles_manual<double>() );
-  
   for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_3( eulerangles_rand<float>() );
-    CALL_SUBTEST_4( eulerangles_rand<double>() );
+    CALL_SUBTEST_1( eulerangles<float>() );
+    CALL_SUBTEST_2( eulerangles<double>() );
   }
-  
-  // TODO: Add tests for auto diff
-  // TODO: Add tests for complex numbers
 }
diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp
index 4df2f5c..9cf1128 100644
--- a/eigen/unsupported/test/autodiff_scalar.cpp
+++ b/eigen/unsupported/test/autodiff_scalar.cpp
@@ -72,6 +72,20 @@ template<typename Scalar> void check_hyperbolic_functions()
   VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150));
 }
 
+template <typename Scalar>
+void check_limits_specialization()
+{
+  typedef Eigen::Matrix<Scalar, 1, 1> Deriv;
+  typedef Eigen::AutoDiffScalar<Deriv> AD;
+
+  typedef std::numeric_limits<AD> A;
+  typedef std::numeric_limits<Scalar> B;
+
+#if EIGEN_HAS_CXX11
+  VERIFY(bool(std::is_base_of<B, A>::value));
+#endif
+}
+
 void test_autodiff_scalar()
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -79,5 +93,6 @@ void test_autodiff_scalar()
     CALL_SUBTEST_2( check_atan2<double>() );
     CALL_SUBTEST_3( check_hyperbolic_functions<float>() );
     CALL_SUBTEST_4( check_hyperbolic_functions<double>() );
+    CALL_SUBTEST_5( check_limits_specialization<double>());
   }
 }
diff --git a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
index 48cd2d4..5f9bb93 100644
--- a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
+++ b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp
@@ -11,7 +11,6 @@
 #define EIGEN_USE_THREADS
 #include "main.h"
 #include "Eigen/CXX11/ThreadPool"
-#include "Eigen/CXX11/Tensor"
 
 static void test_create_destroy_empty_pool()
 {
@@ -23,11 +22,11 @@ static void test_create_destroy_empty_pool()
 }
 
 
-static void test_parallelism(bool allow_spinning)
+static void test_parallelism()
 {
   // Test we never-ever fail to match available tasks with idle threads.
   const int kThreads = 16;  // code below expects that this is a multiple of 4
-  NonBlockingThreadPool tp(kThreads, allow_spinning);
+  NonBlockingThreadPool tp(kThreads);
   VERIFY_IS_EQUAL(tp.NumThreads(), kThreads);
   VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1);
   for (int iter = 0; iter < 100; ++iter) {
@@ -101,25 +100,8 @@ static void test_parallelism(bool allow_spinning)
   }
 }
 
-
-static void test_cancel()
-{
-  NonBlockingThreadPool tp(2);
-
-  // Schedule a large number of closure that each sleeps for one second. This
-  // will keep the thread pool busy for much longer than the default test timeout.
-  for (int i = 0; i < 1000; ++i) {
-    tp.Schedule([]() { EIGEN_SLEEP(2000); });
-  }
-
-  // Cancel the processing of all the closures that are still pending.
-  tp.Cancel();
-}
-
 void test_cxx11_non_blocking_thread_pool()
 {
   CALL_SUBTEST(test_create_destroy_empty_pool());
-  CALL_SUBTEST(test_parallelism(true));
-  CALL_SUBTEST(test_parallelism(false));
-  CALL_SUBTEST(test_cancel());
+  CALL_SUBTEST(test_parallelism());
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
index 21fdfca..7201bfe 100644
--- a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
@@ -14,7 +14,7 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_SYCL
 
 #include "main.h"
@@ -25,99 +25,39 @@ using Eigen::SyclDevice;
 using Eigen::Tensor;
 using Eigen::TensorMap;
 
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){
-
-  // BROADCAST test:
-  IndexType inDim1=2;
-  IndexType inDim2=3;
-  IndexType inDim3=5;
-  IndexType inDim4=7;
-  IndexType bDim1=2;
-  IndexType bDim2=3;
-  IndexType bDim3=1;
-  IndexType bDim4=4;
-  array<IndexType, 4> in_range   = {{inDim1, inDim2, inDim3, inDim4}};
-  array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
-  array<IndexType, 4> out_range;  // = in_range * broadcasts
-  for (size_t i = 0; i < out_range.size(); ++i)
-    out_range[i] = in_range[i] * broadcasts[i];
-
-  Tensor<DataType, 4, DataLayout, IndexType>  input(in_range);
-  Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
-
-  for (size_t i = 0; i < in_range.size(); ++i)
-    VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
-
-
-  for (IndexType i = 0; i < input.size(); ++i)
-    input(i) = static_cast<DataType>(i);
-
-  DataType * gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_out_data  = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
-
-  TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range);
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
-  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
-  gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
-
-  for (IndexType i = 0; i < inDim1*bDim1; ++i) {
-    for (IndexType j = 0; j < inDim2*bDim2; ++j) {
-      for (IndexType k = 0; k < inDim3*bDim3; ++k) {
-        for (IndexType l = 0; l < inDim4*bDim4; ++l) {
-          VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
-        }
-      }
-    }
-  }
-  printf("Broadcast Test with fixed size Passed\n");
-  sycl_device.deallocate(gpu_in_data);
-  sycl_device.deallocate(gpu_out_data);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
 static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
 
   // BROADCAST test:
-  IndexType inDim1=2;
-  IndexType inDim2=3;
-  IndexType inDim3=5;
-  IndexType inDim4=7;
-  IndexType bDim1=2;
-  IndexType bDim2=3;
-  IndexType bDim3=1;
-  IndexType bDim4=4;
-  array<IndexType, 4> in_range   = {{inDim1, inDim2, inDim3, inDim4}};
-  array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}};
-  array<IndexType, 4> out_range;  // = in_range * broadcasts
+  array<int, 4> in_range   = {{2, 3, 5, 7}};
+  array<int, 4> broadcasts = {{2, 3, 1, 4}};
+  array<int, 4> out_range;  // = in_range * broadcasts
   for (size_t i = 0; i < out_range.size(); ++i)
     out_range[i] = in_range[i] * broadcasts[i];
 
-  Tensor<DataType, 4, DataLayout, IndexType>  input(in_range);
-  Tensor<DataType, 4, DataLayout, IndexType> out(out_range);
+  Tensor<float, 4>  input(in_range);
+  Tensor<float, 4> out(out_range);
 
   for (size_t i = 0; i < in_range.size(); ++i)
     VERIFY_IS_EQUAL(out.dimension(i), out_range[i]);
 
 
-  for (IndexType i = 0; i < input.size(); ++i)
-    input(i) = static_cast<DataType>(i);
+  for (int i = 0; i < input.size(); ++i)
+    input(i) = static_cast<float>(i);
 
-  DataType * gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_out_data  = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
+  float * gpu_in_data  = static_cast<float*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data  = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
 
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>>  gpu_in(gpu_in_data, in_range);
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range);
-  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType));
+  TensorMap<Tensor<float, 4>>  gpu_in(gpu_in_data, in_range);
+  TensorMap<Tensor<float, 4>> gpu_out(gpu_out_data, out_range);
+  sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float));
   gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts);
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
 
-  for (IndexType i = 0; i < inDim1*bDim1; ++i) {
-    for (IndexType j = 0; j < inDim2*bDim2; ++j) {
-      for (IndexType k = 0; k < inDim3*bDim3; ++k) {
-        for (IndexType l = 0; l < inDim4*bDim4; ++l) {
-          VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l));
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 9; ++j) {
+      for (int k = 0; k < 5; ++k) {
+        for (int l = 0; l < 28; ++l) {
+          VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l));
         }
       }
     }
@@ -127,18 +67,8 @@ static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){
   sycl_device.deallocate(gpu_out_data);
 }
 
-template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::device& d){
-  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
-  QueueInterface queueInterface(d);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
-  test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
-}
-
 void test_cxx11_tensor_broadcast_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
-  }
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_broadcast_sycl(sycl_device));
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
deleted file mode 100644
index 400a31d..0000000
--- a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-namespace std {
-template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); }
-template <typename T> T square(T x) { return x * x; }
-template <typename T> T cube(T x) { return x * x * x; }
-template <typename T> T inverse(T x) { return 1 / x; }
-}
-
-#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout)         \
-  {                                                                            \
-    /* out OPERATOR in.FUNC() */                                               \
-    Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange);                        \
-    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
-    in = in.random() + static_cast<SCALAR>(0.01);                              \
-    out = out.random() + static_cast<SCALAR>(0.01);                            \
-    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
-    SCALAR *gpu_data = static_cast<SCALAR *>(                                  \
-        sycl_device.allocate(in.size() * sizeof(SCALAR)));                     \
-    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
-        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange);          \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
-    sycl_device.memcpyHostToDevice(gpu_data, in.data(),                        \
-                                   (in.size()) * sizeof(SCALAR));              \
-    sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    gpu_out.device(sycl_device) OPERATOR gpu.FUNC();                           \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      SCALAR ver = reference(i);                                               \
-      ver OPERATOR std::FUNC(in(i));                                           \
-      VERIFY_IS_APPROX(out(i), ver);                                           \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data);                                          \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }                                                                            \
-  {                                                                            \
-    /* out OPERATOR out.FUNC() */                                              \
-    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
-    out = out.random() + static_cast<SCALAR>(0.01);                            \
-    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
-    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
-        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
-    sycl_device.memcpyHostToDevice(gpu_data_out, out.data(),                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC();                       \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      SCALAR ver = reference(i);                                               \
-      ver OPERATOR std::FUNC(reference(i));                                    \
-      VERIFY_IS_APPROX(out(i), ver);                                           \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }
-
-#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout)                \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout)               \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout)              \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout)             \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout)            \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout)              \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout)           \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout)              \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout)               \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout)             \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout)               \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout)               \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout)              \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout)             \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout)             \
-  TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout)
-
-#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout)                        \
-  {                                                                            \
-    /* out = in.FUNC() */                                                      \
-    Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange);                        \
-    Tensor<bool, 3, Layout, int64_t> out(tensorRange);                         \
-    in = in.random() + static_cast<SCALAR>(0.01);                              \
-    SCALAR *gpu_data = static_cast<SCALAR *>(                                  \
-        sycl_device.allocate(in.size() * sizeof(SCALAR)));                     \
-    bool *gpu_data_out =                                                       \
-        static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool)));  \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange);          \
-    TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);    \
-    sycl_device.memcpyHostToDevice(gpu_data, in.data(),                        \
-                                   (in.size()) * sizeof(SCALAR));              \
-    gpu_out.device(sycl_device) = gpu.FUNC();                                  \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(bool));               \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      VERIFY_IS_EQUAL(out(i), std::FUNC(in(i)));                               \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data);                                          \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }
-
-#define TEST_UNARY_BUILTINS(SCALAR, Layout)                                    \
-  TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout)                             \
-  TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout)                              \
-  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout)                             \
-  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout)                          \
-  TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout)
-
-static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) {
-  int64_t sizeDim1 = 10;
-  int64_t sizeDim2 = 10;
-  int64_t sizeDim3 = 10;
-  array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-
-  TEST_UNARY_BUILTINS(float, RowMajor)
-  TEST_UNARY_BUILTINS(float, ColMajor)
-}
-
-namespace std {
-template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); }
-template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); }
-}
-
-#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout)                        \
-  {                                                                            \
-    /* out = in_1.FUNC(in_2) */                                                \
-    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
-    Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange);                      \
-    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
-    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
-    in_2 = in_2.random() + static_cast<SCALAR>(0.01);                          \
-    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
-    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
-        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
-    SCALAR *gpu_data_2 = static_cast<SCALAR *>(                                \
-        sycl_device.allocate(in_2.size() * sizeof(SCALAR)));                   \
-    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
-        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange);      \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
-    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
-                                   (in_1.size()) * sizeof(SCALAR));            \
-    sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),                    \
-                                   (in_2.size()) * sizeof(SCALAR));            \
-    gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2);                           \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      SCALAR ver = reference(i);                                               \
-      ver = std::FUNC(in_1(i), in_2(i));                                       \
-      VERIFY_IS_APPROX(out(i), ver);                                           \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data_1);                                        \
-    sycl_device.deallocate(gpu_data_2);                                        \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }
-
-#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout)               \
-  {                                                                            \
-    /* out = in_1 OPERATOR in_2 */                                             \
-    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
-    Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange);                      \
-    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
-    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
-    in_2 = in_2.random() + static_cast<SCALAR>(0.01);                          \
-    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
-    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
-        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
-    SCALAR *gpu_data_2 = static_cast<SCALAR *>(                                \
-        sycl_device.allocate(in_2.size() * sizeof(SCALAR)));                   \
-    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
-        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange);      \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
-    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
-                                   (in_1.size()) * sizeof(SCALAR));            \
-    sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(),                    \
-                                   (in_2.size()) * sizeof(SCALAR));            \
-    gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2;                        \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i));                      \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data_1);                                        \
-    sycl_device.deallocate(gpu_data_2);                                        \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }
-
-#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout)     \
-  {                                                                            \
-    /* out = in_1 OPERATOR 2 */                                                \
-    Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange);                      \
-    Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange);                       \
-    in_1 = in_1.random() + static_cast<SCALAR>(0.01);                          \
-    Tensor<SCALAR, 3, Layout, int64_t> reference(out);                         \
-    SCALAR *gpu_data_1 = static_cast<SCALAR *>(                                \
-        sycl_device.allocate(in_1.size() * sizeof(SCALAR)));                   \
-    SCALAR *gpu_data_out = static_cast<SCALAR *>(                              \
-        sycl_device.allocate(out.size() * sizeof(SCALAR)));                    \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange);      \
-    TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange);  \
-    sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(),                    \
-                                   (in_1.size()) * sizeof(SCALAR));            \
-    gpu_out.device(sycl_device) = gpu_1 OPERATOR 2;                            \
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out,                   \
-                                   (out.size()) * sizeof(SCALAR));             \
-    for (int64_t i = 0; i < out.size(); ++i) {                                 \
-      VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2);                            \
-    }                                                                          \
-    sycl_device.deallocate(gpu_data_1);                                        \
-    sycl_device.deallocate(gpu_data_out);                                      \
-  }
-
-#define TEST_BINARY_BUILTINS(SCALAR, Layout)                                   \
-  TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout)                         \
-  TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout)                         \
-  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout)                           \
-  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout)                           \
-  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout)                           \
-  TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout)
-
-static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
-  int64_t sizeDim1 = 10;
-  int64_t sizeDim2 = 10;
-  int64_t sizeDim3 = 10;
-  array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-  TEST_BINARY_BUILTINS(float, RowMajor)
-  TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor)
-  TEST_BINARY_BUILTINS(float, ColMajor)
-  TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor)
-}
-
-void test_cxx11_tensor_builtins_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    QueueInterface queueInterface(device);
-    Eigen::SyclDevice sycl_device(&queueInterface);
-    CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
-    CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_chipping.cpp b/eigen/unsupported/test/cxx11_tensor_chipping.cpp
index 89cf5c7..1832dec 100644
--- a/eigen/unsupported/test/cxx11_tensor_chipping.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_chipping.cpp
@@ -43,7 +43,7 @@ static void test_simple_chip()
   VERIFY_IS_EQUAL(chip2.dimension(2), 7);
   VERIFY_IS_EQUAL(chip2.dimension(3), 11);
   for (int i = 0; i < 2; ++i) {
-    for (int j = 0; j < 5; ++j) {
+    for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 7; ++k) {
         for (int l = 0; l < 11; ++l) {
           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
@@ -75,7 +75,7 @@ static void test_simple_chip()
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 5; ++k) {
-        for (int l = 0; l < 11; ++l) {
+        for (int l = 0; l < 7; ++l) {
           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
         }
       }
@@ -126,7 +126,7 @@ static void test_dynamic_chip()
   VERIFY_IS_EQUAL(chip2.dimension(2), 7);
   VERIFY_IS_EQUAL(chip2.dimension(3), 11);
   for (int i = 0; i < 2; ++i) {
-    for (int j = 0; j < 5; ++j) {
+    for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 7; ++k) {
         for (int l = 0; l < 11; ++l) {
           VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l));
@@ -158,7 +158,7 @@ static void test_dynamic_chip()
   for (int i = 0; i < 2; ++i) {
     for (int j = 0; j < 3; ++j) {
       for (int k = 0; k < 5; ++k) {
-        for (int l = 0; l < 11; ++l) {
+        for (int l = 0; l < 7; ++l) {
           VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l));
         }
       }
diff --git a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp
deleted file mode 100644
index 39e4f0a..0000000
--- a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp
+++ /dev/null
@@ -1,622 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_chipping_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-
-#include <Eigen/CXX11/Tensor>
-
-using Eigen::Tensor;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  IndexType sizeDim5 = 11;
-
-  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
-  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
-  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
-
-  tensor.setRandom();
-
-  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
-  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
-  DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
-  gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l);
-  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
-  VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
-  VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim2; ++i) {
-    for (IndexType j = 0; j < sizeDim3; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
-  const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
-  DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
-  gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l);
-  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
-  VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim3; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
-  const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
-  DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
-
-  gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l);
-  sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
-  const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
-  DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
-
-  gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l);
-  sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
-  VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
-        }
-      }
-    }
-  }
-
-
-  array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
-  const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
-  DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
-
-  gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l);
-  sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
-  VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim4; ++l) {
-          VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
-        }
-      }
-    }
-  }
-
-  sycl_device.deallocate(gpu_data_tensor);
-  sycl_device.deallocate(gpu_data_chip1);
-  sycl_device.deallocate(gpu_data_chip2);
-  sycl_device.deallocate(gpu_data_chip3);
-  sycl_device.deallocate(gpu_data_chip4);
-  sycl_device.deallocate(gpu_data_chip5);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  IndexType sizeDim5 = 11;
-
-  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
-  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
-  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
-
-  tensor.setRandom();
-
-  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
-  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
-  DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
-  gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l);
-  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2);
-  VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3);
-  VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim2; ++i) {
-    for (IndexType j = 0; j < sizeDim3; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange);
-  const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType);
-  DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
-  gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l);
-  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3);
-  VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim3; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange);
-  const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType);
-  DataType* gpu_data_chip3  = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange);
-
-  gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l);
-  sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4);
-  VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim4; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange);
-  const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType);
-  DataType* gpu_data_chip4  = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange);
-
-  gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l);
-  sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3);
-  VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim5; ++l) {
-          VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l));
-        }
-      }
-    }
-  }
-
-
-  array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-  Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange);
-  const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType);
-  DataType* gpu_data_chip5  = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange);
-
-  gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l);
-  sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize);
-
-  VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3);
-  VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim4; ++l) {
-          VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l));
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data_tensor);
-  sycl_device.deallocate(gpu_data_chip1);
-  sycl_device.deallocate(gpu_data_chip2);
-  sycl_device.deallocate(gpu_data_chip3);
-  sycl_device.deallocate(gpu_data_chip4);
-  sycl_device.deallocate(gpu_data_chip5);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) {
-
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  IndexType sizeDim5 = 11;
-
-  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-  array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
-  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
-
-  Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange);
-  Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange);
-  tensor.setRandom();
-  tensor1.setRandom();
-
-  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
-  const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType);
-  DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  DataType* gpu_data_chip1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-  DataType* gpu_data_tensor1  = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize));
-
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange);
-
-
-  sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize);
-  sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize);
-  gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1;
-  sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize);
-
-  for (int i = 0; i < sizeDim2; ++i) {
-    for (int j = 0; j < sizeDim3; ++j) {
-      for (int k = 0; k < sizeDim4; ++k) {
-        for (int l = 0; l < sizeDim5; ++l) {
-          float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l);
-          VERIFY_IS_EQUAL(chip1(i,j,k,l), expected);
-        }
-      }
-    }
-  }
-
-  array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}};
-  Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange);
-  Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange);
-  tensor2.setRandom();
-  const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType);
-  DataType* gpu_data_tensor2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
-  DataType* gpu_data_chip2  = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize));
-  TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange);
-  TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize);
-  gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2;
-  sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize);
-
-  for (int i = 0; i < sizeDim2; ++i) {
-    for (int j = 0; j < sizeDim4; ++j) {
-      for (int k = 0; k < sizeDim5; ++k) {
-        float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k);
-        VERIFY_IS_EQUAL(chip2(i,j,k), expected);
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data_tensor);
-  sycl_device.deallocate(gpu_data_tensor1);
-  sycl_device.deallocate(gpu_data_chip1);
-  sycl_device.deallocate(gpu_data_tensor2);
-  sycl_device.deallocate(gpu_data_chip2);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device)
-{
-
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  IndexType sizeDim5 = 11;
-
-  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-  array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-
-  Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange);
-  Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange);
-  Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange);
-  input1.setRandom();
-  input2.setRandom();
-
-
-  const size_t tensorBuffSize =tensor.size()*sizeof(DataType);
-  const size_t input2TensorBuffSize =input2.size()*sizeof(DataType);
-  DataType* gpu_data_tensor  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  DataType* gpu_data_input1  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  DataType* gpu_data_input2  = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize));
-
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange);
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize);
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize);
-  gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k < sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (i != 1) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange);
-  input3.setRandom();
-
-  const size_t input3TensorBuffSize =input3.size()*sizeof(DataType);
-  DataType* gpu_data_input3  = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize);
-  gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k <sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (j != 1) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange);
-  input4.setRandom();
-
-  const size_t input4TensorBuffSize =input4.size()*sizeof(DataType);
-  DataType* gpu_data_input4  = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize);
-  gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k <sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (k != 3) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m));
-            }
-          }
-        }
-      }
-    }
-  }
-
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}};
-  Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange);
-  input5.setRandom();
-
-  const size_t input5TensorBuffSize =input5.size()*sizeof(DataType);
-  DataType* gpu_data_input5  = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize);
-  gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k <sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (l != 4) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m));
-            }
-          }
-        }
-      }
-    }
-  }
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-  Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange);
-  input6.setRandom();
-
-  const size_t input6TensorBuffSize =input6.size()*sizeof(DataType);
-  DataType* gpu_data_input6  = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize));
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize);
-  gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k <sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (m != 5) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l));
-            }
-          }
-        }
-      }
-    }
-  }
-
-
-  gpu_tensor.device(sycl_device)=gpu_input1;
-  Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange);
-  input7.setRandom();
-
-  DataType* gpu_data_input7  = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize));
-  TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize);
-  gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l);
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize);
-
-  for (int i = 0; i < sizeDim1; ++i) {
-    for (int j = 0; j < sizeDim2; ++j) {
-      for (int k = 0; k <sizeDim3; ++k) {
-        for (int l = 0; l < sizeDim4; ++l) {
-          for (int m = 0; m < sizeDim5; ++m) {
-            if (i != 0) {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m));
-            } else {
-              VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m));
-            }
-          }
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data_tensor);
-  sycl_device.deallocate(gpu_data_input1);
-  sycl_device.deallocate(gpu_data_input2);
-  sycl_device.deallocate(gpu_data_input3);
-  sycl_device.deallocate(gpu_data_input4);
-  sycl_device.deallocate(gpu_data_input5);
-  sycl_device.deallocate(gpu_data_input6);
-  sycl_device.deallocate(gpu_data_input7);
-
-}
-
-template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device);
-  test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device);
-  test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_chipping_sycl()
-{
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_chipping_test_per_device<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp
deleted file mode 100644
index e3023a3..0000000
--- a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_concatenation_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::Tensor;
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType leftDim1 = 2;
-  IndexType leftDim2 = 3;
-  IndexType leftDim3 = 1;
-  Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}};
-  IndexType rightDim1 = 2;
-  IndexType rightDim2 = 3;
-  IndexType rightDim3 = 1;
-  Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}};
-
-  //IndexType concatDim1 = 3;
-//	IndexType concatDim2 = 3;
-//	IndexType concatDim3 = 1;
-  //Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}};
-
-  Tensor<DataType, 3, DataLayout, IndexType> left(leftRange);
-  Tensor<DataType, 3, DataLayout, IndexType> right(rightRange);
-  left.setRandom();
-  right.setRandom();
-
-  DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
-  sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
-  sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
-  ///
-  Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3);
-  DataType * gpu_out_data1 =  static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType)));
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions());
-
-  //concatenation = left.concatenate(right, 0);
-  gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0);
-  sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType));
-
-  VERIFY_IS_EQUAL(concatenation1.dimension(0), 4);
-  VERIFY_IS_EQUAL(concatenation1.dimension(1), 3);
-  VERIFY_IS_EQUAL(concatenation1.dimension(2), 1);
-  for (IndexType j = 0; j < 3; ++j) {
-    for (IndexType i = 0; i < 2; ++i) {
-      VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0));
-    }
-    for (IndexType i = 2; i < 4; ++i) {
-      VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0));
-    }
-  }
-
-  sycl_device.deallocate(gpu_out_data1);
-  Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3);
-  DataType * gpu_out_data2 =  static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType)));
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions());
-  gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1);
-  sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType));
-
-  //concatenation = left.concatenate(right, 1);
-  VERIFY_IS_EQUAL(concatenation2.dimension(0), 2);
-  VERIFY_IS_EQUAL(concatenation2.dimension(1), 6);
-  VERIFY_IS_EQUAL(concatenation2.dimension(2), 1);
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0));
-    }
-    for (IndexType j = 3; j < 6; ++j) {
-      VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0));
-    }
-  }
-  sycl_device.deallocate(gpu_out_data2);
-  Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3);
-  DataType * gpu_out_data3 =  static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType)));
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions());
-  gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2);
-  sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType));
-
-  //concatenation = left.concatenate(right, 2);
-  VERIFY_IS_EQUAL(concatenation3.dimension(0), 2);
-  VERIFY_IS_EQUAL(concatenation3.dimension(1), 3);
-  VERIFY_IS_EQUAL(concatenation3.dimension(2), 2);
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0));
-      VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0));
-    }
-  }
-  sycl_device.deallocate(gpu_out_data3);
-  sycl_device.deallocate(gpu_in1_data);
-  sycl_device.deallocate(gpu_in2_data);
-}
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
-
-  IndexType leftDim1 = 2;
-  IndexType leftDim2 = 3;
-  Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}};
-
-  IndexType rightDim1 = 2;
-  IndexType rightDim2 = 3;
-  Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}};
-
-  IndexType concatDim1 = 4;
-  IndexType concatDim2 = 3;
-  Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}};
-
-  Tensor<DataType, 2, DataLayout, IndexType> left(leftRange);
-  Tensor<DataType, 2, DataLayout, IndexType> right(rightRange);
-  Tensor<DataType, 2, DataLayout, IndexType> result(resRange);
-
-  left.setRandom();
-  right.setRandom();
-  result.setRandom();
-
-  DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
-
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange);
-
-  sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType));
-  sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType));
-  sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
-
-//  t1.concatenate(t2, 0) = result;
- gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out;
- sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType));
- sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType));
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      VERIFY_IS_EQUAL(left(i, j), result(i, j));
-      VERIFY_IS_EQUAL(right(i, j), result(i+2, j));
-    }
-  }
-  sycl_device.deallocate(gpu_in1_data);
-  sycl_device.deallocate(gpu_in2_data);
-  sycl_device.deallocate(gpu_out_data);
-}
-
-
-template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device);
-  test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_concatenation_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(tensorConcat_perDevice<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp
deleted file mode 100644
index 5bace66..0000000
--- a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp
+++ /dev/null
@@ -1,290 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_contract_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void static test_sycl_contraction(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
-{
-  typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
-  static const DataType error_threshold =1e-4f;
-//  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
-  // with these dimensions, the output has 300 * 140 elements, which is
-  // more than 30 * 1024, which is the number of threads in blocks on
-  // a 15 SM GK110 GPU
-  Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
-  Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
-  Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size);
-  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(m_size, n_size);
-//  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
-  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
-  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
-  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
-  Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}};
-
-  t_left.setRandom();
-  t_right.setRandom();
-
-  std::size_t t_left_bytes = t_left.size()  * sizeof(DataType);
-  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
-  std::size_t t_result_bytes = t_result.size() * sizeof(DataType);
-
-  DataType * d_t_left  = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
-  DataType * d_t_right  = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
-  DataType * d_t_result =  static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, result_dims);
-
-  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
-  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
-  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
-  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
-  t_result = t_left.contract(t_right, dims);
-
-  for (IndexType i = 0; i < t_result.size(); i++) {
-    if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) {
-      continue;
-    }
-    if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
-      continue;
-    }
-    std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i)
-              << " vs " <<  t_result_gpu(i) << std::endl;
-    assert(false);
-  }
-  sycl_device.deallocate(d_t_left);
-  sycl_device.deallocate(d_t_right);
-  sycl_device.deallocate(d_t_result);
-}
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_TF(const Device& sycl_device)
-{
-  typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
-  static const DataType error_threshold =1e-4f;
-  Eigen::array<IndexType, 2> left_dims = {{2, 3}};
-  Eigen::array<IndexType, 2> right_dims = {{3, 1}};
-  Eigen::array<IndexType, 2> res_dims = {{2, 1}};
-  Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
-
-
-  Tensor<DataType, 2, DataLayout, IndexType> t_left(left_dims);
-  Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims);
-  Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims);
-  Tensor<DataType, 2, DataLayout, IndexType> t_result(res_dims);
-
-  t_left.data()[0] = 1.0f;
-  t_left.data()[1] = 2.0f;
-  t_left.data()[2] = 3.0f;
-  t_left.data()[3] = 4.0f;
-  t_left.data()[4] = 5.0f;
-  t_left.data()[5] = 6.0f;
-
-  t_right.data()[0] = -1.0f;
-  t_right.data()[1] = 0.5f;
-  t_right.data()[2] = 2.0f;
-
-  std::size_t t_left_bytes = t_left.size()  * sizeof(DataType);
-  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
-  std::size_t t_result_bytes = t_result.size()*sizeof(DataType);
-
-
-  DataType * d_t_left  = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
-  DataType * d_t_right  = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
-  DataType * d_t_result =  static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, res_dims);
-
-  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
-  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
-  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
-  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
-  t_result = t_left.contract(t_right, dims);
-
-  for (IndexType i = 0; i < t_result.size(); i++) {
-    if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) {
-      continue;
-    }
-    if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) {
-      continue;
-    }
-    std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i)
-              << " vs " <<  t_result_gpu(i) << std::endl;
-    assert(false);
-  }
-  sycl_device.deallocate(d_t_left);
-  sycl_device.deallocate(d_t_right);
-  sycl_device.deallocate(d_t_result);
-
-
-}
-
-template<int DataLayout,  typename DataType, typename IndexType, typename Device>
-void test_scalar(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size)
-{
-  //std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
-  // with these dimensions, the output has 300 * 140 elements, which is
-  // more than 30 * 1024, which is the number of threads in blocks on
-  // a 15 SM GK110 GPU
-  typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair;
-  static const DataType error_threshold =1e-4f;
-  Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size);
-  Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size);
-  Tensor<DataType, 0, DataLayout, IndexType> t_result;
-  Tensor<DataType, 0, DataLayout, IndexType> t_result_gpu;
-  Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}};
-  Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}};
-  Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}};
-  t_left.setRandom();
-  t_right.setRandom();
-
-  std::size_t t_left_bytes = t_left.size()  * sizeof(DataType);
-  std::size_t t_right_bytes = t_right.size() * sizeof(DataType);
-  std::size_t t_result_bytes = sizeof(DataType);
-
-
-  DataType * d_t_left  = static_cast<DataType*>(sycl_device.allocate(t_left_bytes));
-  DataType * d_t_right  = static_cast<DataType*>(sycl_device.allocate(t_right_bytes));
-  DataType * d_t_result =  static_cast<DataType*>(sycl_device.allocate(t_result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 0, DataLayout, IndexType> > gpu_t_result(d_t_result);
-
-  sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes);
-  sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes);
-
-  gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims);
-  sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes);
-
-  t_result = t_left.contract(t_right, dims);
-
-  if (static_cast<DataType>(fabs(t_result() - t_result_gpu())) > error_threshold &&
-      !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) {
-    std::cout << "mismatch detected: " << t_result()
-              << " vs " <<  t_result_gpu() << std::endl;
-    assert(false);
-  }
-
-  sycl_device.deallocate(d_t_left);
-  sycl_device.deallocate(d_t_right);
-  sycl_device.deallocate(d_t_result);
-}
-
-
-template<int DataLayout, typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_m(const Device& sycl_device) {
-  for (IndexType k = 32; k < 256; k++) {
-    test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, k, 128, 128);
-  }
-}
-
-template<int DataLayout,  typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_k(const Device& sycl_device) {
-  for (IndexType k = 32; k < 256; k++) {
-    test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, k, 128);
-  }
-}
-
-template<int DataLayout,  typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_n(const Device& sycl_device) {
-  for (IndexType k = 32; k < 256; k++) {
-    test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, 128, k);
-  }
-}
-
-
-template<int DataLayout,  typename DataType, typename IndexType, typename Device>
-void test_sycl_contraction_sizes(const Device& sycl_device) {
-  IndexType m_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257 , 511,
-                   512, 513, 1023, 1024, 1025};
-
-  IndexType n_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257,  511,
-                   512, 513, 1023, 1024, 1025};
-
-  IndexType k_sizes[] = {  31,   39,  63,  64,   65,
-                     95,   96, 127, 129,  255,
-                    257,  511, 512, 513, 1023,
-                   1024, 1025};
-
-  for (IndexType i = 0; i < 15; i++) {
-    for (IndexType j = 0; j < 15; j++) {
-      for (IndexType k = 0; k < 17; k++) {
-        test_sycl_contraction<DataLayout, DataType,IndexType>(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]);
-      }
-    }
-  }
-}
-
-template <typename Dev_selector> void tensorContractionPerDevice(Dev_selector& s){
-  QueueInterface queueInterface(s);
-  auto sycl_device=Eigen::SyclDevice(&queueInterface);
-  test_sycl_contraction<ColMajor, float,int64_t>(sycl_device, 32, 32, 32);
-  test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 32, 32, 32);
-  test_scalar<ColMajor,float,int64_t>(sycl_device, 32, 32, 32);
-  test_scalar<RowMajor,float,int64_t>(sycl_device, 32, 32, 32);
-  std::chrono::time_point<std::chrono::system_clock> start, end;
-  start = std::chrono::system_clock::now();
-  test_sycl_contraction<ColMajor,float,int64_t>(sycl_device, 128, 128, 128);
-  test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 128, 128, 128);
-  test_scalar<ColMajor,float,int64_t>(sycl_device, 128, 128, 128);
-  test_scalar<RowMajor,float,int64_t>(sycl_device, 128, 128, 128);
-  test_sycl_contraction_m<ColMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_m<RowMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_n<ColMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_n<RowMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_k<ColMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_k<RowMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_sizes<ColMajor, float, int64_t>(sycl_device);
-  test_sycl_contraction_sizes<RowMajor, float, int64_t>(sycl_device);
-  test_TF<RowMajor, float, int64_t>(sycl_device);
-  test_TF<ColMajor, float, int64_t>(sycl_device);
-
-  end = std::chrono::system_clock::now();
-  std::chrono::duration<double> elapsed_seconds = end-start;
-  std::time_t end_time = std::chrono::system_clock::to_time_t(end);
-  std::cout << "finished computation at " << std::ctime(&end_time)
-            << "elapsed time: " << elapsed_seconds.count() << "s\n";
-
-}
-
-void test_cxx11_tensor_contract_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(tensorContractionPerDevice(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp
deleted file mode 100644
index a4226a6..0000000
--- a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp
+++ /dev/null
@@ -1,469 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-#include <iomanip>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-static const float error_threshold =1e-4f;
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType indim0 =53;
-  IndexType indim1= 55;
-  IndexType indim2= 51;
-  IndexType outdim0=50;
-  IndexType outdim1=55;
-  IndexType outdim2=51;
-  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
-  Eigen::array<IndexType, 1> kernel_dims = {{4}};
-  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
-  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
-  Eigen::array<IndexType, 1> dims3{{0}};
-
-  input.setRandom();
-  kernel.setRandom();
-  result.setZero();
-  result_host.setZero();
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
-  for(IndexType j=0; j< outdim1; j++ ){
-    for(IndexType k=0; k< outdim2; k++ ){
-      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
-        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
-        assert(false);
-      }
-    }
-  }
-}
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType indim0 =53;
-  IndexType indim1= 55;
-  IndexType indim2= 51;
-  IndexType outdim0=50;
-  IndexType outdim1=51;
-  IndexType outdim2=51;
-  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
-  Eigen::array<IndexType, 2> kernel_dims = {{4,5}};
-  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
-  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
-  Eigen::array<IndexType, 2> dims3{{0,1}};
-
-  input.setRandom();
-  kernel.setRandom();
-  result.setZero();
-  result_host.setZero();
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
-  for(IndexType j=0; j< outdim1; j++ ){
-    for(IndexType k=0; k< outdim2; k++ ){
-      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
-        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
-        assert(false);
-      }
-    }
-  }
-}
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType indim0 =53;
-  IndexType indim1= 55;
-  IndexType indim2= 51;
-  IndexType outdim0=50;
-  IndexType outdim1=51;
-  IndexType outdim2=49;
-  Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}};
-  Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}};
-  Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}};
-
-  Tensor<DataType, 3, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result(result_dims);
-  Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims);
-
-  Eigen::array<IndexType, 3> dims3{{0,1,2}};
-
-  input.setRandom();
-  kernel.setRandom();
-  result.setZero();
-  result_host.setZero();
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims);
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  result_host=input.convolve(kernel, dims3);
-
-for(IndexType i=0; i< outdim0; i++ ){
-  for(IndexType j=0; j< outdim1; j++ ){
-    for(IndexType k=0; k< outdim2; k++ ){
-      if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) {
-        std::cout <<std::setprecision(16)<< "mismatch detected at index  ( "<< i  << " , "  << j  << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<<  result_host(i,j,k) << std::endl;
-        assert(false);
-      }
-    }
-  }
-}
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_result);
-
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_evals(const Eigen::SyclDevice& sycl_device)
-{
-  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
-  Eigen::array<IndexType, 1> kernel_dims = {{2}};
-  Eigen::array<IndexType, 2> result_dims = {{2, 3}};
-
-  Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims);
-  Tensor<DataType, 2, DataLayout,IndexType> result(result_dims);
-
-  Eigen::array<IndexType, 1> dims3{{0}};
-
-  input.setRandom();
-  kernel.setRandom();
-  result.setZero();
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims);
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1));  // index 0
-  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1));  // index 2
-  VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1));  // index 4
-  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1));  // index 1
-  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1));  // index 3
-  VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1));  // index 5
-
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_result);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_expr(const Eigen::SyclDevice& sycl_device)
-{
-  Eigen::array<IndexType, 2> input_dims = {{3, 3}};
-  Eigen::array<IndexType, 2> kernel_dims = {{2, 2}};
-  Eigen::array<IndexType, 2> result_dims = {{2, 2}};
-
-  Tensor<DataType, 2, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims);
-  Tensor<DataType, 2, DataLayout, IndexType> result(result_dims);
-
-  input.setRandom();
-  kernel.setRandom();
-  Eigen::array<IndexType, 2> dims;
-  dims[0] = 0;
-  dims[1] = 1;
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims);
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) +
-                                input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1));
-  VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) +
-                                input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1));
-  VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) +
-                                input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1));
-  VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) +
-                                input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1));
-
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_result);
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_modes(const Eigen::SyclDevice& sycl_device){
-
-Eigen::array<IndexType, 1> input_dims = {{3}};
-Eigen::array<IndexType, 1> kernel_dims = {{3}};
-
-Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
-Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
-
-input.setRandom();
-kernel.setRandom();
-Eigen::array<IndexType, 1> dims;
-dims[0] = 0;
-
-  input(0) = 1.0f;
-  input(1) = 2.0f;
-  input(2) = 3.0f;
-  kernel(0) = 0.5f;
-  kernel(1) = 1.0f;
-  kernel(2) = 0.0f;
-
-  Eigen::array<std::pair<IndexType, IndexType>, 1> padding;
-
-  // Emulate VALID mode (as defined in
-  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
-  padding[0] = std::make_pair(0, 0);
-  Tensor<DataType, 1, DataLayout, IndexType> valid(1);
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t valid_bytes = valid.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_valid =  static_cast<DataType*>(sycl_device.allocate(valid_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions());
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
-  sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes);
-
-  VERIFY_IS_EQUAL(valid.dimension(0), 1);
-  VERIFY_IS_APPROX(valid(0), 2.5f);
-
-  // Emulate SAME mode (as defined in
-  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
-  padding[0] = std::make_pair(1, 1);
-  Tensor<DataType, 1, DataLayout, IndexType> same(3);
-  std::size_t same_bytes = same.size() * sizeof(DataType);
-  DataType * d_same =  static_cast<DataType*>(sycl_device.allocate(same_bytes));
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions());
-  gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
-  sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes);
-
-  VERIFY_IS_EQUAL(same.dimension(0), 3);
-  VERIFY_IS_APPROX(same(0), 1.0f);
-  VERIFY_IS_APPROX(same(1), 2.5f);
-  VERIFY_IS_APPROX(same(2), 4.0f);
-
-  // Emulate FULL mode (as defined in
-  // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html).
-  padding[0] = std::make_pair(2, 2);
-
-  Tensor<DataType, 1, DataLayout, IndexType> full(5);
-  std::size_t full_bytes = full.size() * sizeof(DataType);
-  DataType * d_full =  static_cast<DataType*>(sycl_device.allocate(full_bytes));
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions());
-  gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims);
-  sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes);
-
-  VERIFY_IS_EQUAL(full.dimension(0), 5);
-  VERIFY_IS_APPROX(full(0), 0.0f);
-  VERIFY_IS_APPROX(full(1), 1.0f);
-  VERIFY_IS_APPROX(full(2), 2.5f);
-  VERIFY_IS_APPROX(full(3), 4.0f);
-  VERIFY_IS_APPROX(full(4), 1.5f);
-
-  sycl_device.deallocate(d_input);
-  sycl_device.deallocate(d_kernel);
-  sycl_device.deallocate(d_valid);
-  sycl_device.deallocate(d_same);
-  sycl_device.deallocate(d_full);
-
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_strides(const Eigen::SyclDevice& sycl_device){
-
-  Eigen::array<IndexType, 1> input_dims = {{13}};
-  Eigen::array<IndexType, 1> kernel_dims = {{3}};
-
-  Tensor<DataType, 1, DataLayout, IndexType> input(input_dims);
-  Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims);
-  Tensor<DataType, 1, DataLayout, IndexType> result(2);
-
-  input.setRandom();
-  kernel.setRandom();
-  Eigen::array<IndexType, 1> dims;
-  dims[0] = 0;
-
-  Eigen::array<IndexType, 1> stride_of_3;
-  stride_of_3[0] = 3;
-  Eigen::array<IndexType, 1> stride_of_2;
-  stride_of_2[0] = 2;
-
-  std::size_t input_bytes = input.size()  * sizeof(DataType);
-  std::size_t kernel_bytes = kernel.size() * sizeof(DataType);
-  std::size_t result_bytes = result.size() * sizeof(DataType);
-
-  DataType * d_input  = static_cast<DataType*>(sycl_device.allocate(input_bytes));
-  DataType * d_kernel  = static_cast<DataType*>(sycl_device.allocate(kernel_bytes));
-  DataType * d_result =  static_cast<DataType*>(sycl_device.allocate(result_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions());
-  sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes);
-  sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes);
-
-  gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2);
-  sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes);
-
-  VERIFY_IS_EQUAL(result.dimension(0), 2);
-  VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) +
-                               input(6)*kernel(2)));
-  VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) +
-                               input(12)*kernel(2)));
-}
-
-template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){
-  QueueInterface queueInterface(s);
-  auto sycl_device=Eigen::SyclDevice(&queueInterface);
-  test_larg_expr1D<float, RowMajor, int64_t>(sycl_device);
-  test_larg_expr1D<float, ColMajor, int64_t>(sycl_device);
-  test_larg_expr2D<float, RowMajor, int64_t>(sycl_device);
-  test_larg_expr2D<float, ColMajor, int64_t>(sycl_device);
-  test_larg_expr3D<float, RowMajor, int64_t>(sycl_device);
-  test_larg_expr3D<float, ColMajor, int64_t>(sycl_device);
-  test_evals<float, ColMajor, int64_t>(sycl_device);
-  test_evals<float, RowMajor, int64_t>(sycl_device);
-  test_expr<float, ColMajor, int64_t>(sycl_device);
-  test_expr<float, RowMajor, int64_t>(sycl_device);
-  test_modes<float, ColMajor, int64_t>(sycl_device);
-  test_modes<float, RowMajor, int64_t>(sycl_device);
-  test_strides<float, ColMajor, int64_t>(sycl_device);
-  test_strides<float, RowMajor, int64_t>(sycl_device);
-}
-
-void test_cxx11_tensor_convolution_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(tensorConvolutionPerDevice(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
index 3ecc68d..7f79753 100644
--- a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -14,64 +14,18 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_device_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_SYCL
 
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
-#include <stdint.h>
-#include <iostream>
 
-template <typename DataType, int DataLayout, typename IndexType>
-void test_device_memory(const Eigen::SyclDevice &sycl_device) {
-  std::cout << "Running on : "
-            << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
-            <<std::endl;
-  IndexType sizeDim1 = 100;
-  array<IndexType, 1> tensorRange = {{sizeDim1}};
-  Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
-  Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
-  memset(in1.data(), 1, in1.size() * sizeof(DataType));
-  DataType* gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
-  sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
-  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
-  for (IndexType i=0; i<in.size(); i++) {
-    VERIFY_IS_EQUAL(in(i), in1(i));
-  }
-  sycl_device.deallocate(gpu_in_data);
+void test_device_sycl(const Eigen::SyclDevice &sycl_device) {
+  std::cout <<"Helo from ComputeCpp: the requested device exists and the device name is : "
+    << sycl_device.m_queue.get_device(). template get_info<cl::sycl::info::device::name>() <<std::endl;;
 }
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
-  VERIFY(sycl_device.ok());
-  IndexType sizeDim1 = 100;
-  array<IndexType, 1> tensorDims = {{sizeDim1}};
-  DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(sizeDim1*sizeof(DataType)));
-  sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType));
-
-  TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> in(gpu_data, tensorDims);
-  TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> out(gpu_data, tensorDims);
-  out.device(sycl_device) = in / in.constant(0);
-
-  sycl_device.synchronize();
-  VERIFY(!sycl_device.ok());
-  sycl_device.deallocate(gpu_data);
-}
-
-template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
-  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
-  QueueInterface queueInterface(d);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_device_memory<DataType, RowMajor, int64_t>(sycl_device);
-  test_device_memory<DataType, ColMajor, int64_t>(sycl_device);
-  /// this test throw an exception. enable it if you want to see the exception
-  //test_device_exceptions<DataType, RowMajor>(sycl_device);
-  /// this test throw an exception. enable it if you want to see the exception
-  //test_device_exceptions<DataType, ColMajor>(sycl_device);
-}
-
 void test_cxx11_tensor_device_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_device_test_per_device<float>(device));
-  }
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_device_sycl(sycl_device));
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_expr.cpp b/eigen/unsupported/test/cxx11_tensor_expr.cpp
index 129b4e6..77e24cb 100644
--- a/eigen/unsupported/test/cxx11_tensor_expr.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_expr.cpp
@@ -300,51 +300,6 @@ static void test_select()
   }
 }
 
-template <typename Scalar>
-void test_minmax_nan_propagation_templ() {
-  for (int size = 1; size < 17; ++size) {
-    const Scalar kNan = std::numeric_limits<Scalar>::quiet_NaN();
-    Tensor<Scalar, 1> vec_nan(size);
-    Tensor<Scalar, 1> vec_zero(size);
-    Tensor<Scalar, 1> vec_res(size);
-    vec_nan.setConstant(kNan);
-    vec_zero.setZero();
-    vec_res.setZero();
-
-    // Test that we propagate NaNs in the tensor when applying the
-    // cwiseMax(scalar) operator, which is used for the Relu operator.
-    vec_res = vec_nan.cwiseMax(Scalar(0));
-    for (int i = 0; i < size; ++i) {
-      VERIFY((numext::isnan)(vec_res(i)));
-    }
-
-    // Test that NaNs do not propagate if we reverse the arguments.
-    vec_res = vec_zero.cwiseMax(kNan);
-    for (int i = 0; i < size; ++i) {
-      VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
-    }
-
-    // Test that we propagate NaNs in the tensor when applying the
-    // cwiseMin(scalar) operator.
-    vec_res.setZero();
-    vec_res = vec_nan.cwiseMin(Scalar(0));
-    for (int i = 0; i < size; ++i) {
-      VERIFY((numext::isnan)(vec_res(i)));
-    }
-
-    // Test that NaNs do not propagate if we reverse the arguments.
-    vec_res = vec_zero.cwiseMin(kNan);
-    for (int i = 0; i < size; ++i) {
-      VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
-    }
-  }
-}
-
-static void test_minmax_nan_propagation()
-{
-  test_minmax_nan_propagation_templ<float>();
-  test_minmax_nan_propagation_templ<double>();
-}
 
 void test_cxx11_tensor_expr()
 {
@@ -356,5 +311,4 @@ void test_cxx11_tensor_expr()
   CALL_SUBTEST(test_functors());
   CALL_SUBTEST(test_type_casting());
   CALL_SUBTEST(test_select());
-  CALL_SUBTEST(test_minmax_nan_propagation());
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
index e6274f8..4c660de 100644
--- a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp
@@ -21,7 +21,7 @@ static void test_0d()
   TensorFixedSize<float, Sizes<>, RowMajor> scalar2;
   VERIFY_IS_EQUAL(scalar1.rank(), 0);
   VERIFY_IS_EQUAL(scalar1.size(), 1);
-  VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1);
+  VERIFY_IS_EQUAL(array_prod(scalar1.dimensions()), 1);
 
   scalar1() = 7.0;
   scalar2() = 13.0;
diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
index aca036c..5690da7 100644
--- a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
@@ -14,43 +14,43 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_SYCL
 
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
 using Eigen::Tensor;
-template <typename DataType, int DataLayout, typename IndexType>
+
 void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
 
-  IndexType sizeDim1 = 100;
-  IndexType sizeDim2 = 20;
-  IndexType sizeDim3 = 20;
-  Eigen::array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-  Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange);
-  Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange);
-  Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange);
+  int sizeDim1 = 100;
+  int sizeDim2 = 200;
+  int sizeDim3 = 200;
+  Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  Eigen::Tensor<float, 3> in1(tensorRange);
+  Eigen::Tensor<float, 3> in2(tensorRange);
+  Eigen::Tensor<float, 3> out(tensorRange);
 
-  DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType)));
-  DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType)));
+  float * gpu_in1_data  = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in2_data  = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data =  static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
 
   in1 = in1.random() + in1.constant(10.0f);
   in2 = in2.random() + in2.constant(10.0f);
 
   // creating TensorMap from tensor
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
-  sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
-  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType));
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+  Eigen::TensorMap<Eigen::Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
+  sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(float));
+  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(float));
   /// c=(a+b)*b
   gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2;
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType));
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i, j, k),
                          (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k));
       }
@@ -63,14 +63,8 @@ void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) {
 
 }
 
-template <typename DataType, typename Dev_selector> void tensorForced_evalperDevice(Dev_selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_forced_eval_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_forced_eval_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
 void test_cxx11_tensor_forced_eval_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(tensorForced_evalperDevice<float>(device));
-  }
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_forced_eval_sycl(sycl_device));
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp
deleted file mode 100644
index 9b521bc..0000000
--- a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_morphing_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_reshape(const Eigen::SyclDevice& sycl_device)
-{
-  typename Tensor<DataType, 5 ,DataLayout, IndexType>::Dimensions dim1(2,3,1,7,1);
-  typename Tensor<DataType, 3 ,DataLayout, IndexType>::Dimensions dim2(2,3,7);
-  typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim3(6,7);
-  typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim4(2,21);
-
-  Tensor<DataType, 5, DataLayout, IndexType> tensor1(dim1);
-  Tensor<DataType, 3, DataLayout, IndexType> tensor2(dim2);
-  Tensor<DataType, 2, DataLayout, IndexType> tensor3(dim3);
-  Tensor<DataType, 2, DataLayout, IndexType> tensor4(dim4);
-
-  tensor1.setRandom();
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
-  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
-  DataType* gpu_data4  = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType)));
-
-  TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, dim1);
-  TensorMap<Tensor<DataType, 3,DataLayout, IndexType>> gpu2(gpu_data2, dim2);
-  TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu3(gpu_data3, dim3);
-  TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu4(gpu_data4, dim4);
-
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
-
-  gpu2.device(sycl_device)=gpu1.reshape(dim2);
-  sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType));
-
-  gpu3.device(sycl_device)=gpu1.reshape(dim3);
-  sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
-
-  gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4);
-  sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType));
-  for (IndexType i = 0; i < 2; ++i){
-    for (IndexType j = 0; j < 3; ++j){
-      for (IndexType k = 0; k < 7; ++k){
-        VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));      ///ColMajor
-        if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
-          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k));    ///ColMajor
-          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k));    ///ColMajor
-        }
-        else{
-          //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));      /// RowMajor
-          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k));   /// RowMajor
-          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k));   /// RowMajor
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-  sycl_device.deallocate(gpu_data3);
-  sycl_device.deallocate(gpu_data4);
-}
-
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
-  typename Tensor<DataType, 3, DataLayout, IndexType>::Dimensions dim1(2,3,7);
-  typename Tensor<DataType, 2, DataLayout, IndexType>::Dimensions dim2(6,7);
-  typename Tensor<DataType, 5, DataLayout, IndexType>::Dimensions dim3(2,3,1,7,1);
-  Tensor<DataType, 3, DataLayout, IndexType> tensor(dim1);
-  Tensor<DataType, 2, DataLayout, IndexType> tensor2d(dim2);
-  Tensor<DataType, 5, DataLayout, IndexType> tensor5d(dim3);
-
-  tensor.setRandom();
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType)));
-  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType)));
-
-  TensorMap< Tensor<DataType, 3, DataLayout, IndexType> > gpu1(gpu_data1, dim1);
-  TensorMap< Tensor<DataType, 2, DataLayout, IndexType> > gpu2(gpu_data2, dim2);
-  TensorMap< Tensor<DataType, 5, DataLayout, IndexType> > gpu3(gpu_data3, dim3);
-
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-
-  gpu2.reshape(dim1).device(sycl_device)=gpu1;
-  sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType));
-
-  gpu3.reshape(dim1).device(sycl_device)=gpu1;
-  sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType));
-
-
-  for (IndexType i = 0; i < 2; ++i){
-    for (IndexType j = 0; j < 3; ++j){
-      for (IndexType k = 0; k < 7; ++k){
-        VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
-        if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
-          VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k));    ///ColMajor
-        }
-        else{
-          VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k));   /// RowMajor
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-  sycl_device.deallocate(gpu_data3);
-}
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
-{
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  IndexType sizeDim5 = 11;
-  array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}};
-  Tensor<DataType, 5,DataLayout, IndexType> tensor(tensorRange);
-  tensor.setRandom();
-  array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}};
-  Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range);
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType)));
-  TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range);
-  Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5);
-  Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1);
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-  gpu2.device(sycl_device)=gpu1.slice(indices, sizes);
-  sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType));
-  VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5));
-
-
-  array<IndexType, 5> slice2_range ={{1,1,2,2,3}};
-  Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range);
-  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType)));
-  TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range);
-  Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5);
-  Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3);
-  gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2);
-  sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType));
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 2; ++j) {
-      for (IndexType k = 0; k < 3; ++k) {
-        VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k));
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-  sycl_device.deallocate(gpu_data3);
-}
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device)
-{
-  typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f;
-  typedef Eigen::DSizes<IndexType, 2> Index2;
-  IndexType sizeDim1 = 7L;
-  IndexType sizeDim2 = 11L;
-  array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}};
-  Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange);
-  IndexType sliceDim1 = 2;
-  IndexType sliceDim2 = 3;
-  array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}};
-  Tensor2f slice(sliceRange);
-  Index2 strides(1L,1L);
-  Index2 indicesStart(3L,4L);
-  Index2 indicesStop(5L,7L);
-  Index2 lengths(2L,3L);
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
-  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType)));
-  TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
-  TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange);
-
-
-  tensor.setRandom();
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-  gpu2.device(sycl_device)=gpu1;
-
-  slice.setRandom();
-  sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType));
-
-
-  gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3;
-  gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3;
-  sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType));
-  sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType));
-
-  for(IndexType i=0;i<sizeDim1;i++)
-    for(IndexType j=0;j<sizeDim2;j++){
-    VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j));
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-  sycl_device.deallocate(gpu_data3);
-}
-
-template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_simple_slice<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_slice<DataType, ColMajor, int64_t>(sycl_device);
-  test_simple_reshape<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_reshape<DataType, ColMajor, int64_t>(sycl_device);
-  test_reshape_as_lvalue<DataType, RowMajor, int64_t>(sycl_device);
-  test_reshape_as_lvalue<DataType, ColMajor, int64_t>(sycl_device);
-  test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device);
-}
-void test_cxx11_tensor_morphing_sycl()
-{
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_morphing_test_per_device<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_notification.cpp b/eigen/unsupported/test/cxx11_tensor_notification.cpp
index 183ef02..c946007 100644
--- a/eigen/unsupported/test/cxx11_tensor_notification.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_notification.cpp
@@ -13,6 +13,15 @@
 #include "main.h"
 #include <Eigen/CXX11/Tensor>
 
+#if EIGEN_OS_WIN || EIGEN_OS_WIN64
+#include <windows.h>
+void sleep(int seconds) {
+  Sleep(seconds*1000);
+}
+#else
+#include <unistd.h>
+#endif
+
 
 namespace {
 
@@ -31,7 +40,7 @@ static void test_notification_single()
   Eigen::Notification n;
   std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter);
   thread_pool.Schedule(func);
-  EIGEN_SLEEP(1000);
+  sleep(1);
 
   // The thread should be waiting for the notification.
   VERIFY_IS_EQUAL(counter, 0);
@@ -39,7 +48,7 @@ static void test_notification_single()
   // Unblock the thread
   n.Notify();
 
-  EIGEN_SLEEP(1000);
+  sleep(1);
 
   // Verify the counter has been incremented
   VERIFY_IS_EQUAL(counter, 1);
@@ -58,10 +67,10 @@ static void test_notification_multiple()
   thread_pool.Schedule(func);
   thread_pool.Schedule(func);
   thread_pool.Schedule(func);
-  EIGEN_SLEEP(1000);
+  sleep(1);
   VERIFY_IS_EQUAL(counter, 0);
   n.Notify();
-  EIGEN_SLEEP(1000);
+  sleep(1);
   VERIFY_IS_EQUAL(counter, 4);
 }
 
diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index 908a5e5..2f86980 100644
--- a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -200,8 +200,6 @@ void test_cuda_trancendental() {
   Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
   Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
   Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
-  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem);
-  Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
 
   gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
   gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
@@ -209,7 +207,6 @@ void test_cuda_trancendental() {
   gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
   gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
   gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
-  gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::half>();
 
   gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
   gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
@@ -220,9 +217,6 @@ void test_cuda_trancendental() {
   gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
   gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
 
-  gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
-  gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1();
-
   Tensor<float, 1> input1(num_elem);
   Tensor<Eigen::half, 1> half_prec1(num_elem);
   Tensor<Eigen::half, 1> full_prec1(num_elem);
diff --git a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp
deleted file mode 100644
index dc748b7..0000000
--- a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_simple_padding(const Eigen::SyclDevice& sycl_device)
-{
-
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
-  tensor.setRandom();
-
-  array<std::pair<IndexType, IndexType>, 4> paddings;
-  paddings[0] = std::make_pair(0, 0);
-  paddings[1] = std::make_pair(2, 1);
-  paddings[2] = std::make_pair(3, 4);
-  paddings[3] = std::make_pair(0, 0);
-
-  IndexType padedSizeDim1 = 2;
-  IndexType padedSizeDim2 = 6;
-  IndexType padedSizeDim3 = 12;
-  IndexType padedSizeDim4 = 7;
-  array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}};
-
-  Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange);
-
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType)));
-  TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange);
-
-  VERIFY_IS_EQUAL(padded.dimension(0), 2+0);
-  VERIFY_IS_EQUAL(padded.dimension(1), 3+3);
-  VERIFY_IS_EQUAL(padded.dimension(2), 5+7);
-  VERIFY_IS_EQUAL(padded.dimension(3), 7+0);
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-  gpu2.device(sycl_device)=gpu1.pad(paddings);
-  sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType));
-  for (IndexType i = 0; i < padedSizeDim1; ++i) {
-    for (IndexType j = 0; j < padedSizeDim2; ++j) {
-      for (IndexType k = 0; k < padedSizeDim3; ++k) {
-        for (IndexType l = 0; l < padedSizeDim4; ++l) {
-          if (j >= 2 && j < 5 && k >= 3 && k < 8) {
-            VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l));
-          } else {
-            VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f);
-          }
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-}
-
-template<typename DataType, int DataLayout, typename IndexType>
-static void test_padded_expr(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
-  tensor.setRandom();
-
-  array<std::pair<IndexType, IndexType>, 4> paddings;
-  paddings[0] = std::make_pair(0, 0);
-  paddings[1] = std::make_pair(2, 1);
-  paddings[2] = std::make_pair(3, 4);
-  paddings[3] = std::make_pair(0, 0);
-
-  Eigen::DSizes<IndexType, 2> reshape_dims;
-  reshape_dims[0] = 12;
-  reshape_dims[1] = 84;
-
-
-  Tensor<DataType, 2, DataLayout, IndexType>  result(reshape_dims);
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType)));
-  TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims);
-
-
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
-  gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims);
-  sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType));
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 6; ++j) {
-      for (IndexType k = 0; k < 12; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          const float result_value = DataLayout == ColMajor ?
-              result(i+2*j,k+12*l) : result(j+6*i,l+7*k);
-          if (j >= 2 && j < 5 && k >= 3 && k < 8) {
-            VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l));
-          } else {
-            VERIFY_IS_EQUAL(result_value, 0.0f);
-          }
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-}
-
-template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_simple_padding<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_padding<DataType, ColMajor, int64_t>(sycl_device);
-  test_padded_expr<DataType, RowMajor, int64_t>(sycl_device);
-  test_padded_expr<DataType, ColMajor, int64_t>(sycl_device);
-
-}
-void test_cxx11_tensor_padding_sycl()
-{
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_padding_test_per_device<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
index 440d48b..a9ef829 100644
--- a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -14,168 +14,125 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_SYCL
 
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
 
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_full_reductions_mean_sycl(const Eigen::SyclDevice&  sycl_device) {
 
-  const IndexType num_rows = 452;
-  const IndexType num_cols = 765;
-  array<IndexType, 2> tensorRange = {{num_rows, num_cols}};
+static void test_full_reductions_sycl(const Eigen::SyclDevice&  sycl_device) {
 
-  Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange);
-  Tensor<DataType, 0, DataLayout, IndexType> full_redux;
-  Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu;
-
-  in.setRandom();
-
-  full_redux = in.mean();
-
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType));
-
-  TensorMap<Tensor<DataType, 2, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 0, DataLayout, IndexType> >  out_gpu(gpu_out_data);
-
-  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
-  out_gpu.device(sycl_device) = in_gpu.mean();
-  sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType));
-  // Check that the CPU and GPU reductions return the same result.
-  VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
-  sycl_device.deallocate(gpu_in_data);
-  sycl_device.deallocate(gpu_out_data);
-}
+  const int num_rows = 452;
+  const int num_cols = 765;
+  array<int, 2> tensorRange = {{num_rows, num_cols}};
 
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_full_reductions_min_sycl(const Eigen::SyclDevice&  sycl_device) {
-
-  const IndexType num_rows = 876;
-  const IndexType num_cols = 953;
-  array<IndexType, 2> tensorRange = {{num_rows, num_cols}};
-
-  Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange);
-  Tensor<DataType, 0, DataLayout, IndexType> full_redux;
-  Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu;
+  Tensor<float, 2> in(tensorRange);
+  Tensor<float, 0> full_redux;
+  Tensor<float, 0> full_redux_gpu;
 
   in.setRandom();
 
-  full_redux = in.minimum();
+  full_redux = in.sum();
 
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType));
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data =(float*)sycl_device.allocate(sizeof(float));
 
-  TensorMap<Tensor<DataType, 2, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 0, DataLayout, IndexType> >  out_gpu(gpu_out_data);
+  TensorMap<Tensor<float, 2> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 0> >  out_gpu(gpu_out_data);
 
-  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
-  out_gpu.device(sycl_device) = in_gpu.minimum();
-  sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType));
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum();
+  sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(float));
   // Check that the CPU and GPU reductions return the same result.
   VERIFY_IS_APPROX(full_redux_gpu(), full_redux());
+
   sycl_device.deallocate(gpu_in_data);
   sycl_device.deallocate(gpu_out_data);
 }
 
+static void test_first_dim_reductions_sycl(const Eigen::SyclDevice& sycl_device) {
 
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) {
-
-  IndexType dim_x = 145;
-  IndexType dim_y = 1;
-  IndexType dim_z = 67;
+  int dim_x = 145;
+  int dim_y = 1;
+  int dim_z = 67;
 
-  array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}};
-  Eigen::array<IndexType, 1> red_axis;
+  array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+  Eigen::array<int, 1> red_axis;
   red_axis[0] = 0;
-  array<IndexType, 2> reduced_tensorRange = {{dim_y, dim_z}};
+  array<int, 2> reduced_tensorRange = {{dim_y, dim_z}};
 
-  Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
-  Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange);
-  Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange);
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
 
   in.setRandom();
 
-  redux= in.maximum(red_axis);
+  redux= in.sum(red_axis);
 
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType)));
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
 
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 2, DataLayout, IndexType> >  out_gpu(gpu_out_data, reduced_tensorRange);
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
 
-  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
-  out_gpu.device(sycl_device) = in_gpu.maximum(red_axis);
-  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType));
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
+  out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
 
   // Check that the CPU and GPU reductions return the same result.
-  for(IndexType j=0; j<reduced_tensorRange[0]; j++ )
-    for(IndexType k=0; k<reduced_tensorRange[1]; k++ )
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
       VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
 
   sycl_device.deallocate(gpu_in_data);
   sycl_device.deallocate(gpu_out_data);
 }
 
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) {
+static void test_last_dim_reductions_sycl(const Eigen::SyclDevice &sycl_device) {
 
-  IndexType dim_x = 567;
-  IndexType dim_y = 1;
-  IndexType dim_z = 47;
+  int dim_x = 567;
+  int dim_y = 1;
+  int dim_z = 47;
 
-  array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}};
-  Eigen::array<IndexType, 1> red_axis;
+  array<int, 3> tensorRange = {{dim_x, dim_y, dim_z}};
+  Eigen::array<int, 1> red_axis;
   red_axis[0] = 2;
-  array<IndexType, 2> reduced_tensorRange = {{dim_x, dim_y}};
+  array<int, 2> reduced_tensorRange = {{dim_x, dim_y}};
 
-  Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange);
-  Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange);
-  Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange);
+  Tensor<float, 3> in(tensorRange);
+  Tensor<float, 2> redux(reduced_tensorRange);
+  Tensor<float, 2> redux_gpu(reduced_tensorRange);
 
   in.setRandom();
 
   redux= in.sum(red_axis);
 
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType)));
+  float* gpu_in_data = static_cast<float*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(float)));
+  float* gpu_out_data = static_cast<float*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(float)));
 
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 2, DataLayout, IndexType> >  out_gpu(gpu_out_data, reduced_tensorRange);
+  TensorMap<Tensor<float, 3> >  in_gpu(gpu_in_data, tensorRange);
+  TensorMap<Tensor<float, 2> >  out_gpu(gpu_out_data, reduced_tensorRange);
 
-  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType));
+  sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(float));
   out_gpu.device(sycl_device) = in_gpu.sum(red_axis);
-  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType));
+  sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(float));
   // Check that the CPU and GPU reductions return the same result.
-  for(IndexType j=0; j<reduced_tensorRange[0]; j++ )
-    for(IndexType k=0; k<reduced_tensorRange[1]; k++ )
+  for(int j=0; j<reduced_tensorRange[0]; j++ )
+    for(int k=0; k<reduced_tensorRange[1]; k++ )
       VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k));
 
   sycl_device.deallocate(gpu_in_data);
   sycl_device.deallocate(gpu_out_data);
 
 }
-template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl::device& d){
-  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
-  QueueInterface queueInterface(d);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-
-  test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_first_dim_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device);
-  test_last_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device);
-}
+
 void test_cxx11_tensor_reduction_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_reduction_test_per_device<float>(device));
-  }
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST((test_full_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_first_dim_reductions_sycl(sycl_device)));
+  CALL_SUBTEST((test_last_dim_reductions_sycl(sycl_device)));
+
 }
diff --git a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp
deleted file mode 100644
index 2f54844..0000000
--- a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_reverse_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_reverse(const Eigen::SyclDevice&  sycl_device) {
-
-  IndexType dim1 = 2;
-  IndexType dim2 = 3;
-  IndexType dim3 = 5;
-  IndexType dim4 = 7;
-
-  array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
-  Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange);
-  tensor.setRandom();
-
-  array<bool, 4> dim_rev;
-  dim_rev[0] = false;
-  dim_rev[1] = true;
-  dim_rev[2] = true;
-  dim_rev[3] = false;
-
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data =static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType)));
-
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> >  out_gpu(gpu_out_data, tensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType));
-  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
-  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
-  // Check that the CPU and GPU reductions return the same result.
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l));
-        }
-      }
-    }
-  }
-  dim_rev[0] = true;
-  dim_rev[1] = false;
-  dim_rev[2] = false;
-  dim_rev[3] = false;
-
-  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
-  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l));
-        }
-      }
-    }
-  }
-
-  dim_rev[0] = true;
-  dim_rev[1] = false;
-  dim_rev[2] = false;
-  dim_rev[3] = true;
-  out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev);
-  sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType));
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l));
-        }
-      }
-    }
-  }
-
-  sycl_device.deallocate(gpu_in_data);
-  sycl_device.deallocate(gpu_out_data);
-}
-
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_expr_reverse(const Eigen::SyclDevice&  sycl_device, bool LValue)
-{
-  IndexType dim1 = 2;
-  IndexType dim2 = 3;
-  IndexType dim3 = 5;
-  IndexType dim4 = 7;
-
-  array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}};
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange);
-  Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange);
-  Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange);
-  tensor.setRandom();
-
-  array<bool, 4> dim_rev;
-  dim_rev[0] = false;
-  dim_rev[1] = true;
-  dim_rev[2] = false;
-  dim_rev[3] = true;
-
-  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data_expected =static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType)));
-  DataType* gpu_out_data_result =static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType)));
-
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> >  in_gpu(gpu_in_data, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> >  out_gpu_expected(gpu_out_data_expected, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout, IndexType> >  out_gpu_result(gpu_out_data_result, tensorRange);
-
-
-  sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType));
-
-  if (LValue) {
-    out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu;
-  } else {
-    out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev);
-  }
-  sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType));
-
-
-  array<IndexType, 4> src_slice_dim;
-  src_slice_dim[0] = 2;
-  src_slice_dim[1] = 3;
-  src_slice_dim[2] = 1;
-  src_slice_dim[3] = 7;
-  array<IndexType, 4> src_slice_start;
-  src_slice_start[0] = 0;
-  src_slice_start[1] = 0;
-  src_slice_start[2] = 0;
-  src_slice_start[3] = 0;
-  array<IndexType, 4> dst_slice_dim = src_slice_dim;
-  array<IndexType, 4> dst_slice_start = src_slice_start;
-
-  for (IndexType i = 0; i < 5; ++i) {
-    if (LValue) {
-        out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
-          in_gpu.slice(src_slice_start, src_slice_dim);
-    } else {
-      out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
-          in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev);
-    }
-    src_slice_start[2] += 1;
-    dst_slice_start[2] += 1;
-  }
-  sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType));
-
-  for (IndexType i = 0; i < expected.dimension(0); ++i) {
-    for (IndexType j = 0; j < expected.dimension(1); ++j) {
-      for (IndexType k = 0; k < expected.dimension(2); ++k) {
-        for (IndexType l = 0; l < expected.dimension(3); ++l) {
-          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
-        }
-      }
-    }
-  }
-
-  dst_slice_start[2] = 0;
-  result.setRandom();
-  sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType));
-  for (IndexType i = 0; i < 5; ++i) {
-     if (LValue) {
-       out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) =
-           in_gpu.slice(dst_slice_start, dst_slice_dim);
-     } else {
-       out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) =
-           in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim);
-     }
-    dst_slice_start[2] += 1;
-  }
-  sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType));
-
-  for (IndexType i = 0; i < expected.dimension(0); ++i) {
-    for (IndexType j = 0; j < expected.dimension(1); ++j) {
-      for (IndexType k = 0; k < expected.dimension(2); ++k) {
-        for (IndexType l = 0; l < expected.dimension(3); ++l) {
-          VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l));
-        }
-      }
-    }
-  }
-}
-
-
-
-template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::device& d){
-  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
-  QueueInterface queueInterface(d);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device);
-  test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false);
-  test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false);
-  test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true);
-  test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true);
-}
-void test_cxx11_tensor_reverse_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_reverse_test_per_device<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
deleted file mode 100644
index c88db7c..0000000
--- a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-// Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device)
-{
-  IndexType sizeDim1 = 2;
-  IndexType sizeDim2 = 3;
-  IndexType sizeDim3 = 5;
-  IndexType sizeDim4 = 7;
-  array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
-  Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange);
-  Tensor<DataType, 4, DataLayout,IndexType> no_shuffle(tensorRange);
-  tensor.setRandom();
-
-  const size_t buffSize =tensor.size()*sizeof(DataType);
-  array<IndexType, 4> shuffles;
-  shuffles[0] = 0;
-  shuffles[1] = 1;
-  shuffles[2] = 2;
-  shuffles[3] = 3;
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(buffSize));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(buffSize));
-
-
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu2(gpu_data2, tensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize);
-
-  gpu2.device(sycl_device)=gpu1.shuffle(shuffles);
-  sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize);
-  sycl_device.synchronize();
-
-  VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1);
-  VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2);
-  VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3);
-  VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim4; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
-        }
-      }
-    }
-  }
-
-  shuffles[0] = 2;
-  shuffles[1] = 3;
-  shuffles[2] = 1;
-  shuffles[3] = 0;
-  array<IndexType, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}};
-  Tensor<DataType, 4, DataLayout,IndexType> shuffle(tensorrangeShuffle);
-  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(buffSize));
-  TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu3(gpu_data3, tensorrangeShuffle);
-
-  gpu3.device(sycl_device)=gpu1.shuffle(shuffles);
-  sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize);
-  sycl_device.synchronize();
-
-  VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3);
-  VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4);
-  VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2);
-  VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1);
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
-        for (IndexType l = 0; l < sizeDim4; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
-        }
-      }
-    }
-  }
-}
-
-
-template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device);
-  test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device);
-
-}
-void test_cxx11_tensor_shuffling_sycl()
-{
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp
deleted file mode 100644
index 603c374..0000000
--- a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2016
-// Mehdi Goli    Codeplay Software Ltd.
-// Ralph Potter  Codeplay Software Ltd.
-// Luke Iwanski  Codeplay Software Ltd.
-// Contact: <eigen@codeplay.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_striding_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
-#define EIGEN_USE_SYCL
-
-#include <iostream>
-#include <chrono>
-#include <ctime>
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-using Eigen::array;
-using Eigen::SyclDevice;
-using Eigen::Tensor;
-using Eigen::TensorMap;
-
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_simple_striding(const Eigen::SyclDevice& sycl_device)
-{
-
-  Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
-  Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}};
-
-
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
-  Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims);
-  Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
-
-
-  std::size_t tensor_bytes = tensor.size()  * sizeof(DataType);
-  std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
-  std::size_t stride_bytes = stride.size() * sizeof(DataType);
-  DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
-  DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
-  DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
-
-
-  tensor.setRandom();
-  array<IndexType, 4> strides;
-  strides[0] = 1;
-  strides[1] = 1;
-  strides[2] = 1;
-  strides[3] = 1;
-  sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
-  gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides);
-  sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
-
-  //no_stride = tensor.stride(strides);
-
-  VERIFY_IS_EQUAL(no_stride.dimension(0), 2);
-  VERIFY_IS_EQUAL(no_stride.dimension(1), 3);
-  VERIFY_IS_EQUAL(no_stride.dimension(2), 5);
-  VERIFY_IS_EQUAL(no_stride.dimension(3), 7);
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l));
-        }
-      }
-    }
-  }
-
-  strides[0] = 2;
-  strides[1] = 4;
-  strides[2] = 2;
-  strides[3] = 3;
-//Tensor<float, 4, DataLayout> stride;
-//  stride = tensor.stride(strides);
-
-  gpu_stride.device(sycl_device)=gpu_tensor.stride(strides);
-  sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
-
-  VERIFY_IS_EQUAL(stride.dimension(0), 1);
-  VERIFY_IS_EQUAL(stride.dimension(1), 1);
-  VERIFY_IS_EQUAL(stride.dimension(2), 3);
-  VERIFY_IS_EQUAL(stride.dimension(3), 3);
-
-  for (IndexType i = 0; i < 1; ++i) {
-    for (IndexType j = 0; j < 1; ++j) {
-      for (IndexType k = 0; k < 3; ++k) {
-        for (IndexType l = 0; l < 3; ++l) {
-          VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l));
-        }
-      }
-    }
-  }
-
-  sycl_device.deallocate(d_tensor);
-  sycl_device.deallocate(d_no_stride);
-  sycl_device.deallocate(d_stride);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device)
-{
-
-  Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}};
-  Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}};
-
-
-  Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims);
-  Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims);
-  Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims);
-
-
-  std::size_t tensor_bytes = tensor.size()  * sizeof(DataType);
-  std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType);
-  std::size_t stride_bytes = stride.size() * sizeof(DataType);
-
-  DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes));
-  DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes));
-  DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes));
-
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims);
-  Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims);
-
-  //Tensor<float, 4, DataLayout> tensor(2,3,5,7);
-  tensor.setRandom();
-  array<IndexType, 4> strides;
-  strides[0] = 2;
-  strides[1] = 4;
-  strides[2] = 2;
-  strides[3] = 3;
-
-//  Tensor<float, 4, DataLayout> result(3, 12, 10, 21);
-//  result.stride(strides) = tensor;
-  sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes);
-  gpu_stride.stride(strides).device(sycl_device)=gpu_tensor;
-  sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes);
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l));
-        }
-      }
-    }
-  }
-
-  array<IndexType, 4> no_strides;
-  no_strides[0] = 1;
-  no_strides[1] = 1;
-  no_strides[2] = 1;
-  no_strides[3] = 1;
-//  Tensor<float, 4, DataLayout> result2(3, 12, 10, 21);
-//  result2.stride(strides) = tensor.stride(no_strides);
-
-  gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides);
-  sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes);
-
-  for (IndexType i = 0; i < 2; ++i) {
-    for (IndexType j = 0; j < 3; ++j) {
-      for (IndexType k = 0; k < 5; ++k) {
-        for (IndexType l = 0; l < 7; ++l) {
-          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l));
-        }
-      }
-    }
-  }
-  sycl_device.deallocate(d_tensor);
-  sycl_device.deallocate(d_no_stride);
-  sycl_device.deallocate(d_stride);
-}
-
-
-template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){
-  QueueInterface queueInterface(s);
-  auto sycl_device=Eigen::SyclDevice(&queueInterface);
-  test_simple_striding<float, ColMajor, int64_t>(sycl_device);
-  test_simple_striding<float, RowMajor, int64_t>(sycl_device);
-  test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device);
-  test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device);
-}
-
-void test_cxx11_tensor_striding_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(tensorStridingPerDevice(device));
-  }
-}
diff --git a/eigen/unsupported/test/cxx11_tensor_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_sycl.cpp
index 5cd0f4c..6a9c334 100644
--- a/eigen/unsupported/test/cxx11_tensor_sycl.cpp
+++ b/eigen/unsupported/test/cxx11_tensor_sycl.cpp
@@ -16,7 +16,7 @@
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cxx11_tensor_sycl
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_SYCL
 
 #include "main.h"
@@ -27,105 +27,36 @@ using Eigen::SyclDevice;
 using Eigen::Tensor;
 using Eigen::TensorMap;
 
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_mem_transfers(const Eigen::SyclDevice &sycl_device) {
-  IndexType sizeDim1 = 100;
-  IndexType sizeDim2 = 10;
-  IndexType sizeDim3 = 20;
-  array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-  Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange);
-  Tensor<DataType, 3, DataLayout, IndexType> out1(tensorRange);
-  Tensor<DataType, 3, DataLayout, IndexType> out2(tensorRange);
-  Tensor<DataType, 3, DataLayout, IndexType> out3(tensorRange);
+void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) {
 
-  in1 = in1.random();
-
-  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
-  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(out1.size()*sizeof(DataType)));
-
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange);
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange);
-
-  sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(DataType));
-  sycl_device.memcpyHostToDevice(gpu_data2, in1.data(),(in1.size())*sizeof(DataType));
-  gpu1.device(sycl_device) = gpu1 * 3.14f;
-  gpu2.device(sycl_device) = gpu2 * 2.7f;
-  sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(DataType));
-  sycl_device.memcpyDeviceToHost(out2.data(), gpu_data1,(out2.size())*sizeof(DataType));
-  sycl_device.memcpyDeviceToHost(out3.data(), gpu_data2,(out3.size())*sizeof(DataType));
-  sycl_device.synchronize();
-
-  for (IndexType i = 0; i < in1.size(); ++i) {
-    VERIFY_IS_APPROX(out1(i), in1(i) * 3.14f);
-    VERIFY_IS_APPROX(out2(i), in1(i) * 3.14f);
-    VERIFY_IS_APPROX(out3(i), in1(i) * 2.7f);
-  }
-
-  sycl_device.deallocate(gpu_data1);
-  sycl_device.deallocate(gpu_data2);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_mem_sync(const Eigen::SyclDevice &sycl_device) {
-  IndexType size = 20;
-  array<IndexType, 1> tensorRange = {{size}};
-  Tensor<DataType, 1, DataLayout, IndexType> in1(tensorRange);
-  Tensor<DataType, 1, DataLayout, IndexType> in2(tensorRange);
-  Tensor<DataType, 1, DataLayout, IndexType> out(tensorRange);
-
-  in1 = in1.random();
-  in2 = in1;
-
-  DataType* gpu_data  = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
-
-  TensorMap<Tensor<DataType, 1, DataLayout, IndexType>> gpu1(gpu_data, tensorRange);
-  sycl_device.memcpyHostToDevice(gpu_data, in1.data(),(in1.size())*sizeof(DataType));
-  sycl_device.synchronize();
-  in1.setZero();
-
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_data, out.size()*sizeof(DataType));
-  sycl_device.synchronize();
-
-  for (IndexType i = 0; i < in1.size(); ++i) {
-    VERIFY_IS_APPROX(out(i), in2(i));
-  }
-
-  sycl_device.deallocate(gpu_data);
-}
-
-template <typename DataType, int DataLayout, typename IndexType>
-void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
-
-  IndexType sizeDim1 = 100;
-  IndexType sizeDim2 = 10;
-  IndexType sizeDim3 = 20;
-  array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
-  Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange);
-  Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange);
-  Tensor<DataType, 3,DataLayout, IndexType> in3(tensorRange);
-  Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange);
+  int sizeDim1 = 100;
+  int sizeDim2 = 100;
+  int sizeDim3 = 100;
+  array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}};
+  Tensor<float, 3> in1(tensorRange);
+  Tensor<float, 3> in2(tensorRange);
+  Tensor<float, 3> in3(tensorRange);
+  Tensor<float, 3> out(tensorRange);
 
   in2 = in2.random();
   in3 = in3.random();
 
-  DataType * gpu_in1_data  = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType)));
-  DataType * gpu_in2_data  = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType)));
-  DataType * gpu_in3_data  = static_cast<DataType*>(sycl_device.allocate(in3.size()*sizeof(DataType)));
-  DataType * gpu_out_data =  static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType)));
+  float * gpu_in1_data  = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in2_data  = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_in3_data  = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float)));
+  float * gpu_out_data =  static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float)));
 
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange);
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange);
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in3(gpu_in3_data, tensorRange);
-  TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange);
+  TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange);
 
   /// a=1.2f
   gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f);
-  sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.size())*sizeof(DataType));
-  sycl_device.synchronize();
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(in1(i,j,k), 1.2f);
       }
     }
@@ -134,12 +65,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
 
   /// a=b*1.2f
   gpu_out.device(sycl_device) = gpu_in1 * 1.2f;
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i,j,k),
                          in1(i,j,k) * 1.2f);
       }
@@ -148,14 +77,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
   printf("a=b*1.2f Test Passed\n");
 
   /// c=a*b
-  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType));
+  sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float));
   gpu_out.device(sycl_device) = gpu_in1 * gpu_in2;
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i,j,k),
                          in1(i,j,k) *
                              in2(i,j,k));
@@ -166,11 +93,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
 
   /// c=a+b
   gpu_out.device(sycl_device) = gpu_in1 + gpu_in2;
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i,j,k),
                          in1(i,j,k) +
                              in2(i,j,k));
@@ -181,11 +107,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
 
   /// c=a*a
   gpu_out.device(sycl_device) = gpu_in1 * gpu_in1;
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i,j,k),
                          in1(i,j,k) *
                              in1(i,j,k));
@@ -196,11 +121,10 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
 
   //a*3.14f + b*2.7f
   gpu_out.device(sycl_device) =  gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f);
-  sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i,j,k),
                          in1(i,j,k) * 3.14f
                        + in2(i,j,k) * 2.7f);
@@ -210,13 +134,12 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
   printf("a*3.14f + b*2.7f Test Passed\n");
 
   ///d= (a>0.5? b:c)
-  sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.size())*sizeof(DataType));
+  sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float));
   gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3);
-  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType));
-  sycl_device.synchronize();
-  for (IndexType i = 0; i < sizeDim1; ++i) {
-    for (IndexType j = 0; j < sizeDim2; ++j) {
-      for (IndexType k = 0; k < sizeDim3; ++k) {
+  sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float));
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
         VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f)
                                                 ? in2(i, j, k)
                                                 : in3(i, j, k));
@@ -229,48 +152,8 @@ void test_sycl_computations(const Eigen::SyclDevice &sycl_device) {
   sycl_device.deallocate(gpu_in3_data);
   sycl_device.deallocate(gpu_out_data);
 }
-template<typename Scalar1, typename Scalar2,  int DataLayout, typename IndexType>
-static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){
-    IndexType size = 20;
-    array<IndexType, 1> tensorRange = {{size}};
-    Tensor<Scalar1, 1, DataLayout, IndexType> in(tensorRange);
-    Tensor<Scalar2, 1, DataLayout, IndexType> out(tensorRange);
-    Tensor<Scalar2, 1, DataLayout, IndexType> out_host(tensorRange);
-
-    in = in.random();
-
-    Scalar1* gpu_in_data  = static_cast<Scalar1*>(sycl_device.allocate(in.size()*sizeof(Scalar1)));
-    Scalar2 * gpu_out_data =  static_cast<Scalar2*>(sycl_device.allocate(out.size()*sizeof(Scalar2)));
-
-    TensorMap<Tensor<Scalar1, 1, DataLayout, IndexType>> gpu_in(gpu_in_data, tensorRange);
-    TensorMap<Tensor<Scalar2, 1, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange);
-    sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1));
-    gpu_out.device(sycl_device) = gpu_in. template cast<Scalar2>();
-    sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, out.size()*sizeof(Scalar2));
-    out_host = in. template cast<Scalar2>();
-    for(IndexType i=0; i< size; i++)
-    {
-      VERIFY_IS_APPROX(out(i), out_host(i));
-    }
-    printf("cast Test Passed\n");
-    sycl_device.deallocate(gpu_in_data);
-    sycl_device.deallocate(gpu_out_data);
-}
-template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){
-  QueueInterface queueInterface(s);
-  auto sycl_device = Eigen::SyclDevice(&queueInterface);
-  test_sycl_mem_transfers<DataType, RowMajor, int64_t>(sycl_device);
-  test_sycl_computations<DataType, RowMajor, int64_t>(sycl_device);
-  test_sycl_mem_sync<DataType, RowMajor, int64_t>(sycl_device);
-  test_sycl_mem_transfers<DataType, ColMajor, int64_t>(sycl_device);
-  test_sycl_computations<DataType, ColMajor, int64_t>(sycl_device);
-  test_sycl_mem_sync<DataType, ColMajor, int64_t>(sycl_device);
-  test_sycl_cast<DataType, int, RowMajor, int64_t>(sycl_device);
-  test_sycl_cast<DataType, int, ColMajor, int64_t>(sycl_device);
-}
-
 void test_cxx11_tensor_sycl() {
-  for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
-  }
+  cl::sycl::gpu_selector s;
+  Eigen::SyclDevice sycl_device(s);
+  CALL_SUBTEST(test_sycl_cpu(sycl_device));
 }
diff --git a/eigen/unsupported/test/polynomialsolver.cpp b/eigen/unsupported/test/polynomialsolver.cpp
index 7ad4aa6..0c87478 100644
--- a/eigen/unsupported/test/polynomialsolver.cpp
+++ b/eigen/unsupported/test/polynomialsolver.cpp
@@ -32,10 +32,9 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
 {
   typedef typename POLYNOMIAL::Index Index;
   typedef typename POLYNOMIAL::Scalar Scalar;
-  typedef typename POLYNOMIAL::RealScalar RealScalar;
 
   typedef typename SOLVER::RootsType    RootsType;
-  typedef Matrix<RealScalar,Deg,1>      EvalRootsType;
+  typedef Matrix<Scalar,Deg,1>          EvalRootsType;
 
   const Index deg = pols.size()-1;
 
@@ -58,7 +57,7 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve )
     cerr << endl;
   }
 
-  std::vector<RealScalar> rootModuli( roots.size() );
+  std::vector<Scalar> rootModuli( roots.size() );
   Map< EvalRootsType > aux( &rootModuli[0], roots.size() );
   aux = roots.array().abs();
   std::sort( rootModuli.begin(), rootModuli.end() );
@@ -84,7 +83,7 @@ void evalSolver( const POLYNOMIAL& pols )
 {
   typedef typename POLYNOMIAL::Scalar Scalar;
 
-  typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType;
+  typedef PolynomialSolver<Scalar, Deg >              PolynomialSolverType;
 
   PolynomialSolverType psolve;
   aux_evalSolver<Deg, POLYNOMIAL, PolynomialSolverType>( pols, psolve );
@@ -98,7 +97,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 {
   using std::sqrt;
   typedef typename POLYNOMIAL::Scalar Scalar;
-  typedef typename POLYNOMIAL::RealScalar RealScalar;
 
   typedef PolynomialSolver<Scalar, Deg >              PolynomialSolverType;
 
@@ -109,12 +107,15 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
     // 1) the roots found are correct
     // 2) the roots have distinct moduli
 
+    typedef typename POLYNOMIAL::Scalar                 Scalar;
+    typedef typename REAL_ROOTS::Scalar                 Real;
+
     //Test realRoots
-    std::vector< RealScalar > calc_realRoots;
-    psolve.realRoots( calc_realRoots,  test_precision<RealScalar>());
-    VERIFY_IS_EQUAL( calc_realRoots.size() , (size_t)real_roots.size() );
+    std::vector< Real > calc_realRoots;
+    psolve.realRoots( calc_realRoots );
+    VERIFY( calc_realRoots.size() == (size_t)real_roots.size() );
 
-    const RealScalar psPrec = sqrt( test_precision<RealScalar>() );
+    const Scalar psPrec = sqrt( test_precision<Scalar>() );
 
     for( size_t i=0; i<calc_realRoots.size(); ++i )
     {
@@ -137,7 +138,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 
     bool hasRealRoot;
     //Test absGreatestRealRoot
-    RealScalar r = psolve.absGreatestRealRoot( hasRealRoot );
+    Real r = psolve.absGreatestRealRoot( hasRealRoot );
     VERIFY( hasRealRoot == (real_roots.size() > 0 ) );
     if( hasRealRoot ){
       VERIFY( internal::isApprox( real_roots.array().abs().maxCoeff(), abs(r), psPrec ) );  }
@@ -166,11 +167,9 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 template<typename _Scalar, int _Deg>
 void polynomialsolver(int deg)
 {
-  typedef typename NumTraits<_Scalar>::Real RealScalar;
-  typedef internal::increment_if_fixed_size<_Deg>     Dim;
+  typedef internal::increment_if_fixed_size<_Deg>            Dim;
   typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
   typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
-  typedef Matrix<RealScalar,_Deg,1>                   RealRootsType;
 
   cout << "Standard cases" << endl;
   PolynomialType pols = PolynomialType::Random(deg+1);
@@ -183,11 +182,15 @@ void polynomialsolver(int deg)
   evalSolver<_Deg,PolynomialType>( pols );
 
   cout << "Test sugar" << endl;
-  RealRootsType realRoots = RealRootsType::Random(deg);
+  EvalRootsType realRoots = EvalRootsType::Random(deg);
   roots_to_monicPolynomial( realRoots, pols );
   evalSolverSugarFunction<_Deg>(
       pols,
-      realRoots.template cast <std::complex<RealScalar> >().eval(),
+      realRoots.template cast <
+                    std::complex<
+                         typename NumTraits<_Scalar>::Real
+                         >
+                    >(),
       realRoots );
 }
 
@@ -211,6 +214,5 @@ void test_polynomialsolver()
             internal::random<int>(9,13)
             )) );
     CALL_SUBTEST_11((polynomialsolver<float,Dynamic>(1)) );
-    CALL_SUBTEST_12((polynomialsolver<std::complex<double>,Dynamic>(internal::random<int>(2,13))) );
   }
 }
diff --git a/eigen/unsupported/test/sparse_extra.cpp b/eigen/unsupported/test/sparse_extra.cpp
index 4f6723d..a010ceb 100644
--- a/eigen/unsupported/test/sparse_extra.cpp
+++ b/eigen/unsupported/test/sparse_extra.cpp
@@ -129,19 +129,6 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
 
 }
 
-template<typename SparseMatrixType>
-void check_marketio()
-{
-  typedef Matrix<typename SparseMatrixType::Scalar, Dynamic, Dynamic> DenseMatrix;
-  Index rows = internal::random<Index>(1,100);
-  Index cols = internal::random<Index>(1,100);
-  SparseMatrixType m1, m2;
-  m1 = DenseMatrix::Random(rows, cols).sparseView();
-  saveMarket(m1, "sparse_extra.mtx");
-  loadMarket(m2, "sparse_extra.mtx");
-  VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2));
-}
-
 void test_sparse_extra()
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -156,15 +143,5 @@ void test_sparse_extra()
 
     CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
     CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
-
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
-    TEST_SET_BUT_UNUSED_VARIABLE(s);
   }
 }
-- 
cgit v1.2.3