diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2017-03-25 14:17:07 +0100 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2017-03-25 14:17:07 +0100 |
commit | 35f7829af10c61e33dd2e2a7a015058e11a11ea0 (patch) | |
tree | 7135010dcf8fd0a49f3020d52112709bcb883bd6 /eigen/unsupported/test | |
parent | 6e8724193e40a932faf9064b664b529e7301c578 (diff) |
update
Diffstat (limited to 'eigen/unsupported/test')
104 files changed, 22875 insertions, 3952 deletions
diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt index 2e4cfdb..003c9de 100644 --- a/eigen/unsupported/test/CMakeLists.txt +++ b/eigen/unsupported/test/CMakeLists.txt @@ -1,10 +1,37 @@ +# generate split test header file only if it does not yet exist +# in order to prevent a rebuild everytime cmake is configured +if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h) + file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h "") + foreach(i RANGE 1 999) + file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h + "#ifdef EIGEN_TEST_PART_${i}\n" + "#define CALL_SUBTEST_${i}(FUNC) CALL_SUBTEST(FUNC)\n" + "#else\n" + "#define CALL_SUBTEST_${i}(FUNC)\n" + "#endif\n\n" + ) + endforeach() +endif() set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported") add_custom_target(BuildUnsupported) -include_directories(../../test ../../unsupported ../../Eigen +include_directories(../../test ../../unsupported ../../Eigen ${CMAKE_CURRENT_BINARY_DIR}/../../test) +find_package (Threads) + +find_package(Xsmm) +if(XSMM_FOUND) + add_definitions("-DEIGEN_USE_LIBXSMM") + include_directories(${XSMM_INCLUDES}) + link_directories(${XSMM_LIBRARIES}) + set(EXTERNAL_LIBS ${EXTERNAL_LIBS} xsmm) + ei_add_property(EIGEN_TESTED_BACKENDS "Xsmm, ") +else(XSMM_FOUND) + ei_add_property(EIGEN_MISSING_BACKENDS "Xsmm, ") +endif(XSMM_FOUND) + find_package(GoogleHash) if(GOOGLEHASH_FOUND) add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT") @@ -28,22 +55,30 @@ endif(ADOLC_FOUND) ei_add_test(NonLinearOptimization) ei_add_test(NumericalDiff) +ei_add_test(autodiff_scalar) ei_add_test(autodiff) + +if (NOT CMAKE_CXX_COMPILER MATCHES "clang\\+\\+$") ei_add_test(BVH) +endif() + ei_add_test(matrix_exponential) ei_add_test(matrix_function) ei_add_test(matrix_power) ei_add_test(matrix_square_root) ei_add_test(alignedvector3) + ei_add_test(FFT) +ei_add_test(EulerAngles) + find_package(MPFR 2.3.0) find_package(GMP) -if(MPFR_FOUND) +if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) include_directories(${MPFR_INCLUDES} ./mpreal) ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ") set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES}) - ei_add_test(mpreal_support "" "${EIGEN_MPFR_TEST_LIBRARIES}" ) + ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" ) else() ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ") endif() @@ -82,9 +117,162 @@ endif() ei_add_test(polynomialsolver) ei_add_test(polynomialutils) -ei_add_test(kronecker_product) ei_add_test(splines) ei_add_test(gmres) ei_add_test(minres) ei_add_test(levenberg_marquardt) -ei_add_test(bdcsvd) +ei_add_test(kronecker_product) +ei_add_test(special_functions) + +# TODO: The following test names are prefixed with the cxx11 string, since historically +# the tests depended on c++11. This isn't the case anymore so we ought to rename them. +# FIXME: Old versions of MSVC fail to compile this code, so we just disable these tests +# when using visual studio. We should make the check more strict to enable the tests for +# newer versions of MSVC. +if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") +ei_add_test(cxx11_tensor_dimension) +ei_add_test(cxx11_tensor_map) +ei_add_test(cxx11_tensor_assign) +ei_add_test(cxx11_tensor_comparisons) +ei_add_test(cxx11_tensor_forced_eval) +ei_add_test(cxx11_tensor_math) +ei_add_test(cxx11_tensor_const) +ei_add_test(cxx11_tensor_intdiv) +ei_add_test(cxx11_tensor_casts) +ei_add_test(cxx11_tensor_empty) +ei_add_test(cxx11_tensor_sugar) +ei_add_test(cxx11_tensor_roundings) +ei_add_test(cxx11_tensor_layout_swap) +ei_add_test(cxx11_tensor_io) +if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8") + # This test requires __uint128_t which is only available on 64bit systems + ei_add_test(cxx11_tensor_uint128) +endif() +endif() + +if(EIGEN_TEST_CXX11) + if(EIGEN_TEST_SYCL) + ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_padding_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_contract_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_concatenation_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_reverse_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_convolution_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_striding_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_chipping_sycl "-std=c++11") + endif(EIGEN_TEST_SYCL) + # It should be safe to always run these tests as there is some fallback code for + # older compiler that don't support cxx11. + set(CMAKE_CXX_STANDARD 11) + + ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + + ei_add_test(cxx11_meta) + ei_add_test(cxx11_tensor_simple) +# ei_add_test(cxx11_tensor_symmetry) + ei_add_test(cxx11_tensor_index_list) + ei_add_test(cxx11_tensor_mixed_indices) + ei_add_test(cxx11_tensor_contraction) + ei_add_test(cxx11_tensor_convolution) + ei_add_test(cxx11_tensor_expr) + ei_add_test(cxx11_tensor_fixed_size) + ei_add_test(cxx11_tensor_of_const_values) + ei_add_test(cxx11_tensor_of_complex) + ei_add_test(cxx11_tensor_of_strings) + ei_add_test(cxx11_tensor_lvalue) + ei_add_test(cxx11_tensor_broadcasting) + ei_add_test(cxx11_tensor_chipping) + ei_add_test(cxx11_tensor_concatenation) + ei_add_test(cxx11_tensor_inflation) + ei_add_test(cxx11_tensor_morphing) + ei_add_test(cxx11_tensor_padding) + ei_add_test(cxx11_tensor_patch) + ei_add_test(cxx11_tensor_image_patch) + ei_add_test(cxx11_tensor_volume_patch) + ei_add_test(cxx11_tensor_reduction) + ei_add_test(cxx11_tensor_argmax) + ei_add_test(cxx11_tensor_shuffling) + ei_add_test(cxx11_tensor_striding) + ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") + ei_add_test(cxx11_tensor_ref) + ei_add_test(cxx11_tensor_random) + ei_add_test(cxx11_tensor_generator) + ei_add_test(cxx11_tensor_custom_op) + ei_add_test(cxx11_tensor_custom_index) + ei_add_test(cxx11_tensor_fft) + ei_add_test(cxx11_tensor_ifft) + ei_add_test(cxx11_tensor_scan) + +endif() + +# These tests needs nvcc +find_package(CUDA 7.0) +if(CUDA_FOUND AND EIGEN_TEST_CUDA) + # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor + # and -fno-check-new flags since they trigger thousands of compilation warnings + # in the CUDA runtime + # Also remove -ansi that is incompatible with std=c++11. + string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS}) + + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE) + endif() + if(EIGEN_TEST_CUDA_CLANG) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 --cuda-gpu-arch=sm_${EIGEN_CUDA_COMPUTE_ARCH}") + endif() + + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr") + if (${CUDA_VERSION} STREQUAL "7.0") + set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr") + endif() + + if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) + set(EIGEN_CUDA_CXX11_FLAG "-std=c++11") + else() + # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11) + set(EIGEN_CUDA_CXX11_FLAG "") + endif() + + set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}") + cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") + set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") + + ei_add_test(cxx11_tensor_complex_cuda) + ei_add_test(cxx11_tensor_complex_cwise_ops_cuda) + ei_add_test(cxx11_tensor_reduction_cuda) + ei_add_test(cxx11_tensor_argmax_cuda) + ei_add_test(cxx11_tensor_cast_float16_cuda) + ei_add_test(cxx11_tensor_scan_cuda) + + # Contractions require arch 3.0 or higher + if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29) + ei_add_test(cxx11_tensor_device) + ei_add_test(cxx11_tensor_cuda) + ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_of_float16_cuda) + endif() + + # The random number generation code requires arch 3.5 or greater. + if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34) + ei_add_test(cxx11_tensor_random_cuda) + endif() + + + unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) +endif() diff --git a/eigen/unsupported/test/EulerAngles.cpp b/eigen/unsupported/test/EulerAngles.cpp new file mode 100644 index 0000000..79ee728 --- /dev/null +++ b/eigen/unsupported/test/EulerAngles.cpp @@ -0,0 +1,292 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <unsupported/Eigen/EulerAngles> + +using namespace Eigen; + +// Unfortunately, we need to specialize it in order to work. (We could add it in main.h test framework) +template <typename Scalar, class System> +bool verifyIsApprox(const Eigen::EulerAngles<Scalar, System>& a, const Eigen::EulerAngles<Scalar, System>& b) +{ + return verifyIsApprox(a.angles(), b.angles()); +} + +// Verify that x is in the approxed range [a, b] +#define VERIFY_APPROXED_RANGE(a, x, b) \ + do { \ + VERIFY_IS_APPROX_OR_LESS_THAN(a, x); \ + VERIFY_IS_APPROX_OR_LESS_THAN(x, b); \ + } while(0) + +const char X = EULER_X; +const char Y = EULER_Y; +const char Z = EULER_Z; + +template<typename Scalar, class EulerSystem> +void verify_euler(const EulerAngles<Scalar, EulerSystem>& e) +{ + typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType; + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Quaternion<Scalar> QuaternionType; + typedef AngleAxis<Scalar> AngleAxisType; + + const Scalar ONE = Scalar(1); + const Scalar HALF_PI = Scalar(EIGEN_PI / 2); + const Scalar PI = Scalar(EIGEN_PI); + + // It's very important calc the acceptable precision depending on the distance from the pole. + const Scalar longitudeRadius = std::abs( + EulerSystem::IsTaitBryan ? + std::cos(e.beta()) : + std::sin(e.beta()) + ); + Scalar precision = test_precision<Scalar>() / longitudeRadius; + + Scalar betaRangeStart, betaRangeEnd; + if (EulerSystem::IsTaitBryan) + { + betaRangeStart = -HALF_PI; + betaRangeEnd = HALF_PI; + } + else + { + if (!EulerSystem::IsBetaOpposite) + { + betaRangeStart = 0; + betaRangeEnd = PI; + } + else + { + betaRangeStart = -PI; + betaRangeEnd = 0; + } + } + + const Vector3 I = EulerAnglesType::AlphaAxisVector(); + const Vector3 J = EulerAnglesType::BetaAxisVector(); + const Vector3 K = EulerAnglesType::GammaAxisVector(); + + // Is approx checks + VERIFY(e.isApprox(e)); + VERIFY_IS_APPROX(e, e); + VERIFY_IS_NOT_APPROX(e, EulerAnglesType(e.alpha() + ONE, e.beta() + ONE, e.gamma() + ONE)); + + const Matrix3 m(e); + VERIFY_IS_APPROX(Scalar(m.determinant()), ONE); + + EulerAnglesType ebis(m); + + // When no roll(acting like polar representation), we have the best precision. + // One of those cases is when the Euler angles are on the pole, and because it's singular case, + // the computation returns no roll. + if (ebis.beta() == 0) + precision = test_precision<Scalar>(); + + // Check that eabis in range + VERIFY_APPROXED_RANGE(-PI, ebis.alpha(), PI); + VERIFY_APPROXED_RANGE(betaRangeStart, ebis.beta(), betaRangeEnd); + VERIFY_APPROXED_RANGE(-PI, ebis.gamma(), PI); + + const Matrix3 mbis(AngleAxisType(ebis.alpha(), I) * AngleAxisType(ebis.beta(), J) * AngleAxisType(ebis.gamma(), K)); + VERIFY_IS_APPROX(Scalar(mbis.determinant()), ONE); + VERIFY_IS_APPROX(mbis, ebis.toRotationMatrix()); + /*std::cout << "===================\n" << + "e: " << e << std::endl << + "eabis: " << eabis.transpose() << std::endl << + "m: " << m << std::endl << + "mbis: " << mbis << std::endl << + "X: " << (m * Vector3::UnitX()).transpose() << std::endl << + "X: " << (mbis * Vector3::UnitX()).transpose() << std::endl;*/ + VERIFY(m.isApprox(mbis, precision)); + + // Test if ea and eabis are the same + // Need to check both singular and non-singular cases + // There are two singular cases. + // 1. When I==K and sin(ea(1)) == 0 + // 2. When I!=K and cos(ea(1)) == 0 + + // TODO: Make this test work well, and use range saturation function. + /*// If I==K, and ea[1]==0, then there no unique solution. + // The remark apply in the case where I!=K, and |ea[1]| is close to +-pi/2. + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) + VERIFY_IS_APPROX(ea, eabis);*/ + + // Quaternions + const QuaternionType q(e); + ebis = q; + const QuaternionType qbis(ebis); + VERIFY(internal::isApprox<Scalar>(std::abs(q.dot(qbis)), ONE, precision)); + //VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same + + // A suggestion for simple product test when will be supported. + /*EulerAnglesType e2(PI/2, PI/2, PI/2); + Matrix3 m2(e2); + VERIFY_IS_APPROX(e*e2, m*m2);*/ +} + +template<signed char A, signed char B, signed char C, typename Scalar> +void verify_euler_vec(const Matrix<Scalar,3,1>& ea) +{ + verify_euler(EulerAngles<Scalar, EulerSystem<A, B, C> >(ea[0], ea[1], ea[2])); +} + +template<signed char A, signed char B, signed char C, typename Scalar> +void verify_euler_all_neg(const Matrix<Scalar,3,1>& ea) +{ + verify_euler_vec<+A,+B,+C>(ea); + verify_euler_vec<+A,+B,-C>(ea); + verify_euler_vec<+A,-B,+C>(ea); + verify_euler_vec<+A,-B,-C>(ea); + + verify_euler_vec<-A,+B,+C>(ea); + verify_euler_vec<-A,+B,-C>(ea); + verify_euler_vec<-A,-B,+C>(ea); + verify_euler_vec<-A,-B,-C>(ea); +} + +template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea) +{ + verify_euler_all_neg<X,Y,Z>(ea); + verify_euler_all_neg<X,Y,X>(ea); + verify_euler_all_neg<X,Z,Y>(ea); + verify_euler_all_neg<X,Z,X>(ea); + + verify_euler_all_neg<Y,Z,X>(ea); + verify_euler_all_neg<Y,Z,Y>(ea); + verify_euler_all_neg<Y,X,Z>(ea); + verify_euler_all_neg<Y,X,Y>(ea); + + verify_euler_all_neg<Z,X,Y>(ea); + verify_euler_all_neg<Z,X,Z>(ea); + verify_euler_all_neg<Z,Y,X>(ea); + verify_euler_all_neg<Z,Y,Z>(ea); +} + +template<typename Scalar> void check_singular_cases(const Scalar& singularBeta) +{ + typedef Matrix<Scalar,3,1> Vector3; + const Scalar PI = Scalar(EIGEN_PI); + + for (Scalar epsilon = NumTraits<Scalar>::epsilon(); epsilon < 1; epsilon *= Scalar(1.2)) + { + check_all_var(Vector3(PI/4, singularBeta, PI/3)); + check_all_var(Vector3(PI/4, singularBeta - epsilon, PI/3)); + check_all_var(Vector3(PI/4, singularBeta - Scalar(1.5)*epsilon, PI/3)); + check_all_var(Vector3(PI/4, singularBeta - 2*epsilon, PI/3)); + check_all_var(Vector3(PI*Scalar(0.8), singularBeta - epsilon, Scalar(0.9)*PI)); + check_all_var(Vector3(PI*Scalar(-0.9), singularBeta + epsilon, PI*Scalar(0.3))); + check_all_var(Vector3(PI*Scalar(-0.6), singularBeta + Scalar(1.5)*epsilon, PI*Scalar(0.3))); + check_all_var(Vector3(PI*Scalar(-0.5), singularBeta + 2*epsilon, PI*Scalar(0.4))); + check_all_var(Vector3(PI*Scalar(0.9), singularBeta + epsilon, Scalar(0.8)*PI)); + } + + // This one for sanity, it had a problem with near pole cases in float scalar. + check_all_var(Vector3(PI*Scalar(0.8), singularBeta - Scalar(1E-6), Scalar(0.9)*PI)); +} + +template<typename Scalar> void eulerangles_manual() +{ + typedef Matrix<Scalar,3,1> Vector3; + const Vector3 Zero = Vector3::Zero(); + const Scalar PI = Scalar(EIGEN_PI); + + check_all_var(Zero); + + // singular cases + check_singular_cases(PI/2); + check_singular_cases(-PI/2); + + check_singular_cases(Scalar(0)); + check_singular_cases(Scalar(-0)); + + check_singular_cases(PI); + check_singular_cases(-PI); + + // non-singular cases + VectorXd alpha = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); + VectorXd beta = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.49) * PI, Scalar(0.49) * PI); + VectorXd gamma = VectorXd::LinSpaced(Eigen::Sequential, 20, Scalar(-0.99) * PI, PI); + for (int i = 0; i < alpha.size(); ++i) { + for (int j = 0; j < beta.size(); ++j) { + for (int k = 0; k < gamma.size(); ++k) { + check_all_var(Vector3d(alpha(i), beta(j), gamma(k))); + } + } + } +} + +template<typename Scalar> void eulerangles_rand() +{ + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Array<Scalar,3,1> Array3; + typedef Quaternion<Scalar> Quaternionx; + typedef AngleAxis<Scalar> AngleAxisType; + + Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Quaternionx q1; + q1 = AngleAxisType(a, Vector3::Random().normalized()); + Matrix3 m; + m = q1; + + Vector3 ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with purely random Quaternion: + q1.coeffs() = Quaternionx::Coefficients::Random().normalized(); + m = q1; + ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi]. + ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1); + check_all_var(ea); + + ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[1] = 0; + check_all_var(ea); + + ea.head(2).setZero(); + check_all_var(ea); + + ea.setZero(); + check_all_var(ea); +} + +void test_EulerAngles() +{ + // Simple cast test + EulerAnglesXYZd onesEd(1, 1, 1); + EulerAnglesXYZf onesEf = onesEd.cast<float>(); + VERIFY_IS_APPROX(onesEd, onesEf.cast<double>()); + + CALL_SUBTEST_1( eulerangles_manual<float>() ); + CALL_SUBTEST_2( eulerangles_manual<double>() ); + + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_3( eulerangles_rand<float>() ); + CALL_SUBTEST_4( eulerangles_rand<double>() ); + } + + // TODO: Add tests for auto diff + // TODO: Add tests for complex numbers +} diff --git a/eigen/unsupported/test/FFTW.cpp b/eigen/unsupported/test/FFTW.cpp index d3718e2..8b7528f 100644 --- a/eigen/unsupported/test/FFTW.cpp +++ b/eigen/unsupported/test/FFTW.cpp @@ -18,11 +18,11 @@ using namespace Eigen; template < typename T> -complex<long double> promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); } +complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); } -complex<long double> promote(float x) { return complex<long double>( x); } -complex<long double> promote(double x) { return complex<long double>( x); } -complex<long double> promote(long double x) { return complex<long double>( x); } +complex<long double> promote(float x) { return complex<long double>((long double)x); } +complex<long double> promote(double x) { return complex<long double>((long double)x); } +complex<long double> promote(long double x) { return complex<long double>((long double)x); } template <typename VT1,typename VT2> @@ -33,7 +33,7 @@ complex<long double> promote(long double x) { return complex<long double>( x); long double pi = acos((long double)-1 ); for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) { complex<long double> acc = 0; - long double phinc = -2.*k0* pi / timebuf.size(); + long double phinc = (long double)(-2.)*k0* pi / timebuf.size(); for (size_t k1=0;k1<(size_t)timebuf.size();++k1) { acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) ); } @@ -54,8 +54,8 @@ complex<long double> promote(long double x) { return complex<long double>( x); long double difpower=0; size_t n = (min)( buf1.size(),buf2.size() ); for (size_t k=0;k<n;++k) { - totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.; - difpower += numext::abs2(buf1[k] - buf2[k]); + totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2); + difpower += (long double)(numext::abs2(buf1[k] - buf2[k])); } return sqrt(difpower/totalpower); } @@ -93,19 +93,19 @@ void test_scalar_generic(int nfft) fft.SetFlag(fft.HalfSpectrum ); fft.fwd( freqBuf,tbuf); VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) ); - VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check fft.ClearFlag(fft.HalfSpectrum ); fft.fwd( freqBuf,tbuf); VERIFY( (size_t)freqBuf.size() == (size_t)nfft); - VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check if (nfft&1) return; // odd FFTs get the wrong size inverse FFT ScalarVector tbuf2; fft.inv( tbuf2 , freqBuf); - VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check // verify that the Unscaled flag takes effect @@ -121,12 +121,12 @@ void test_scalar_generic(int nfft) //for (size_t i=0;i<(size_t) tbuf.size();++i) // cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl; - VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check // verify that ClearFlag works fft.ClearFlag(fft.Unscaled); fft.inv( tbuf2 , freqBuf); - VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check } template <typename T> @@ -152,10 +152,10 @@ void test_complex_generic(int nfft) inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) ); fft.fwd( outbuf , inbuf); - VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>() );// gross check + VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check // verify that the Unscaled flag takes effect ComplexVector buf4; @@ -163,12 +163,12 @@ void test_complex_generic(int nfft) fft.inv( buf4 , outbuf); for (int k=0;k<nfft;++k) buf4[k] *= T(1./nfft); - VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check // verify that ClearFlag works fft.ClearFlag(fft.Unscaled); fft.inv( buf3 , outbuf); - VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check + VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check } template <typename T> diff --git a/eigen/unsupported/test/NonLinearOptimization.cpp b/eigen/unsupported/test/NonLinearOptimization.cpp index d7376b0..1d682dd 100644 --- a/eigen/unsupported/test/NonLinearOptimization.cpp +++ b/eigen/unsupported/test/NonLinearOptimization.cpp @@ -12,7 +12,8 @@ // It is intended to be done for this test only. #include <Eigen/src/Core/util/DisableStupidWarnings.h> -using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 int fcn_chkder(const VectorXd &x, VectorXd &fvec, MatrixXd &fjac, int iflag) { @@ -246,9 +247,9 @@ struct hybrj_functor : Functor<double> int operator()(const VectorXd &x, VectorXd &fvec) { double temp, temp1, temp2; - const int n = x.size(); + const VectorXd::Index n = x.size(); assert(fvec.size()==n); - for (int k = 0; k < n; k++) + for (VectorXd::Index k = 0; k < n; k++) { temp = (3. - 2.*x[k])*x[k]; temp1 = 0.; @@ -261,12 +262,12 @@ struct hybrj_functor : Functor<double> } int df(const VectorXd &x, MatrixXd &fjac) { - const int n = x.size(); + const VectorXd::Index n = x.size(); assert(fjac.rows()==n); assert(fjac.cols()==n); - for (int k = 0; k < n; k++) + for (VectorXd::Index k = 0; k < n; k++) { - for (int j = 0; j < n; j++) + for (VectorXd::Index j = 0; j < n; j++) fjac(k,j) = 0.; fjac(k,k) = 3.- 4.*x[k]; if (k) fjac(k,k-1) = -1.; @@ -351,10 +352,10 @@ struct hybrd_functor : Functor<double> int operator()(const VectorXd &x, VectorXd &fvec) const { double temp, temp1, temp2; - const int n = x.size(); + const VectorXd::Index n = x.size(); assert(fvec.size()==n); - for (int k=0; k < n; k++) + for (VectorXd::Index k=0; k < n; k++) { temp = (3. - 2.*x[k])*x[k]; temp1 = 0.; @@ -455,7 +456,7 @@ struct lmstr_functor : Functor<double> assert(jac_row.size()==x.size()); double tmp1, tmp2, tmp3, tmp4; - int i = rownb-2; + VectorXd::Index i = rownb-2; tmp1 = i+1; tmp2 = 16 - i - 1; tmp3 = (i>=8)? tmp2 : tmp1; @@ -1022,7 +1023,9 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev, 79); VERIFY_IS_EQUAL(lm.njev, 72); // check norm^2 - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + std::cout.precision(30); + std::cout << lm.fvec.squaredNorm() << "\n"; + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1043,7 +1046,7 @@ void testNistLanczos1(void) VERIFY_IS_EQUAL(lm.nfev, 9); VERIFY_IS_EQUAL(lm.njev, 8); // check norm^2 - VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -1262,8 +1265,8 @@ void testNistBoxBOD(void) // check return value VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev, 31); - VERIFY_IS_EQUAL(lm.njev, 25); + VERIFY(lm.nfev < 31); // 31 + VERIFY(lm.njev < 25); // 25 // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03); // check x @@ -1342,10 +1345,6 @@ void testNistMGH17(void) lm.parameters.maxfev = 1000; info = lm.minimize(x); - // check return value - VERIFY_IS_EQUAL(info, 2); - VERIFY_IS_EQUAL(lm.nfev, 602 ); - VERIFY_IS_EQUAL(lm.njev, 545 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05); // check x @@ -1354,6 +1353,15 @@ void testNistMGH17(void) VERIFY_IS_APPROX(x[2], -1.4646871366E+00); VERIFY_IS_APPROX(x[3], 1.2867534640E-02); VERIFY_IS_APPROX(x[4], 2.2122699662E-02); + + // check return value + VERIFY_IS_EQUAL(info, 2); + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev, 602); // 602 + VERIFY_IS_EQUAL(lm.njev, 545); // 545 + --g_test_level; + VERIFY(lm.nfev < 602 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev < 545 * LM_EVAL_COUNT_TOL); /* * Second try @@ -1832,8 +1840,8 @@ void test_NonLinearOptimization() // NIST tests, level of difficulty = "Average" CALL_SUBTEST/*_5*/(testNistHahn1()); CALL_SUBTEST/*_6*/(testNistMisra1d()); -// CALL_SUBTEST/*_7*/(testNistMGH17()); -// CALL_SUBTEST/*_8*/(testNistLanczos1()); + CALL_SUBTEST/*_7*/(testNistMGH17()); + CALL_SUBTEST/*_8*/(testNistLanczos1()); // // NIST tests, level of difficulty = "Higher" CALL_SUBTEST/*_9*/(testNistRat42()); diff --git a/eigen/unsupported/test/alignedvector3.cpp b/eigen/unsupported/test/alignedvector3.cpp index fc2bc21..252cb1d 100644 --- a/eigen/unsupported/test/alignedvector3.cpp +++ b/eigen/unsupported/test/alignedvector3.cpp @@ -10,6 +10,16 @@ #include "main.h" #include <unsupported/Eigen/AlignedVector3> +namespace Eigen { + +template<typename T,typename Derived> +T test_relative_error(const AlignedVector3<T> &a, const MatrixBase<Derived> &b) +{ + return test_relative_error(a.coeffs().template head<3>(), b); +} + +} + template<typename Scalar> void alignedvector3() { @@ -19,8 +29,8 @@ void alignedvector3() typedef Matrix<Scalar,3,3> Mat33; typedef AlignedVector3<Scalar> FastType; RefType r1(RefType::Random()), r2(RefType::Random()), r3(RefType::Random()), - r4(RefType::Random()), r5(RefType::Random()), r6(RefType::Random()); - FastType f1(r1), f2(r2), f3(r3), f4(r4), f5(r5), f6(r6); + r4(RefType::Random()), r5(RefType::Random()); + FastType f1(r1), f2(r2), f3(r3), f4(r4), f5(r5); Mat33 m1(Mat33::Random()); VERIFY_IS_APPROX(f1,r1); @@ -49,6 +59,21 @@ void alignedvector3() f2.normalize(); r2.normalize(); VERIFY_IS_APPROX(f2,r2); + + { + FastType f6 = RefType::Zero(); + FastType f7 = FastType::Zero(); + VERIFY_IS_APPROX(f6,f7); + f6 = r4+r1; + VERIFY_IS_APPROX(f6,r4+r1); + f6 -= Scalar(2)*r4; + VERIFY_IS_APPROX(f6,r1-r4); + } + + std::stringstream ss1, ss2; + ss1 << f1; + ss2 << r1; + VERIFY(ss1.str()==ss2.str()); } void test_alignedvector3() diff --git a/eigen/unsupported/test/autodiff.cpp b/eigen/unsupported/test/autodiff.cpp index 7c112a1..8574313 100644 --- a/eigen/unsupported/test/autodiff.cpp +++ b/eigen/unsupported/test/autodiff.cpp @@ -16,7 +16,8 @@ EIGEN_DONT_INLINE Scalar foo(const Scalar& x, const Scalar& y) using namespace std; // return x+std::sin(y); EIGEN_ASM_COMMENT("mybegin"); - return static_cast<Scalar>(x*2 - pow(x,2) + 2*sqrt(y*y) - 4 * sin(x) + 2 * cos(y) - exp(-0.5*x*x)); + // pow(float, int) promotes to pow(double, double) + return x*2 - 1 + static_cast<Scalar>(pow(1+x,2)) + 2*sqrt(y*y+0) - 4 * sin(0+x) + 2 * cos(y+0) - exp(Scalar(-0.5)*x*x+0); //return x+2*y*x;//x*2 -std::pow(x,2);//(2*y/x);// - y*2; EIGEN_ASM_COMMENT("myend"); } @@ -104,6 +105,89 @@ struct TestFunc1 } }; + +#if EIGEN_HAS_VARIADIC_TEMPLATES +/* Test functor for the C++11 features. */ +template <typename Scalar> +struct integratorFunctor +{ + typedef Matrix<Scalar, 2, 1> InputType; + typedef Matrix<Scalar, 2, 1> ValueType; + + /* + * Implementation starts here. + */ + integratorFunctor(const Scalar gain) : _gain(gain) {} + integratorFunctor(const integratorFunctor& f) : _gain(f._gain) {} + const Scalar _gain; + + template <typename T1, typename T2> + void operator() (const T1 &input, T2 *output, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + } + + /* Only needed for the test */ + template <typename T1, typename T2, typename T3> + void operator() (const T1 &input, T2 *output, T3 *jacobian, const Scalar dt) const + { + T2 &o = *output; + + /* Integrator to test the AD. */ + o[0] = input[0] + input[1] * dt * _gain; + o[1] = input[1] * _gain; + + if (jacobian) + { + T3 &j = *jacobian; + + j(0, 0) = 1; + j(0, 1) = dt * _gain; + j(1, 0) = 0; + j(1, 1) = _gain; + } + } + +}; + +template<typename Func> void forward_jacobian_cpp11(const Func& f) +{ + typedef typename Func::ValueType::Scalar Scalar; + typedef typename Func::ValueType ValueType; + typedef typename Func::InputType InputType; + typedef typename AutoDiffJacobian<Func>::JacobianType JacobianType; + + InputType x = InputType::Random(InputType::RowsAtCompileTime); + ValueType y, yref; + JacobianType j, jref; + + const Scalar dt = internal::random<double>(); + + jref.setZero(); + yref.setZero(); + f(x, &yref, &jref, dt); + + //std::cerr << "y, yref, jref: " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << yref << "\n\n"; + //std::cerr << jref << "\n\n"; + + AutoDiffJacobian<Func> autoj(f); + autoj(x, &y, &j, dt); + + //std::cerr << "y j (via autodiff): " << "\n"; + //std::cerr << y.transpose() << "\n\n"; + //std::cerr << j << "\n\n"; + + VERIFY_IS_APPROX(y, yref); + VERIFY_IS_APPROX(j, jref); +} +#endif + template<typename Func> void forward_jacobian(const Func& f) { typename Func::InputType x = Func::InputType::Random(f.inputs()); @@ -127,8 +211,8 @@ template<typename Func> void forward_jacobian(const Func& f) VERIFY_IS_APPROX(j, jref); } - // TODO also check actual derivatives! +template <int> void test_autodiff_scalar() { Vector2f p = Vector2f::Random(); @@ -139,7 +223,9 @@ void test_autodiff_scalar() VERIFY_IS_APPROX(res.value(), foo(p.x(),p.y())); } + // TODO also check actual derivatives! +template <int> void test_autodiff_vector() { Vector2f p = Vector2f::Random(); @@ -148,11 +234,12 @@ void test_autodiff_vector() VectorAD ap = p.cast<AD>(); ap.x().derivatives() = Vector2f::UnitX(); ap.y().derivatives() = Vector2f::UnitY(); - + AD res = foo<VectorAD>(ap); VERIFY_IS_APPROX(res.value(), foo(p)); } +template <int> void test_autodiff_jacobian() { CALL_SUBTEST(( forward_jacobian(TestFunc1<double,2,2>()) )); @@ -160,6 +247,54 @@ void test_autodiff_jacobian() CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) )); CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) )); +#if EIGEN_HAS_VARIADIC_TEMPLATES + CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) )); +#endif +} + + +template <int> +void test_autodiff_hessian() +{ + typedef AutoDiffScalar<VectorXd> AD; + typedef Matrix<AD,Eigen::Dynamic,1> VectorAD; + typedef AutoDiffScalar<VectorAD> ADD; + typedef Matrix<ADD,Eigen::Dynamic,1> VectorADD; + VectorADD x(2); + double s1 = internal::random<double>(), s2 = internal::random<double>(), s3 = internal::random<double>(), s4 = internal::random<double>(); + x(0).value()=s1; + x(1).value()=s2; + + //set unit vectors for the derivative directions (partial derivatives of the input vector) + x(0).derivatives().resize(2); + x(0).derivatives().setZero(); + x(0).derivatives()(0)= 1; + x(1).derivatives().resize(2); + x(1).derivatives().setZero(); + x(1).derivatives()(1)=1; + + //repeat partial derivatives for the inner AutoDiffScalar + x(0).value().derivatives() = VectorXd::Unit(2,0); + x(1).value().derivatives() = VectorXd::Unit(2,1); + + //set the hessian matrix to zero + for(int idx=0; idx<2; idx++) { + x(0).derivatives()(idx).derivatives() = VectorXd::Zero(2); + x(1).derivatives()(idx).derivatives() = VectorXd::Zero(2); + } + + ADD y = sin(AD(s3)*x(0) + AD(s4)*x(1)); + + VERIFY_IS_APPROX(y.value().derivatives()(0), y.derivatives()(0).value()); + VERIFY_IS_APPROX(y.value().derivatives()(1), y.derivatives()(1).value()); + VERIFY_IS_APPROX(y.value().derivatives()(0), s3*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4)); + VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3)); + VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4)); + + ADD z = x(0)*x(1); + VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1)); + VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0)); } double bug_1222() { @@ -171,14 +306,62 @@ double bug_1222() { return denom.value(); } +double bug_1223() { + using std::min; + typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD; + + const double _cv1_3 = 1.0; + const AD chi_3 = 1.0; + const AD denom = 1.0; + + // failed because implementation of min attempts to construct ADS<DerType&> via constructor AutoDiffScalar(const Real& value) + // without initializing m_derivatives (which is a reference in this case) + #define EIGEN_TEST_SPACE + const AD t = min EIGEN_TEST_SPACE (denom / chi_3, 1.0); + + const AD t2 = min EIGEN_TEST_SPACE (denom / (chi_3 * _cv1_3), 1.0); + + return t.value() + t2.value(); +} + +// regression test for some compilation issues with specializations of ScalarBinaryOpTraits +void bug_1260() { + Matrix4d A; + Vector4d v; + A*v; +} + +// check a compilation issue with numext::max +double bug_1261() { + typedef AutoDiffScalar<Matrix2d> AD; + typedef Matrix<AD,2,1> VectorAD; + + VectorAD v; + const AD maxVal = v.maxCoeff(); + const AD minVal = v.minCoeff(); + return maxVal.value() + minVal.value(); +} + +double bug_1264() { + typedef AutoDiffScalar<Vector2d> AD; + const AD s; + const Matrix<AD, 3, 1> v1; + const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1; + return v2(0).value(); +} + void test_autodiff() { for(int i = 0; i < g_repeat; i++) { - CALL_SUBTEST_1( test_autodiff_scalar() ); - CALL_SUBTEST_2( test_autodiff_vector() ); - CALL_SUBTEST_3( test_autodiff_jacobian() ); + CALL_SUBTEST_1( test_autodiff_scalar<1>() ); + CALL_SUBTEST_2( test_autodiff_vector<1>() ); + CALL_SUBTEST_3( test_autodiff_jacobian<1>() ); + CALL_SUBTEST_4( test_autodiff_hessian<1>() ); } bug_1222(); + bug_1223(); + bug_1260(); + bug_1261(); } diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp new file mode 100644 index 0000000..4df2f5c --- /dev/null +++ b/eigen/unsupported/test/autodiff_scalar.cpp @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christoph Hertzberg <chtz@informatik.uni-bremen.de> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <unsupported/Eigen/AutoDiff> + +/* + * In this file scalar derivations are tested for correctness. + * TODO add more tests! + */ + +template<typename Scalar> void check_atan2() +{ + typedef Matrix<Scalar, 1, 1> Deriv1; + typedef AutoDiffScalar<Deriv1> AD; + + AD x(internal::random<Scalar>(-3.0, 3.0), Deriv1::UnitX()); + + using std::exp; + Scalar r = exp(internal::random<Scalar>(-10, 10)); + + AD s = sin(x), c = cos(x); + AD res = atan2(r*s, r*c); + + VERIFY_IS_APPROX(res.value(), x.value()); + VERIFY_IS_APPROX(res.derivatives(), x.derivatives()); + + res = atan2(r*s+0, r*c+0); + VERIFY_IS_APPROX(res.value(), x.value()); + VERIFY_IS_APPROX(res.derivatives(), x.derivatives()); +} + +template<typename Scalar> void check_hyperbolic_functions() +{ + using std::sinh; + using std::cosh; + using std::tanh; + typedef Matrix<Scalar, 1, 1> Deriv1; + typedef AutoDiffScalar<Deriv1> AD; + Deriv1 p = Deriv1::Random(); + AD val(p.x(),Deriv1::UnitX()); + + Scalar cosh_px = std::cosh(p.x()); + AD res1 = tanh(val); + VERIFY_IS_APPROX(res1.value(), std::tanh(p.x())); + VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(1.0) / (cosh_px * cosh_px)); + + AD res2 = sinh(val); + VERIFY_IS_APPROX(res2.value(), std::sinh(p.x())); + VERIFY_IS_APPROX(res2.derivatives().x(), cosh_px); + + AD res3 = cosh(val); + VERIFY_IS_APPROX(res3.value(), cosh_px); + VERIFY_IS_APPROX(res3.derivatives().x(), std::sinh(p.x())); + + // Check constant values. + const Scalar sample_point = Scalar(1) / Scalar(3); + val = AD(sample_point,Deriv1::UnitX()); + res1 = tanh(val); + VERIFY_IS_APPROX(res1.derivatives().x(), Scalar(0.896629559604914)); + + res2 = sinh(val); + VERIFY_IS_APPROX(res2.derivatives().x(), Scalar(1.056071867829939)); + + res3 = cosh(val); + VERIFY_IS_APPROX(res3.derivatives().x(), Scalar(0.339540557256150)); +} + +void test_autodiff_scalar() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( check_atan2<float>() ); + CALL_SUBTEST_2( check_atan2<double>() ); + CALL_SUBTEST_3( check_hyperbolic_functions<float>() ); + CALL_SUBTEST_4( check_hyperbolic_functions<double>() ); + } +} diff --git a/eigen/unsupported/test/bdcsvd.cpp b/eigen/unsupported/test/bdcsvd.cpp deleted file mode 100644 index 115a649..0000000 --- a/eigen/unsupported/test/bdcsvd.cpp +++ /dev/null @@ -1,213 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com> -// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr> -// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> -// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/ - -#include "svd_common.h" -#include <iostream> -#include <Eigen/LU> - -// check if "svd" is the good image of "m" -template<typename MatrixType> -void bdcsvd_check_full(const MatrixType& m, const BDCSVD<MatrixType>& svd) -{ - svd_check_full< MatrixType, BDCSVD< MatrixType > >(m, svd); -} - -// Compare to a reference value -template<typename MatrixType> -void bdcsvd_compare_to_full(const MatrixType& m, - unsigned int computationOptions, - const BDCSVD<MatrixType>& referenceSvd) -{ - svd_compare_to_full< MatrixType, BDCSVD< MatrixType > >(m, computationOptions, referenceSvd); -} // end bdcsvd_compare_to_full - - -template<typename MatrixType> -void bdcsvd_solve(const MatrixType& m, unsigned int computationOptions) -{ - svd_solve< MatrixType, BDCSVD< MatrixType > >(m, computationOptions); -} // end template bdcsvd_solve - - -// test the computations options -template<typename MatrixType> -void bdcsvd_test_all_computation_options(const MatrixType& m) -{ - BDCSVD<MatrixType> fullSvd(m, ComputeFullU|ComputeFullV); - svd_test_computation_options_1< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); - svd_test_computation_options_2< MatrixType, BDCSVD< MatrixType > >(m, fullSvd); -} // end bdcsvd_test_all_computation_options - - -// Call a test with all the computations options -template<typename MatrixType> -void bdcsvd(const MatrixType& a = MatrixType(), bool pickrandom = true) -{ - MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a; - bdcsvd_test_all_computation_options<MatrixType>(m); -} // end template bdcsvd - - -// verify assert -template<typename MatrixType> -void bdcsvd_verify_assert(const MatrixType& m) -{ - svd_verify_assert< MatrixType, BDCSVD< MatrixType > >(m); -}// end template bdcsvd_verify_assert - - -// test weird values -template<typename MatrixType> -void bdcsvd_inf_nan() -{ - svd_inf_nan< MatrixType, BDCSVD< MatrixType > >(); -}// end template bdcsvd_inf_nan - - - -void bdcsvd_preallocate() -{ - svd_preallocate< BDCSVD< MatrixXf > >(); -} // end bdcsvd_preallocate - - -// compare the Singular values returned with Jacobi and Bdc -template<typename MatrixType> -void compare_bdc_jacobi(const MatrixType& a = MatrixType(), unsigned int computationOptions = 0) -{ - std::cout << "debut compare" << std::endl; - MatrixType m = MatrixType::Random(a.rows(), a.cols()); - BDCSVD<MatrixType> bdc_svd(m); - JacobiSVD<MatrixType> jacobi_svd(m); - VERIFY_IS_APPROX(bdc_svd.singularValues(), jacobi_svd.singularValues()); - if(computationOptions & ComputeFullU) - VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); - if(computationOptions & ComputeThinU) - VERIFY_IS_APPROX(bdc_svd.matrixU(), jacobi_svd.matrixU()); - if(computationOptions & ComputeFullV) - VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); - if(computationOptions & ComputeThinV) - VERIFY_IS_APPROX(bdc_svd.matrixV(), jacobi_svd.matrixV()); - std::cout << "fin compare" << std::endl; -} // end template compare_bdc_jacobi - - -// call the tests -void test_bdcsvd() -{ - // test of Dynamic defined Matrix (42, 42) of float - CALL_SUBTEST_11(( bdcsvd_verify_assert<Matrix<float,Dynamic,Dynamic> > - (Matrix<float,Dynamic,Dynamic>(42,42)) )); - CALL_SUBTEST_11(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> > - (Matrix<float,Dynamic,Dynamic>(42,42), 0) )); - CALL_SUBTEST_11(( bdcsvd<Matrix<float,Dynamic,Dynamic> > - (Matrix<float,Dynamic,Dynamic>(42,42)) )); - - // test of Dynamic defined Matrix (50, 50) of double - CALL_SUBTEST_13(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(50,50)) )); - CALL_SUBTEST_13(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(50,50), 0) )); - CALL_SUBTEST_13(( bdcsvd<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(50, 50)) )); - - // test of Dynamic defined Matrix (22, 22) of complex double - CALL_SUBTEST_14(( bdcsvd_verify_assert<Matrix<std::complex<double>,Dynamic,Dynamic> > - (Matrix<std::complex<double>,Dynamic,Dynamic>(22,22)) )); - CALL_SUBTEST_14(( compare_bdc_jacobi<Matrix<std::complex<double>,Dynamic,Dynamic> > - (Matrix<std::complex<double>, Dynamic, Dynamic> (22,22), 0) )); - CALL_SUBTEST_14(( bdcsvd<Matrix<std::complex<double>,Dynamic,Dynamic> > - (Matrix<std::complex<double>,Dynamic,Dynamic>(22, 22)) )); - - // test of Dynamic defined Matrix (10, 10) of int - //CALL_SUBTEST_15(( bdcsvd_verify_assert<Matrix<int,Dynamic,Dynamic> > - // (Matrix<int,Dynamic,Dynamic>(10,10)) )); - //CALL_SUBTEST_15(( compare_bdc_jacobi<Matrix<int,Dynamic,Dynamic> > - // (Matrix<int,Dynamic,Dynamic>(10,10), 0) )); - //CALL_SUBTEST_15(( bdcsvd<Matrix<int,Dynamic,Dynamic> > - // (Matrix<int,Dynamic,Dynamic>(10, 10)) )); - - - // test of Dynamic defined Matrix (8, 6) of double - - CALL_SUBTEST_16(( bdcsvd_verify_assert<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(8,6)) )); - CALL_SUBTEST_16(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(8, 6), 0) )); - CALL_SUBTEST_16(( bdcsvd<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(8, 6)) )); - - - - // test of Dynamic defined Matrix (36, 12) of float - CALL_SUBTEST_17(( compare_bdc_jacobi<Matrix<float,Dynamic,Dynamic> > - (Matrix<float,Dynamic,Dynamic>(36, 12), 0) )); - CALL_SUBTEST_17(( bdcsvd<Matrix<float,Dynamic,Dynamic> > - (Matrix<float,Dynamic,Dynamic>(36, 12)) )); - - // test of Dynamic defined Matrix (5, 8) of double - CALL_SUBTEST_18(( compare_bdc_jacobi<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(5, 8), 0) )); - CALL_SUBTEST_18(( bdcsvd<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(5, 8)) )); - - - // non regression tests - CALL_SUBTEST_3(( bdcsvd_verify_assert(Matrix3f()) )); - CALL_SUBTEST_4(( bdcsvd_verify_assert(Matrix4d()) )); - CALL_SUBTEST_7(( bdcsvd_verify_assert(MatrixXf(10,12)) )); - CALL_SUBTEST_8(( bdcsvd_verify_assert(MatrixXcd(7,5)) )); - - // SUBTESTS 1 and 2 on specifics matrix - for(int i = 0; i < g_repeat; i++) { - Matrix2cd m; - m << 0, 1, - 0, 1; - CALL_SUBTEST_1(( bdcsvd(m, false) )); - m << 1, 0, - 1, 0; - CALL_SUBTEST_1(( bdcsvd(m, false) )); - - Matrix2d n; - n << 0, 0, - 0, 0; - CALL_SUBTEST_2(( bdcsvd(n, false) )); - n << 0, 0, - 0, 1; - CALL_SUBTEST_2(( bdcsvd(n, false) )); - - // Statics matrix don't work with BDSVD yet - // bdc algo on a random 3x3 float matrix - // CALL_SUBTEST_3(( bdcsvd<Matrix3f>() )); - // bdc algo on a random 4x4 double matrix - // CALL_SUBTEST_4(( bdcsvd<Matrix4d>() )); - // bdc algo on a random 3x5 float matrix - // CALL_SUBTEST_5(( bdcsvd<Matrix<float,3,5> >() )); - - int r = internal::random<int>(1, 30), - c = internal::random<int>(1, 30); - CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(r,c)) )); - CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(r,c)) )); - (void) r; - (void) c; - - // Test on inf/nan matrix - CALL_SUBTEST_7( bdcsvd_inf_nan<MatrixXf>() ); - } - - CALL_SUBTEST_7(( bdcsvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); - CALL_SUBTEST_8(( bdcsvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) )); - - // Test problem size constructors - CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) ); - -} // end test_bdcsvd diff --git a/eigen/unsupported/test/cxx11_eventcount.cpp b/eigen/unsupported/test/cxx11_eventcount.cpp new file mode 100644 index 0000000..3b598bf --- /dev/null +++ b/eigen/unsupported/test/cxx11_eventcount.cpp @@ -0,0 +1,142 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +static void test_basic_eventcount() +{ + MaxSizeVector<EventCount::Waiter> waiters(1); + waiters.resize(1); + EventCount ec(waiters); + EventCount::Waiter& w = waiters[0]; + ec.Notify(false); + ec.Prewait(&w); + ec.Notify(true); + ec.CommitWait(&w); + ec.Prewait(&w); + ec.CancelWait(&w); +} + +// Fake bounded counter-based queue. +struct TestQueue { + std::atomic<int> val_; + static const int kQueueSize = 10; + + TestQueue() : val_() {} + + ~TestQueue() { VERIFY_IS_EQUAL(val_.load(), 0); } + + bool Push() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == kQueueSize) return false; + if (val_.compare_exchange_weak(val, val + 1, std::memory_order_relaxed)) + return true; + } + } + + bool Pop() { + int val = val_.load(std::memory_order_relaxed); + for (;;) { + VERIFY_GE(val, 0); + VERIFY_LE(val, kQueueSize); + if (val == 0) return false; + if (val_.compare_exchange_weak(val, val - 1, std::memory_order_relaxed)) + return true; + } + } + + bool Empty() { return val_.load(std::memory_order_relaxed) == 0; } +}; + +const int TestQueue::kQueueSize; + +// A number of producers send messages to a set of consumers using a set of +// fake queues. Ensure that it does not crash, consumers don't deadlock and +// number of blocked and unblocked threads match. +static void test_stress_eventcount() +{ + const int kThreads = std::thread::hardware_concurrency(); + static const int kEvents = 1 << 16; + static const int kQueues = 10; + + MaxSizeVector<EventCount::Waiter> waiters(kThreads); + waiters.resize(kThreads); + EventCount ec(waiters); + TestQueue queues[kQueues]; + + std::vector<std::unique_ptr<std::thread>> producers; + for (int i = 0; i < kThreads; i++) { + producers.emplace_back(new std::thread([&ec, &queues]() { + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Push()) { + ec.Notify(false); + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + })); + } + + std::vector<std::unique_ptr<std::thread>> consumers; + for (int i = 0; i < kThreads; i++) { + consumers.emplace_back(new std::thread([&ec, &queues, &waiters, i]() { + EventCount::Waiter& w = waiters[i]; + unsigned int rnd = static_cast<unsigned int>(std::hash<std::thread::id>()(std::this_thread::get_id())); + for (int j = 0; j < kEvents; j++) { + unsigned idx = rand_reentrant(&rnd) % kQueues; + if (queues[idx].Pop()) continue; + j--; + ec.Prewait(&w); + bool empty = true; + for (int q = 0; q < kQueues; q++) { + if (!queues[q].Empty()) { + empty = false; + break; + } + } + if (!empty) { + ec.CancelWait(&w); + continue; + } + ec.CommitWait(&w); + } + })); + } + + for (int i = 0; i < kThreads; i++) { + producers[i]->join(); + consumers[i]->join(); + } +} + +void test_cxx11_eventcount() +{ + CALL_SUBTEST(test_basic_eventcount()); + CALL_SUBTEST(test_stress_eventcount()); +} diff --git a/eigen/unsupported/test/cxx11_meta.cpp b/eigen/unsupported/test/cxx11_meta.cpp new file mode 100644 index 0000000..8911c59 --- /dev/null +++ b/eigen/unsupported/test/cxx11_meta.cpp @@ -0,0 +1,357 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <array> +#include <Eigen/CXX11/src/util/CXX11Meta.h> + +using Eigen::internal::is_same; +using Eigen::internal::type_list; +using Eigen::internal::numeric_list; +using Eigen::internal::gen_numeric_list; +using Eigen::internal::gen_numeric_list_reversed; +using Eigen::internal::gen_numeric_list_swapped_pair; +using Eigen::internal::gen_numeric_list_repeated; +using Eigen::internal::concat; +using Eigen::internal::mconcat; +using Eigen::internal::take; +using Eigen::internal::skip; +using Eigen::internal::slice; +using Eigen::internal::get; +using Eigen::internal::id_numeric; +using Eigen::internal::id_type; +using Eigen::internal::is_same_gf; +using Eigen::internal::apply_op_from_left; +using Eigen::internal::apply_op_from_right; +using Eigen::internal::contained_in_list; +using Eigen::internal::contained_in_list_gf; +using Eigen::internal::arg_prod; +using Eigen::internal::arg_sum; +using Eigen::internal::sum_op; +using Eigen::internal::product_op; +using Eigen::internal::array_reverse; +using Eigen::internal::array_sum; +using Eigen::internal::array_prod; +using Eigen::internal::array_reduce; +using Eigen::internal::array_zip; +using Eigen::internal::array_zip_and_reduce; +using Eigen::internal::array_apply; +using Eigen::internal::array_apply_and_reduce; +using Eigen::internal::repeat; +using Eigen::internal::instantiate_by_c_array; + +struct dummy_a {}; +struct dummy_b {}; +struct dummy_c {}; +struct dummy_d {}; +struct dummy_e {}; + +// dummy operation for testing apply +template<typename A, typename B> struct dummy_op; +template<> struct dummy_op<dummy_a, dummy_b> { typedef dummy_c type; }; +template<> struct dummy_op<dummy_b, dummy_a> { typedef dummy_d type; }; +template<> struct dummy_op<dummy_b, dummy_c> { typedef dummy_a type; }; +template<> struct dummy_op<dummy_c, dummy_b> { typedef dummy_d type; }; +template<> struct dummy_op<dummy_c, dummy_a> { typedef dummy_b type; }; +template<> struct dummy_op<dummy_a, dummy_c> { typedef dummy_d type; }; +template<> struct dummy_op<dummy_a, dummy_a> { typedef dummy_e type; }; +template<> struct dummy_op<dummy_b, dummy_b> { typedef dummy_e type; }; +template<> struct dummy_op<dummy_c, dummy_c> { typedef dummy_e type; }; + +template<typename A, typename B> struct dummy_test { constexpr static bool value = false; constexpr static int global_flags = 0; }; +template<> struct dummy_test<dummy_a, dummy_a> { constexpr static bool value = true; constexpr static int global_flags = 1; }; +template<> struct dummy_test<dummy_b, dummy_b> { constexpr static bool value = true; constexpr static int global_flags = 2; }; +template<> struct dummy_test<dummy_c, dummy_c> { constexpr static bool value = true; constexpr static int global_flags = 4; }; + +struct times2_op { template<typename A> static A run(A v) { return v * 2; } }; + +struct dummy_inst +{ + int c; + + dummy_inst() : c(0) {} + explicit dummy_inst(int) : c(1) {} + dummy_inst(int, int) : c(2) {} + dummy_inst(int, int, int) : c(3) {} + dummy_inst(int, int, int, int) : c(4) {} + dummy_inst(int, int, int, int, int) : c(5) {} +}; + +static void test_gen_numeric_list() +{ + VERIFY((is_same<typename gen_numeric_list<int, 0>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 1>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 2>::type, numeric_list<int, 0, 1>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 5>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 10>::type, numeric_list<int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9>>::value)); + + VERIFY((is_same<typename gen_numeric_list<int, 0, 42>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 1, 42>::type, numeric_list<int, 42>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 2, 42>::type, numeric_list<int, 42, 43>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 5, 42>::type, numeric_list<int, 42, 43, 44, 45, 46>>::value)); + VERIFY((is_same<typename gen_numeric_list<int, 10, 42>::type, numeric_list<int, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51>>::value)); + + VERIFY((is_same<typename gen_numeric_list_reversed<int, 0>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 1>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 2>::type, numeric_list<int, 1, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 5>::type, numeric_list<int, 4, 3, 2, 1, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 10>::type, numeric_list<int, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0>>::value)); + + VERIFY((is_same<typename gen_numeric_list_reversed<int, 0, 42>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 1, 42>::type, numeric_list<int, 42>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 2, 42>::type, numeric_list<int, 43, 42>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 5, 42>::type, numeric_list<int, 46, 45, 44, 43, 42>>::value)); + VERIFY((is_same<typename gen_numeric_list_reversed<int, 10, 42>::type, numeric_list<int, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42>>::value)); + + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 2, 3>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 2, 3>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 2, 3>::type, numeric_list<int, 0, 1>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 2, 3>::type, numeric_list<int, 0, 1, 3, 2, 4, 5, 6, 7, 8, 9>>::value)); + + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 0, 44, 45, 42>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 1, 44, 45, 42>::type, numeric_list<int, 42>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 2, 44, 45, 42>::type, numeric_list<int, 42, 43>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 5, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46>>::value)); + VERIFY((is_same<typename gen_numeric_list_swapped_pair<int, 10, 44, 45, 42>::type, numeric_list<int, 42, 43, 45, 44, 46, 47, 48, 49, 50, 51>>::value)); + + VERIFY((is_same<typename gen_numeric_list_repeated<int, 0, 0>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename gen_numeric_list_repeated<int, 1, 0>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_repeated<int, 2, 0>::type, numeric_list<int, 0, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_repeated<int, 5, 0>::type, numeric_list<int, 0, 0, 0, 0, 0>>::value)); + VERIFY((is_same<typename gen_numeric_list_repeated<int, 10, 0>::type, numeric_list<int, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>>::value)); +} + +static void test_concat() +{ + VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<>>::type, type_list<dummy_a, dummy_a>>::value)); + VERIFY((is_same<typename concat<type_list<>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a>>::value)); + VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_a, dummy_a>>::type, type_list<dummy_a, dummy_a, dummy_a, dummy_a>>::value)); + VERIFY((is_same<typename concat<type_list<dummy_a, dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_c>>::value)); + VERIFY((is_same<typename concat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value)); + + VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int>>::type, numeric_list<int, 0, 0>>::value)); + VERIFY((is_same<typename concat<numeric_list<int>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0>>::value)); + VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 0, 0>>::type, numeric_list<int, 0, 0, 0, 0>>::value)); + VERIFY((is_same<typename concat<numeric_list<int, 0, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 0, 1, 2>>::value)); + VERIFY((is_same<typename concat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value)); + + VERIFY((is_same<typename mconcat<type_list<dummy_a>>::type, type_list<dummy_a>>::value)); + VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>>::type, type_list<dummy_a, dummy_b>>::value)); + VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value)); + VERIFY((is_same<typename mconcat<type_list<dummy_a>, type_list<dummy_b, dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value)); + VERIFY((is_same<typename mconcat<type_list<dummy_a, dummy_b>, type_list<dummy_c>>::type, type_list<dummy_a, dummy_b, dummy_c>>::value)); + + VERIFY((is_same<typename mconcat<numeric_list<int, 0>>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>>::type, numeric_list<int, 0, 1>>::value)); + VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value)); + VERIFY((is_same<typename mconcat<numeric_list<int, 0>, numeric_list<int, 1, 2>>::type, numeric_list<int, 0, 1, 2>>::value)); + VERIFY((is_same<typename mconcat<numeric_list<int, 0, 1>, numeric_list<int, 2>>::type, numeric_list<int, 0, 1, 2>>::value)); +} + +static void test_slice() +{ + typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl; + typedef numeric_list<int, 0, 1, 2, 3, 4, 5> il; + + VERIFY((is_same<typename take<0, tl>::type, type_list<>>::value)); + VERIFY((is_same<typename take<1, tl>::type, type_list<dummy_a>>::value)); + VERIFY((is_same<typename take<2, tl>::type, type_list<dummy_a, dummy_a>>::value)); + VERIFY((is_same<typename take<3, tl>::type, type_list<dummy_a, dummy_a, dummy_b>>::value)); + VERIFY((is_same<typename take<4, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b>>::value)); + VERIFY((is_same<typename take<5, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c>>::value)); + VERIFY((is_same<typename take<6, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value)); + + VERIFY((is_same<typename take<0, il>::type, numeric_list<int>>::value)); + VERIFY((is_same<typename take<1, il>::type, numeric_list<int, 0>>::value)); + VERIFY((is_same<typename take<2, il>::type, numeric_list<int, 0, 1>>::value)); + VERIFY((is_same<typename take<3, il>::type, numeric_list<int, 0, 1, 2>>::value)); + VERIFY((is_same<typename take<4, il>::type, numeric_list<int, 0, 1, 2, 3>>::value)); + VERIFY((is_same<typename take<5, il>::type, numeric_list<int, 0, 1, 2, 3, 4>>::value)); + VERIFY((is_same<typename take<6, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value)); + + VERIFY((is_same<typename skip<0, tl>::type, type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value)); + VERIFY((is_same<typename skip<1, tl>::type, type_list<dummy_a, dummy_b, dummy_b, dummy_c, dummy_c>>::value)); + VERIFY((is_same<typename skip<2, tl>::type, type_list<dummy_b, dummy_b, dummy_c, dummy_c>>::value)); + VERIFY((is_same<typename skip<3, tl>::type, type_list<dummy_b, dummy_c, dummy_c>>::value)); + VERIFY((is_same<typename skip<4, tl>::type, type_list<dummy_c, dummy_c>>::value)); + VERIFY((is_same<typename skip<5, tl>::type, type_list<dummy_c>>::value)); + VERIFY((is_same<typename skip<6, tl>::type, type_list<>>::value)); + + VERIFY((is_same<typename skip<0, il>::type, numeric_list<int, 0, 1, 2, 3, 4, 5>>::value)); + VERIFY((is_same<typename skip<1, il>::type, numeric_list<int, 1, 2, 3, 4, 5>>::value)); + VERIFY((is_same<typename skip<2, il>::type, numeric_list<int, 2, 3, 4, 5>>::value)); + VERIFY((is_same<typename skip<3, il>::type, numeric_list<int, 3, 4, 5>>::value)); + VERIFY((is_same<typename skip<4, il>::type, numeric_list<int, 4, 5>>::value)); + VERIFY((is_same<typename skip<5, il>::type, numeric_list<int, 5>>::value)); + VERIFY((is_same<typename skip<6, il>::type, numeric_list<int>>::value)); + + VERIFY((is_same<typename slice<0, 3, tl>::type, typename take<3, tl>::type>::value)); + VERIFY((is_same<typename slice<0, 3, il>::type, typename take<3, il>::type>::value)); + VERIFY((is_same<typename slice<1, 3, tl>::type, type_list<dummy_a, dummy_b, dummy_b>>::value)); + VERIFY((is_same<typename slice<1, 3, il>::type, numeric_list<int, 1, 2, 3>>::value)); +} + +static void test_get() +{ + typedef type_list<dummy_a, dummy_a, dummy_b, dummy_b, dummy_c, dummy_c> tl; + typedef numeric_list<int, 4, 8, 15, 16, 23, 42> il; + + VERIFY((is_same<typename get<0, tl>::type, dummy_a>::value)); + VERIFY((is_same<typename get<1, tl>::type, dummy_a>::value)); + VERIFY((is_same<typename get<2, tl>::type, dummy_b>::value)); + VERIFY((is_same<typename get<3, tl>::type, dummy_b>::value)); + VERIFY((is_same<typename get<4, tl>::type, dummy_c>::value)); + VERIFY((is_same<typename get<5, tl>::type, dummy_c>::value)); + + VERIFY_IS_EQUAL(((int)get<0, il>::value), 4); + VERIFY_IS_EQUAL(((int)get<1, il>::value), 8); + VERIFY_IS_EQUAL(((int)get<2, il>::value), 15); + VERIFY_IS_EQUAL(((int)get<3, il>::value), 16); + VERIFY_IS_EQUAL(((int)get<4, il>::value), 23); + VERIFY_IS_EQUAL(((int)get<5, il>::value), 42); +} + +static void test_id_helper(dummy_a a, dummy_a b, dummy_a c) +{ + (void)a; + (void)b; + (void)c; +} + +template<int... ii> +static void test_id_numeric() +{ + test_id_helper(typename id_numeric<int, ii, dummy_a>::type()...); +} + +template<typename... tt> +static void test_id_type() +{ + test_id_helper(typename id_type<tt, dummy_a>::type()...); +} + +static void test_id() +{ + // don't call VERIFY here, just assume it works if it compiles + // (otherwise it will complain that it can't find the function) + test_id_numeric<1, 4, 6>(); + test_id_type<dummy_a, dummy_b, dummy_c>(); +} + +static void test_is_same_gf() +{ + VERIFY((!is_same_gf<dummy_a, dummy_b>::value)); + VERIFY((!!is_same_gf<dummy_a, dummy_a>::value)); + VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_b>::global_flags), false); + VERIFY_IS_EQUAL((!!is_same_gf<dummy_a, dummy_a>::global_flags), false); +} + +static void test_apply_op() +{ + typedef type_list<dummy_a, dummy_b, dummy_c> tl; + VERIFY((!!is_same<typename apply_op_from_left<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_c, dummy_d>>::value)); + VERIFY((!!is_same<typename apply_op_from_right<dummy_op, dummy_a, tl>::type, type_list<dummy_e, dummy_d, dummy_b>>::value)); +} + +static void test_contained_in_list() +{ + typedef type_list<dummy_a, dummy_b, dummy_c> tl; + + VERIFY((!!contained_in_list<is_same, dummy_a, tl>::value)); + VERIFY((!!contained_in_list<is_same, dummy_b, tl>::value)); + VERIFY((!!contained_in_list<is_same, dummy_c, tl>::value)); + VERIFY((!contained_in_list<is_same, dummy_d, tl>::value)); + VERIFY((!contained_in_list<is_same, dummy_e, tl>::value)); + + VERIFY((!!contained_in_list_gf<dummy_test, dummy_a, tl>::value)); + VERIFY((!!contained_in_list_gf<dummy_test, dummy_b, tl>::value)); + VERIFY((!!contained_in_list_gf<dummy_test, dummy_c, tl>::value)); + VERIFY((!contained_in_list_gf<dummy_test, dummy_d, tl>::value)); + VERIFY((!contained_in_list_gf<dummy_test, dummy_e, tl>::value)); + + VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_a, tl>::global_flags), 1); + VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_b, tl>::global_flags), 2); + VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_c, tl>::global_flags), 4); + VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_d, tl>::global_flags), 0); + VERIFY_IS_EQUAL(((int)contained_in_list_gf<dummy_test, dummy_e, tl>::global_flags), 0); +} + +static void test_arg_reductions() +{ + VERIFY_IS_EQUAL(arg_sum(1,2,3,4), 10); + VERIFY_IS_EQUAL(arg_prod(1,2,3,4), 24); + VERIFY_IS_APPROX(arg_sum(0.5, 2, 5), 7.5); + VERIFY_IS_APPROX(arg_prod(0.5, 2, 5), 5.0); +} + +static void test_array_reverse_and_reduce() +{ + array<int, 6> a{{4, 8, 15, 16, 23, 42}}; + array<int, 6> b{{42, 23, 16, 15, 8, 4}}; + + // there is no operator<< for std::array, so VERIFY_IS_EQUAL will + // not compile + VERIFY((array_reverse(a) == b)); + VERIFY((array_reverse(b) == a)); + VERIFY_IS_EQUAL((array_sum(a)), 108); + VERIFY_IS_EQUAL((array_sum(b)), 108); + VERIFY_IS_EQUAL((array_prod(a)), 7418880); + VERIFY_IS_EQUAL((array_prod(b)), 7418880); +} + +static void test_array_zip_and_apply() +{ + array<int, 6> a{{4, 8, 15, 16, 23, 42}}; + array<int, 6> b{{0, 1, 2, 3, 4, 5}}; + array<int, 6> c{{4, 9, 17, 19, 27, 47}}; + array<int, 6> d{{0, 8, 30, 48, 92, 210}}; + array<int, 6> e{{0, 2, 4, 6, 8, 10}}; + + VERIFY((array_zip<sum_op>(a, b) == c)); + VERIFY((array_zip<product_op>(a, b) == d)); + VERIFY((array_apply<times2_op>(b) == e)); + VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(a)), 216); + VERIFY_IS_EQUAL((array_apply_and_reduce<sum_op, times2_op>(b)), 30); + VERIFY_IS_EQUAL((array_zip_and_reduce<product_op, sum_op>(a, b)), 14755932); + VERIFY_IS_EQUAL((array_zip_and_reduce<sum_op, product_op>(a, b)), 388); +} + +static void test_array_misc() +{ + array<int, 3> a3{{1, 1, 1}}; + array<int, 6> a6{{2, 2, 2, 2, 2, 2}}; + VERIFY((repeat<3, int>(1) == a3)); + VERIFY((repeat<6, int>(2) == a6)); + + int data[5] = { 0, 1, 2, 3, 4 }; + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 0>(data).c), 0); + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 1>(data).c), 1); + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 2>(data).c), 2); + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 3>(data).c), 3); + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 4>(data).c), 4); + VERIFY_IS_EQUAL((instantiate_by_c_array<dummy_inst, int, 5>(data).c), 5); +} + +void test_cxx11_meta() +{ + CALL_SUBTEST(test_gen_numeric_list()); + CALL_SUBTEST(test_concat()); + CALL_SUBTEST(test_slice()); + CALL_SUBTEST(test_get()); + CALL_SUBTEST(test_id()); + CALL_SUBTEST(test_is_same_gf()); + CALL_SUBTEST(test_apply_op()); + CALL_SUBTEST(test_contained_in_list()); + CALL_SUBTEST(test_arg_reductions()); + CALL_SUBTEST(test_array_reverse_and_reduce()); + CALL_SUBTEST(test_array_zip_and_apply()); + CALL_SUBTEST(test_array_misc()); +} diff --git a/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp new file mode 100644 index 0000000..48cd2d4 --- /dev/null +++ b/eigen/unsupported/test/cxx11_non_blocking_thread_pool.cpp @@ -0,0 +1,125 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include "main.h" +#include "Eigen/CXX11/ThreadPool" +#include "Eigen/CXX11/Tensor" + +static void test_create_destroy_empty_pool() +{ + // Just create and destroy the pool. This will wind up and tear down worker + // threads. Ensure there are no issues in that logic. + for (int i = 0; i < 16; ++i) { + NonBlockingThreadPool tp(i); + } +} + + +static void test_parallelism(bool allow_spinning) +{ + // Test we never-ever fail to match available tasks with idle threads. + const int kThreads = 16; // code below expects that this is a multiple of 4 + NonBlockingThreadPool tp(kThreads, allow_spinning); + VERIFY_IS_EQUAL(tp.NumThreads(), kThreads); + VERIFY_IS_EQUAL(tp.CurrentThreadId(), -1); + for (int iter = 0; iter < 100; ++iter) { + std::atomic<int> running(0); + std::atomic<int> done(0); + std::atomic<int> phase(0); + // Schedule kThreads tasks and ensure that they all are running. + for (int i = 0; i < kThreads; ++i) { + tp.Schedule([&]() { + const int thread_id = tp.CurrentThreadId(); + VERIFY_GE(thread_id, 0); + VERIFY_LE(thread_id, kThreads - 1); + running++; + while (phase < 1) { + } + done++; + }); + } + while (running != kThreads) { + } + running = 0; + phase = 1; + // Now, while the previous tasks exit, schedule another kThreads tasks and + // ensure that they are running. + for (int i = 0; i < kThreads; ++i) { + tp.Schedule([&, i]() { + running++; + while (phase < 2) { + } + // When all tasks are running, half of tasks exit, quarter of tasks + // continue running and quarter of tasks schedule another 2 tasks each. + // Concurrently main thread schedules another quarter of tasks. + // This gives us another kThreads tasks and we ensure that they all + // are running. + if (i < kThreads / 2) { + } else if (i < 3 * kThreads / 4) { + running++; + while (phase < 3) { + } + done++; + } else { + for (int j = 0; j < 2; ++j) { + tp.Schedule([&]() { + running++; + while (phase < 3) { + } + done++; + }); + } + } + done++; + }); + } + while (running != kThreads) { + } + running = 0; + phase = 2; + for (int i = 0; i < kThreads / 4; ++i) { + tp.Schedule([&]() { + running++; + while (phase < 3) { + } + done++; + }); + } + while (running != kThreads) { + } + phase = 3; + while (done != 3 * kThreads) { + } + } +} + + +static void test_cancel() +{ + NonBlockingThreadPool tp(2); + + // Schedule a large number of closure that each sleeps for one second. This + // will keep the thread pool busy for much longer than the default test timeout. + for (int i = 0; i < 1000; ++i) { + tp.Schedule([]() { EIGEN_SLEEP(2000); }); + } + + // Cancel the processing of all the closures that are still pending. + tp.Cancel(); +} + +void test_cxx11_non_blocking_thread_pool() +{ + CALL_SUBTEST(test_create_destroy_empty_pool()); + CALL_SUBTEST(test_parallelism(true)); + CALL_SUBTEST(test_parallelism(false)); + CALL_SUBTEST(test_cancel()); +} diff --git a/eigen/unsupported/test/cxx11_runqueue.cpp b/eigen/unsupported/test/cxx11_runqueue.cpp new file mode 100644 index 0000000..91f6901 --- /dev/null +++ b/eigen/unsupported/test/cxx11_runqueue.cpp @@ -0,0 +1,235 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Dmitry Vyukov <dvyukov@google.com> +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS +#include <cstdlib> +#include "main.h" +#include <Eigen/CXX11/ThreadPool> + + +// Visual studio doesn't implement a rand_r() function since its +// implementation of rand() is already thread safe +int rand_reentrant(unsigned int* s) { +#ifdef EIGEN_COMP_MSVC_STRICT + EIGEN_UNUSED_VARIABLE(s); + return rand(); +#else + return rand_r(s); +#endif +} + +void test_basic_runqueue() +{ + RunQueue<int, 4> q; + // Check empty state. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + std::vector<int> stolen; + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); + // Push one front, pop one front. + VERIFY_IS_EQUAL(0, q.PushFront(1)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(1, q.PopFront()); + VERIFY_IS_EQUAL(0u, q.Size()); + // Push front to overflow. + VERIFY_IS_EQUAL(0, q.PushFront(2)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(3)); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(4)); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(5)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(6, q.PushFront(6)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(5, q.PopFront()); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(4, q.PopFront()); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(3, q.PopFront()); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(2, q.PopFront()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PopFront()); + // Push one back, pop one back. + VERIFY_IS_EQUAL(0, q.PushBack(7)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(7, stolen[0]); + VERIFY_IS_EQUAL(0u, q.Size()); + stolen.clear(); + // Push back to overflow. + VERIFY_IS_EQUAL(0, q.PushBack(8)); + VERIFY_IS_EQUAL(1u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(9)); + VERIFY_IS_EQUAL(2u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(10)); + VERIFY_IS_EQUAL(3u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushBack(11)); + VERIFY_IS_EQUAL(4u, q.Size()); + VERIFY_IS_EQUAL(12, q.PushBack(12)); + VERIFY_IS_EQUAL(4u, q.Size()); + // Pop back in halves. + VERIFY_IS_EQUAL(2u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(2u, stolen.size()); + VERIFY_IS_EQUAL(10, stolen[0]); + VERIFY_IS_EQUAL(11, stolen[1]); + VERIFY_IS_EQUAL(2u, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(9, stolen[0]); + VERIFY_IS_EQUAL(1u, q.Size()); + stolen.clear(); + VERIFY_IS_EQUAL(1u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(1u, stolen.size()); + VERIFY_IS_EQUAL(8, stolen[0]); + stolen.clear(); + VERIFY_IS_EQUAL(0u, q.PopBackHalf(&stolen)); + VERIFY_IS_EQUAL(0u, stolen.size()); + // Empty again. + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); + VERIFY_IS_EQUAL(0, q.PushFront(1)); + VERIFY_IS_EQUAL(0, q.PushFront(2)); + VERIFY_IS_EQUAL(0, q.PushFront(3)); + VERIFY_IS_EQUAL(1, q.PopBack()); + VERIFY_IS_EQUAL(2, q.PopBack()); + VERIFY_IS_EQUAL(3, q.PopBack()); + VERIFY(q.Empty()); + VERIFY_IS_EQUAL(0u, q.Size()); +} + +// Empty tests that the queue is not claimed to be empty when is is in fact not. +// Emptiness property is crucial part of thread pool blocking scheme, +// so we go to great effort to ensure this property. We create a queue with +// 1 element and then push 1 element (either front or back at random) and pop +// 1 element (either front or back at random). So queue always contains at least +// 1 element, but otherwise changes chaotically. Another thread constantly tests +// that the queue is not claimed to be empty. +void test_empty_runqueue() +{ + RunQueue<int, 4> q; + q.PushFront(1); + std::atomic<bool> done(false); + std::thread mutator([&q, &done]() { + unsigned rnd = 0; + std::vector<int> stolen; + for (int i = 0; i < 1 << 18; i++) { + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(0, q.PushFront(1)); + else + VERIFY_IS_EQUAL(0, q.PushBack(1)); + if (rand_reentrant(&rnd) % 2) + VERIFY_IS_EQUAL(1, q.PopFront()); + else { + for (;;) { + if (q.PopBackHalf(&stolen) == 1) { + stolen.clear(); + break; + } + VERIFY_IS_EQUAL(0u, stolen.size()); + } + } + } + done = true; + }); + while (!done) { + VERIFY(!q.Empty()); + int size = q.Size(); + VERIFY_GE(size, 1); + VERIFY_LE(size, 2); + } + VERIFY_IS_EQUAL(1, q.PopFront()); + mutator.join(); +} + +// Stress is a chaotic random test. +// One thread (owner) calls PushFront/PopFront, other threads call PushBack/ +// PopBack. Ensure that we don't crash, deadlock, and all sanity checks pass. +void test_stress_runqueue() +{ + static const int kEvents = 1 << 18; + RunQueue<int, 8> q; + std::atomic<int> total(0); + std::vector<std::unique_ptr<std::thread>> threads; + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + int pushed = 1; + int popped = 1; + while (pushed < kEvents || popped < kEvents) { + if (pushed < kEvents) { + if (q.PushFront(pushed) == 0) { + sum += pushed; + pushed++; + } + } + if (popped < kEvents) { + int v = q.PopFront(); + if (v != 0) { + sum -= v; + popped++; + } + } + } + total += sum; + })); + for (int i = 0; i < 2; i++) { + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + for (int j = 1; j < kEvents; j++) { + if (q.PushBack(j) == 0) { + sum += j; + continue; + } + EIGEN_THREAD_YIELD(); + j--; + } + total += sum; + })); + threads.emplace_back(new std::thread([&q, &total]() { + int sum = 0; + std::vector<int> stolen; + for (int j = 1; j < kEvents;) { + if (q.PopBackHalf(&stolen) == 0) { + EIGEN_THREAD_YIELD(); + continue; + } + while (stolen.size() && j < kEvents) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + sum += v; + j++; + } + } + while (stolen.size()) { + int v = stolen.back(); + stolen.pop_back(); + VERIFY_IS_NOT_EQUAL(v, 0); + while ((v = q.PushBack(v)) != 0) EIGEN_THREAD_YIELD(); + } + total -= sum; + })); + } + for (size_t i = 0; i < threads.size(); i++) threads[i]->join(); + VERIFY(q.Empty()); + VERIFY(total.load() == 0); +} + +void test_cxx11_runqueue() +{ + CALL_SUBTEST_1(test_basic_runqueue()); + CALL_SUBTEST_2(test_empty_runqueue()); + CALL_SUBTEST_3(test_stress_runqueue()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_argmax.cpp b/eigen/unsupported/test/cxx11_tensor_argmax.cpp new file mode 100644 index 0000000..0377672 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_argmax.cpp @@ -0,0 +1,294 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Eugene Brevdo <ebrevdo@google.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::array; +using Eigen::Tuple; + +template <int DataLayout> +static void test_simple_index_tuples() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + for (DenseIndex n = 0; n < 2*3*5*7; ++n) { + const Tuple<DenseIndex, float>& v = index_tuples.coeff(n); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor.coeff(n)); + } +} + +template <int DataLayout> +static void test_index_tuples_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + + index_tuples = tensor.index_tuples(); + + for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) { + const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l); + VERIFY_IS_EQUAL(v.first, n); + VERIFY_IS_EQUAL(v.second, tensor(n)); + } +} + +template <int DataLayout> +static void test_argmax_tuple_reducer() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; + DimensionList<DenseIndex, 4> dims; + reduced = index_tuples.reduce( + dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >()); + + Tensor<float, 0, DataLayout> maxi = tensor.maximum(); + + VERIFY_IS_EQUAL(maxi(), reduced(0).second); + + array<DenseIndex, 3> reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >()); + + Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(max_by_dims(l), reduced_by_dims(l).second); + } +} + +template <int DataLayout> +static void test_argmin_tuple_reducer() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7); + index_tuples = tensor.index_tuples(); + + Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced; + DimensionList<DenseIndex, 4> dims; + reduced = index_tuples.reduce( + dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >()); + + Tensor<float, 0, DataLayout> mini = tensor.minimum(); + + VERIFY_IS_EQUAL(mini(), reduced(0).second); + + array<DenseIndex, 3> reduce_dims; + for (int d = 0; d < 3; ++d) reduce_dims[d] = d; + Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7); + reduced_by_dims = index_tuples.reduce( + reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >()); + + Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims); + + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(min_by_dims(l), reduced_by_dims(l).second); + } +} + +template <int DataLayout> +static void test_simple_argmax() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = 10.0; + + Tensor<DenseIndex, 0, DataLayout> tensor_argmax; + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 0); + + tensor(1,2,4,6) = 20.0; + + tensor_argmax = tensor.argmax(); + + VERIFY_IS_EQUAL(tensor_argmax(0), 2*3*5*7 - 1); +} + +template <int DataLayout> +static void test_simple_argmin() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + tensor(0,0,0,0) = -10.0; + + Tensor<DenseIndex, 0, DataLayout> tensor_argmin; + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 0); + + tensor(1,2,4,6) = -20.0; + + tensor_argmin = tensor.argmin(); + + VERIFY_IS_EQUAL(tensor_argmin(0), 2*3*5*7 - 1); +} + +template <int DataLayout> +static void test_argmax_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<DenseIndex, 3, DataLayout> tensor_argmax; + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.size(), + ptrdiff_t(2*3*5*7 / tensor.dimension(dim))); + for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + tensor_argmax = tensor.argmax(dim); + + VERIFY_IS_EQUAL(tensor_argmax.size(), + ptrdiff_t(2*3*5*7 / tensor.dimension(dim))); + for (ptrdiff_t n = 0; n < tensor_argmax.size(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmax.data()[n], tensor.dimension(dim) - 1); + } + } +} + +template <int DataLayout> +static void test_argmin_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims {2, 3, 5, 7}; + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + Tensor<DenseIndex, 3, DataLayout> tensor_argmin; + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0 + tensor(ix) = -10.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.size(), + ptrdiff_t(2*3*5*7 / tensor.dimension(dim))); + for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0 + tensor(ix) = -20.0; + } + } + } + } + + tensor_argmin = tensor.argmin(dim); + + VERIFY_IS_EQUAL(tensor_argmin.size(), + ptrdiff_t(2*3*5*7 / tensor.dimension(dim))); + for (ptrdiff_t n = 0; n < tensor_argmin.size(); ++n) { + // Expect min to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_argmin.data()[n], tensor.dimension(dim) - 1); + } + } +} + +void test_cxx11_tensor_argmax() +{ + CALL_SUBTEST(test_simple_index_tuples<RowMajor>()); + CALL_SUBTEST(test_simple_index_tuples<ColMajor>()); + CALL_SUBTEST(test_index_tuples_dim<RowMajor>()); + CALL_SUBTEST(test_index_tuples_dim<ColMajor>()); + CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>()); + CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>()); + CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>()); + CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>()); + CALL_SUBTEST(test_simple_argmax<RowMajor>()); + CALL_SUBTEST(test_simple_argmax<ColMajor>()); + CALL_SUBTEST(test_simple_argmin<RowMajor>()); + CALL_SUBTEST(test_simple_argmin<ColMajor>()); + CALL_SUBTEST(test_argmax_dim<RowMajor>()); + CALL_SUBTEST(test_argmax_dim<ColMajor>()); + CALL_SUBTEST(test_argmin_dim<RowMajor>()); + CALL_SUBTEST(test_argmin_dim<ColMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu b/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu new file mode 100644 index 0000000..653443d --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -0,0 +1,254 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int Layout> +void test_cuda_simple_argmax() +{ + Tensor<double, 3, Layout> in(Eigen::array<DenseIndex, 3>(72,53,97)); + Tensor<DenseIndex, 1, Layout> out_max(Eigen::array<DenseIndex, 1>(1)); + Tensor<DenseIndex, 1, Layout> out_min(Eigen::array<DenseIndex, 1>(1)); + in.setRandom(); + in *= in.constant(100.0); + in(0, 0, 0) = -1000.0; + in(71, 52, 96) = 1000.0; + + std::size_t in_bytes = in.size() * sizeof(double); + std::size_t out_bytes = out_max.size() * sizeof(DenseIndex); + + double* d_in; + DenseIndex* d_out_max; + DenseIndex* d_out_min; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out_max), out_bytes); + cudaMalloc((void**)(&d_out_min), out_bytes); + + cudaMemcpy(d_in, in.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<double, 3, Layout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_max(d_out_max, Eigen::array<DenseIndex, 1>(1)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 1, Layout>, Aligned > gpu_out_min(d_out_min, Eigen::array<DenseIndex, 1>(1)); + + gpu_out_max.device(gpu_device) = gpu_in.argmax(); + gpu_out_min.device(gpu_device) = gpu_in.argmin(); + + assert(cudaMemcpyAsync(out_max.data(), d_out_max, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(out_min.data(), d_out_min, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out_max(Eigen::array<DenseIndex, 1>(0)), 72*53*97 - 1); + VERIFY_IS_EQUAL(out_min(Eigen::array<DenseIndex, 1>(0)), 0); + + cudaFree(d_in); + cudaFree(d_out_max); + cudaFree(d_out_min); +} + +template <int DataLayout> +void test_cuda_argmax_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array<DenseIndex, 3> out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape); + + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = 10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.size(), + size_t(2*3*5*7 / tensor.dimension(dim))); + + for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { + // Expect max to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = 20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmax(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + + cudaFree(d_in); + cudaFree(d_out); + } +} + +template <int DataLayout> +void test_cuda_argmin_dim() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + std::vector<int> dims; + dims.push_back(2); dims.push_back(3); dims.push_back(5); dims.push_back(7); + + for (int dim = 0; dim < 4; ++dim) { + tensor.setRandom(); + tensor = (tensor + tensor.constant(0.5)).log(); + + array<DenseIndex, 3> out_shape; + for (int d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; + + Tensor<DenseIndex, 3, DataLayout> tensor_arg(out_shape); + + array<DenseIndex, 4> ix; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != 0) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 + tensor(ix) = -10.0; + } + } + } + } + + std::size_t in_bytes = tensor.size() * sizeof(float); + std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); + + float* d_in; + DenseIndex* d_out; + cudaMalloc((void**)(&d_in), in_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout>, Aligned > gpu_in(d_in, Eigen::array<DenseIndex, 4>(2, 3, 5, 7)); + Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout>, Aligned > gpu_out(d_out, out_shape); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(tensor_arg.size(), + 2*3*5*7 / tensor.dimension(dim)); + + for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { + // Expect min to be in the first index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; + if (ix[dim] != tensor.dimension(dim) - 1) continue; + // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 + tensor(ix) = -20.0; + } + } + } + } + + cudaMemcpy(d_in, tensor.data(), in_bytes, cudaMemcpyHostToDevice); + + gpu_out.device(gpu_device) = gpu_in.argmin(dim); + + assert(cudaMemcpyAsync(tensor_arg.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { + // Expect max to be in the last index of the reduced dimension + VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); + } + + cudaFree(d_in); + cudaFree(d_out); + } +} + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST_1(test_cuda_simple_argmax<RowMajor>()); + CALL_SUBTEST_1(test_cuda_simple_argmax<ColMajor>()); + CALL_SUBTEST_2(test_cuda_argmax_dim<RowMajor>()); + CALL_SUBTEST_2(test_cuda_argmax_dim<ColMajor>()); + CALL_SUBTEST_3(test_cuda_argmin_dim<RowMajor>()); + CALL_SUBTEST_3(test_cuda_argmin_dim<ColMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_assign.cpp b/eigen/unsupported/test/cxx11_tensor_assign.cpp new file mode 100644 index 0000000..8fe85d8 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_assign.cpp @@ -0,0 +1,370 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_1d() +{ + Tensor<int, 1> vec1(6); + Tensor<int, 1, RowMajor> vec2(6); + vec1(0) = 4; vec2(0) = 0; + vec1(1) = 8; vec2(1) = 1; + vec1(2) = 15; vec2(2) = 2; + vec1(3) = 16; vec2(3) = 3; + vec1(4) = 23; vec2(4) = 4; + vec1(5) = 42; vec2(5) = 5; + + int col_major[6]; + int row_major[6]; + memset(col_major, 0, 6*sizeof(int)); + memset(row_major, 0, 6*sizeof(int)); + TensorMap<Tensor<int, 1> > vec3(col_major, 6); + TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6); + + vec3 = vec1; + vec4 = vec2; + + VERIFY_IS_EQUAL(vec3(0), 4); + VERIFY_IS_EQUAL(vec3(1), 8); + VERIFY_IS_EQUAL(vec3(2), 15); + VERIFY_IS_EQUAL(vec3(3), 16); + VERIFY_IS_EQUAL(vec3(4), 23); + VERIFY_IS_EQUAL(vec3(5), 42); + + VERIFY_IS_EQUAL(vec4(0), 0); + VERIFY_IS_EQUAL(vec4(1), 1); + VERIFY_IS_EQUAL(vec4(2), 2); + VERIFY_IS_EQUAL(vec4(3), 3); + VERIFY_IS_EQUAL(vec4(4), 4); + VERIFY_IS_EQUAL(vec4(5), 5); + + vec1.setZero(); + vec2.setZero(); + vec1 = vec3; + vec2 = vec4; + + VERIFY_IS_EQUAL(vec1(0), 4); + VERIFY_IS_EQUAL(vec1(1), 8); + VERIFY_IS_EQUAL(vec1(2), 15); + VERIFY_IS_EQUAL(vec1(3), 16); + VERIFY_IS_EQUAL(vec1(4), 23); + VERIFY_IS_EQUAL(vec1(5), 42); + + VERIFY_IS_EQUAL(vec2(0), 0); + VERIFY_IS_EQUAL(vec2(1), 1); + VERIFY_IS_EQUAL(vec2(2), 2); + VERIFY_IS_EQUAL(vec2(3), 3); + VERIFY_IS_EQUAL(vec2(4), 4); + VERIFY_IS_EQUAL(vec2(5), 5); +} + +static void test_2d() +{ + Tensor<int, 2> mat1(2,3); + Tensor<int, 2, RowMajor> mat2(2,3); + + mat1(0,0) = 0; + mat1(0,1) = 1; + mat1(0,2) = 2; + mat1(1,0) = 3; + mat1(1,1) = 4; + mat1(1,2) = 5; + + mat2(0,0) = 0; + mat2(0,1) = 1; + mat2(0,2) = 2; + mat2(1,0) = 3; + mat2(1,1) = 4; + mat2(1,2) = 5; + + int col_major[6]; + int row_major[6]; + memset(col_major, 0, 6*sizeof(int)); + memset(row_major, 0, 6*sizeof(int)); + TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3); + TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3); + + mat3 = mat1; + mat4 = mat2; + + VERIFY_IS_EQUAL(mat3(0,0), 0); + VERIFY_IS_EQUAL(mat3(0,1), 1); + VERIFY_IS_EQUAL(mat3(0,2), 2); + VERIFY_IS_EQUAL(mat3(1,0), 3); + VERIFY_IS_EQUAL(mat3(1,1), 4); + VERIFY_IS_EQUAL(mat3(1,2), 5); + + VERIFY_IS_EQUAL(mat4(0,0), 0); + VERIFY_IS_EQUAL(mat4(0,1), 1); + VERIFY_IS_EQUAL(mat4(0,2), 2); + VERIFY_IS_EQUAL(mat4(1,0), 3); + VERIFY_IS_EQUAL(mat4(1,1), 4); + VERIFY_IS_EQUAL(mat4(1,2), 5); + + mat1.setZero(); + mat2.setZero(); + mat1 = mat3; + mat2 = mat4; + + VERIFY_IS_EQUAL(mat1(0,0), 0); + VERIFY_IS_EQUAL(mat1(0,1), 1); + VERIFY_IS_EQUAL(mat1(0,2), 2); + VERIFY_IS_EQUAL(mat1(1,0), 3); + VERIFY_IS_EQUAL(mat1(1,1), 4); + VERIFY_IS_EQUAL(mat1(1,2), 5); + + VERIFY_IS_EQUAL(mat2(0,0), 0); + VERIFY_IS_EQUAL(mat2(0,1), 1); + VERIFY_IS_EQUAL(mat2(0,2), 2); + VERIFY_IS_EQUAL(mat2(1,0), 3); + VERIFY_IS_EQUAL(mat2(1,1), 4); + VERIFY_IS_EQUAL(mat2(1,2), 5); +} + +static void test_3d() +{ + Tensor<int, 3> mat1(2,3,7); + Tensor<int, 3, RowMajor> mat2(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val++; + } + } + } + + int col_major[2*3*7]; + int row_major[2*3*7]; + memset(col_major, 0, 2*3*7*sizeof(int)); + memset(row_major, 0, 2*3*7*sizeof(int)); + TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7); + TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7); + + mat3 = mat1; + mat4 = mat2; + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat3(i,j,k), val); + VERIFY_IS_EQUAL(mat4(i,j,k), val); + val++; + } + } + } + + mat1.setZero(); + mat2.setZero(); + mat1 = mat3; + mat2 = mat4; + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat1(i,j,k), val); + VERIFY_IS_EQUAL(mat2(i,j,k), val); + val++; + } + } + } +} + +static void test_same_type() +{ + Tensor<int, 1> orig_tensor(5); + Tensor<int, 1> dest_tensor(5); + orig_tensor.setRandom(); + dest_tensor.setRandom(); + int* orig_data = orig_tensor.data(); + int* dest_data = dest_tensor.data(); + dest_tensor = orig_tensor; + VERIFY_IS_EQUAL(orig_tensor.data(), orig_data); + VERIFY_IS_EQUAL(dest_tensor.data(), dest_data); + for (int i = 0; i < 5; ++i) { + VERIFY_IS_EQUAL(dest_tensor(i), orig_tensor(i)); + } + + TensorFixedSize<int, Sizes<5> > orig_array; + TensorFixedSize<int, Sizes<5> > dest_array; + orig_array.setRandom(); + dest_array.setRandom(); + orig_data = orig_array.data(); + dest_data = dest_array.data(); + dest_array = orig_array; + VERIFY_IS_EQUAL(orig_array.data(), orig_data); + VERIFY_IS_EQUAL(dest_array.data(), dest_data); + for (int i = 0; i < 5; ++i) { + VERIFY_IS_EQUAL(dest_array(i), orig_array(i)); + } + + int orig[5] = {1, 2, 3, 4, 5}; + int dest[5] = {6, 7, 8, 9, 10}; + TensorMap<Tensor<int, 1> > orig_map(orig, 5); + TensorMap<Tensor<int, 1> > dest_map(dest, 5); + orig_data = orig_map.data(); + dest_data = dest_map.data(); + dest_map = orig_map; + VERIFY_IS_EQUAL(orig_map.data(), orig_data); + VERIFY_IS_EQUAL(dest_map.data(), dest_data); + for (int i = 0; i < 5; ++i) { + VERIFY_IS_EQUAL(dest[i], i+1); + } +} + +static void test_auto_resize() +{ + Tensor<int, 1> tensor1; + Tensor<int, 1> tensor2(3); + Tensor<int, 1> tensor3(5); + Tensor<int, 1> tensor4(7); + + Tensor<int, 1> new_tensor(5); + new_tensor.setRandom(); + + tensor1 = tensor2 = tensor3 = tensor4 = new_tensor; + + VERIFY_IS_EQUAL(tensor1.dimension(0), new_tensor.dimension(0)); + VERIFY_IS_EQUAL(tensor2.dimension(0), new_tensor.dimension(0)); + VERIFY_IS_EQUAL(tensor3.dimension(0), new_tensor.dimension(0)); + VERIFY_IS_EQUAL(tensor4.dimension(0), new_tensor.dimension(0)); + for (int i = 0; i < new_tensor.dimension(0); ++i) { + VERIFY_IS_EQUAL(tensor1(i), new_tensor(i)); + VERIFY_IS_EQUAL(tensor2(i), new_tensor(i)); + VERIFY_IS_EQUAL(tensor3(i), new_tensor(i)); + VERIFY_IS_EQUAL(tensor4(i), new_tensor(i)); + } +} + + +static void test_compound_assign() +{ + Tensor<int, 1> start_tensor(10); + Tensor<int, 1> offset_tensor(10); + start_tensor.setRandom(); + offset_tensor.setRandom(); + + Tensor<int, 1> tensor = start_tensor; + tensor += offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) + offset_tensor(i)); + } + + tensor = start_tensor; + tensor -= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) - offset_tensor(i)); + } + + tensor = start_tensor; + tensor *= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) * offset_tensor(i)); + } + + tensor = start_tensor; + tensor /= offset_tensor; + for (int i = 0; i < 10; ++i) { + VERIFY_IS_EQUAL(tensor(i), start_tensor(i) / offset_tensor(i)); + } +} + +static void test_std_initializers_tensor() { +#if EIGEN_HAS_VARIADIC_TEMPLATES + Tensor<int, 1> a(3); + a.setValues({0, 1, 2}); + VERIFY_IS_EQUAL(a(0), 0); + VERIFY_IS_EQUAL(a(1), 1); + VERIFY_IS_EQUAL(a(2), 2); + + // It fills the top-left slice. + a.setValues({10, 20}); + VERIFY_IS_EQUAL(a(0), 10); + VERIFY_IS_EQUAL(a(1), 20); + VERIFY_IS_EQUAL(a(2), 2); + + // Chaining. + Tensor<int, 1> a2(3); + a2 = a.setValues({100, 200, 300}); + VERIFY_IS_EQUAL(a(0), 100); + VERIFY_IS_EQUAL(a(1), 200); + VERIFY_IS_EQUAL(a(2), 300); + VERIFY_IS_EQUAL(a2(0), 100); + VERIFY_IS_EQUAL(a2(1), 200); + VERIFY_IS_EQUAL(a2(2), 300); + + Tensor<int, 2> b(2, 3); + b.setValues({{0, 1, 2}, {3, 4, 5}}); + VERIFY_IS_EQUAL(b(0, 0), 0); + VERIFY_IS_EQUAL(b(0, 1), 1); + VERIFY_IS_EQUAL(b(0, 2), 2); + VERIFY_IS_EQUAL(b(1, 0), 3); + VERIFY_IS_EQUAL(b(1, 1), 4); + VERIFY_IS_EQUAL(b(1, 2), 5); + + // It fills the top-left slice. + b.setValues({{10, 20}, {30}}); + VERIFY_IS_EQUAL(b(0, 0), 10); + VERIFY_IS_EQUAL(b(0, 1), 20); + VERIFY_IS_EQUAL(b(0, 2), 2); + VERIFY_IS_EQUAL(b(1, 0), 30); + VERIFY_IS_EQUAL(b(1, 1), 4); + VERIFY_IS_EQUAL(b(1, 2), 5); + + Eigen::Tensor<int, 3> c(3, 2, 4); + c.setValues({{{0, 1, 2, 3}, {4, 5, 6, 7}}, + {{10, 11, 12, 13}, {14, 15, 16, 17}}, + {{20, 21, 22, 23}, {24, 25, 26, 27}}}); + VERIFY_IS_EQUAL(c(0, 0, 0), 0); + VERIFY_IS_EQUAL(c(0, 0, 1), 1); + VERIFY_IS_EQUAL(c(0, 0, 2), 2); + VERIFY_IS_EQUAL(c(0, 0, 3), 3); + VERIFY_IS_EQUAL(c(0, 1, 0), 4); + VERIFY_IS_EQUAL(c(0, 1, 1), 5); + VERIFY_IS_EQUAL(c(0, 1, 2), 6); + VERIFY_IS_EQUAL(c(0, 1, 3), 7); + VERIFY_IS_EQUAL(c(1, 0, 0), 10); + VERIFY_IS_EQUAL(c(1, 0, 1), 11); + VERIFY_IS_EQUAL(c(1, 0, 2), 12); + VERIFY_IS_EQUAL(c(1, 0, 3), 13); + VERIFY_IS_EQUAL(c(1, 1, 0), 14); + VERIFY_IS_EQUAL(c(1, 1, 1), 15); + VERIFY_IS_EQUAL(c(1, 1, 2), 16); + VERIFY_IS_EQUAL(c(1, 1, 3), 17); + VERIFY_IS_EQUAL(c(2, 0, 0), 20); + VERIFY_IS_EQUAL(c(2, 0, 1), 21); + VERIFY_IS_EQUAL(c(2, 0, 2), 22); + VERIFY_IS_EQUAL(c(2, 0, 3), 23); + VERIFY_IS_EQUAL(c(2, 1, 0), 24); + VERIFY_IS_EQUAL(c(2, 1, 1), 25); + VERIFY_IS_EQUAL(c(2, 1, 2), 26); + VERIFY_IS_EQUAL(c(2, 1, 3), 27); +#endif // EIGEN_HAS_VARIADIC_TEMPLATES +} + +void test_cxx11_tensor_assign() +{ + CALL_SUBTEST(test_1d()); + CALL_SUBTEST(test_2d()); + CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_same_type()); + CALL_SUBTEST(test_auto_resize()); + CALL_SUBTEST(test_compound_assign()); + CALL_SUBTEST(test_std_initializers_tensor()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp new file mode 100644 index 0000000..21fdfca --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_broadcast_sycl.cpp @@ -0,0 +1,144 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +template <typename DataType, int DataLayout, typename IndexType> +static void test_broadcast_sycl_fixed(const Eigen::SyclDevice &sycl_device){ + + // BROADCAST test: + IndexType inDim1=2; + IndexType inDim2=3; + IndexType inDim3=5; + IndexType inDim4=7; + IndexType bDim1=2; + IndexType bDim2=3; + IndexType bDim3=1; + IndexType bDim4=4; + array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}}; + array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; + array<IndexType, 4> out_range; // = in_range * broadcasts + for (size_t i = 0; i < out_range.size(); ++i) + out_range[i] = in_range[i] * broadcasts[i]; + + Tensor<DataType, 4, DataLayout, IndexType> input(in_range); + Tensor<DataType, 4, DataLayout, IndexType> out(out_range); + + for (size_t i = 0; i < in_range.size(); ++i) + VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); + + + for (IndexType i = 0; i < input.size(); ++i) + input(i) = static_cast<DataType>(i); + + DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<TensorFixedSize<DataType, Sizes<2, 3, 5, 7>, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range); + sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); + gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + + for (IndexType i = 0; i < inDim1*bDim1; ++i) { + for (IndexType j = 0; j < inDim2*bDim2; ++j) { + for (IndexType k = 0; k < inDim3*bDim3; ++k) { + for (IndexType l = 0; l < inDim4*bDim4; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); + } + } + } + } + printf("Broadcast Test with fixed size Passed\n"); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ + + // BROADCAST test: + IndexType inDim1=2; + IndexType inDim2=3; + IndexType inDim3=5; + IndexType inDim4=7; + IndexType bDim1=2; + IndexType bDim2=3; + IndexType bDim3=1; + IndexType bDim4=4; + array<IndexType, 4> in_range = {{inDim1, inDim2, inDim3, inDim4}}; + array<IndexType, 4> broadcasts = {{bDim1, bDim2, bDim3, bDim4}}; + array<IndexType, 4> out_range; // = in_range * broadcasts + for (size_t i = 0; i < out_range.size(); ++i) + out_range[i] = in_range[i] * broadcasts[i]; + + Tensor<DataType, 4, DataLayout, IndexType> input(in_range); + Tensor<DataType, 4, DataLayout, IndexType> out(out_range); + + for (size_t i = 0; i < in_range.size(); ++i) + VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); + + + for (IndexType i = 0; i < input.size(); ++i) + input(i) = static_cast<DataType>(i); + + DataType * gpu_in_data = static_cast<DataType*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_in(gpu_in_data, in_range); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType>> gpu_out(gpu_out_data, out_range); + sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(DataType)); + gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + + for (IndexType i = 0; i < inDim1*bDim1; ++i) { + for (IndexType j = 0; j < inDim2*bDim2; ++j) { + for (IndexType k = 0; k < inDim3*bDim3; ++k) { + for (IndexType l = 0; l < inDim4*bDim4; ++l) { + VERIFY_IS_APPROX(input(i%inDim1,j%inDim2,k%inDim3,l%inDim4), out(i,j,k,l)); + } + } + } + } + printf("Broadcast Test Passed\n"); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + +template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::device& d){ + std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; + QueueInterface queueInterface(d); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device); + test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device); +} + +void test_cxx11_tensor_broadcast_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp b/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp new file mode 100644 index 0000000..5c0ea58 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp @@ -0,0 +1,194 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout> +static void test_simple_broadcasting() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> broadcasts; + broadcasts[0] = 1; + broadcasts[1] = 1; + broadcasts[2] = 1; + broadcasts[3] = 1; + + Tensor<float, 4, DataLayout> no_broadcast; + no_broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(no_broadcast.dimension(0), 2); + VERIFY_IS_EQUAL(no_broadcast.dimension(1), 3); + VERIFY_IS_EQUAL(no_broadcast.dimension(2), 5); + VERIFY_IS_EQUAL(no_broadcast.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_broadcast(i,j,k,l)); + } + } + } + } + + broadcasts[0] = 2; + broadcasts[1] = 3; + broadcasts[2] = 1; + broadcasts[3] = 4; + Tensor<float, 4, DataLayout> broadcast; + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 4); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 5); + VERIFY_IS_EQUAL(broadcast.dimension(3), 28); + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_EQUAL(tensor(i%2,j%3,k%5,l%7), broadcast(i,j,k,l)); + } + } + } + } +} + + +template <int DataLayout> +static void test_vectorized_broadcasting() +{ + Tensor<float, 3, DataLayout> tensor(8,3,5); + tensor.setRandom(); + array<ptrdiff_t, 3> broadcasts; + broadcasts[0] = 2; + broadcasts[1] = 3; + broadcasts[2] = 4; + + Tensor<float, 3, DataLayout> broadcast; + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 16); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k)); + } + } + } + + tensor.resize(11,3,5); + tensor.setRandom(); + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 22); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 22; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k)); + } + } + } +} + + +template <int DataLayout> +static void test_static_broadcasting() +{ + Tensor<float, 3, DataLayout> tensor(8,3,5); + tensor.setRandom(); + +#if EIGEN_HAS_CONSTEXPR + Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts; +#else + Eigen::array<int, 3> broadcasts; + broadcasts[0] = 2; + broadcasts[1] = 3; + broadcasts[2] = 4; +#endif + + Tensor<float, 3, DataLayout> broadcast; + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 16); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%8,j%3,k%5), broadcast(i,j,k)); + } + } + } + + tensor.resize(11,3,5); + tensor.setRandom(); + broadcast = tensor.broadcast(broadcasts); + + VERIFY_IS_EQUAL(broadcast.dimension(0), 22); + VERIFY_IS_EQUAL(broadcast.dimension(1), 9); + VERIFY_IS_EQUAL(broadcast.dimension(2), 20); + + for (int i = 0; i < 22; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 20; ++k) { + VERIFY_IS_EQUAL(tensor(i%11,j%3,k%5), broadcast(i,j,k)); + } + } + } +} + + +template <int DataLayout> +static void test_fixed_size_broadcasting() +{ + // Need to add a [] operator to the Size class for this to work +#if 0 + Tensor<float, 1, DataLayout> t1(10); + t1.setRandom(); + TensorFixedSize<float, Sizes<1>, DataLayout> t2; + t2 = t2.constant(20.0f); + + Tensor<float, 1, DataLayout> t3 = t1 + t2.broadcast(Eigen::array<int, 1>{{10}}); + for (int i = 0; i < 10; ++i) { + VERIFY_IS_APPROX(t3(i), t1(i) + t2(0)); + } + + TensorMap<TensorFixedSize<float, Sizes<1>, DataLayout> > t4(t2.data(), {{1}}); + Tensor<float, 1, DataLayout> t5 = t1 + t4.broadcast(Eigen::array<int, 1>{{10}}); + for (int i = 0; i < 10; ++i) { + VERIFY_IS_APPROX(t5(i), t1(i) + t2(0)); + } +#endif +} + + +void test_cxx11_tensor_broadcasting() +{ + CALL_SUBTEST(test_simple_broadcasting<ColMajor>()); + CALL_SUBTEST(test_simple_broadcasting<RowMajor>()); + CALL_SUBTEST(test_vectorized_broadcasting<ColMajor>()); + CALL_SUBTEST(test_vectorized_broadcasting<RowMajor>()); + CALL_SUBTEST(test_static_broadcasting<ColMajor>()); + CALL_SUBTEST(test_static_broadcasting<RowMajor>()); + CALL_SUBTEST(test_fixed_size_broadcasting<ColMajor>()); + CALL_SUBTEST(test_fixed_size_broadcasting<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp new file mode 100644 index 0000000..400a31d --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp @@ -0,0 +1,267 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_builtins_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +namespace std { +template <typename T> T rsqrt(T x) { return 1 / std::sqrt(x); } +template <typename T> T square(T x) { return x * x; } +template <typename T> T cube(T x) { return x * x * x; } +template <typename T> T inverse(T x) { return 1 / x; } +} + +#define TEST_UNARY_BUILTINS_FOR_SCALAR(FUNC, SCALAR, OPERATOR, Layout) \ + { \ + /* out OPERATOR in.FUNC() */ \ + Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ + in = in.random() + static_cast<SCALAR>(0.01); \ + out = out.random() + static_cast<SCALAR>(0.01); \ + Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ + SCALAR *gpu_data = static_cast<SCALAR *>( \ + sycl_device.allocate(in.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast<SCALAR *>( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ + (in.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ + (out.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) OPERATOR gpu.FUNC(); \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + SCALAR ver = reference(i); \ + ver OPERATOR std::FUNC(in(i)); \ + VERIFY_IS_APPROX(out(i), ver); \ + } \ + sycl_device.deallocate(gpu_data); \ + sycl_device.deallocate(gpu_data_out); \ + } \ + { \ + /* out OPERATOR out.FUNC() */ \ + Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ + out = out.random() + static_cast<SCALAR>(0.01); \ + Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ + SCALAR *gpu_data_out = static_cast<SCALAR *>( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_out, out.data(), \ + (out.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) OPERATOR gpu_out.FUNC(); \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + SCALAR ver = reference(i); \ + ver OPERATOR std::FUNC(reference(i)); \ + VERIFY_IS_APPROX(out(i), ver); \ + } \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_UNARY_BUILTINS_OPERATOR(SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(sqrt, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(rsqrt, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(square, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(cube, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(inverse, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(tanh, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(exp, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(expm1, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(log, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(abs, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(ceil, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(floor, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(round, SCALAR, OPERATOR , Layout) \ + TEST_UNARY_BUILTINS_FOR_SCALAR(log1p, SCALAR, OPERATOR , Layout) + +#define TEST_IS_THAT_RETURNS_BOOL(SCALAR, FUNC, Layout) \ + { \ + /* out = in.FUNC() */ \ + Tensor<SCALAR, 3, Layout, int64_t> in(tensorRange); \ + Tensor<bool, 3, Layout, int64_t> out(tensorRange); \ + in = in.random() + static_cast<SCALAR>(0.01); \ + SCALAR *gpu_data = static_cast<SCALAR *>( \ + sycl_device.allocate(in.size() * sizeof(SCALAR))); \ + bool *gpu_data_out = \ + static_cast<bool *>(sycl_device.allocate(out.size() * sizeof(bool))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu(gpu_data, tensorRange); \ + TensorMap<Tensor<bool, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data, in.data(), \ + (in.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu.FUNC(); \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(bool)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + VERIFY_IS_EQUAL(out(i), std::FUNC(in(i))); \ + } \ + sycl_device.deallocate(gpu_data); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_UNARY_BUILTINS(SCALAR, Layout) \ + TEST_UNARY_BUILTINS_OPERATOR(SCALAR, +=, Layout) \ + TEST_UNARY_BUILTINS_OPERATOR(SCALAR, =, Layout) \ + TEST_IS_THAT_RETURNS_BOOL(SCALAR, isnan, Layout) \ + TEST_IS_THAT_RETURNS_BOOL(SCALAR, isfinite, Layout) \ + TEST_IS_THAT_RETURNS_BOOL(SCALAR, isinf, Layout) + +static void test_builtin_unary_sycl(const Eigen::SyclDevice &sycl_device) { + int64_t sizeDim1 = 10; + int64_t sizeDim2 = 10; + int64_t sizeDim3 = 10; + array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + + TEST_UNARY_BUILTINS(float, RowMajor) + TEST_UNARY_BUILTINS(float, ColMajor) +} + +namespace std { +template <typename T> T cwiseMax(T x, T y) { return std::max(x, y); } +template <typename T> T cwiseMin(T x, T y) { return std::min(x, y); } +} + +#define TEST_BINARY_BUILTINS_FUNC(SCALAR, FUNC, Layout) \ + { \ + /* out = in_1.FUNC(in_2) */ \ + Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ + in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ + in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ + Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ + SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ + sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast<SCALAR *>( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ + (in_2.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1.FUNC(gpu_2); \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + SCALAR ver = reference(i); \ + ver = std::FUNC(in_1(i), in_2(i)); \ + VERIFY_IS_APPROX(out(i), ver); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_2); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS_OPERATORS(SCALAR, OPERATOR, Layout) \ + { \ + /* out = in_1 OPERATOR in_2 */ \ + Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> in_2(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ + in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ + in_2 = in_2.random() + static_cast<SCALAR>(0.01); \ + Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ + SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_2 = static_cast<SCALAR *>( \ + sycl_device.allocate(in_2.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast<SCALAR *>( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_2(gpu_data_2, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + sycl_device.memcpyHostToDevice(gpu_data_2, in_2.data(), \ + (in_2.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1 OPERATOR gpu_2; \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR in_2(i)); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_2); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(SCALAR, OPERATOR, Layout) \ + { \ + /* out = in_1 OPERATOR 2 */ \ + Tensor<SCALAR, 3, Layout, int64_t> in_1(tensorRange); \ + Tensor<SCALAR, 3, Layout, int64_t> out(tensorRange); \ + in_1 = in_1.random() + static_cast<SCALAR>(0.01); \ + Tensor<SCALAR, 3, Layout, int64_t> reference(out); \ + SCALAR *gpu_data_1 = static_cast<SCALAR *>( \ + sycl_device.allocate(in_1.size() * sizeof(SCALAR))); \ + SCALAR *gpu_data_out = static_cast<SCALAR *>( \ + sycl_device.allocate(out.size() * sizeof(SCALAR))); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_1(gpu_data_1, tensorRange); \ + TensorMap<Tensor<SCALAR, 3, Layout, int64_t>> gpu_out(gpu_data_out, tensorRange); \ + sycl_device.memcpyHostToDevice(gpu_data_1, in_1.data(), \ + (in_1.size()) * sizeof(SCALAR)); \ + gpu_out.device(sycl_device) = gpu_1 OPERATOR 2; \ + sycl_device.memcpyDeviceToHost(out.data(), gpu_data_out, \ + (out.size()) * sizeof(SCALAR)); \ + for (int64_t i = 0; i < out.size(); ++i) { \ + VERIFY_IS_APPROX(out(i), in_1(i) OPERATOR 2); \ + } \ + sycl_device.deallocate(gpu_data_1); \ + sycl_device.deallocate(gpu_data_out); \ + } + +#define TEST_BINARY_BUILTINS(SCALAR, Layout) \ + TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMax , Layout) \ + TEST_BINARY_BUILTINS_FUNC(SCALAR, cwiseMin , Layout) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, + , Layout) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, - , Layout) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, * , Layout) \ + TEST_BINARY_BUILTINS_OPERATORS(SCALAR, / , Layout) + +static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) { + int64_t sizeDim1 = 10; + int64_t sizeDim2 = 10; + int64_t sizeDim3 = 10; + array<int64_t, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + TEST_BINARY_BUILTINS(float, RowMajor) + TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, RowMajor) + TEST_BINARY_BUILTINS(float, ColMajor) + TEST_BINARY_BUILTINS_OPERATORS_THAT_TAKES_SCALAR(int, %, ColMajor) +} + +void test_cxx11_tensor_builtins_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + QueueInterface queueInterface(device); + Eigen::SyclDevice sycl_device(&queueInterface); + CALL_SUBTEST(test_builtin_unary_sycl(sycl_device)); + CALL_SUBTEST(test_builtin_binary_sycl(sycl_device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu new file mode 100644 index 0000000..88c2339 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -0,0 +1,82 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cuda_conversion() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + Tensor<float, 1> floats(num_elem); + floats.setRandom(); + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half( + d_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv( + d_conv, num_elem); + + gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float)); + + gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>(); + gpu_conv.device(gpu_device) = gpu_half.cast<float>(); + + Tensor<float, 1> initial(num_elem); + Tensor<float, 1> final(num_elem); + gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + VERIFY_IS_APPROX(initial(i), final(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_half); + gpu_device.deallocate(d_conv); +} + + +void test_fallback_conversion() { + int num_elem = 101; + Tensor<float, 1> floats(num_elem); + floats.setRandom(); + + Eigen::Tensor<Eigen::half, 1> halfs = floats.cast<Eigen::half>(); + Eigen::Tensor<float, 1> conv = halfs.cast<float>(); + + for (int i = 0; i < num_elem; ++i) { + VERIFY_IS_APPROX(floats(i), conv(i)); + } +} + + +void test_cxx11_tensor_cast_float16_cuda() +{ + CALL_SUBTEST(test_cuda_conversion()); + CALL_SUBTEST(test_fallback_conversion()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_casts.cpp b/eigen/unsupported/test/cxx11_tensor_casts.cpp new file mode 100644 index 0000000..3c6d0d2 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_casts.cpp @@ -0,0 +1,115 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::array; + +static void test_simple_cast() +{ + Tensor<float, 2> ftensor(20,30); + ftensor = ftensor.random() * 100.f; + Tensor<char, 2> chartensor(20,30); + chartensor.setRandom(); + Tensor<std::complex<float>, 2> cplextensor(20,30); + cplextensor.setRandom(); + + chartensor = ftensor.cast<char>(); + cplextensor = ftensor.cast<std::complex<float> >(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(chartensor(i,j), static_cast<char>(ftensor(i,j))); + VERIFY_IS_EQUAL(cplextensor(i,j), static_cast<std::complex<float> >(ftensor(i,j))); + } + } +} + + +static void test_vectorized_cast() +{ + Tensor<int, 2> itensor(20,30); + itensor = itensor.random() / 1000; + Tensor<float, 2> ftensor(20,30); + ftensor.setRandom(); + Tensor<double, 2> dtensor(20,30); + dtensor.setRandom(); + + ftensor = itensor.cast<float>(); + dtensor = itensor.cast<double>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(itensor(i,j), static_cast<int>(ftensor(i,j))); + VERIFY_IS_EQUAL(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + +static void test_float_to_int_cast() +{ + Tensor<float, 2> ftensor(20,30); + ftensor = ftensor.random() * 1000.0f; + Tensor<double, 2> dtensor(20,30); + dtensor = dtensor.random() * 1000.0; + + Tensor<int, 2> i1tensor = ftensor.cast<int>(); + Tensor<int, 2> i2tensor = dtensor.cast<int>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(i1tensor(i,j), static_cast<int>(ftensor(i,j))); + VERIFY_IS_EQUAL(i2tensor(i,j), static_cast<int>(dtensor(i,j))); + } + } +} + + +static void test_big_to_small_type_cast() +{ + Tensor<double, 2> dtensor(20, 30); + dtensor.setRandom(); + Tensor<float, 2> ftensor(20, 30); + ftensor = dtensor.cast<float>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + +static void test_small_to_big_type_cast() +{ + Tensor<float, 2> ftensor(20, 30); + ftensor.setRandom(); + Tensor<double, 2> dtensor(20, 30); + dtensor = ftensor.cast<double>(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_APPROX(dtensor(i,j), static_cast<double>(ftensor(i,j))); + } + } +} + + +void test_cxx11_tensor_casts() +{ + CALL_SUBTEST(test_simple_cast()); + CALL_SUBTEST(test_vectorized_cast()); + CALL_SUBTEST(test_float_to_int_cast()); + CALL_SUBTEST(test_big_to_small_type_cast()); + CALL_SUBTEST(test_small_to_big_type_cast()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_chipping.cpp b/eigen/unsupported/test/cxx11_tensor_chipping.cpp new file mode 100644 index 0000000..89cf5c7 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_chipping.cpp @@ -0,0 +1,425 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_simple_chip() +{ + Tensor<float, 5, DataLayout> tensor(2,3,5,7,11); + tensor.setRandom(); + + Tensor<float, 4, DataLayout> chip1; + chip1 = tensor.template chip<0>(1); + + VERIFY_IS_EQUAL(chip1.dimension(0), 3); + VERIFY_IS_EQUAL(chip1.dimension(1), 5); + VERIFY_IS_EQUAL(chip1.dimension(2), 7); + VERIFY_IS_EQUAL(chip1.dimension(3), 11); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip2 = tensor.template chip<1>(1); + VERIFY_IS_EQUAL(chip2.dimension(0), 2); + VERIFY_IS_EQUAL(chip2.dimension(1), 5); + VERIFY_IS_EQUAL(chip2.dimension(2), 7); + VERIFY_IS_EQUAL(chip2.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip3 = tensor.template chip<2>(2); + VERIFY_IS_EQUAL(chip3.dimension(0), 2); + VERIFY_IS_EQUAL(chip3.dimension(1), 3); + VERIFY_IS_EQUAL(chip3.dimension(2), 7); + VERIFY_IS_EQUAL(chip3.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip4(tensor.template chip<3>(5)); + VERIFY_IS_EQUAL(chip4.dimension(0), 2); + VERIFY_IS_EQUAL(chip4.dimension(1), 3); + VERIFY_IS_EQUAL(chip4.dimension(2), 5); + VERIFY_IS_EQUAL(chip4.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip5(tensor.template chip<4>(7)); + VERIFY_IS_EQUAL(chip5.dimension(0), 2); + VERIFY_IS_EQUAL(chip5.dimension(1), 3); + VERIFY_IS_EQUAL(chip5.dimension(2), 5); + VERIFY_IS_EQUAL(chip5.dimension(3), 7); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7)); + } + } + } + } +} + +template<int DataLayout> +static void test_dynamic_chip() +{ + Tensor<float, 5, DataLayout> tensor(2,3,5,7,11); + tensor.setRandom(); + + Tensor<float, 4, DataLayout> chip1; + chip1 = tensor.chip(1, 0); + VERIFY_IS_EQUAL(chip1.dimension(0), 3); + VERIFY_IS_EQUAL(chip1.dimension(1), 5); + VERIFY_IS_EQUAL(chip1.dimension(2), 7); + VERIFY_IS_EQUAL(chip1.dimension(3), 11); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1,i,j,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip2 = tensor.chip(1, 1); + VERIFY_IS_EQUAL(chip2.dimension(0), 2); + VERIFY_IS_EQUAL(chip2.dimension(1), 5); + VERIFY_IS_EQUAL(chip2.dimension(2), 7); + VERIFY_IS_EQUAL(chip2.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1,j,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip3 = tensor.chip(2, 2); + VERIFY_IS_EQUAL(chip3.dimension(0), 2); + VERIFY_IS_EQUAL(chip3.dimension(1), 3); + VERIFY_IS_EQUAL(chip3.dimension(2), 7); + VERIFY_IS_EQUAL(chip3.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2,k,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip4(tensor.chip(5, 3)); + VERIFY_IS_EQUAL(chip4.dimension(0), 2); + VERIFY_IS_EQUAL(chip4.dimension(1), 3); + VERIFY_IS_EQUAL(chip4.dimension(2), 5); + VERIFY_IS_EQUAL(chip4.dimension(3), 11); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5,l)); + } + } + } + } + + Tensor<float, 4, DataLayout> chip5(tensor.chip(7, 4)); + VERIFY_IS_EQUAL(chip5.dimension(0), 2); + VERIFY_IS_EQUAL(chip5.dimension(1), 3); + VERIFY_IS_EQUAL(chip5.dimension(2), 5); + VERIFY_IS_EQUAL(chip5.dimension(3), 7); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7)); + } + } + } + } +} + +template<int DataLayout> +static void test_chip_in_expr() { + Tensor<float, 5, DataLayout> input1(2,3,5,7,11); + input1.setRandom(); + Tensor<float, 4, DataLayout> input2(3,5,7,11); + input2.setRandom(); + + Tensor<float, 4, DataLayout> result = input1.template chip<0>(0) + input2; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + float expected = input1(0,i,j,k,l) + input2(i,j,k,l); + VERIFY_IS_EQUAL(result(i,j,k,l), expected); + } + } + } + } + + Tensor<float, 3, DataLayout> input3(3,7,11); + input3.setRandom(); + Tensor<float, 3, DataLayout> result2 = input1.template chip<0>(0).template chip<1>(2) + input3; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 7; ++j) { + for (int k = 0; k < 11; ++k) { + float expected = input1(0,i,2,j,k) + input3(i,j,k); + VERIFY_IS_EQUAL(result2(i,j,k), expected); + } + } + } +} + +template<int DataLayout> +static void test_chip_as_lvalue() +{ + Tensor<float, 5, DataLayout> input1(2,3,5,7,11); + input1.setRandom(); + + Tensor<float, 4, DataLayout> input2(3,5,7,11); + input2.setRandom(); + Tensor<float, 5, DataLayout> tensor = input1; + tensor.template chip<0>(1) = input2; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (i != 1) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m)); + } + } + } + } + } + } + + Tensor<float, 4, DataLayout> input3(2,5,7,11); + input3.setRandom(); + tensor = input1; + tensor.template chip<1>(1) = input3; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (j != 1) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m)); + } + } + } + } + } + } + + Tensor<float, 4, DataLayout> input4(2,3,7,11); + input4.setRandom(); + tensor = input1; + tensor.template chip<2>(3) = input4; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (k != 3) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m)); + } + } + } + } + } + } + + Tensor<float, 4, DataLayout> input5(2,3,5,11); + input5.setRandom(); + tensor = input1; + tensor.template chip<3>(4) = input5; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (l != 4) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m)); + } + } + } + } + } + } + + Tensor<float, 4, DataLayout> input6(2,3,5,7); + input6.setRandom(); + tensor = input1; + tensor.template chip<4>(5) = input6; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (m != 5) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l)); + } + } + } + } + } + } + + Tensor<float, 5, DataLayout> input7(2,3,5,7,11); + input7.setRandom(); + tensor = input1; + tensor.chip(0, 0) = input7.chip(0, 0); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + for (int m = 0; m < 11; ++m) { + if (i != 0) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m)); + } + } + } + } + } + } +} + +static void test_chip_raw_data_col_major() +{ + Tensor<float, 5, ColMajor> tensor(2,3,5,7,11); + tensor.setRandom(); + + typedef TensorEvaluator<decltype(tensor.chip<4>(3)), DefaultDevice> Evaluator4; + auto chip = Evaluator4(tensor.chip<4>(3), DefaultDevice()); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + int chip_index = i + 2 * (j + 3 * (k + 5 * l)); + VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(i,j,k,l,3)); + } + } + } + } + + typedef TensorEvaluator<decltype(tensor.chip<0>(0)), DefaultDevice> Evaluator0; + auto chip0 = Evaluator0(tensor.chip<0>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip0.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1; + auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2; + auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3; + auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0)); +} + +static void test_chip_raw_data_row_major() +{ + Tensor<float, 5, RowMajor> tensor(11,7,5,3,2); + tensor.setRandom(); + + typedef TensorEvaluator<decltype(tensor.chip<0>(3)), DefaultDevice> Evaluator0; + auto chip = Evaluator0(tensor.chip<0>(3), DefaultDevice()); + for (int i = 0; i < 7; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 2; ++l) { + int chip_index = l + 2 * (k + 3 * (j + 5 * i)); + VERIFY_IS_EQUAL(chip.data()[chip_index], tensor(3,i,j,k,l)); + } + } + } + } + + typedef TensorEvaluator<decltype(tensor.chip<1>(0)), DefaultDevice> Evaluator1; + auto chip1 = Evaluator1(tensor.chip<1>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip1.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<2>(0)), DefaultDevice> Evaluator2; + auto chip2 = Evaluator2(tensor.chip<2>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip2.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<3>(0)), DefaultDevice> Evaluator3; + auto chip3 = Evaluator3(tensor.chip<3>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip3.data(), static_cast<float*>(0)); + + typedef TensorEvaluator<decltype(tensor.chip<4>(0)), DefaultDevice> Evaluator4; + auto chip4 = Evaluator4(tensor.chip<4>(0), DefaultDevice()); + VERIFY_IS_EQUAL(chip4.data(), static_cast<float*>(0)); +} + +void test_cxx11_tensor_chipping() +{ + CALL_SUBTEST(test_simple_chip<ColMajor>()); + CALL_SUBTEST(test_simple_chip<RowMajor>()); + CALL_SUBTEST(test_dynamic_chip<ColMajor>()); + CALL_SUBTEST(test_dynamic_chip<RowMajor>()); + CALL_SUBTEST(test_chip_in_expr<ColMajor>()); + CALL_SUBTEST(test_chip_in_expr<RowMajor>()); + CALL_SUBTEST(test_chip_as_lvalue<ColMajor>()); + CALL_SUBTEST(test_chip_as_lvalue<RowMajor>()); + CALL_SUBTEST(test_chip_raw_data_col_major()); + CALL_SUBTEST(test_chip_raw_data_row_major()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp new file mode 100644 index 0000000..39e4f0a --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_chipping_sycl.cpp @@ -0,0 +1,622 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_chipping_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <typename DataType, int DataLayout, typename IndexType> +static void test_static_chip_sycl(const Eigen::SyclDevice& sycl_device) +{ + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + IndexType sizeDim5 = 11; + + array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + + Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); + Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); + + tensor.setRandom(); + + const size_t tensorBuffSize =tensor.size()*sizeof(DataType); + const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); + DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); + + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); + gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(1l); + sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); + + VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); + VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); + VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim2; ++i) { + for (IndexType j = 0; j < sizeDim3; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); + } + } + } + } + + array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange); + const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); + DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); + + gpu_chip2.device(sycl_device)=gpu_tensor.template chip<1l>(1l); + sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); + + VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); + VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim3; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); + } + } + } + } + + array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange); + const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); + DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange); + + gpu_chip3.device(sycl_device)=gpu_tensor.template chip<2l>(2l); + sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); + + VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); + } + } + } + } + + array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange); + const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); + DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange); + + gpu_chip4.device(sycl_device)=gpu_tensor.template chip<3l>(5l); + sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); + + VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); + VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); + } + } + } + } + + + array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange); + const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); + DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange); + + gpu_chip5.device(sycl_device)=gpu_tensor.template chip<4l>(7l); + sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); + + VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); + VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim4; ++l) { + VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); + } + } + } + } + + sycl_device.deallocate(gpu_data_tensor); + sycl_device.deallocate(gpu_data_chip1); + sycl_device.deallocate(gpu_data_chip2); + sycl_device.deallocate(gpu_data_chip3); + sycl_device.deallocate(gpu_data_chip4); + sycl_device.deallocate(gpu_data_chip5); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_dynamic_chip_sycl(const Eigen::SyclDevice& sycl_device) +{ + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + IndexType sizeDim5 = 11; + + array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + + Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); + Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); + + tensor.setRandom(); + + const size_t tensorBuffSize =tensor.size()*sizeof(DataType); + const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); + DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); + + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); + gpu_chip1.device(sycl_device)=gpu_tensor.chip(1l,0l); + sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); + + VERIFY_IS_EQUAL(chip1.dimension(0), sizeDim2); + VERIFY_IS_EQUAL(chip1.dimension(1), sizeDim3); + VERIFY_IS_EQUAL(chip1.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip1.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim2; ++i) { + for (IndexType j = 0; j < sizeDim3; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip1(i,j,k,l), tensor(1l,i,j,k,l)); + } + } + } + } + + array<IndexType, 4> chip2TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip2(chip2TensorRange); + const size_t chip2TensorBuffSize =chip2.size()*sizeof(DataType); + DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); + + gpu_chip2.device(sycl_device)=gpu_tensor.chip(1l,1l); + sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); + + VERIFY_IS_EQUAL(chip2.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip2.dimension(1), sizeDim3); + VERIFY_IS_EQUAL(chip2.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip2.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim3; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip2(i,j,k,l), tensor(i,1l,j,k,l)); + } + } + } + } + + array<IndexType, 4> chip3TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip3(chip3TensorRange); + const size_t chip3TensorBuffSize =chip3.size()*sizeof(DataType); + DataType* gpu_data_chip3 = static_cast<DataType*>(sycl_device.allocate(chip3TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip3(gpu_data_chip3, chip3TensorRange); + + gpu_chip3.device(sycl_device)=gpu_tensor.chip(2l,2l); + sycl_device.memcpyDeviceToHost(chip3.data(), gpu_data_chip3, chip3TensorBuffSize); + + VERIFY_IS_EQUAL(chip3.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip3.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip3.dimension(2), sizeDim4); + VERIFY_IS_EQUAL(chip3.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim4; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip3(i,j,k,l), tensor(i,j,2l,k,l)); + } + } + } + } + + array<IndexType, 4> chip4TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> chip4(chip4TensorRange); + const size_t chip4TensorBuffSize =chip4.size()*sizeof(DataType); + DataType* gpu_data_chip4 = static_cast<DataType*>(sycl_device.allocate(chip4TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip4(gpu_data_chip4, chip4TensorRange); + + gpu_chip4.device(sycl_device)=gpu_tensor.chip(5l,3l); + sycl_device.memcpyDeviceToHost(chip4.data(), gpu_data_chip4, chip4TensorBuffSize); + + VERIFY_IS_EQUAL(chip4.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip4.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip4.dimension(2), sizeDim3); + VERIFY_IS_EQUAL(chip4.dimension(3), sizeDim5); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim5; ++l) { + VERIFY_IS_EQUAL(chip4(i,j,k,l), tensor(i,j,k,5l,l)); + } + } + } + } + + + array<IndexType, 4> chip5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + Tensor<DataType, 4, DataLayout,IndexType> chip5(chip5TensorRange); + const size_t chip5TensorBuffSize =chip5.size()*sizeof(DataType); + DataType* gpu_data_chip5 = static_cast<DataType*>(sycl_device.allocate(chip5TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip5(gpu_data_chip5, chip5TensorRange); + + gpu_chip5.device(sycl_device)=gpu_tensor.chip(7l,4l); + sycl_device.memcpyDeviceToHost(chip5.data(), gpu_data_chip5, chip5TensorBuffSize); + + VERIFY_IS_EQUAL(chip5.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(chip5.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(chip5.dimension(2), sizeDim3); + VERIFY_IS_EQUAL(chip5.dimension(3), sizeDim4); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim4; ++l) { + VERIFY_IS_EQUAL(chip5(i,j,k,l), tensor(i,j,k,l,7l)); + } + } + } + } + sycl_device.deallocate(gpu_data_tensor); + sycl_device.deallocate(gpu_data_chip1); + sycl_device.deallocate(gpu_data_chip2); + sycl_device.deallocate(gpu_data_chip3); + sycl_device.deallocate(gpu_data_chip4); + sycl_device.deallocate(gpu_data_chip5); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_chip_in_expr(const Eigen::SyclDevice& sycl_device) { + + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + IndexType sizeDim5 = 11; + + array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + array<IndexType, 4> chip1TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + + Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); + + Tensor<DataType, 4, DataLayout,IndexType> chip1(chip1TensorRange); + Tensor<DataType, 4, DataLayout,IndexType> tensor1(chip1TensorRange); + tensor.setRandom(); + tensor1.setRandom(); + + const size_t tensorBuffSize =tensor.size()*sizeof(DataType); + const size_t chip1TensorBuffSize =chip1.size()*sizeof(DataType); + DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + DataType* gpu_data_chip1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); + DataType* gpu_data_tensor1 = static_cast<DataType*>(sycl_device.allocate(chip1TensorBuffSize)); + + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_chip1(gpu_data_chip1, chip1TensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_tensor1(gpu_data_tensor1, chip1TensorRange); + + + sycl_device.memcpyHostToDevice(gpu_data_tensor, tensor.data(), tensorBuffSize); + sycl_device.memcpyHostToDevice(gpu_data_tensor1, tensor1.data(), chip1TensorBuffSize); + gpu_chip1.device(sycl_device)=gpu_tensor.template chip<0l>(0l) + gpu_tensor1; + sycl_device.memcpyDeviceToHost(chip1.data(), gpu_data_chip1, chip1TensorBuffSize); + + for (int i = 0; i < sizeDim2; ++i) { + for (int j = 0; j < sizeDim3; ++j) { + for (int k = 0; k < sizeDim4; ++k) { + for (int l = 0; l < sizeDim5; ++l) { + float expected = tensor(0l,i,j,k,l) + tensor1(i,j,k,l); + VERIFY_IS_EQUAL(chip1(i,j,k,l), expected); + } + } + } + } + + array<IndexType, 3> chip2TensorRange = {{sizeDim2, sizeDim4, sizeDim5}}; + Tensor<DataType, 3, DataLayout,IndexType> tensor2(chip2TensorRange); + Tensor<DataType, 3, DataLayout,IndexType> chip2(chip2TensorRange); + tensor2.setRandom(); + const size_t chip2TensorBuffSize =tensor2.size()*sizeof(DataType); + DataType* gpu_data_tensor2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); + DataType* gpu_data_chip2 = static_cast<DataType*>(sycl_device.allocate(chip2TensorBuffSize)); + TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_tensor2(gpu_data_tensor2, chip2TensorRange); + TensorMap<Tensor<DataType, 3, DataLayout,IndexType>> gpu_chip2(gpu_data_chip2, chip2TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_tensor2, tensor2.data(), chip2TensorBuffSize); + gpu_chip2.device(sycl_device)=gpu_tensor.template chip<0l>(0l).template chip<1l>(2l) + gpu_tensor2; + sycl_device.memcpyDeviceToHost(chip2.data(), gpu_data_chip2, chip2TensorBuffSize); + + for (int i = 0; i < sizeDim2; ++i) { + for (int j = 0; j < sizeDim4; ++j) { + for (int k = 0; k < sizeDim5; ++k) { + float expected = tensor(0l,i,2l,j,k) + tensor2(i,j,k); + VERIFY_IS_EQUAL(chip2(i,j,k), expected); + } + } + } + sycl_device.deallocate(gpu_data_tensor); + sycl_device.deallocate(gpu_data_tensor1); + sycl_device.deallocate(gpu_data_chip1); + sycl_device.deallocate(gpu_data_tensor2); + sycl_device.deallocate(gpu_data_chip2); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_chip_as_lvalue_sycl(const Eigen::SyclDevice& sycl_device) +{ + + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + IndexType sizeDim5 = 11; + + array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + array<IndexType, 4> input2TensorRange = {{sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + + Tensor<DataType, 5, DataLayout,IndexType> tensor(tensorRange); + Tensor<DataType, 5, DataLayout,IndexType> input1(tensorRange); + Tensor<DataType, 4, DataLayout,IndexType> input2(input2TensorRange); + input1.setRandom(); + input2.setRandom(); + + + const size_t tensorBuffSize =tensor.size()*sizeof(DataType); + const size_t input2TensorBuffSize =input2.size()*sizeof(DataType); + DataType* gpu_data_tensor = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + DataType* gpu_data_input1 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + DataType* gpu_data_input2 = static_cast<DataType*>(sycl_device.allocate(input2TensorBuffSize)); + + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_tensor(gpu_data_tensor, tensorRange); + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input1(gpu_data_input1, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input2(gpu_data_input2, input2TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input1, input1.data(), tensorBuffSize); + gpu_tensor.device(sycl_device)=gpu_input1; + sycl_device.memcpyHostToDevice(gpu_data_input2, input2.data(), input2TensorBuffSize); + gpu_tensor.template chip<0l>(1l).device(sycl_device)=gpu_input2; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (i != 1) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input2(j,k,l,m)); + } + } + } + } + } + } + + gpu_tensor.device(sycl_device)=gpu_input1; + array<IndexType, 4> input3TensorRange = {{sizeDim1, sizeDim3, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> input3(input3TensorRange); + input3.setRandom(); + + const size_t input3TensorBuffSize =input3.size()*sizeof(DataType); + DataType* gpu_data_input3 = static_cast<DataType*>(sycl_device.allocate(input3TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input3(gpu_data_input3, input3TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input3, input3.data(), input3TensorBuffSize); + gpu_tensor.template chip<1l>(1l).device(sycl_device)=gpu_input3; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k <sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (j != 1) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input3(i,k,l,m)); + } + } + } + } + } + } + + gpu_tensor.device(sycl_device)=gpu_input1; + array<IndexType, 4> input4TensorRange = {{sizeDim1, sizeDim2, sizeDim4, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> input4(input4TensorRange); + input4.setRandom(); + + const size_t input4TensorBuffSize =input4.size()*sizeof(DataType); + DataType* gpu_data_input4 = static_cast<DataType*>(sycl_device.allocate(input4TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input4(gpu_data_input4, input4TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input4, input4.data(), input4TensorBuffSize); + gpu_tensor.template chip<2l>(3l).device(sycl_device)=gpu_input4; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k <sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (k != 3) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input4(i,j,l,m)); + } + } + } + } + } + } + + gpu_tensor.device(sycl_device)=gpu_input1; + array<IndexType, 4> input5TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim5}}; + Tensor<DataType, 4, DataLayout,IndexType> input5(input5TensorRange); + input5.setRandom(); + + const size_t input5TensorBuffSize =input5.size()*sizeof(DataType); + DataType* gpu_data_input5 = static_cast<DataType*>(sycl_device.allocate(input5TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input5(gpu_data_input5, input5TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input5, input5.data(), input5TensorBuffSize); + gpu_tensor.template chip<3l>(4l).device(sycl_device)=gpu_input5; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k <sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (l != 4) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input5(i,j,k,m)); + } + } + } + } + } + } + gpu_tensor.device(sycl_device)=gpu_input1; + array<IndexType, 4> input6TensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + Tensor<DataType, 4, DataLayout,IndexType> input6(input6TensorRange); + input6.setRandom(); + + const size_t input6TensorBuffSize =input6.size()*sizeof(DataType); + DataType* gpu_data_input6 = static_cast<DataType*>(sycl_device.allocate(input6TensorBuffSize)); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu_input6(gpu_data_input6, input6TensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input6, input6.data(), input6TensorBuffSize); + gpu_tensor.template chip<4l>(5l).device(sycl_device)=gpu_input6; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k <sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (m != 5) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input6(i,j,k,l)); + } + } + } + } + } + } + + + gpu_tensor.device(sycl_device)=gpu_input1; + Tensor<DataType, 5, DataLayout,IndexType> input7(tensorRange); + input7.setRandom(); + + DataType* gpu_data_input7 = static_cast<DataType*>(sycl_device.allocate(tensorBuffSize)); + TensorMap<Tensor<DataType, 5, DataLayout,IndexType>> gpu_input7(gpu_data_input7, tensorRange); + + sycl_device.memcpyHostToDevice(gpu_data_input7, input7.data(), tensorBuffSize); + gpu_tensor.chip(0l,0l).device(sycl_device)=gpu_input7.chip(0l,0l); + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data_tensor, tensorBuffSize); + + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k <sizeDim3; ++k) { + for (int l = 0; l < sizeDim4; ++l) { + for (int m = 0; m < sizeDim5; ++m) { + if (i != 0) { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input1(i,j,k,l,m)); + } else { + VERIFY_IS_EQUAL(tensor(i,j,k,l,m), input7(i,j,k,l,m)); + } + } + } + } + } + } + sycl_device.deallocate(gpu_data_tensor); + sycl_device.deallocate(gpu_data_input1); + sycl_device.deallocate(gpu_data_input2); + sycl_device.deallocate(gpu_data_input3); + sycl_device.deallocate(gpu_data_input4); + sycl_device.deallocate(gpu_data_input5); + sycl_device.deallocate(gpu_data_input6); + sycl_device.deallocate(gpu_data_input7); + +} + +template<typename DataType, typename dev_Selector> void sycl_chipping_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_static_chip_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_static_chip_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_dynamic_chip_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_dynamic_chip_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_chip_in_expr<DataType, RowMajor, int64_t>(sycl_device); + test_chip_in_expr<DataType, ColMajor, int64_t>(sycl_device); + test_chip_as_lvalue_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_chip_as_lvalue_sycl<DataType, ColMajor, int64_t>(sycl_device); +} +void test_cxx11_tensor_chipping_sycl() +{ + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_chipping_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_comparisons.cpp b/eigen/unsupported/test/cxx11_tensor_comparisons.cpp new file mode 100644 index 0000000..b1ff8ae --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_comparisons.cpp @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_orderings() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<bool, 3> lt(2,3,7); + Tensor<bool, 3> le(2,3,7); + Tensor<bool, 3> gt(2,3,7); + Tensor<bool, 3> ge(2,3,7); + + mat1.setRandom(); + mat2.setRandom(); + + lt = mat1 < mat2; + le = mat1 <= mat2; + gt = mat1 > mat2; + ge = mat1 >= mat2; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(lt(i,j,k), mat1(i,j,k) < mat2(i,j,k)); + VERIFY_IS_EQUAL(le(i,j,k), mat1(i,j,k) <= mat2(i,j,k)); + VERIFY_IS_EQUAL(gt(i,j,k), mat1(i,j,k) > mat2(i,j,k)); + VERIFY_IS_EQUAL(ge(i,j,k), mat1(i,j,k) >= mat2(i,j,k)); + } + } + } +} + + +static void test_equality() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + + mat1.setRandom(); + mat2.setRandom(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + if (internal::random<bool>()) { + mat2(i,j,k) = mat1(i,j,k); + } + } + } + } + + Tensor<bool, 3> eq(2,3,7); + Tensor<bool, 3> ne(2,3,7); + eq = (mat1 == mat2); + ne = (mat1 != mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(eq(i,j,k), mat1(i,j,k) == mat2(i,j,k)); + VERIFY_IS_EQUAL(ne(i,j,k), mat1(i,j,k) != mat2(i,j,k)); + } + } + } +} + + +void test_cxx11_tensor_comparisons() +{ + CALL_SUBTEST(test_orderings()); + CALL_SUBTEST(test_equality()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu b/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu new file mode 100644 index 0000000..d4e111f --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -0,0 +1,153 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cuda_nullary() { + Tensor<std::complex<float>, 1, 0, int> in1(2); + Tensor<std::complex<float>, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t float_bytes = in1.size() * sizeof(float); + std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>); + + std::complex<float>* d_in1; + std::complex<float>* d_in2; + float* d_out2; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out2), float_bytes); + cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2( + d_out2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f)); + gpu_out2.device(gpu_device) = gpu_in2.abs(); + + Tensor<std::complex<float>, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f)); + VERIFY_IS_APPROX(new2(i), std::abs(in2(i))); + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out2); +} + + +static void test_cuda_sum_reductions() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + const int num_rows = internal::random<int>(1024, 5*1024); + const int num_cols = internal::random<int>(1024, 5*1024); + + Tensor<std::complex<float>, 2> in(num_rows, num_cols); + in.setRandom(); + + Tensor<std::complex<float>, 0> full_redux; + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(std::complex<float>); + std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>); + std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); + std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor<std::complex<float>, 0> full_redux_gpu; + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); + + gpu_device.deallocate(gpu_in_ptr); + gpu_device.deallocate(gpu_out_ptr); +} + + +static void test_cuda_product_reductions() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + const int num_rows = internal::random<int>(1024, 5*1024); + const int num_cols = internal::random<int>(1024, 5*1024); + + Tensor<std::complex<float>, 2> in(num_rows, num_cols); + in.setRandom(); + + Tensor<std::complex<float>, 0> full_redux; + full_redux = in.prod(); + + std::size_t in_bytes = in.size() * sizeof(std::complex<float>); + std::size_t out_bytes = full_redux.size() * sizeof(std::complex<float>); + std::complex<float>* gpu_in_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(in_bytes)); + std::complex<float>* gpu_out_ptr = static_cast<std::complex<float>*>(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap<Tensor<std::complex<float>, 2> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<std::complex<float>, 0> > out_gpu(gpu_out_ptr); + + out_gpu.device(gpu_device) = in_gpu.prod(); + + Tensor<std::complex<float>, 0> full_redux_gpu; + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); + + gpu_device.deallocate(gpu_in_ptr); + gpu_device.deallocate(gpu_out_ptr); +} + + +void test_cxx11_tensor_complex() +{ + CALL_SUBTEST(test_cuda_nullary()); + CALL_SUBTEST(test_cuda_sum_reductions()); + CALL_SUBTEST(test_cuda_product_reductions()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu new file mode 100644 index 0000000..2baf5ea --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -0,0 +1,97 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename T> +void test_cuda_complex_cwise_ops() { + const int kNumItems = 2; + std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>); + + std::complex<T>* d_in1; + std::complex<T>* d_in2; + std::complex<T>* d_out; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out), complex_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, kNumItems); + Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out( + d_out, kNumItems); + + const std::complex<T> a(3.14f, 2.7f); + const std::complex<T> b(-10.6f, 1.4f); + + gpu_in1.device(gpu_device) = gpu_in1.constant(a); + gpu_in2.device(gpu_device) = gpu_in2.constant(b); + + enum CwiseOp { + Add = 0, + Sub, + Mul, + Div + }; + + Tensor<std::complex<T>, 1, 0, int> actual(kNumItems); + for (int op = Add; op <= Div; op++) { + std::complex<T> expected; + switch (static_cast<CwiseOp>(op)) { + case Add: + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + expected = a + b; + break; + case Sub: + gpu_out.device(gpu_device) = gpu_in1 - gpu_in2; + expected = a - b; + break; + case Mul: + gpu_out.device(gpu_device) = gpu_in1 * gpu_in2; + expected = a * b; + break; + case Div: + gpu_out.device(gpu_device) = gpu_in1 / gpu_in2; + expected = a / b; + break; + } + assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < kNumItems; ++i) { + VERIFY_IS_APPROX(actual(i), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + + +void test_cxx11_tensor_complex_cwise_ops() +{ + CALL_SUBTEST(test_cuda_complex_cwise_ops<float>()); + CALL_SUBTEST(test_cuda_complex_cwise_ops<double>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation.cpp new file mode 100644 index 0000000..03ef12e --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_concatenation.cpp @@ -0,0 +1,137 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_dimension_failures() +{ + Tensor<int, 3, DataLayout> left(2, 3, 1); + Tensor<int, 3, DataLayout> right(3, 3, 1); + left.setRandom(); + right.setRandom(); + + // Okay; other dimensions are equal. + Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0); + + // Dimension mismatches. + VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 1)); + VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 2)); + + // Axis > NumDims or < 0. + VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 3)); + VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, -1)); +} + +template<int DataLayout> +static void test_static_dimension_failure() +{ + Tensor<int, 2, DataLayout> left(2, 3); + Tensor<int, 3, DataLayout> right(2, 3, 1); + +#ifdef CXX11_TENSOR_CONCATENATION_STATIC_DIMENSION_FAILURE + // Technically compatible, but we static assert that the inputs have same + // NumDims. + Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0); +#endif + + // This can be worked around in this case. + Tensor<int, 3, DataLayout> concatenation = left + .reshape(Tensor<int, 3>::Dimensions(2, 3, 1)) + .concatenate(right, 0); + Tensor<int, 2, DataLayout> alternative = left + .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{{2, 3}}}), 0); +} + +template<int DataLayout> +static void test_simple_concatenation() +{ + Tensor<int, 3, DataLayout> left(2, 3, 1); + Tensor<int, 3, DataLayout> right(2, 3, 1); + left.setRandom(); + right.setRandom(); + + Tensor<int, 3, DataLayout> concatenation = left.concatenate(right, 0); + VERIFY_IS_EQUAL(concatenation.dimension(0), 4); + VERIFY_IS_EQUAL(concatenation.dimension(1), 3); + VERIFY_IS_EQUAL(concatenation.dimension(2), 1); + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < 2; ++i) { + VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0)); + } + for (int i = 2; i < 4; ++i) { + VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i - 2, j, 0)); + } + } + + concatenation = left.concatenate(right, 1); + VERIFY_IS_EQUAL(concatenation.dimension(0), 2); + VERIFY_IS_EQUAL(concatenation.dimension(1), 6); + VERIFY_IS_EQUAL(concatenation.dimension(2), 1); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0)); + } + for (int j = 3; j < 6; ++j) { + VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i, j - 3, 0)); + } + } + + concatenation = left.concatenate(right, 2); + VERIFY_IS_EQUAL(concatenation.dimension(0), 2); + VERIFY_IS_EQUAL(concatenation.dimension(1), 3); + VERIFY_IS_EQUAL(concatenation.dimension(2), 2); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0)); + VERIFY_IS_EQUAL(concatenation(i, j, 1), right(i, j, 0)); + } + } +} + + +// TODO(phli): Add test once we have a real vectorized implementation. +// static void test_vectorized_concatenation() {} + +static void test_concatenation_as_lvalue() +{ + Tensor<int, 2> t1(2, 3); + Tensor<int, 2> t2(2, 3); + t1.setRandom(); + t2.setRandom(); + + Tensor<int, 2> result(4, 3); + result.setRandom(); + t1.concatenate(t2, 0) = result; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(t1(i, j), result(i, j)); + VERIFY_IS_EQUAL(t2(i, j), result(i+2, j)); + } + } +} + + +void test_cxx11_tensor_concatenation() +{ + CALL_SUBTEST(test_dimension_failures<ColMajor>()); + CALL_SUBTEST(test_dimension_failures<RowMajor>()); + CALL_SUBTEST(test_static_dimension_failure<ColMajor>()); + CALL_SUBTEST(test_static_dimension_failure<RowMajor>()); + CALL_SUBTEST(test_simple_concatenation<ColMajor>()); + CALL_SUBTEST(test_simple_concatenation<RowMajor>()); + // CALL_SUBTEST(test_vectorized_concatenation()); + CALL_SUBTEST(test_concatenation_as_lvalue()); + +} diff --git a/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp new file mode 100644 index 0000000..e3023a3 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_concatenation_sycl.cpp @@ -0,0 +1,180 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_concatenation_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename DataType, int DataLayout, typename IndexType> +static void test_simple_concatenation(const Eigen::SyclDevice& sycl_device) +{ + IndexType leftDim1 = 2; + IndexType leftDim2 = 3; + IndexType leftDim3 = 1; + Eigen::array<IndexType, 3> leftRange = {{leftDim1, leftDim2, leftDim3}}; + IndexType rightDim1 = 2; + IndexType rightDim2 = 3; + IndexType rightDim3 = 1; + Eigen::array<IndexType, 3> rightRange = {{rightDim1, rightDim2, rightDim3}}; + + //IndexType concatDim1 = 3; +// IndexType concatDim2 = 3; +// IndexType concatDim3 = 1; + //Eigen::array<IndexType, 3> concatRange = {{concatDim1, concatDim2, concatDim3}}; + + Tensor<DataType, 3, DataLayout, IndexType> left(leftRange); + Tensor<DataType, 3, DataLayout, IndexType> right(rightRange); + left.setRandom(); + right.setRandom(); + + DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange); + sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); + /// + Tensor<DataType, 3, DataLayout, IndexType> concatenation1(leftDim1+rightDim1, leftDim2, leftDim3); + DataType * gpu_out_data1 = static_cast<DataType*>(sycl_device.allocate(concatenation1.dimensions().TotalSize()*sizeof(DataType))); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out1(gpu_out_data1, concatenation1.dimensions()); + + //concatenation = left.concatenate(right, 0); + gpu_out1.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 0); + sycl_device.memcpyDeviceToHost(concatenation1.data(), gpu_out_data1,(concatenation1.dimensions().TotalSize())*sizeof(DataType)); + + VERIFY_IS_EQUAL(concatenation1.dimension(0), 4); + VERIFY_IS_EQUAL(concatenation1.dimension(1), 3); + VERIFY_IS_EQUAL(concatenation1.dimension(2), 1); + for (IndexType j = 0; j < 3; ++j) { + for (IndexType i = 0; i < 2; ++i) { + VERIFY_IS_EQUAL(concatenation1(i, j, 0), left(i, j, 0)); + } + for (IndexType i = 2; i < 4; ++i) { + VERIFY_IS_EQUAL(concatenation1(i, j, 0), right(i - 2, j, 0)); + } + } + + sycl_device.deallocate(gpu_out_data1); + Tensor<DataType, 3, DataLayout, IndexType> concatenation2(leftDim1, leftDim2 +rightDim2, leftDim3); + DataType * gpu_out_data2 = static_cast<DataType*>(sycl_device.allocate(concatenation2.dimensions().TotalSize()*sizeof(DataType))); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out2(gpu_out_data2, concatenation2.dimensions()); + gpu_out2.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 1); + sycl_device.memcpyDeviceToHost(concatenation2.data(), gpu_out_data2,(concatenation2.dimensions().TotalSize())*sizeof(DataType)); + + //concatenation = left.concatenate(right, 1); + VERIFY_IS_EQUAL(concatenation2.dimension(0), 2); + VERIFY_IS_EQUAL(concatenation2.dimension(1), 6); + VERIFY_IS_EQUAL(concatenation2.dimension(2), 1); + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(concatenation2(i, j, 0), left(i, j, 0)); + } + for (IndexType j = 3; j < 6; ++j) { + VERIFY_IS_EQUAL(concatenation2(i, j, 0), right(i, j - 3, 0)); + } + } + sycl_device.deallocate(gpu_out_data2); + Tensor<DataType, 3, DataLayout, IndexType> concatenation3(leftDim1, leftDim2, leftDim3+rightDim3); + DataType * gpu_out_data3 = static_cast<DataType*>(sycl_device.allocate(concatenation3.dimensions().TotalSize()*sizeof(DataType))); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out3(gpu_out_data3, concatenation3.dimensions()); + gpu_out3.device(sycl_device) =gpu_in1.concatenate(gpu_in2, 2); + sycl_device.memcpyDeviceToHost(concatenation3.data(), gpu_out_data3,(concatenation3.dimensions().TotalSize())*sizeof(DataType)); + + //concatenation = left.concatenate(right, 2); + VERIFY_IS_EQUAL(concatenation3.dimension(0), 2); + VERIFY_IS_EQUAL(concatenation3.dimension(1), 3); + VERIFY_IS_EQUAL(concatenation3.dimension(2), 2); + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(concatenation3(i, j, 0), left(i, j, 0)); + VERIFY_IS_EQUAL(concatenation3(i, j, 1), right(i, j, 0)); + } + } + sycl_device.deallocate(gpu_out_data3); + sycl_device.deallocate(gpu_in1_data); + sycl_device.deallocate(gpu_in2_data); +} +template<typename DataType, int DataLayout, typename IndexType> +static void test_concatenation_as_lvalue(const Eigen::SyclDevice& sycl_device) +{ + + IndexType leftDim1 = 2; + IndexType leftDim2 = 3; + Eigen::array<IndexType, 2> leftRange = {{leftDim1, leftDim2}}; + + IndexType rightDim1 = 2; + IndexType rightDim2 = 3; + Eigen::array<IndexType, 2> rightRange = {{rightDim1, rightDim2}}; + + IndexType concatDim1 = 4; + IndexType concatDim2 = 3; + Eigen::array<IndexType, 2> resRange = {{concatDim1, concatDim2}}; + + Tensor<DataType, 2, DataLayout, IndexType> left(leftRange); + Tensor<DataType, 2, DataLayout, IndexType> right(rightRange); + Tensor<DataType, 2, DataLayout, IndexType> result(resRange); + + left.setRandom(); + right.setRandom(); + result.setRandom(); + + DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(left.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(right.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); + + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in1(gpu_in1_data, leftRange); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_in2(gpu_in2_data, rightRange); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType>> gpu_out(gpu_out_data, resRange); + + sycl_device.memcpyHostToDevice(gpu_in1_data, left.data(),(left.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, right.data(),(right.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_out_data, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); + +// t1.concatenate(t2, 0) = result; + gpu_in1.concatenate(gpu_in2, 0).device(sycl_device) =gpu_out; + sycl_device.memcpyDeviceToHost(left.data(), gpu_in1_data,(left.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(right.data(), gpu_in2_data,(right.dimensions().TotalSize())*sizeof(DataType)); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(left(i, j), result(i, j)); + VERIFY_IS_EQUAL(right(i, j), result(i+2, j)); + } + } + sycl_device.deallocate(gpu_in1_data); + sycl_device.deallocate(gpu_in2_data); + sycl_device.deallocate(gpu_out_data); +} + + +template <typename DataType, typename Dev_selector> void tensorConcat_perDevice(Dev_selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_simple_concatenation<DataType, RowMajor, int64_t>(sycl_device); + test_simple_concatenation<DataType, ColMajor, int64_t>(sycl_device); + test_concatenation_as_lvalue<DataType, ColMajor, int64_t>(sycl_device); +} +void test_cxx11_tensor_concatenation_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorConcat_perDevice<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_const.cpp b/eigen/unsupported/test/cxx11_tensor_const.cpp new file mode 100644 index 0000000..ad9c9da --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_const.cpp @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> +using Eigen::Tensor; + + +static void test_simple_assign() +{ + Tensor<int, 3> random(2,3,7); + random.setRandom(); + + TensorMap<Tensor<const int, 3> > constant(random.data(), 2, 3, 7); + Tensor<int, 3> result(2,3,7); + result = constant; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL((result(i,j,k)), random(i,j,k)); + } + } + } +} + + +static void test_assign_of_const_tensor() +{ + Tensor<int, 3> random(2,3,7); + random.setRandom(); + + TensorMap<Tensor<const int, 3> > constant1(random.data(), 2, 3, 7); + TensorMap<const Tensor<int, 3> > constant2(random.data(), 2, 3, 7); + const TensorMap<Tensor<int, 3> > constant3(random.data(), 2, 3, 7); + + Tensor<int, 2> result1 = constant1.chip(0, 2); + Tensor<int, 2> result2 = constant2.chip(0, 2); + Tensor<int, 2> result3 = constant3.chip(0, 2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL((result1(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result2(i,j)), random(i,j,0)); + VERIFY_IS_EQUAL((result3(i,j)), random(i,j,0)); + } + } +} + + +void test_cxx11_tensor_const() +{ + CALL_SUBTEST(test_simple_assign()); + CALL_SUBTEST(test_assign_of_const_tensor()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu b/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu new file mode 100644 index 0000000..dd68430 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -0,0 +1,216 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; +typedef Tensor<float, 1>::DimensionPair DimPair; + +template<int DataLayout> +void test_cuda_contraction(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<float, 2, DataLayout> t_left(m_size, k_size); + Tensor<float, 2, DataLayout> t_right(k_size, n_size); + Tensor<float, 2, DataLayout> t_result(m_size, n_size); + Tensor<float, 2, DataLayout> t_result_gpu(m_size, n_size); + Eigen::array<DimPair, 1> dims(DimPair(1, 0)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_left(d_t_left, Eigen::array<int, 2>(m_size, k_size)); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_right(d_t_right, Eigen::array<int, 2>(k_size, n_size)); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_result(d_t_result, Eigen::array<int, 2>(m_size, n_size)); + + + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + t_result = t_left.contract(t_right, dims); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + for (DenseIndex i = 0; i < t_result.size(); i++) { + if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) { + continue; + } + std::cout << "mismatch detected at index " << i << ": " << t_result(i) + << " vs " << t_result_gpu(i) << std::endl; + assert(false); + } + + cudaFree((void*)d_t_left); + cudaFree((void*)d_t_right); + cudaFree((void*)d_t_result); +} + + +template<int DataLayout> +void test_scalar(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<float, 2, DataLayout> t_left(m_size, k_size); + Tensor<float, 2, DataLayout> t_right(k_size, n_size); + Tensor<float, 0, DataLayout> t_result; + Tensor<float, 0, DataLayout> t_result_gpu; + Eigen::array<DimPair, 2> dims(DimPair(0, 0), DimPair(1, 1)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_left(d_t_left, m_size, k_size); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > + gpu_t_right(d_t_right, k_size, n_size); + Eigen::TensorMap<Eigen::Tensor<float, 0, DataLayout> > + gpu_t_result(d_t_result); + + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + t_result = t_left.contract(t_right, dims); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + if (fabs(t_result() - t_result_gpu()) > 1e-4f && + !Eigen::internal::isApprox(t_result(), t_result_gpu(), 1e-4f)) { + std::cout << "mismatch detected: " << t_result() + << " vs " << t_result_gpu() << std::endl; + assert(false); + } + + cudaFree((void*)d_t_left); + cudaFree((void*)d_t_right); + cudaFree((void*)d_t_result); +} + + +template<int DataLayout> +void test_cuda_contraction_m() { + for (int k = 32; k < 256; k++) { + test_cuda_contraction<ColMajor>(k, 128, 128); + test_cuda_contraction<RowMajor>(k, 128, 128); + } +} + +template<int DataLayout> +void test_cuda_contraction_k() { + for (int k = 32; k < 256; k++) { + test_cuda_contraction<ColMajor>(128, k, 128); + test_cuda_contraction<RowMajor>(128, k, 128); + } +} + +template<int DataLayout> +void test_cuda_contraction_n() { + for (int k = 32; k < 256; k++) { + test_cuda_contraction<ColMajor>(128, 128, k); + test_cuda_contraction<RowMajor>(128, 128, k); + } +} + + +template<int DataLayout> +void test_cuda_contraction_sizes() { + int m_sizes[] = { 31, 39, 63, 64, 65, + 127, 129, 255, 257 , 511, + 512, 513, 1023, 1024, 1025}; + + int n_sizes[] = { 31, 39, 63, 64, 65, + 127, 129, 255, 257, 511, + 512, 513, 1023, 1024, 1025}; + + int k_sizes[] = { 31, 39, 63, 64, 65, + 95, 96, 127, 129, 255, + 257, 511, 512, 513, 1023, + 1024, 1025}; + + for (int i = 0; i < 15; i++) { + for (int j = 0; j < 15; j++) { + for (int k = 0; k < 17; k++) { + test_cuda_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]); + } + } + } +} + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST_1(test_cuda_contraction<ColMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_cuda_contraction<RowMajor>(128, 128, 128)); + + CALL_SUBTEST_1(test_scalar<ColMajor>(128, 128, 128)); + CALL_SUBTEST_1(test_scalar<RowMajor>(128, 128, 128)); + + CALL_SUBTEST_2(test_cuda_contraction_m<ColMajor>()); + CALL_SUBTEST_3(test_cuda_contraction_m<RowMajor>()); + + CALL_SUBTEST_4(test_cuda_contraction_k<ColMajor>()); + CALL_SUBTEST_5(test_cuda_contraction_k<RowMajor>()); + + CALL_SUBTEST_6(test_cuda_contraction_n<ColMajor>()); + CALL_SUBTEST_7(test_cuda_contraction_n<RowMajor>()); + + CALL_SUBTEST_8(test_cuda_contraction_sizes<ColMajor>()); + CALL_SUBTEST_9(test_cuda_contraction_sizes<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp new file mode 100644 index 0000000..5bace66 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_contract_sycl.cpp @@ -0,0 +1,290 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_contract_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include <iostream> +#include <chrono> +#include <ctime> + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void static test_sycl_contraction(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) +{ + typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; + static const DataType error_threshold =1e-4f; +// std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size); + Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size); + Tensor<DataType, 2, DataLayout, IndexType> t_result(m_size, n_size); + Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(m_size, n_size); +// Eigen::array<DimPair, 1> dims(DimPair(1, 0)); + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}}; + Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}}; + Eigen::array<IndexType, 2> result_dims = {{m_size, n_size}}; + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(DataType); + std::size_t t_right_bytes = t_right.size() * sizeof(DataType); + std::size_t t_result_bytes = t_result.size() * sizeof(DataType); + + DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); + DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); + DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, result_dims); + + sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); + sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); + + gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); + sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); + + t_result = t_left.contract(t_right, dims); + + for (IndexType i = 0; i < t_result.size(); i++) { + if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { + continue; + } + std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) + << " vs " << t_result_gpu(i) << std::endl; + assert(false); + } + sycl_device.deallocate(d_t_left); + sycl_device.deallocate(d_t_right); + sycl_device.deallocate(d_t_result); +} + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_TF(const Device& sycl_device) +{ + typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; + static const DataType error_threshold =1e-4f; + Eigen::array<IndexType, 2> left_dims = {{2, 3}}; + Eigen::array<IndexType, 2> right_dims = {{3, 1}}; + Eigen::array<IndexType, 2> res_dims = {{2, 1}}; + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + + + Tensor<DataType, 2, DataLayout, IndexType> t_left(left_dims); + Tensor<DataType, 2, DataLayout, IndexType> t_right(right_dims); + Tensor<DataType, 2, DataLayout, IndexType> t_result_gpu(res_dims); + Tensor<DataType, 2, DataLayout, IndexType> t_result(res_dims); + + t_left.data()[0] = 1.0f; + t_left.data()[1] = 2.0f; + t_left.data()[2] = 3.0f; + t_left.data()[3] = 4.0f; + t_left.data()[4] = 5.0f; + t_left.data()[5] = 6.0f; + + t_right.data()[0] = -1.0f; + t_right.data()[1] = 0.5f; + t_right.data()[2] = 2.0f; + + std::size_t t_left_bytes = t_left.size() * sizeof(DataType); + std::size_t t_right_bytes = t_right.size() * sizeof(DataType); + std::size_t t_result_bytes = t_result.size()*sizeof(DataType); + + + DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); + DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); + DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_result(d_t_result, res_dims); + + sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); + sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); + + gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); + sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); + + t_result = t_left.contract(t_right, dims); + + for (IndexType i = 0; i < t_result.size(); i++) { + if (static_cast<DataType>(fabs(t_result(i) - t_result_gpu(i))) < error_threshold) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), error_threshold)) { + continue; + } + std::cout << "mismatch detected at IndexType " << i << ": " << t_result(i) + << " vs " << t_result_gpu(i) << std::endl; + assert(false); + } + sycl_device.deallocate(d_t_left); + sycl_device.deallocate(d_t_right); + sycl_device.deallocate(d_t_result); + + +} + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_scalar(const Device& sycl_device, IndexType m_size, IndexType k_size, IndexType n_size) +{ + //std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + typedef typename Tensor<DataType, 1, DataLayout, IndexType>::DimensionPair DimPair; + static const DataType error_threshold =1e-4f; + Tensor<DataType, 2, DataLayout, IndexType> t_left(m_size, k_size); + Tensor<DataType, 2, DataLayout, IndexType> t_right(k_size, n_size); + Tensor<DataType, 0, DataLayout, IndexType> t_result; + Tensor<DataType, 0, DataLayout, IndexType> t_result_gpu; + Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}}; + Eigen::array<IndexType, 2> left_dims = {{m_size, k_size}}; + Eigen::array<IndexType, 2> right_dims = {{k_size, n_size}}; + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(DataType); + std::size_t t_right_bytes = t_right.size() * sizeof(DataType); + std::size_t t_result_bytes = sizeof(DataType); + + + DataType * d_t_left = static_cast<DataType*>(sycl_device.allocate(t_left_bytes)); + DataType * d_t_right = static_cast<DataType*>(sycl_device.allocate(t_right_bytes)); + DataType * d_t_result = static_cast<DataType*>(sycl_device.allocate(t_result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_left(d_t_left, left_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_t_right(d_t_right, right_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 0, DataLayout, IndexType> > gpu_t_result(d_t_result); + + sycl_device.memcpyHostToDevice(d_t_left, t_left.data(),t_left_bytes); + sycl_device.memcpyHostToDevice(d_t_right, t_right.data(),t_right_bytes); + + gpu_t_result.device(sycl_device) = gpu_t_left.contract(gpu_t_right, dims); + sycl_device.memcpyDeviceToHost(t_result_gpu.data(), d_t_result, t_result_bytes); + + t_result = t_left.contract(t_right, dims); + + if (static_cast<DataType>(fabs(t_result() - t_result_gpu())) > error_threshold && + !Eigen::internal::isApprox(t_result(), t_result_gpu(), error_threshold)) { + std::cout << "mismatch detected: " << t_result() + << " vs " << t_result_gpu() << std::endl; + assert(false); + } + + sycl_device.deallocate(d_t_left); + sycl_device.deallocate(d_t_right); + sycl_device.deallocate(d_t_result); +} + + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_sycl_contraction_m(const Device& sycl_device) { + for (IndexType k = 32; k < 256; k++) { + test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, k, 128, 128); + } +} + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_sycl_contraction_k(const Device& sycl_device) { + for (IndexType k = 32; k < 256; k++) { + test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, k, 128); + } +} + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_sycl_contraction_n(const Device& sycl_device) { + for (IndexType k = 32; k < 256; k++) { + test_sycl_contraction<DataLayout, DataType, IndexType>(sycl_device, 128, 128, k); + } +} + + +template<int DataLayout, typename DataType, typename IndexType, typename Device> +void test_sycl_contraction_sizes(const Device& sycl_device) { + IndexType m_sizes[] = { 31, 39, 63, 64, 65, + 127, 129, 255, 257 , 511, + 512, 513, 1023, 1024, 1025}; + + IndexType n_sizes[] = { 31, 39, 63, 64, 65, + 127, 129, 255, 257, 511, + 512, 513, 1023, 1024, 1025}; + + IndexType k_sizes[] = { 31, 39, 63, 64, 65, + 95, 96, 127, 129, 255, + 257, 511, 512, 513, 1023, + 1024, 1025}; + + for (IndexType i = 0; i < 15; i++) { + for (IndexType j = 0; j < 15; j++) { + for (IndexType k = 0; k < 17; k++) { + test_sycl_contraction<DataLayout, DataType,IndexType>(sycl_device, m_sizes[i], n_sizes[j], k_sizes[k]); + } + } + } +} + +template <typename Dev_selector> void tensorContractionPerDevice(Dev_selector& s){ + QueueInterface queueInterface(s); + auto sycl_device=Eigen::SyclDevice(&queueInterface); + test_sycl_contraction<ColMajor, float,int64_t>(sycl_device, 32, 32, 32); + test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 32, 32, 32); + test_scalar<ColMajor,float,int64_t>(sycl_device, 32, 32, 32); + test_scalar<RowMajor,float,int64_t>(sycl_device, 32, 32, 32); + std::chrono::time_point<std::chrono::system_clock> start, end; + start = std::chrono::system_clock::now(); + test_sycl_contraction<ColMajor,float,int64_t>(sycl_device, 128, 128, 128); + test_sycl_contraction<RowMajor,float,int64_t>(sycl_device, 128, 128, 128); + test_scalar<ColMajor,float,int64_t>(sycl_device, 128, 128, 128); + test_scalar<RowMajor,float,int64_t>(sycl_device, 128, 128, 128); + test_sycl_contraction_m<ColMajor, float, int64_t>(sycl_device); + test_sycl_contraction_m<RowMajor, float, int64_t>(sycl_device); + test_sycl_contraction_n<ColMajor, float, int64_t>(sycl_device); + test_sycl_contraction_n<RowMajor, float, int64_t>(sycl_device); + test_sycl_contraction_k<ColMajor, float, int64_t>(sycl_device); + test_sycl_contraction_k<RowMajor, float, int64_t>(sycl_device); + test_sycl_contraction_sizes<ColMajor, float, int64_t>(sycl_device); + test_sycl_contraction_sizes<RowMajor, float, int64_t>(sycl_device); + test_TF<RowMajor, float, int64_t>(sycl_device); + test_TF<ColMajor, float, int64_t>(sycl_device); + + end = std::chrono::system_clock::now(); + std::chrono::duration<double> elapsed_seconds = end-start; + std::time_t end_time = std::chrono::system_clock::to_time_t(end); + std::cout << "finished computation at " << std::ctime(&end_time) + << "elapsed time: " << elapsed_seconds.count() << "s\n"; + +} + +void test_cxx11_tensor_contract_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorContractionPerDevice(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_contraction.cpp b/eigen/unsupported/test/cxx11_tensor_contraction.cpp new file mode 100644 index 0000000..ace9705 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_contraction.cpp @@ -0,0 +1,545 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::DefaultDevice; +using Eigen::Tensor; + +typedef Tensor<float, 1>::DimensionPair DimPair; + +template<int DataLayout> +static void test_evals() +{ + Tensor<float, 2, DataLayout> mat1(2, 3); + Tensor<float, 2, DataLayout> mat2(2, 3); + Tensor<float, 2, DataLayout> mat3(3, 2); + + mat1.setRandom(); + mat2.setRandom(); + mat3.setRandom(); + + Tensor<float, 2, DataLayout> mat4(3,3); + mat4.setZero(); + Eigen::array<DimPair, 1> dims3 = {{DimPair(0, 0)}}; + typedef TensorEvaluator<decltype(mat1.contract(mat2, dims3)), DefaultDevice> Evaluator; + Evaluator eval(mat1.contract(mat2, dims3), DefaultDevice()); + eval.evalTo(mat4.data()); + EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval.dimensions()[0], 3); + VERIFY_IS_EQUAL(eval.dimensions()[1], 3); + + VERIFY_IS_APPROX(mat4(0,0), mat1(0,0)*mat2(0,0) + mat1(1,0)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(0,1), mat1(0,0)*mat2(0,1) + mat1(1,0)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(0,2), mat1(0,0)*mat2(0,2) + mat1(1,0)*mat2(1,2)); + VERIFY_IS_APPROX(mat4(1,0), mat1(0,1)*mat2(0,0) + mat1(1,1)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(1,1), mat1(0,1)*mat2(0,1) + mat1(1,1)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(1,2), mat1(0,1)*mat2(0,2) + mat1(1,1)*mat2(1,2)); + VERIFY_IS_APPROX(mat4(2,0), mat1(0,2)*mat2(0,0) + mat1(1,2)*mat2(1,0)); + VERIFY_IS_APPROX(mat4(2,1), mat1(0,2)*mat2(0,1) + mat1(1,2)*mat2(1,1)); + VERIFY_IS_APPROX(mat4(2,2), mat1(0,2)*mat2(0,2) + mat1(1,2)*mat2(1,2)); + + Tensor<float, 2, DataLayout> mat5(2,2); + mat5.setZero(); + Eigen::array<DimPair, 1> dims4 = {{DimPair(1, 1)}}; + typedef TensorEvaluator<decltype(mat1.contract(mat2, dims4)), DefaultDevice> Evaluator2; + Evaluator2 eval2(mat1.contract(mat2, dims4), DefaultDevice()); + eval2.evalTo(mat5.data()); + EIGEN_STATIC_ASSERT(Evaluator2::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval2.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval2.dimensions()[1], 2); + + VERIFY_IS_APPROX(mat5(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(0,1) + mat1(0,2)*mat2(0,2)); + VERIFY_IS_APPROX(mat5(0,1), mat1(0,0)*mat2(1,0) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(1,2)); + VERIFY_IS_APPROX(mat5(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(0,1) + mat1(1,2)*mat2(0,2)); + VERIFY_IS_APPROX(mat5(1,1), mat1(1,0)*mat2(1,0) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(1,2)); + + Tensor<float, 2, DataLayout> mat6(2,2); + mat6.setZero(); + Eigen::array<DimPair, 1> dims6 = {{DimPair(1, 0)}}; + typedef TensorEvaluator<decltype(mat1.contract(mat3, dims6)), DefaultDevice> Evaluator3; + Evaluator3 eval3(mat1.contract(mat3, dims6), DefaultDevice()); + eval3.evalTo(mat6.data()); + EIGEN_STATIC_ASSERT(Evaluator3::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval3.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval3.dimensions()[1], 2); + + VERIFY_IS_APPROX(mat6(0,0), mat1(0,0)*mat3(0,0) + mat1(0,1)*mat3(1,0) + mat1(0,2)*mat3(2,0)); + VERIFY_IS_APPROX(mat6(0,1), mat1(0,0)*mat3(0,1) + mat1(0,1)*mat3(1,1) + mat1(0,2)*mat3(2,1)); + VERIFY_IS_APPROX(mat6(1,0), mat1(1,0)*mat3(0,0) + mat1(1,1)*mat3(1,0) + mat1(1,2)*mat3(2,0)); + VERIFY_IS_APPROX(mat6(1,1), mat1(1,0)*mat3(0,1) + mat1(1,1)*mat3(1,1) + mat1(1,2)*mat3(2,1)); +} + +template<int DataLayout> +static void test_scalar() +{ + Tensor<float, 1, DataLayout> vec1({6}); + Tensor<float, 1, DataLayout> vec2({6}); + + vec1.setRandom(); + vec2.setRandom(); + + Eigen::array<DimPair, 1> dims = {{DimPair(0, 0)}}; + Tensor<float, 0, DataLayout> scalar = vec1.contract(vec2, dims); + + float expected = 0.0f; + for (int i = 0; i < 6; ++i) { + expected += vec1(i) * vec2(i); + } + VERIFY_IS_APPROX(scalar(), expected); +} + +template<int DataLayout> +static void test_multidims() +{ + Tensor<float, 3, DataLayout> mat1(2, 2, 2); + Tensor<float, 4, DataLayout> mat2(2, 2, 2, 2); + + mat1.setRandom(); + mat2.setRandom(); + + Tensor<float, 3, DataLayout> mat3(2, 2, 2); + mat3.setZero(); + Eigen::array<DimPair, 2> dims = {{DimPair(1, 2), DimPair(2, 3)}}; + typedef TensorEvaluator<decltype(mat1.contract(mat2, dims)), DefaultDevice> Evaluator; + Evaluator eval(mat1.contract(mat2, dims), DefaultDevice()); + eval.evalTo(mat3.data()); + EIGEN_STATIC_ASSERT(Evaluator::NumDims==3ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval.dimensions()[1], 2); + VERIFY_IS_EQUAL(eval.dimensions()[2], 2); + + VERIFY_IS_APPROX(mat3(0,0,0), mat1(0,0,0)*mat2(0,0,0,0) + mat1(0,1,0)*mat2(0,0,1,0) + + mat1(0,0,1)*mat2(0,0,0,1) + mat1(0,1,1)*mat2(0,0,1,1)); + VERIFY_IS_APPROX(mat3(0,0,1), mat1(0,0,0)*mat2(0,1,0,0) + mat1(0,1,0)*mat2(0,1,1,0) + + mat1(0,0,1)*mat2(0,1,0,1) + mat1(0,1,1)*mat2(0,1,1,1)); + VERIFY_IS_APPROX(mat3(0,1,0), mat1(0,0,0)*mat2(1,0,0,0) + mat1(0,1,0)*mat2(1,0,1,0) + + mat1(0,0,1)*mat2(1,0,0,1) + mat1(0,1,1)*mat2(1,0,1,1)); + VERIFY_IS_APPROX(mat3(0,1,1), mat1(0,0,0)*mat2(1,1,0,0) + mat1(0,1,0)*mat2(1,1,1,0) + + mat1(0,0,1)*mat2(1,1,0,1) + mat1(0,1,1)*mat2(1,1,1,1)); + VERIFY_IS_APPROX(mat3(1,0,0), mat1(1,0,0)*mat2(0,0,0,0) + mat1(1,1,0)*mat2(0,0,1,0) + + mat1(1,0,1)*mat2(0,0,0,1) + mat1(1,1,1)*mat2(0,0,1,1)); + VERIFY_IS_APPROX(mat3(1,0,1), mat1(1,0,0)*mat2(0,1,0,0) + mat1(1,1,0)*mat2(0,1,1,0) + + mat1(1,0,1)*mat2(0,1,0,1) + mat1(1,1,1)*mat2(0,1,1,1)); + VERIFY_IS_APPROX(mat3(1,1,0), mat1(1,0,0)*mat2(1,0,0,0) + mat1(1,1,0)*mat2(1,0,1,0) + + mat1(1,0,1)*mat2(1,0,0,1) + mat1(1,1,1)*mat2(1,0,1,1)); + VERIFY_IS_APPROX(mat3(1,1,1), mat1(1,0,0)*mat2(1,1,0,0) + mat1(1,1,0)*mat2(1,1,1,0) + + mat1(1,0,1)*mat2(1,1,0,1) + mat1(1,1,1)*mat2(1,1,1,1)); + + Tensor<float, 2, DataLayout> mat4(2, 2); + Tensor<float, 3, DataLayout> mat5(2, 2, 2); + + mat4.setRandom(); + mat5.setRandom(); + + Tensor<float, 1, DataLayout> mat6(2); + mat6.setZero(); + Eigen::array<DimPair, 2> dims2({{DimPair(0, 1), DimPair(1, 0)}}); + typedef TensorEvaluator<decltype(mat4.contract(mat5, dims2)), DefaultDevice> Evaluator2; + Evaluator2 eval2(mat4.contract(mat5, dims2), DefaultDevice()); + eval2.evalTo(mat6.data()); + EIGEN_STATIC_ASSERT(Evaluator2::NumDims==1ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval2.dimensions()[0], 2); + + VERIFY_IS_APPROX(mat6(0), mat4(0,0)*mat5(0,0,0) + mat4(1,0)*mat5(0,1,0) + + mat4(0,1)*mat5(1,0,0) + mat4(1,1)*mat5(1,1,0)); + VERIFY_IS_APPROX(mat6(1), mat4(0,0)*mat5(0,0,1) + mat4(1,0)*mat5(0,1,1) + + mat4(0,1)*mat5(1,0,1) + mat4(1,1)*mat5(1,1,1)); +} + +template<int DataLayout> +static void test_holes() { + Tensor<float, 4, DataLayout> t1(2, 5, 7, 3); + Tensor<float, 5, DataLayout> t2(2, 7, 11, 13, 3); + t1.setRandom(); + t2.setRandom(); + + Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(3, 4)}}; + Tensor<float, 5, DataLayout> result = t1.contract(t2, dims); + VERIFY_IS_EQUAL(result.dimension(0), 5); + VERIFY_IS_EQUAL(result.dimension(1), 7); + VERIFY_IS_EQUAL(result.dimension(2), 7); + VERIFY_IS_EQUAL(result.dimension(3), 11); + VERIFY_IS_EQUAL(result.dimension(4), 13); + + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 5; ++l) { + for (int m = 0; m < 5; ++m) { + VERIFY_IS_APPROX(result(i, j, k, l, m), + t1(0, i, j, 0) * t2(0, k, l, m, 0) + + t1(1, i, j, 0) * t2(1, k, l, m, 0) + + t1(0, i, j, 1) * t2(0, k, l, m, 1) + + t1(1, i, j, 1) * t2(1, k, l, m, 1) + + t1(0, i, j, 2) * t2(0, k, l, m, 2) + + t1(1, i, j, 2) * t2(1, k, l, m, 2)); + } + } + } + } + } +} + +template<int DataLayout> +static void test_full_redux() +{ + Tensor<float, 2, DataLayout> t1(2, 2); + Tensor<float, 3, DataLayout> t2(2, 2, 2); + t1.setRandom(); + t2.setRandom(); + + Eigen::array<DimPair, 2> dims = {{DimPair(0, 0), DimPair(1, 1)}}; + Tensor<float, 1, DataLayout> result = t1.contract(t2, dims); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(1, 0, 0) + + t1(0, 1) * t2(0, 1, 0) + t1(1, 1) * t2(1, 1, 0)); + VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(0, 0, 1) + t1(1, 0) * t2(1, 0, 1) + + t1(0, 1) * t2(0, 1, 1) + t1(1, 1) * t2(1, 1, 1)); + + dims[0] = DimPair(1, 0); + dims[1] = DimPair(2, 1); + result = t2.contract(t1, dims); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), t1(0, 0) * t2(0, 0, 0) + t1(1, 0) * t2(0, 1, 0) + + t1(0, 1) * t2(0, 0, 1) + t1(1, 1) * t2(0, 1, 1)); + VERIFY_IS_APPROX(result(1), t1(0, 0) * t2(1, 0, 0) + t1(1, 0) * t2(1, 1, 0) + + t1(0, 1) * t2(1, 0, 1) + t1(1, 1) * t2(1, 1, 1)); +} + +template<int DataLayout> +static void test_contraction_of_contraction() +{ + Tensor<float, 2, DataLayout> t1(2, 2); + Tensor<float, 2, DataLayout> t2(2, 2); + Tensor<float, 2, DataLayout> t3(2, 2); + Tensor<float, 2, DataLayout> t4(2, 2); + t1.setRandom(); + t2.setRandom(); + t3.setRandom(); + t4.setRandom(); + + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + auto contract1 = t1.contract(t2, dims); + auto diff = t3 - contract1; + auto contract2 = t1.contract(t4, dims); + Tensor<float, 2, DataLayout> result = contract2.contract(diff, dims); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 2); + + Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> + m1(t1.data(), 2, 2), m2(t2.data(), 2, 2), m3(t3.data(), 2, 2), + m4(t4.data(), 2, 2); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> + expected = (m1 * m4) * (m3 - m1 * m2); + + VERIFY_IS_APPROX(result(0, 0), expected(0, 0)); + VERIFY_IS_APPROX(result(0, 1), expected(0, 1)); + VERIFY_IS_APPROX(result(1, 0), expected(1, 0)); + VERIFY_IS_APPROX(result(1, 1), expected(1, 1)); +} + +template<int DataLayout> +static void test_expr() +{ + Tensor<float, 2, DataLayout> mat1(2, 3); + Tensor<float, 2, DataLayout> mat2(3, 2); + mat1.setRandom(); + mat2.setRandom(); + + Tensor<float, 2, DataLayout> mat3(2,2); + + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1)); + VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); +} + +template<int DataLayout> +static void test_out_of_order_contraction() +{ + Tensor<float, 3, DataLayout> mat1(2, 2, 2); + Tensor<float, 3, DataLayout> mat2(2, 2, 2); + + mat1.setRandom(); + mat2.setRandom(); + + Tensor<float, 2, DataLayout> mat3(2, 2); + + Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(0, 2)}}; + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0, 0), + mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) + + mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1)); + VERIFY_IS_APPROX(mat3(1, 0), + mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) + + mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1)); + VERIFY_IS_APPROX(mat3(0, 1), + mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) + + mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1)); + VERIFY_IS_APPROX(mat3(1, 1), + mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) + + mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1)); + + Eigen::array<DimPair, 2> dims2 = {{DimPair(0, 2), DimPair(2, 0)}}; + mat3 = mat1.contract(mat2, dims2); + + VERIFY_IS_APPROX(mat3(0, 0), + mat1(0,0,0)*mat2(0,0,0) + mat1(1,0,0)*mat2(0,0,1) + + mat1(0,0,1)*mat2(1,0,0) + mat1(1,0,1)*mat2(1,0,1)); + VERIFY_IS_APPROX(mat3(1, 0), + mat1(0,1,0)*mat2(0,0,0) + mat1(1,1,0)*mat2(0,0,1) + + mat1(0,1,1)*mat2(1,0,0) + mat1(1,1,1)*mat2(1,0,1)); + VERIFY_IS_APPROX(mat3(0, 1), + mat1(0,0,0)*mat2(0,1,0) + mat1(1,0,0)*mat2(0,1,1) + + mat1(0,0,1)*mat2(1,1,0) + mat1(1,0,1)*mat2(1,1,1)); + VERIFY_IS_APPROX(mat3(1, 1), + mat1(0,1,0)*mat2(0,1,0) + mat1(1,1,0)*mat2(0,1,1) + + mat1(0,1,1)*mat2(1,1,0) + mat1(1,1,1)*mat2(1,1,1)); + +} + +template<int DataLayout> +static void test_consistency() +{ + // this does something like testing (A*B)^T = (B^T * A^T) + + Tensor<float, 3, DataLayout> mat1(4, 3, 5); + Tensor<float, 5, DataLayout> mat2(3, 2, 1, 5, 4); + mat1.setRandom(); + mat2.setRandom(); + + Tensor<float, 4, DataLayout> mat3(5, 2, 1, 5); + Tensor<float, 4, DataLayout> mat4(2, 1, 5, 5); + + // contract on dimensions of size 4 and 3 + Eigen::array<DimPair, 2> dims1 = {{DimPair(0, 4), DimPair(1, 0)}}; + Eigen::array<DimPair, 2> dims2 = {{DimPair(4, 0), DimPair(0, 1)}}; + + mat3 = mat1.contract(mat2, dims1); + mat4 = mat2.contract(mat1, dims2); + + // check that these are equal except for ordering of dimensions + if (DataLayout == ColMajor) { + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 10; j++) { + VERIFY_IS_APPROX(mat3.data()[i + 5 * j], mat4.data()[j + 10 * i]); + } + } + } else { + // Row major + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 10; j++) { + VERIFY_IS_APPROX(mat3.data()[10 * i + j], mat4.data()[i + 5 * j]); + } + } + } +} + +template<int DataLayout> +static void test_large_contraction() +{ + Tensor<float, 4, DataLayout> t_left(30, 50, 8, 31); + Tensor<float, 5, DataLayout> t_right(8, 31, 7, 20, 10); + Tensor<float, 5, DataLayout> t_result(30, 50, 7, 20, 10); + + t_left.setRandom(); + t_right.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; + MapXf m_left(t_left.data(), 1500, 248); + MapXf m_right(t_right.data(), 248, 1400); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}}; + + // compute results by separate methods + t_result = t_left.contract(t_right, dims); + m_result = m_left * m_right; + + for (int i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY(&t_result.data()[i] != &m_result.data()[i]); + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + +template<int DataLayout> +static void test_matrix_vector() +{ + Tensor<float, 2, DataLayout> t_left(30, 50); + Tensor<float, 1, DataLayout> t_right(50); + Tensor<float, 1, DataLayout> t_result(30); + + t_left.setRandom(); + t_right.setRandom(); + + typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; + MapXf m_left(t_left.data(), 30, 50); + MapXf m_right(t_right.data(), 50, 1); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(30, 1); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array<DimPair, 1> dims{{DimPair(1, 0)}}; + + // compute results by separate methods + t_result = t_left.contract(t_right, dims); + m_result = m_left * m_right; + + for (int i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1)); + } +} + + +template<int DataLayout> +static void test_tensor_vector() +{ + Tensor<float, 3, DataLayout> t_left(7, 13, 17); + Tensor<float, 2, DataLayout> t_right(1, 7); + + t_left.setRandom(); + t_right.setRandom(); + + typedef typename Tensor<float, 1, DataLayout>::DimensionPair DimensionPair; + Eigen::array<DimensionPair, 1> dim_pair01{{{0, 1}}}; + Tensor<float, 3, DataLayout> t_result = t_left.contract(t_right, dim_pair01); + + typedef Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; + MapXf m_left(t_left.data(), 7, 13*17); + MapXf m_right(t_right.data(), 1, 7); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left.transpose() * m_right.transpose(); + + for (int i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY(internal::isApprox(t_result(i), m_result(i, 0), 1)); + } +} + + +template<int DataLayout> +static void test_small_blocking_factors() +{ + Tensor<float, 4, DataLayout> t_left(30, 5, 3, 31); + Tensor<float, 5, DataLayout> t_right(3, 31, 7, 20, 1); + t_left.setRandom(); + t_right.setRandom(); + + // Add a little offset so that the results won't be close to zero. + t_left += t_left.constant(1.0f); + t_right += t_right.constant(1.0f); + + // Force the cache sizes, which results in smaller blocking factors. + Eigen::setCpuCacheSizes(896, 1920, 2944); + + // this contraction should be equivalent to a single matrix multiplication + Eigen::array<DimPair, 2> dims = {{DimPair(2, 0), DimPair(3, 1)}}; + Tensor<float, 5, DataLayout> t_result; + t_result = t_left.contract(t_right, dims); + + // compute result using a simple eigen matrix product + Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_left(t_left.data(), 150, 93); + Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout>> m_right(t_right.data(), 93, 140); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result = m_left * m_right; + + for (int i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + +template<int DataLayout> +static void test_tensor_product() +{ + Tensor<float, 2, DataLayout> mat1(2, 3); + Tensor<float, 2, DataLayout> mat2(4, 1); + mat1.setRandom(); + mat2.setRandom(); + + Tensor<float, 4, DataLayout> result = mat1.contract(mat2, Eigen::array<DimPair, 0>{{}}); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 3); + VERIFY_IS_EQUAL(result.dimension(2), 4); + VERIFY_IS_EQUAL(result.dimension(3), 1); + for (int i = 0; i < result.dimension(0); ++i) { + for (int j = 0; j < result.dimension(1); ++j) { + for (int k = 0; k < result.dimension(2); ++k) { + for (int l = 0; l < result.dimension(3); ++l) { + VERIFY_IS_APPROX(result(i, j, k, l), mat1(i, j) * mat2(k, l) ); + } + } + } + } +} + + +template<int DataLayout> +static void test_const_inputs() +{ + Tensor<float, 2, DataLayout> in1(2, 3); + Tensor<float, 2, DataLayout> in2(3, 2); + in1.setRandom(); + in2.setRandom(); + + TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3); + TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2); + Tensor<float, 2, DataLayout> mat3(2,2); + + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1)); + VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); +} + +void test_cxx11_tensor_contraction() +{ + CALL_SUBTEST(test_evals<ColMajor>()); + CALL_SUBTEST(test_evals<RowMajor>()); + CALL_SUBTEST(test_scalar<ColMajor>()); + CALL_SUBTEST(test_scalar<RowMajor>()); + CALL_SUBTEST(test_multidims<ColMajor>()); + CALL_SUBTEST(test_multidims<RowMajor>()); + CALL_SUBTEST(test_holes<ColMajor>()); + CALL_SUBTEST(test_holes<RowMajor>()); + CALL_SUBTEST(test_full_redux<ColMajor>()); + CALL_SUBTEST(test_full_redux<RowMajor>()); + CALL_SUBTEST(test_contraction_of_contraction<ColMajor>()); + CALL_SUBTEST(test_contraction_of_contraction<RowMajor>()); + CALL_SUBTEST(test_expr<ColMajor>()); + CALL_SUBTEST(test_expr<RowMajor>()); + CALL_SUBTEST(test_out_of_order_contraction<ColMajor>()); + CALL_SUBTEST(test_out_of_order_contraction<RowMajor>()); + CALL_SUBTEST(test_consistency<ColMajor>()); + CALL_SUBTEST(test_consistency<RowMajor>()); + CALL_SUBTEST(test_large_contraction<ColMajor>()); + CALL_SUBTEST(test_large_contraction<RowMajor>()); + CALL_SUBTEST(test_matrix_vector<ColMajor>()); + CALL_SUBTEST(test_matrix_vector<RowMajor>()); + CALL_SUBTEST(test_tensor_vector<ColMajor>()); + CALL_SUBTEST(test_tensor_vector<RowMajor>()); + CALL_SUBTEST(test_small_blocking_factors<ColMajor>()); + CALL_SUBTEST(test_small_blocking_factors<RowMajor>()); + CALL_SUBTEST(test_tensor_product<ColMajor>()); + CALL_SUBTEST(test_tensor_product<RowMajor>()); + CALL_SUBTEST(test_const_inputs<ColMajor>()); + CALL_SUBTEST(test_const_inputs<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_convolution.cpp b/eigen/unsupported/test/cxx11_tensor_convolution.cpp new file mode 100644 index 0000000..e3d4675 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_convolution.cpp @@ -0,0 +1,149 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::DefaultDevice; + +template <int DataLayout> +static void test_evals() +{ + Tensor<float, 2, DataLayout> input(3, 3); + Tensor<float, 1, DataLayout> kernel(2); + + input.setRandom(); + kernel.setRandom(); + + Tensor<float, 2, DataLayout> result(2,3); + result.setZero(); + Eigen::array<Tensor<float, 2>::Index, 1> dims3{{0}}; + + typedef TensorEvaluator<decltype(input.convolve(kernel, dims3)), DefaultDevice> Evaluator; + Evaluator eval(input.convolve(kernel, dims3), DefaultDevice()); + eval.evalTo(result.data()); + EIGEN_STATIC_ASSERT(Evaluator::NumDims==2ul, YOU_MADE_A_PROGRAMMING_MISTAKE); + VERIFY_IS_EQUAL(eval.dimensions()[0], 2); + VERIFY_IS_EQUAL(eval.dimensions()[1], 3); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 + VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 + VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 +} + +template <int DataLayout> +static void test_expr() +{ + Tensor<float, 2, DataLayout> input(3, 3); + Tensor<float, 2, DataLayout> kernel(2, 2); + input.setRandom(); + kernel.setRandom(); + + Tensor<float, 2, DataLayout> result(2,2); + Eigen::array<ptrdiff_t, 2> dims; + dims[0] = 0; + dims[1] = 1; + result = input.convolve(kernel, dims); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + + input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + + input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + + input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + + input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); +} + +template <int DataLayout> +static void test_modes() { + Tensor<float, 1, DataLayout> input(3); + Tensor<float, 1, DataLayout> kernel(3); + input(0) = 1.0f; + input(1) = 2.0f; + input(2) = 3.0f; + kernel(0) = 0.5f; + kernel(1) = 1.0f; + kernel(2) = 0.0f; + + Eigen::array<ptrdiff_t, 1> dims; + dims[0] = 0; + Eigen::array<std::pair<ptrdiff_t, ptrdiff_t>, 1> padding; + + // Emulate VALID mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(0, 0); + Tensor<float, 1, DataLayout> valid(1); + valid = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(valid.dimension(0), 1); + VERIFY_IS_APPROX(valid(0), 2.5f); + + // Emulate SAME mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(1, 1); + Tensor<float, 1, DataLayout> same(3); + same = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(same.dimension(0), 3); + VERIFY_IS_APPROX(same(0), 1.0f); + VERIFY_IS_APPROX(same(1), 2.5f); + VERIFY_IS_APPROX(same(2), 4.0f); + + // Emulate FULL mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(2, 2); + Tensor<float, 1, DataLayout> full(5); + full = input.pad(padding).convolve(kernel, dims); + VERIFY_IS_EQUAL(full.dimension(0), 5); + VERIFY_IS_APPROX(full(0), 0.0f); + VERIFY_IS_APPROX(full(1), 1.0f); + VERIFY_IS_APPROX(full(2), 2.5f); + VERIFY_IS_APPROX(full(3), 4.0f); + VERIFY_IS_APPROX(full(4), 1.5f); +} + +template <int DataLayout> +static void test_strides() { + Tensor<float, 1, DataLayout> input(13); + Tensor<float, 1, DataLayout> kernel(3); + input.setRandom(); + kernel.setRandom(); + + Eigen::array<ptrdiff_t, 1> dims; + dims[0] = 0; + Eigen::array<ptrdiff_t, 1> stride_of_3; + stride_of_3[0] = 3; + Eigen::array<ptrdiff_t, 1> stride_of_2; + stride_of_2[0] = 2; + + Tensor<float, 1, DataLayout> result; + result = input.stride(stride_of_3).convolve(kernel, dims).stride(stride_of_2); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + + input(6)*kernel(2))); + VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + + input(12)*kernel(2))); +} + +void test_cxx11_tensor_convolution() +{ + CALL_SUBTEST(test_evals<ColMajor>()); + CALL_SUBTEST(test_evals<RowMajor>()); + CALL_SUBTEST(test_expr<ColMajor>()); + CALL_SUBTEST(test_expr<RowMajor>()); + CALL_SUBTEST(test_modes<ColMajor>()); + CALL_SUBTEST(test_modes<RowMajor>()); + CALL_SUBTEST(test_strides<ColMajor>()); + CALL_SUBTEST(test_strides<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp new file mode 100644 index 0000000..a4226a6 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_convolution_sycl.cpp @@ -0,0 +1,469 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_convolution_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include <iostream> +#include <chrono> +#include <ctime> + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> +#include <iomanip> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +static const float error_threshold =1e-4f; + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr1D(const Eigen::SyclDevice& sycl_device) +{ + IndexType indim0 =53; + IndexType indim1= 55; + IndexType indim2= 51; + IndexType outdim0=50; + IndexType outdim1=55; + IndexType outdim2=51; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 1> kernel_dims = {{4}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 1> dims3{{0}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(IndexType i=0; i< outdim0; i++ ){ + for(IndexType j=0; j< outdim1; j++ ){ + for(IndexType k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr2D(const Eigen::SyclDevice& sycl_device) +{ + IndexType indim0 =53; + IndexType indim1= 55; + IndexType indim2= 51; + IndexType outdim0=50; + IndexType outdim1=51; + IndexType outdim2=51; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 2> kernel_dims = {{4,5}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 2, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 2> dims3{{0,1}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(IndexType i=0; i< outdim0; i++ ){ + for(IndexType j=0; j< outdim1; j++ ){ + for(IndexType k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_larg_expr3D(const Eigen::SyclDevice& sycl_device) +{ + IndexType indim0 =53; + IndexType indim1= 55; + IndexType indim2= 51; + IndexType outdim0=50; + IndexType outdim1=51; + IndexType outdim2=49; + Eigen::array<IndexType, 3> input_dims = {{indim0, indim1, indim2}}; + Eigen::array<IndexType, 3> kernel_dims = {{4,5,3}}; + Eigen::array<IndexType, 3> result_dims = {{outdim0, outdim1, outdim2}}; + + Tensor<DataType, 3, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 3, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 3, DataLayout,IndexType> result(result_dims); + Tensor<DataType, 3, DataLayout,IndexType> result_host(result_dims); + + Eigen::array<IndexType, 3> dims3{{0,1,2}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + result_host.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + result_host=input.convolve(kernel, dims3); + +for(IndexType i=0; i< outdim0; i++ ){ + for(IndexType j=0; j< outdim1; j++ ){ + for(IndexType k=0; k< outdim2; k++ ){ + if (!(Eigen::internal::isApprox(result(i,j,k), result_host(i,j,k), error_threshold))) { + std::cout <<std::setprecision(16)<< "mismatch detected at index ( "<< i << " , " << j << ", " << k << " ) " << " \t " << result(i,j,k) << " vs "<< result_host(i,j,k) << std::endl; + assert(false); + } + } + } +} + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); + +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_evals(const Eigen::SyclDevice& sycl_device) +{ + Eigen::array<IndexType, 2> input_dims = {{3, 3}}; + Eigen::array<IndexType, 1> kernel_dims = {{2}}; + Eigen::array<IndexType, 2> result_dims = {{2, 3}}; + + Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout,IndexType> kernel(kernel_dims); + Tensor<DataType, 2, DataLayout,IndexType> result(result_dims); + + Eigen::array<IndexType, 1> dims3{{0}}; + + input.setRandom(); + kernel.setRandom(); + result.setZero(); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout, IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout, IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims3); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0) + input(1,0)*kernel(1)); // index 0 + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0) + input(1,1)*kernel(1)); // index 2 + VERIFY_IS_APPROX(result(0,2), input(0,2)*kernel(0) + input(1,2)*kernel(1)); // index 4 + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0) + input(2,0)*kernel(1)); // index 1 + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0) + input(2,1)*kernel(1)); // index 3 + VERIFY_IS_APPROX(result(1,2), input(1,2)*kernel(0) + input(2,2)*kernel(1)); // index 5 + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_expr(const Eigen::SyclDevice& sycl_device) +{ + Eigen::array<IndexType, 2> input_dims = {{3, 3}}; + Eigen::array<IndexType, 2> kernel_dims = {{2, 2}}; + Eigen::array<IndexType, 2> result_dims = {{2, 2}}; + + Tensor<DataType, 2, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 2, DataLayout, IndexType> kernel(kernel_dims); + Tensor<DataType, 2, DataLayout, IndexType> result(result_dims); + + input.setRandom(); + kernel.setRandom(); + Eigen::array<IndexType, 2> dims; + dims[0] = 0; + dims[1] = 1; + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 2, DataLayout,IndexType> > gpu_result(d_result, result_dims); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_APPROX(result(0,0), input(0,0)*kernel(0,0) + input(0,1)*kernel(0,1) + + input(1,0)*kernel(1,0) + input(1,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(0,1), input(0,1)*kernel(0,0) + input(0,2)*kernel(0,1) + + input(1,1)*kernel(1,0) + input(1,2)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,0), input(1,0)*kernel(0,0) + input(1,1)*kernel(0,1) + + input(2,0)*kernel(1,0) + input(2,1)*kernel(1,1)); + VERIFY_IS_APPROX(result(1,1), input(1,1)*kernel(0,0) + input(1,2)*kernel(0,1) + + input(2,1)*kernel(1,0) + input(2,2)*kernel(1,1)); + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_result); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_modes(const Eigen::SyclDevice& sycl_device){ + +Eigen::array<IndexType, 1> input_dims = {{3}}; +Eigen::array<IndexType, 1> kernel_dims = {{3}}; + +Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); +Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); + +input.setRandom(); +kernel.setRandom(); +Eigen::array<IndexType, 1> dims; +dims[0] = 0; + + input(0) = 1.0f; + input(1) = 2.0f; + input(2) = 3.0f; + kernel(0) = 0.5f; + kernel(1) = 1.0f; + kernel(2) = 0.0f; + + Eigen::array<std::pair<IndexType, IndexType>, 1> padding; + + // Emulate VALID mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(0, 0); + Tensor<DataType, 1, DataLayout, IndexType> valid(1); + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t valid_bytes = valid.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_valid = static_cast<DataType*>(sycl_device.allocate(valid_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_valid(d_valid, valid.dimensions()); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_valid.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(valid.data(), d_valid, valid_bytes); + + VERIFY_IS_EQUAL(valid.dimension(0), 1); + VERIFY_IS_APPROX(valid(0), 2.5f); + + // Emulate SAME mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(1, 1); + Tensor<DataType, 1, DataLayout, IndexType> same(3); + std::size_t same_bytes = same.size() * sizeof(DataType); + DataType * d_same = static_cast<DataType*>(sycl_device.allocate(same_bytes)); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_same(d_same, same.dimensions()); + gpu_same.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(same.data(), d_same, same_bytes); + + VERIFY_IS_EQUAL(same.dimension(0), 3); + VERIFY_IS_APPROX(same(0), 1.0f); + VERIFY_IS_APPROX(same(1), 2.5f); + VERIFY_IS_APPROX(same(2), 4.0f); + + // Emulate FULL mode (as defined in + // http://docs.scipy.org/doc/numpy/reference/generated/numpy.convolve.html). + padding[0] = std::make_pair(2, 2); + + Tensor<DataType, 1, DataLayout, IndexType> full(5); + std::size_t full_bytes = full.size() * sizeof(DataType); + DataType * d_full = static_cast<DataType*>(sycl_device.allocate(full_bytes)); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_full(d_full, full.dimensions()); + gpu_full.device(sycl_device)=gpu_input.pad(padding).convolve(gpu_kernel, dims); + sycl_device.memcpyDeviceToHost(full.data(), d_full, full_bytes); + + VERIFY_IS_EQUAL(full.dimension(0), 5); + VERIFY_IS_APPROX(full(0), 0.0f); + VERIFY_IS_APPROX(full(1), 1.0f); + VERIFY_IS_APPROX(full(2), 2.5f); + VERIFY_IS_APPROX(full(3), 4.0f); + VERIFY_IS_APPROX(full(4), 1.5f); + + sycl_device.deallocate(d_input); + sycl_device.deallocate(d_kernel); + sycl_device.deallocate(d_valid); + sycl_device.deallocate(d_same); + sycl_device.deallocate(d_full); + +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_strides(const Eigen::SyclDevice& sycl_device){ + + Eigen::array<IndexType, 1> input_dims = {{13}}; + Eigen::array<IndexType, 1> kernel_dims = {{3}}; + + Tensor<DataType, 1, DataLayout, IndexType> input(input_dims); + Tensor<DataType, 1, DataLayout, IndexType> kernel(kernel_dims); + Tensor<DataType, 1, DataLayout, IndexType> result(2); + + input.setRandom(); + kernel.setRandom(); + Eigen::array<IndexType, 1> dims; + dims[0] = 0; + + Eigen::array<IndexType, 1> stride_of_3; + stride_of_3[0] = 3; + Eigen::array<IndexType, 1> stride_of_2; + stride_of_2[0] = 2; + + std::size_t input_bytes = input.size() * sizeof(DataType); + std::size_t kernel_bytes = kernel.size() * sizeof(DataType); + std::size_t result_bytes = result.size() * sizeof(DataType); + + DataType * d_input = static_cast<DataType*>(sycl_device.allocate(input_bytes)); + DataType * d_kernel = static_cast<DataType*>(sycl_device.allocate(kernel_bytes)); + DataType * d_result = static_cast<DataType*>(sycl_device.allocate(result_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_input(d_input, input_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_kernel(d_kernel, kernel_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 1, DataLayout,IndexType> > gpu_result(d_result, result.dimensions()); + sycl_device.memcpyHostToDevice(d_input, input.data(), input_bytes); + sycl_device.memcpyHostToDevice(d_kernel, kernel.data(), kernel_bytes); + + gpu_result.device(sycl_device)=gpu_input.stride(stride_of_3).convolve(gpu_kernel, dims).stride(stride_of_2); + sycl_device.memcpyDeviceToHost(result.data(), d_result, result_bytes); + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_APPROX(result(0), (input(0)*kernel(0) + input(3)*kernel(1) + + input(6)*kernel(2))); + VERIFY_IS_APPROX(result(1), (input(6)*kernel(0) + input(9)*kernel(1) + + input(12)*kernel(2))); +} + +template <typename Dev_selector> void tensorConvolutionPerDevice(Dev_selector& s){ + QueueInterface queueInterface(s); + auto sycl_device=Eigen::SyclDevice(&queueInterface); + test_larg_expr1D<float, RowMajor, int64_t>(sycl_device); + test_larg_expr1D<float, ColMajor, int64_t>(sycl_device); + test_larg_expr2D<float, RowMajor, int64_t>(sycl_device); + test_larg_expr2D<float, ColMajor, int64_t>(sycl_device); + test_larg_expr3D<float, RowMajor, int64_t>(sycl_device); + test_larg_expr3D<float, ColMajor, int64_t>(sycl_device); + test_evals<float, ColMajor, int64_t>(sycl_device); + test_evals<float, RowMajor, int64_t>(sycl_device); + test_expr<float, ColMajor, int64_t>(sycl_device); + test_expr<float, RowMajor, int64_t>(sycl_device); + test_modes<float, ColMajor, int64_t>(sycl_device); + test_modes<float, RowMajor, int64_t>(sycl_device); + test_strides<float, ColMajor, int64_t>(sycl_device); + test_strides<float, RowMajor, int64_t>(sycl_device); +} + +void test_cxx11_tensor_convolution_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorConvolutionPerDevice(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_cuda.cu b/eigen/unsupported/test/cxx11_tensor_cuda.cu new file mode 100644 index 0000000..0ba9d52 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_cuda.cu @@ -0,0 +1,1287 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_cuda +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cuda_nullary() { + Tensor<float, 1, 0, int> in1(2); + Tensor<float, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t tensor_bytes = in1.size() * sizeof(float); + + float* d_in1; + float* d_in2; + cudaMalloc((void**)(&d_in1), tensor_bytes); + cudaMalloc((void**)(&d_in2), tensor_bytes); + cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f); + gpu_in2.device(gpu_device) = gpu_in2.random(); + + Tensor<float, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), 3.14f); + VERIFY_IS_NOT_EQUAL(new2(i), in2(i)); + } + + cudaFree(d_in1); + cudaFree(d_in2); +} + +void test_cuda_elementwise_small() { + Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2)); + in1.setRandom(); + in2.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1( + d_in1, Eigen::array<Eigen::DenseIndex, 1>(2)); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2( + d_in2, Eigen::array<Eigen::DenseIndex, 1>(2)); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out( + d_out, Eigen::array<Eigen::DenseIndex, 1>(2)); + + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX( + out(Eigen::array<Eigen::DenseIndex, 1>(i)), + in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i))); + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out); +} + +void test_cuda_elementwise() +{ + Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + in1.setRandom(); + in2.setRandom(); + in3.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t in3_bytes = in3.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_in3; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_in3), in3_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in3, in3.data(), in3_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + + gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3; + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 53; ++j) { + for (int k = 0; k < 97; ++k) { + VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k))); + } + } + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_in3); + cudaFree(d_out); +} + +void test_cuda_props() { + Tensor<float, 1> in1(200); + Tensor<bool, 1> out(200); + in1.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(bool); + + float* d_in1; + bool* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1( + d_in1, 200); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_out( + d_out, 200); + + gpu_out.device(gpu_device) = (gpu_in1.isnan)(); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 200; ++i) { + VERIFY_IS_EQUAL(out(i), (std::isnan)(in1(i))); + } + + cudaFree(d_in1); + cudaFree(d_out); +} + +void test_cuda_reduction() +{ + Tensor<float, 4> in1(72,53,97,113); + Tensor<float, 2> out(72,97); + in1.setRandom(); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113); + Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); + + array<Eigen::DenseIndex, 2> reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 3; + + gpu_out.device(gpu_device) = gpu_in1.maximum(reduction_axis); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + float expected = 0; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 113; ++l) { + expected = + std::max<float>(expected, in1(i, k, j, l)); + } + } + VERIFY_IS_APPROX(out(i,j), expected); + } + } + + cudaFree(d_in1); + cudaFree(d_out); +} + +template<int DataLayout> +void test_cuda_contraction() +{ + // with these dimensions, the output has 300 * 140 elements, which is + // more than 30 * 1024, which is the number of threads in blocks on + // a 15 SM GK110 GPU + Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31); + Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1)); + Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1)); + + t_left.setRandom(); + t_right.setRandom(); + + std::size_t t_left_bytes = t_left.size() * sizeof(float); + std::size_t t_right_bytes = t_right.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_left; + float* d_t_right; + float* d_t_result; + + cudaMalloc((void**)(&d_t_left), t_left_bytes); + cudaMalloc((void**)(&d_t_right), t_right_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_t_left(d_t_left, 6, 50, 3, 31); + Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_right(d_t_right, 3, 31, 7, 20, 1); + Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_t_result(d_t_result, 6, 50, 7, 20, 1); + + typedef Eigen::Map<Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> > MapXf; + MapXf m_left(t_left.data(), 300, 93); + MapXf m_right(t_right.data(), 93, 140); + Eigen::Matrix<float, Dynamic, Dynamic, DataLayout> m_result(300, 140); + + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims; + dims[0] = DimPair(2, 0); + dims[1] = DimPair(3, 1); + + m_result = m_left * m_right; + gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); + + cudaMemcpy(t_result.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + + for (DenseIndex i = 0; i < t_result.size(); i++) { + if (fabs(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { + std::cout << "mismatch detected at index " << i << ": " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + cudaFree(d_t_left); + cudaFree(d_t_right); + cudaFree(d_t_result); +} + +template<int DataLayout> +void test_cuda_convolution_1d() +{ + Tensor<float, 4, DataLayout> input(74,37,11,137); + Tensor<float, 1, DataLayout> kernel(4); + Tensor<float, 4, DataLayout> out(74,34,11,137); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input, 74,37,11,137); + Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4); + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137); + + Eigen::array<Eigen::DenseIndex, 1> dims(1); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 34; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 137; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i,j+0,k,l) * kernel(0) + input(i,j+1,k,l) * kernel(1) + + input(i,j+2,k,l) * kernel(2) + input(i,j+3,k,l) * kernel(3); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); +} + +void test_cuda_convolution_inner_dim_col_major_1d() +{ + Tensor<float, 4, ColMajor> input(74,9,11,7); + Tensor<float, 1, ColMajor> kernel(4); + Tensor<float, 4, ColMajor> out(71,9,11,7); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_input(d_input,74,9,11,7); + Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4); + Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7); + + Eigen::array<Eigen::DenseIndex, 1> dims(0); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 71; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 7; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i+0,j,k,l) * kernel(0) + input(i+1,j,k,l) * kernel(1) + + input(i+2,j,k,l) * kernel(2) + input(i+3,j,k,l) * kernel(3); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); +} + +void test_cuda_convolution_inner_dim_row_major_1d() +{ + Tensor<float, 4, RowMajor> input(7,9,11,74); + Tensor<float, 1, RowMajor> kernel(4); + Tensor<float, 4, RowMajor> out(7,9,11,71); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_input(d_input, 7,9,11,74); + Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4); + Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71); + + Eigen::array<Eigen::DenseIndex, 1> dims(3); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 7; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 11; ++k) { + for (int l = 0; l < 71; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i,j,k,l+0) * kernel(0) + input(i,j,k,l+1) * kernel(1) + + input(i,j,k,l+2) * kernel(2) + input(i,j,k,l+3) * kernel(3); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); +} + +template<int DataLayout> +void test_cuda_convolution_2d() +{ + Tensor<float, 4, DataLayout> input(74,37,11,137); + Tensor<float, 2, DataLayout> kernel(3,4); + Tensor<float, 4, DataLayout> out(74,35,8,137); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_input(d_input,74,37,11,137); + Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4); + Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137); + + Eigen::array<Eigen::DenseIndex, 2> dims(1,2); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 137; ++l) { + const float result = out(i,j,k,l); + const float expected = input(i,j+0,k+0,l) * kernel(0,0) + + input(i,j+1,k+0,l) * kernel(1,0) + + input(i,j+2,k+0,l) * kernel(2,0) + + input(i,j+0,k+1,l) * kernel(0,1) + + input(i,j+1,k+1,l) * kernel(1,1) + + input(i,j+2,k+1,l) * kernel(2,1) + + input(i,j+0,k+2,l) * kernel(0,2) + + input(i,j+1,k+2,l) * kernel(1,2) + + input(i,j+2,k+2,l) * kernel(2,2) + + input(i,j+0,k+3,l) * kernel(0,3) + + input(i,j+1,k+3,l) * kernel(1,3) + + input(i,j+2,k+3,l) * kernel(2,3); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); +} + +template<int DataLayout> +void test_cuda_convolution_3d() +{ + Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17)); + Tensor<float, 3, DataLayout> kernel(3,4,2); + Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17)); + input = input.constant(10.0f) + input.random(); + kernel = kernel.constant(7.0f) + kernel.random(); + + std::size_t input_bytes = input.size() * sizeof(float); + std::size_t kernel_bytes = kernel.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_input; + float* d_kernel; + float* d_out; + cudaMalloc((void**)(&d_input), input_bytes); + cudaMalloc((void**)(&d_kernel), kernel_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_input, input.data(), input_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_kernel, kernel.data(), kernel_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_input(d_input,74,37,11,137,17); + Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2); + Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17); + + Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3); + gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 74; ++i) { + for (int j = 0; j < 35; ++j) { + for (int k = 0; k < 8; ++k) { + for (int l = 0; l < 136; ++l) { + for (int m = 0; m < 17; ++m) { + const float result = out(i,j,k,l,m); + const float expected = input(i,j+0,k+0,l+0,m) * kernel(0,0,0) + + input(i,j+1,k+0,l+0,m) * kernel(1,0,0) + + input(i,j+2,k+0,l+0,m) * kernel(2,0,0) + + input(i,j+0,k+1,l+0,m) * kernel(0,1,0) + + input(i,j+1,k+1,l+0,m) * kernel(1,1,0) + + input(i,j+2,k+1,l+0,m) * kernel(2,1,0) + + input(i,j+0,k+2,l+0,m) * kernel(0,2,0) + + input(i,j+1,k+2,l+0,m) * kernel(1,2,0) + + input(i,j+2,k+2,l+0,m) * kernel(2,2,0) + + input(i,j+0,k+3,l+0,m) * kernel(0,3,0) + + input(i,j+1,k+3,l+0,m) * kernel(1,3,0) + + input(i,j+2,k+3,l+0,m) * kernel(2,3,0) + + input(i,j+0,k+0,l+1,m) * kernel(0,0,1) + + input(i,j+1,k+0,l+1,m) * kernel(1,0,1) + + input(i,j+2,k+0,l+1,m) * kernel(2,0,1) + + input(i,j+0,k+1,l+1,m) * kernel(0,1,1) + + input(i,j+1,k+1,l+1,m) * kernel(1,1,1) + + input(i,j+2,k+1,l+1,m) * kernel(2,1,1) + + input(i,j+0,k+2,l+1,m) * kernel(0,2,1) + + input(i,j+1,k+2,l+1,m) * kernel(1,2,1) + + input(i,j+2,k+2,l+1,m) * kernel(2,2,1) + + input(i,j+0,k+3,l+1,m) * kernel(0,3,1) + + input(i,j+1,k+3,l+1,m) * kernel(1,3,1) + + input(i,j+2,k+3,l+1,m) * kernel(2,3,1); + VERIFY_IS_APPROX(result, expected); + } + } + } + } + } + + cudaFree(d_input); + cudaFree(d_kernel); + cudaFree(d_out); +} + + +template <typename Scalar> +void test_cuda_lgamma(const Scalar stddev) +{ + Tensor<Scalar, 2> in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor<Scalar, 2> out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.lgamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); + } + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_digamma() +{ + Tensor<Scalar, 1> in(7); + Tensor<Scalar, 1> out(7); + Tensor<Scalar, 1> expected_out(7); + out.setZero(); + + in(0) = Scalar(1); + in(1) = Scalar(1.5); + in(2) = Scalar(4); + in(3) = Scalar(-10.5); + in(4) = Scalar(10000.5); + in(5) = Scalar(0); + in(6) = Scalar(-1); + + expected_out(0) = Scalar(-0.5772156649015329); + expected_out(1) = Scalar(0.03648997397857645); + expected_out(2) = Scalar(1.2561176684318); + expected_out(3) = Scalar(2.398239129535781); + expected_out(4) = Scalar(9.210340372392849); + expected_out(5) = std::numeric_limits<Scalar>::infinity(); + expected_out(6) = std::numeric_limits<Scalar>::infinity(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in(d_in, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in.digamma(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + for (int i = 5; i < 7; ++i) { + VERIFY_IS_EQUAL(out(i), expected_out(i)); + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_zeta() +{ + Tensor<Scalar, 1> in_x(6); + Tensor<Scalar, 1> in_q(6); + Tensor<Scalar, 1> out(6); + Tensor<Scalar, 1> expected_out(6); + out.setZero(); + + in_x(0) = Scalar(1); + in_x(1) = Scalar(1.5); + in_x(2) = Scalar(4); + in_x(3) = Scalar(-10.5); + in_x(4) = Scalar(10000.5); + in_x(5) = Scalar(3); + + in_q(0) = Scalar(1.2345); + in_q(1) = Scalar(2); + in_q(2) = Scalar(1.5); + in_q(3) = Scalar(3); + in_q(4) = Scalar(1.0001); + in_q(5) = Scalar(-2.5); + + expected_out(0) = std::numeric_limits<Scalar>::infinity(); + expected_out(1) = Scalar(1.61237534869); + expected_out(2) = Scalar(0.234848505667); + expected_out(3) = Scalar(1.03086757337e-5); + expected_out(4) = Scalar(0.367879440865); + expected_out(5) = Scalar(0.054102025820864097); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_q; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_q), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6); + + gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + VERIFY_IS_EQUAL(out(0), expected_out(0)); + VERIFY((std::isnan)(out(3))); + + for (int i = 1; i < 6; ++i) { + if (i != 3) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + cudaFree(d_in_x); + cudaFree(d_in_q); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_polygamma() +{ + Tensor<Scalar, 1> in_x(7); + Tensor<Scalar, 1> in_n(7); + Tensor<Scalar, 1> out(7); + Tensor<Scalar, 1> expected_out(7); + out.setZero(); + + in_n(0) = Scalar(1); + in_n(1) = Scalar(1); + in_n(2) = Scalar(1); + in_n(3) = Scalar(17); + in_n(4) = Scalar(31); + in_n(5) = Scalar(28); + in_n(6) = Scalar(8); + + in_x(0) = Scalar(2); + in_x(1) = Scalar(3); + in_x(2) = Scalar(25.5); + in_x(3) = Scalar(4.7); + in_x(4) = Scalar(11.8); + in_x(5) = Scalar(17.7); + in_x(6) = Scalar(30.2); + + expected_out(0) = Scalar(0.644934066848); + expected_out(1) = Scalar(0.394934066848); + expected_out(2) = Scalar(0.0399946696496); + expected_out(3) = Scalar(293.334565435); + expected_out(4) = Scalar(0.445487887616); + expected_out(5) = Scalar(-2.47810300902e-07); + expected_out(6) = Scalar(-8.29668781082e-09); + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_n; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_n), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7); + + gpu_out.device(gpu_device) = gpu_in_n.polygamma(gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 7; ++i) { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + + cudaFree(d_in_x); + cudaFree(d_in_n); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_igamma() +{ + Tensor<Scalar, 2> a(6, 6); + Tensor<Scalar, 2> x(6, 6); + Tensor<Scalar, 2> out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + + + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + assert(cudaMalloc((void**)(&d_a), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_x), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igamma(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igamma_s[i][j]); + } + } + } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_igammac() +{ + Tensor<Scalar, 2> a(6, 6); + Tensor<Scalar, 2> x(6, 6); + Tensor<Scalar, 2> out(6, 6); + out.setZero(); + + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + a(i, j) = a_s[i]; + x(i, j) = x_s[j]; + } + } + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + + std::size_t bytes = a.size() * sizeof(Scalar); + + Scalar* d_a; + Scalar* d_x; + Scalar* d_out; + cudaMalloc((void**)(&d_a), bytes); + cudaMalloc((void**)(&d_x), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_a, a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_x, x.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_a(d_a, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_x(d_x, 6, 6); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 6, 6); + + gpu_out.device(gpu_device) = gpu_a.igammac(gpu_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(out(i, j))); + } else { + VERIFY_IS_APPROX(out(i, j), igammac_s[i][j]); + } + } + } + + cudaFree(d_a); + cudaFree(d_x); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_erf(const Scalar stddev) +{ + Tensor<Scalar, 2> in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor<Scalar, 2> out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + assert(cudaMalloc((void**)(&d_in), bytes) == cudaSuccess); + assert(cudaMalloc((void**)(&d_out), bytes) == cudaSuccess); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erf(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); + } + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_erfc(const Scalar stddev) +{ + Tensor<Scalar, 2> in(72,97); + in.setRandom(); + in *= in.constant(stddev); + Tensor<Scalar, 2> out(72,97); + out.setZero(); + + std::size_t bytes = in.size() * sizeof(Scalar); + + Scalar* d_in; + Scalar* d_out; + cudaMalloc((void**)(&d_in), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in, in.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_in(d_in, 72, 97); + Eigen::TensorMap<Eigen::Tensor<Scalar, 2> > gpu_out(d_out, 72, 97); + + gpu_out.device(gpu_device) = gpu_in.erfc(); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); + } + } + + cudaFree(d_in); + cudaFree(d_out); +} + +template <typename Scalar> +void test_cuda_betainc() +{ + Tensor<Scalar, 1> in_x(125); + Tensor<Scalar, 1> in_a(125); + Tensor<Scalar, 1> in_b(125); + Tensor<Scalar, 1> out(125); + Tensor<Scalar, 1> expected_out(125); + out.setZero(); + + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + + Array<Scalar, 1, Dynamic> x(125); + Array<Scalar, 1, Dynamic> a(125); + Array<Scalar, 1, Dynamic> b(125); + Array<Scalar, 1, Dynamic> v(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, 999.999, + 999.999, 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, nan, + nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, nan, + nan, 7.864342668429763e-23, 3.015969667594166e-10, 0.0008598571564165444, + nan, nan, 6.031987710123844e-08, 0.5000000000000007, 0.9999999396801229, + nan, nan, 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, + nan, nan, nan, nan, nan, nan, 0.0, 7.029920380986636e-306, + 2.2450728208591345e-101, nan, nan, 0.0, 9.275871147869727e-302, + 1.2232913026152827e-97, nan, nan, 0.0, 3.0891393081932924e-252, + 2.9303043666183996e-60, nan, nan, 2.248913486879199e-196, + 0.5000000000004947, 0.9999999999999999, nan; + + for (int i = 0; i < 125; ++i) { + in_x(i) = x(i); + in_a(i) = a(i); + in_b(i) = b(i); + expected_out(i) = v(i); + } + + std::size_t bytes = in_x.size() * sizeof(Scalar); + + Scalar* d_in_x; + Scalar* d_in_a; + Scalar* d_in_b; + Scalar* d_out; + cudaMalloc((void**)(&d_in_x), bytes); + cudaMalloc((void**)(&d_in_a), bytes); + cudaMalloc((void**)(&d_in_b), bytes); + cudaMalloc((void**)(&d_out), bytes); + + cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_a, in_a.data(), bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in_b, in_b.data(), bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_a(d_in_a, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_b(d_in_b, 125); + Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 125); + + gpu_out.device(gpu_device) = betainc(gpu_in_a, gpu_in_b, gpu_in_x); + + assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 1; i < 125; ++i) { + if ((std::isnan)(expected_out(i))) { + VERIFY((std::isnan)(out(i))); + } else { + VERIFY_IS_APPROX(out(i), expected_out(i)); + } + } + + cudaFree(d_in_x); + cudaFree(d_in_a); + cudaFree(d_in_b); + cudaFree(d_out); +} + + +void test_cxx11_tensor_cuda() +{ + CALL_SUBTEST_1(test_cuda_nullary()); + CALL_SUBTEST_1(test_cuda_elementwise_small()); + CALL_SUBTEST_1(test_cuda_elementwise()); + CALL_SUBTEST_1(test_cuda_props()); + CALL_SUBTEST_1(test_cuda_reduction()); + CALL_SUBTEST_2(test_cuda_contraction<ColMajor>()); + CALL_SUBTEST_2(test_cuda_contraction<RowMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d()); + CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d()); + CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>()); + CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>()); + +#if __cplusplus > 199711L + // std::erf, std::erfc, and so on where only added in c++11. We use them + // as a golden reference to validate the results produced by Eigen. Therefore + // we can only run these tests if we use a c++11 compiler. + CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f)); + CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f)); + CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f)); + CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f)); + + CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01)); + CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001)); + + CALL_SUBTEST_4(test_cuda_erf<float>(1.0f)); + CALL_SUBTEST_4(test_cuda_erf<float>(100.0f)); + CALL_SUBTEST_4(test_cuda_erf<float>(0.01f)); + CALL_SUBTEST_4(test_cuda_erf<float>(0.001f)); + + CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f)); + // CALL_SUBTEST(test_cuda_erfc<float>(100.0f)); + CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f)); + CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f)); + + CALL_SUBTEST_4(test_cuda_erf<double>(1.0)); + CALL_SUBTEST_4(test_cuda_erf<double>(100.0)); + CALL_SUBTEST_4(test_cuda_erf<double>(0.01)); + CALL_SUBTEST_4(test_cuda_erf<double>(0.001)); + + CALL_SUBTEST_4(test_cuda_erfc<double>(1.0)); + // CALL_SUBTEST(test_cuda_erfc<double>(100.0)); + CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs + CALL_SUBTEST_4(test_cuda_erfc<double>(0.01)); + CALL_SUBTEST_4(test_cuda_erfc<double>(0.001)); + + CALL_SUBTEST_5(test_cuda_digamma<float>()); + CALL_SUBTEST_5(test_cuda_digamma<double>()); + + CALL_SUBTEST_5(test_cuda_polygamma<float>()); + CALL_SUBTEST_5(test_cuda_polygamma<double>()); + + CALL_SUBTEST_5(test_cuda_zeta<float>()); + CALL_SUBTEST_5(test_cuda_zeta<double>()); + + CALL_SUBTEST_5(test_cuda_igamma<float>()); + CALL_SUBTEST_5(test_cuda_igammac<float>()); + + CALL_SUBTEST_5(test_cuda_igamma<double>()); + CALL_SUBTEST_5(test_cuda_igammac<double>()); + + CALL_SUBTEST_6(test_cuda_betainc<float>()); + CALL_SUBTEST_6(test_cuda_betainc<double>()); +#endif +} diff --git a/eigen/unsupported/test/cxx11_tensor_custom_index.cpp b/eigen/unsupported/test/cxx11_tensor_custom_index.cpp new file mode 100644 index 0000000..4528cc1 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_custom_index.cpp @@ -0,0 +1,100 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <limits> +#include <map> + +#include <Eigen/Dense> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + + +template <int DataLayout> +static void test_map_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + + using NormalIndex = DSizes<ptrdiff_t, 4>; + using CustomIndex = std::map<ptrdiff_t, ptrdiff_t>; + CustomIndex coeffC; + coeffC[0] = 1; + coeffC[1] = 2; + coeffC[2] = 4; + coeffC[3] = 1; + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template <int DataLayout> +static void test_matrix_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + + using NormalIndex = DSizes<ptrdiff_t, 4>; + using CustomIndex = Matrix<unsigned int, 4, 1>; + CustomIndex coeffC(1,2,4,1); + NormalIndex coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +template <int DataLayout> +static void test_varlist_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes<ptrdiff_t, 4> coeff(1,2,4,1); + + VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff)); +#endif +} + + +template <int DataLayout> +static void test_sizes_as_index() +{ +#ifdef EIGEN_HAS_SFINAE + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + + DSizes<ptrdiff_t, 4> coeff(1,2,4,1); + Sizes<1,2,4,1> coeffC; + + VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff)); + VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff)); +#endif +} + + +void test_cxx11_tensor_custom_index() { + test_map_as_index<ColMajor>(); + test_map_as_index<RowMajor>(); + test_matrix_as_index<ColMajor>(); + test_matrix_as_index<RowMajor>(); + test_varlist_as_index<ColMajor>(); + test_varlist_as_index<RowMajor>(); + test_sizes_as_index<ColMajor>(); + test_sizes_as_index<RowMajor>(); +} diff --git a/eigen/unsupported/test/cxx11_tensor_custom_op.cpp b/eigen/unsupported/test/cxx11_tensor_custom_op.cpp new file mode 100644 index 0000000..8baa477 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_custom_op.cpp @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + + +struct InsertZeros { + DSizes<DenseIndex, 2> dimensions(const Tensor<float, 2>& input) const { + DSizes<DenseIndex, 2> result; + result[0] = input.dimension(0) * 2; + result[1] = input.dimension(1) * 2; + return result; + } + + template <typename Output, typename Device> + void eval(const Tensor<float, 2>& input, Output& output, const Device& device) const + { + array<DenseIndex, 2> strides; + strides[0] = 2; + strides[1] = 2; + output.stride(strides).device(device) = input; + + Eigen::DSizes<DenseIndex, 2> offsets(1,1); + Eigen::DSizes<DenseIndex, 2> extents(output.dimension(0)-1, output.dimension(1)-1); + output.slice(offsets, extents).stride(strides).device(device) = input.constant(0.0f); + } +}; + +static void test_custom_unary_op() +{ + Tensor<float, 2> tensor(3,5); + tensor.setRandom(); + + Tensor<float, 2> result = tensor.customOp(InsertZeros()); + VERIFY_IS_EQUAL(result.dimension(0), 6); + VERIFY_IS_EQUAL(result.dimension(1), 10); + + for (int i = 0; i < 6; i+=2) { + for (int j = 0; j < 10; j+=2) { + VERIFY_IS_EQUAL(result(i, j), tensor(i/2, j/2)); + } + } + for (int i = 1; i < 6; i+=2) { + for (int j = 1; j < 10; j+=2) { + VERIFY_IS_EQUAL(result(i, j), 0); + } + } +} + + +struct BatchMatMul { + DSizes<DenseIndex, 3> dimensions(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2) const { + DSizes<DenseIndex, 3> result; + result[0] = input1.dimension(0); + result[1] = input2.dimension(1); + result[2] = input2.dimension(2); + return result; + } + + template <typename Output, typename Device> + void eval(const Tensor<float, 3>& input1, const Tensor<float, 3>& input2, + Output& output, const Device& device) const + { + typedef Tensor<float, 3>::DimensionPair DimPair; + array<DimPair, 1> dims; + dims[0] = DimPair(1, 0); + for (int i = 0; i < output.dimension(2); ++i) { + output.template chip<2>(i).device(device) = input1.chip<2>(i).contract(input2.chip<2>(i), dims); + } + } +}; + + +static void test_custom_binary_op() +{ + Tensor<float, 3> tensor1(2,3,5); + tensor1.setRandom(); + Tensor<float, 3> tensor2(3,7,5); + tensor2.setRandom(); + + Tensor<float, 3> result = tensor1.customOp(tensor2, BatchMatMul()); + for (int i = 0; i < 5; ++i) { + typedef Tensor<float, 3>::DimensionPair DimPair; + array<DimPair, 1> dims; + dims[0] = DimPair(1, 0); + Tensor<float, 2> reference = tensor1.chip<2>(i).contract(tensor2.chip<2>(i), dims); + TensorRef<Tensor<float, 2> > val = result.chip<2>(i); + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(val(j, k), reference(j, k)); + } + } + } +} + + +void test_cxx11_tensor_custom_op() +{ + CALL_SUBTEST(test_custom_unary_op()); + CALL_SUBTEST(test_custom_binary_op()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_device.cu b/eigen/unsupported/test/cxx11_tensor_device.cu new file mode 100644 index 0000000..fde20dd --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_device.cu @@ -0,0 +1,390 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_device +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +// Context for evaluation on cpu +struct CPUContext { + CPUContext(const Eigen::Tensor<float, 3>& in1, Eigen::Tensor<float, 3>& in2, Eigen::Tensor<float, 3>& out) : in1_(in1), in2_(in2), out_(out), kernel_1d_(2), kernel_2d_(2,2), kernel_3d_(2,2,2) { + kernel_1d_(0) = 3.14f; + kernel_1d_(1) = 2.7f; + + kernel_2d_(0,0) = 3.14f; + kernel_2d_(1,0) = 2.7f; + kernel_2d_(0,1) = 0.2f; + kernel_2d_(1,1) = 7.0f; + + kernel_3d_(0,0,0) = 3.14f; + kernel_3d_(0,1,0) = 2.7f; + kernel_3d_(0,0,1) = 0.2f; + kernel_3d_(0,1,1) = 7.0f; + kernel_3d_(1,0,0) = -1.0f; + kernel_3d_(1,1,0) = -0.3f; + kernel_3d_(1,0,1) = -0.7f; + kernel_3d_(1,1,1) = -0.5f; + } + + const Eigen::DefaultDevice& device() const { return cpu_device_; } + + const Eigen::Tensor<float, 3>& in1() const { return in1_; } + const Eigen::Tensor<float, 3>& in2() const { return in2_; } + Eigen::Tensor<float, 3>& out() { return out_; } + const Eigen::Tensor<float, 1>& kernel1d() const { return kernel_1d_; } + const Eigen::Tensor<float, 2>& kernel2d() const { return kernel_2d_; } + const Eigen::Tensor<float, 3>& kernel3d() const { return kernel_3d_; } + + private: + const Eigen::Tensor<float, 3>& in1_; + const Eigen::Tensor<float, 3>& in2_; + Eigen::Tensor<float, 3>& out_; + + Eigen::Tensor<float, 1> kernel_1d_; + Eigen::Tensor<float, 2> kernel_2d_; + Eigen::Tensor<float, 3> kernel_3d_; + + Eigen::DefaultDevice cpu_device_; +}; + + +// Context for evaluation on GPU +struct GPUContext { + GPUContext(const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1, Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2, Eigen::TensorMap<Eigen::Tensor<float, 3> >& out) : in1_(in1), in2_(in2), out_(out), gpu_device_(&stream_) { + assert(cudaMalloc((void**)(&kernel_1d_), 2*sizeof(float)) == cudaSuccess); + float kernel_1d_val[] = {3.14f, 2.7f}; + assert(cudaMemcpy(kernel_1d_, kernel_1d_val, 2*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + + assert(cudaMalloc((void**)(&kernel_2d_), 4*sizeof(float)) == cudaSuccess); + float kernel_2d_val[] = {3.14f, 2.7f, 0.2f, 7.0f}; + assert(cudaMemcpy(kernel_2d_, kernel_2d_val, 4*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + + assert(cudaMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == cudaSuccess); + float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f}; + assert(cudaMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess); + } + ~GPUContext() { + assert(cudaFree(kernel_1d_) == cudaSuccess); + assert(cudaFree(kernel_2d_) == cudaSuccess); + assert(cudaFree(kernel_3d_) == cudaSuccess); + } + + const Eigen::GpuDevice& device() const { return gpu_device_; } + + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1() const { return in1_; } + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2() const { return in2_; } + Eigen::TensorMap<Eigen::Tensor<float, 3> >& out() { return out_; } + Eigen::TensorMap<Eigen::Tensor<float, 1> > kernel1d() const { return Eigen::TensorMap<Eigen::Tensor<float, 1> >(kernel_1d_, 2); } + Eigen::TensorMap<Eigen::Tensor<float, 2> > kernel2d() const { return Eigen::TensorMap<Eigen::Tensor<float, 2> >(kernel_2d_, 2, 2); } + Eigen::TensorMap<Eigen::Tensor<float, 3> > kernel3d() const { return Eigen::TensorMap<Eigen::Tensor<float, 3> >(kernel_3d_, 2, 2, 2); } + + private: + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in1_; + const Eigen::TensorMap<Eigen::Tensor<float, 3> >& in2_; + Eigen::TensorMap<Eigen::Tensor<float, 3> >& out_; + + float* kernel_1d_; + float* kernel_2d_; + float* kernel_3d_; + + Eigen::CudaStreamDevice stream_; + Eigen::GpuDevice gpu_device_; +}; + + +// The actual expression to evaluate +template <typename Context> +void test_contextual_eval(Context* context) +{ + context->out().device(context->device()) = context->in1() + context->in2() * 3.14f + context->in1().constant(2.718f); +} + +template <typename Context> +void test_forced_contextual_eval(Context* context) +{ + context->out().device(context->device()) = (context->in1() + context->in2()).eval() * 3.14f + context->in1().constant(2.718f); +} + +template <typename Context> +void test_compound_assignment(Context* context) +{ + context->out().device(context->device()) = context->in1().constant(2.718f); + context->out().device(context->device()) += context->in1() + context->in2() * 3.14f; +} + + +template <typename Context> +void test_contraction(Context* context) +{ + Eigen::array<std::pair<int, int>, 2> dims; + dims[0] = std::make_pair(1, 1); + dims[1] = std::make_pair(2, 2); + + Eigen::array<int, 2> shape(40, 50*70); + + Eigen::DSizes<int, 2> indices(0,0); + Eigen::DSizes<int, 2> sizes(40,40); + + context->out().reshape(shape).slice(indices, sizes).device(context->device()) = context->in1().contract(context->in2(), dims); +} + + +template <typename Context> +void test_1d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(0,0,0); + Eigen::DSizes<int, 3> sizes(40,49,70); + + Eigen::array<int, 1> dims(1); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims); +} + +template <typename Context> +void test_2d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(0,0,0); + Eigen::DSizes<int, 3> sizes(40,49,69); + + Eigen::array<int, 2> dims(1,2); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims); +} + +template <typename Context> +void test_3d_convolution(Context* context) +{ + Eigen::DSizes<int, 3> indices(0,0,0); + Eigen::DSizes<int, 3> sizes(39,49,69); + + Eigen::array<int, 3> dims(0,1,2); + context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims); +} + + +void test_cpu() { + Eigen::Tensor<float, 3> in1(40,50,70); + Eigen::Tensor<float, 3> in2(40,50,70); + Eigen::Tensor<float, 3> out(40,50,70); + + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + CPUContext context(in1, in2, out); + test_contextual_eval(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); + } + } + } + + test_forced_contextual_eval(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f); + } + } + } + + test_compound_assignment(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); + } + } + } + + test_contraction(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 40; ++j) { + const float result = out(i,j,0); + float expected = 0; + for (int k = 0; k < 50; ++k) { + for (int l = 0; l < 70; ++l) { + expected += in1(i, k, l) * in2(j, k, l); + } + } + VERIFY_IS_APPROX(expected, result); + } + } + + test_1d_convolution(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f)); + } + } + } + + test_2d_convolution(&context); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f) + + (in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { + continue; + } + VERIFY_IS_APPROX(expected, result); + } + } + } + + test_3d_convolution(&context); + for (int i = 0; i < 39; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f) + + (in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); + if (fabs(expected) < 1e-4f && fabs(result) < 1e-4f) { + continue; + } + VERIFY_IS_APPROX(expected, result); + } + } + } +} + +void test_gpu() { + Eigen::Tensor<float, 3> in1(40,50,70); + Eigen::Tensor<float, 3> in2(40,50,70); + Eigen::Tensor<float, 3> out(40,50,70); + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + std::size_t in1_bytes = in1.size() * sizeof(float); + std::size_t in2_bytes = in2.size() * sizeof(float); + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_in1; + float* d_in2; + float* d_out; + cudaMalloc((void**)(&d_in1), in1_bytes); + cudaMalloc((void**)(&d_in2), in2_bytes); + cudaMalloc((void**)(&d_out), out_bytes); + + cudaMemcpy(d_in1, in1.data(), in1_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), in2_bytes, cudaMemcpyHostToDevice); + + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, 40,50,70); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, 40,50,70); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, 40,50,70); + + GPUContext context(gpu_in1, gpu_in2, gpu_out); + test_contextual_eval(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); + } + } + } + + test_forced_contextual_eval(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) + in2(i,j,k)) * 3.14f + 2.718f); + } + } + } + + test_compound_assignment(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 50; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f + 2.718f); + } + } + } + + test_contraction(&context); + assert(cudaMemcpy(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 40; ++j) { + const float result = out(i,j,0); + float expected = 0; + for (int k = 0; k < 50; ++k) { + for (int l = 0; l < 70; ++l) { + expected += in1(i, k, l) * in2(j, k, l); + } + } + VERIFY_IS_APPROX(expected, result); + } + } + + test_1d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 70; ++k) { + VERIFY_IS_APPROX(out(i,j,k), (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f)); + } + } + } + + test_2d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 40; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f); + VERIFY_IS_APPROX(expected, result); + } + } + } + + test_3d_convolution(&context); + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, context.device().stream()) == cudaSuccess); + assert(cudaStreamSynchronize(context.device().stream()) == cudaSuccess); + for (int i = 0; i < 39; ++i) { + for (int j = 0; j < 49; ++j) { + for (int k = 0; k < 69; ++k) { + const float result = out(i,j,k); + const float expected = (in1(i,j,k) * 3.14f + in1(i,j+1,k) * 2.7f + + in1(i,j,k+1) * 0.2f + in1(i,j+1,k+1) * 7.0f + + in1(i+1,j,k) * -1.0f + in1(i+1,j+1,k) * -0.3f + + in1(i+1,j,k+1) * -0.7f + in1(i+1,j+1,k+1) * -0.5f); + VERIFY_IS_APPROX(expected, result); + } + } + } +} + + +void test_cxx11_tensor_device() +{ + CALL_SUBTEST_1(test_cpu()); + CALL_SUBTEST_2(test_gpu()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp new file mode 100644 index 0000000..3ecc68d --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -0,0 +1,77 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_device_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> +#include <stdint.h> +#include <iostream> + +template <typename DataType, int DataLayout, typename IndexType> +void test_device_memory(const Eigen::SyclDevice &sycl_device) { + std::cout << "Running on : " + << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>() + <<std::endl; + IndexType sizeDim1 = 100; + array<IndexType, 1> tensorRange = {{sizeDim1}}; + Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange); + Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange); + memset(in1.data(), 1, in1.size() * sizeof(DataType)); + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType))); + sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType)); + for (IndexType i=0; i<in.size(); i++) { + VERIFY_IS_EQUAL(in(i), in1(i)); + } + sycl_device.deallocate(gpu_in_data); +} + +template <typename DataType, int DataLayout, typename IndexType> +void test_device_exceptions(const Eigen::SyclDevice &sycl_device) { + VERIFY(sycl_device.ok()); + IndexType sizeDim1 = 100; + array<IndexType, 1> tensorDims = {{sizeDim1}}; + DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(sizeDim1*sizeof(DataType))); + sycl_device.memset(gpu_data, 1, sizeDim1*sizeof(DataType)); + + TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> in(gpu_data, tensorDims); + TensorMap<Tensor<DataType, 1, DataLayout,IndexType>> out(gpu_data, tensorDims); + out.device(sycl_device) = in / in.constant(0); + + sycl_device.synchronize(); + VERIFY(!sycl_device.ok()); + sycl_device.deallocate(gpu_data); +} + +template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){ + std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; + QueueInterface queueInterface(d); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_device_memory<DataType, RowMajor, int64_t>(sycl_device); + test_device_memory<DataType, ColMajor, int64_t>(sycl_device); + /// this test throw an exception. enable it if you want to see the exception + //test_device_exceptions<DataType, RowMajor>(sycl_device); + /// this test throw an exception. enable it if you want to see the exception + //test_device_exceptions<DataType, ColMajor>(sycl_device); +} + +void test_cxx11_tensor_device_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_device_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_dimension.cpp b/eigen/unsupported/test/cxx11_tensor_dimension.cpp new file mode 100644 index 0000000..16f168e --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_dimension.cpp @@ -0,0 +1,69 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + + +static void test_dynamic_size() +{ + Eigen::DSizes<int, 3> dimensions(2,3,7); + + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2); + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3); + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7); + VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7); + VERIFY_IS_EQUAL((int)dimensions[0], 2); + VERIFY_IS_EQUAL((int)dimensions[1], 3); + VERIFY_IS_EQUAL((int)dimensions[2], 7); +} + +static void test_fixed_size() +{ + Eigen::Sizes<2,3,7> dimensions; + + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2); + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3); + VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7); + VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7); +} + +static void test_match() +{ + Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7); + Eigen::Sizes<2,3,7> stat; + VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true); + + Eigen::DSizes<int, 3> dyn1(2,3,7); + Eigen::DSizes<int, 2> dyn2(2,3); + VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn1, dyn2), false); +} + +static void test_rank_zero() +{ + Eigen::Sizes<> scalar; + VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1); + VERIFY_IS_EQUAL((int)scalar.rank(), 0); + VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1); + + Eigen::DSizes<ptrdiff_t, 0> dscalar; + VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1); + VERIFY_IS_EQUAL((int)dscalar.rank(), 0); +} + +void test_cxx11_tensor_dimension() +{ + CALL_SUBTEST(test_dynamic_size()); + CALL_SUBTEST(test_fixed_size()); + CALL_SUBTEST(test_match()); + CALL_SUBTEST(test_rank_zero()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_empty.cpp b/eigen/unsupported/test/cxx11_tensor_empty.cpp new file mode 100644 index 0000000..d7eea42 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_empty.cpp @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +static void test_empty_tensor() +{ + Tensor<float, 2> source; + Tensor<float, 2> tgt1 = source; + Tensor<float, 2> tgt2(source); + Tensor<float, 2> tgt3; + tgt3 = tgt1; + tgt3 = tgt2; +} + +static void test_empty_fixed_size_tensor() +{ + TensorFixedSize<float, Sizes<0> > source; + TensorFixedSize<float, Sizes<0> > tgt1 = source; + TensorFixedSize<float, Sizes<0> > tgt2(source); + TensorFixedSize<float, Sizes<0> > tgt3; + tgt3 = tgt1; + tgt3 = tgt2; +} + + +void test_cxx11_tensor_empty() +{ + CALL_SUBTEST(test_empty_tensor()); + CALL_SUBTEST(test_empty_fixed_size_tensor()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_expr.cpp b/eigen/unsupported/test/cxx11_tensor_expr.cpp new file mode 100644 index 0000000..129b4e6 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_expr.cpp @@ -0,0 +1,360 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_1d() +{ + Tensor<float, 1> vec1(6); + Tensor<float, 1, RowMajor> vec2(6); + + vec1(0) = 4.0; vec2(0) = 0.0; + vec1(1) = 8.0; vec2(1) = 1.0; + vec1(2) = 15.0; vec2(2) = 2.0; + vec1(3) = 16.0; vec2(3) = 3.0; + vec1(4) = 23.0; vec2(4) = 4.0; + vec1(5) = 42.0; vec2(5) = 5.0; + + float data3[6]; + TensorMap<Tensor<float, 1>> vec3(data3, 6); + vec3 = vec1.sqrt(); + float data4[6]; + TensorMap<Tensor<float, 1, RowMajor>> vec4(data4, 6); + vec4 = vec2.square(); + float data5[6]; + TensorMap<Tensor<float, 1, RowMajor>> vec5(data5, 6); + vec5 = vec2.cube(); + + VERIFY_IS_APPROX(vec3(0), sqrtf(4.0)); + VERIFY_IS_APPROX(vec3(1), sqrtf(8.0)); + VERIFY_IS_APPROX(vec3(2), sqrtf(15.0)); + VERIFY_IS_APPROX(vec3(3), sqrtf(16.0)); + VERIFY_IS_APPROX(vec3(4), sqrtf(23.0)); + VERIFY_IS_APPROX(vec3(5), sqrtf(42.0)); + + VERIFY_IS_APPROX(vec4(0), 0.0f); + VERIFY_IS_APPROX(vec4(1), 1.0f); + VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f); + VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f); + VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f); + VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f); + + VERIFY_IS_APPROX(vec5(0), 0.0f); + VERIFY_IS_APPROX(vec5(1), 1.0f); + VERIFY_IS_APPROX(vec5(2), 2.0f * 2.0f * 2.0f); + VERIFY_IS_APPROX(vec5(3), 3.0f * 3.0f * 3.0f); + VERIFY_IS_APPROX(vec5(4), 4.0f * 4.0f * 4.0f); + VERIFY_IS_APPROX(vec5(5), 5.0f * 5.0f * 5.0f); + + vec3 = vec1 + vec2; + VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f); + VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f); + VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f); + VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f); + VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f); + VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f); +} + +static void test_2d() +{ + float data1[6]; + TensorMap<Tensor<float, 2>> mat1(data1, 2, 3); + float data2[6]; + TensorMap<Tensor<float, 2, RowMajor>> mat2(data2, 2, 3); + + mat1(0,0) = 0.0; + mat1(0,1) = 1.0; + mat1(0,2) = 2.0; + mat1(1,0) = 3.0; + mat1(1,1) = 4.0; + mat1(1,2) = 5.0; + + mat2(0,0) = -0.0; + mat2(0,1) = -1.0; + mat2(0,2) = -2.0; + mat2(1,0) = -3.0; + mat2(1,1) = -4.0; + mat2(1,2) = -5.0; + + Tensor<float, 2> mat3(2,3); + Tensor<float, 2, RowMajor> mat4(2,3); + mat3 = mat1.abs(); + mat4 = mat2.abs(); + + VERIFY_IS_APPROX(mat3(0,0), 0.0f); + VERIFY_IS_APPROX(mat3(0,1), 1.0f); + VERIFY_IS_APPROX(mat3(0,2), 2.0f); + VERIFY_IS_APPROX(mat3(1,0), 3.0f); + VERIFY_IS_APPROX(mat3(1,1), 4.0f); + VERIFY_IS_APPROX(mat3(1,2), 5.0f); + + VERIFY_IS_APPROX(mat4(0,0), 0.0f); + VERIFY_IS_APPROX(mat4(0,1), 1.0f); + VERIFY_IS_APPROX(mat4(0,2), 2.0f); + VERIFY_IS_APPROX(mat4(1,0), 3.0f); + VERIFY_IS_APPROX(mat4(1,1), 4.0f); + VERIFY_IS_APPROX(mat4(1,2), 5.0f); +} + +static void test_3d() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3, RowMajor> mat2(2,3,7); + + float val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val += 1.0f; + } + } + } + + Tensor<float, 3> mat3(2,3,7); + mat3 = mat1 + mat1; + Tensor<float, 3, RowMajor> mat4(2,3,7); + mat4 = mat2 * 3.14f; + Tensor<float, 3> mat5(2,3,7); + mat5 = mat1.inverse().log(); + Tensor<float, 3, RowMajor> mat6(2,3,7); + mat6 = mat2.pow(0.5f) * 3.14f; + Tensor<float, 3> mat7(2,3,7); + mat7 = mat1.cwiseMax(mat5 * 2.0f).exp(); + Tensor<float, 3, RowMajor> mat8(2,3,7); + mat8 = (-mat2).exp() * 3.14f; + Tensor<float, 3, RowMajor> mat9(2,3,7); + mat9 = mat2 + 3.14f; + Tensor<float, 3, RowMajor> mat10(2,3,7); + mat10 = mat2 - 3.14f; + Tensor<float, 3, RowMajor> mat11(2,3,7); + mat11 = mat2 / 3.14f; + + val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), val + val); + VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f); + VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val)); + VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f); + VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f))); + VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f); + VERIFY_IS_APPROX(mat9(i,j,k), val + 3.14f); + VERIFY_IS_APPROX(mat10(i,j,k), val - 3.14f); + VERIFY_IS_APPROX(mat11(i,j,k), val / 3.14f); + val += 1.0f; + } + } + } +} + +static void test_constants() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<float, 3> mat3(2,3,7); + + float val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + val += 1.0f; + } + } + } + mat2 = mat1.constant(3.14f); + mat3 = mat1.cwiseMax(7.3f).exp(); + + val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat2(i,j,k), 3.14f); + VERIFY_IS_APPROX(mat3(i,j,k), expf((std::max)(val, 7.3f))); + val += 1.0f; + } + } + } +} + +static void test_boolean() +{ + Tensor<int, 1> vec(6); + std::copy_n(std::begin({0, 1, 2, 3, 4, 5}), 6, vec.data()); + + // Test ||. + Tensor<bool, 1> bool1 = vec < vec.constant(1) || vec > vec.constant(4); + VERIFY_IS_EQUAL(bool1[0], true); + VERIFY_IS_EQUAL(bool1[1], false); + VERIFY_IS_EQUAL(bool1[2], false); + VERIFY_IS_EQUAL(bool1[3], false); + VERIFY_IS_EQUAL(bool1[4], false); + VERIFY_IS_EQUAL(bool1[5], true); + + // Test &&, including cast of operand vec. + Tensor<bool, 1> bool2 = vec.cast<bool>() && vec < vec.constant(4); + VERIFY_IS_EQUAL(bool2[0], false); + VERIFY_IS_EQUAL(bool2[1], true); + VERIFY_IS_EQUAL(bool2[2], true); + VERIFY_IS_EQUAL(bool2[3], true); + VERIFY_IS_EQUAL(bool2[4], false); + VERIFY_IS_EQUAL(bool2[5], false); + + // Compilation tests: + // Test Tensor<bool> against results of cast or comparison; verifies that + // CoeffReturnType is set to match Op return type of bool for Unary and Binary + // Ops. + Tensor<bool, 1> bool3 = vec.cast<bool>() && bool2; + bool3 = vec < vec.constant(4) && bool2; +} + +static void test_functors() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<float, 3> mat3(2,3,7); + + float val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + val += 1.0f; + } + } + } + mat2 = mat1.inverse().unaryExpr(&asinf); + mat3 = mat1.unaryExpr(&tanhf); + + val = 1.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat2(i,j,k), asinf(1.0f / mat1(i,j,k))); + VERIFY_IS_APPROX(mat3(i,j,k), tanhf(mat1(i,j,k))); + val += 1.0f; + } + } + } +} + +static void test_type_casting() +{ + Tensor<bool, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<double, 3> mat3(2,3,7); + mat1.setRandom(); + mat2.setRandom(); + + mat3 = mat1.cast<double>(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) ? 1.0 : 0.0); + } + } + } + + mat3 = mat2.cast<double>(); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), static_cast<double>(mat2(i,j,k))); + } + } + } +} + +static void test_select() +{ + Tensor<float, 3> selector(2,3,7); + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<float, 3> result(2,3,7); + + selector.setRandom(); + mat1.setRandom(); + mat2.setRandom(); + result = (selector > selector.constant(0.5f)).select(mat1, mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(result(i,j,k), (selector(i,j,k) > 0.5f) ? mat1(i,j,k) : mat2(i,j,k)); + } + } + } +} + +template <typename Scalar> +void test_minmax_nan_propagation_templ() { + for (int size = 1; size < 17; ++size) { + const Scalar kNan = std::numeric_limits<Scalar>::quiet_NaN(); + Tensor<Scalar, 1> vec_nan(size); + Tensor<Scalar, 1> vec_zero(size); + Tensor<Scalar, 1> vec_res(size); + vec_nan.setConstant(kNan); + vec_zero.setZero(); + vec_res.setZero(); + + // Test that we propagate NaNs in the tensor when applying the + // cwiseMax(scalar) operator, which is used for the Relu operator. + vec_res = vec_nan.cwiseMax(Scalar(0)); + for (int i = 0; i < size; ++i) { + VERIFY((numext::isnan)(vec_res(i))); + } + + // Test that NaNs do not propagate if we reverse the arguments. + vec_res = vec_zero.cwiseMax(kNan); + for (int i = 0; i < size; ++i) { + VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); + } + + // Test that we propagate NaNs in the tensor when applying the + // cwiseMin(scalar) operator. + vec_res.setZero(); + vec_res = vec_nan.cwiseMin(Scalar(0)); + for (int i = 0; i < size; ++i) { + VERIFY((numext::isnan)(vec_res(i))); + } + + // Test that NaNs do not propagate if we reverse the arguments. + vec_res = vec_zero.cwiseMin(kNan); + for (int i = 0; i < size; ++i) { + VERIFY_IS_EQUAL(vec_res(i), Scalar(0)); + } + } +} + +static void test_minmax_nan_propagation() +{ + test_minmax_nan_propagation_templ<float>(); + test_minmax_nan_propagation_templ<double>(); +} + +void test_cxx11_tensor_expr() +{ + CALL_SUBTEST(test_1d()); + CALL_SUBTEST(test_2d()); + CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_constants()); + CALL_SUBTEST(test_boolean()); + CALL_SUBTEST(test_functors()); + CALL_SUBTEST(test_type_casting()); + CALL_SUBTEST(test_select()); + CALL_SUBTEST(test_minmax_nan_propagation()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_fft.cpp b/eigen/unsupported/test/cxx11_tensor_fft.cpp new file mode 100644 index 0000000..2f14ebc --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_fft.cpp @@ -0,0 +1,273 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout> +static void test_fft_2D_golden() { + Tensor<float, 2, DataLayout> input(2, 3); + input(0, 0) = 1; + input(0, 1) = 2; + input(0, 2) = 3; + input(1, 0) = 4; + input(1, 1) = 5; + input(1, 2) = 6; + + array<ptrdiff_t, 2> fft; + fft[0] = 0; + fft[1] = 1; + + Tensor<std::complex<float>, 2, DataLayout> output = input.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft); + + std::complex<float> output_golden[6]; // in ColMajor order + output_golden[0] = std::complex<float>(21, 0); + output_golden[1] = std::complex<float>(-9, 0); + output_golden[2] = std::complex<float>(-3, 1.73205); + output_golden[3] = std::complex<float>( 0, 0); + output_golden[4] = std::complex<float>(-3, -1.73205); + output_golden[5] = std::complex<float>(0 ,0); + + std::complex<float> c_offset = std::complex<float>(1.0, 1.0); + + if (DataLayout == ColMajor) { + VERIFY_IS_APPROX(output(0) + c_offset, output_golden[0] + c_offset); + VERIFY_IS_APPROX(output(1) + c_offset, output_golden[1] + c_offset); + VERIFY_IS_APPROX(output(2) + c_offset, output_golden[2] + c_offset); + VERIFY_IS_APPROX(output(3) + c_offset, output_golden[3] + c_offset); + VERIFY_IS_APPROX(output(4) + c_offset, output_golden[4] + c_offset); + VERIFY_IS_APPROX(output(5) + c_offset, output_golden[5] + c_offset); + } + else { + VERIFY_IS_APPROX(output(0)+ c_offset, output_golden[0]+ c_offset); + VERIFY_IS_APPROX(output(1)+ c_offset, output_golden[2]+ c_offset); + VERIFY_IS_APPROX(output(2)+ c_offset, output_golden[4]+ c_offset); + VERIFY_IS_APPROX(output(3)+ c_offset, output_golden[1]+ c_offset); + VERIFY_IS_APPROX(output(4)+ c_offset, output_golden[3]+ c_offset); + VERIFY_IS_APPROX(output(5)+ c_offset, output_golden[5]+ c_offset); + } +} + +static void test_fft_complex_input_golden() { + Tensor<std::complex<float>, 1, ColMajor> input(5); + input(0) = std::complex<float>(1, 1); + input(1) = std::complex<float>(2, 2); + input(2) = std::complex<float>(3, 3); + input(3) = std::complex<float>(4, 4); + input(4) = std::complex<float>(5, 5); + + array<ptrdiff_t, 1> fft; + fft[0] = 0; + + Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft); + Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft); + + Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft); + Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft); + + Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft); + Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex<float> forward_golden_result[5]; + std::complex<float> reverse_golden_result[5]; + + forward_golden_result[0] = std::complex<float>(15.000000000000000,+15.000000000000000); + forward_golden_result[1] = std::complex<float>(-5.940954801177935, +0.940954801177934); + forward_golden_result[2] = std::complex<float>(-3.312299240582266, -1.687700759417735); + forward_golden_result[3] = std::complex<float>(-1.687700759417735, -3.312299240582266); + forward_golden_result[4] = std::complex<float>( 0.940954801177934, -5.940954801177935); + + reverse_golden_result[0] = std::complex<float>( 3.000000000000000, + 3.000000000000000); + reverse_golden_result[1] = std::complex<float>( 0.188190960235587, - 1.188190960235587); + reverse_golden_result[2] = std::complex<float>(-0.337540151883547, - 0.662459848116453); + reverse_golden_result[3] = std::complex<float>(-0.662459848116453, - 0.337540151883547); + reverse_golden_result[4] = std::complex<float>(-1.188190960235587, + 0.188190960235587); + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i), forward_golden_result[i]); + VERIFY_IS_APPROX(forward_output_real_part(i), forward_golden_result[i].real()); + VERIFY_IS_APPROX(forward_output_imag_part(i), forward_golden_result[i].imag()); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i), reverse_golden_result[i]); + VERIFY_IS_APPROX(reverse_output_real_part(i), reverse_golden_result[i].real()); + VERIFY_IS_APPROX(reverse_output_imag_part(i), reverse_golden_result[i].imag()); + } +} + +static void test_fft_real_input_golden() { + Tensor<float, 1, ColMajor> input(5); + input(0) = 1.0; + input(1) = 2.0; + input(2) = 3.0; + input(3) = 4.0; + input(4) = 5.0; + + array<ptrdiff_t, 1> fft; + fft[0] = 0; + + Tensor<std::complex<float>, 1, ColMajor> forward_output_both_parts = input.fft<BothParts, FFT_FORWARD>(fft); + Tensor<std::complex<float>, 1, ColMajor> reverse_output_both_parts = input.fft<BothParts, FFT_REVERSE>(fft); + + Tensor<float, 1, ColMajor> forward_output_real_part = input.fft<RealPart, FFT_FORWARD>(fft); + Tensor<float, 1, ColMajor> reverse_output_real_part = input.fft<RealPart, FFT_REVERSE>(fft); + + Tensor<float, 1, ColMajor> forward_output_imag_part = input.fft<ImagPart, FFT_FORWARD>(fft); + Tensor<float, 1, ColMajor> reverse_output_imag_part = input.fft<ImagPart, FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(forward_output_both_parts.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_both_parts.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_real_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_real_part.dimension(0), input.dimension(0)); + + VERIFY_IS_EQUAL(forward_output_imag_part.dimension(0), input.dimension(0)); + VERIFY_IS_EQUAL(reverse_output_imag_part.dimension(0), input.dimension(0)); + + std::complex<float> forward_golden_result[5]; + std::complex<float> reverse_golden_result[5]; + + + forward_golden_result[0] = std::complex<float>( 15, 0); + forward_golden_result[1] = std::complex<float>(-2.5, +3.44095480117793); + forward_golden_result[2] = std::complex<float>(-2.5, +0.81229924058227); + forward_golden_result[3] = std::complex<float>(-2.5, -0.81229924058227); + forward_golden_result[4] = std::complex<float>(-2.5, -3.44095480117793); + + reverse_golden_result[0] = std::complex<float>( 3.0, 0); + reverse_golden_result[1] = std::complex<float>(-0.5, -0.688190960235587); + reverse_golden_result[2] = std::complex<float>(-0.5, -0.162459848116453); + reverse_golden_result[3] = std::complex<float>(-0.5, +0.162459848116453); + reverse_golden_result[4] = std::complex<float>(-0.5, +0.688190960235587); + + std::complex<float> c_offset(1.0, 1.0); + float r_offset = 1.0; + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(forward_output_both_parts(i) + c_offset, forward_golden_result[i] + c_offset); + VERIFY_IS_APPROX(forward_output_real_part(i) + r_offset, forward_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(forward_output_imag_part(i) + r_offset, forward_golden_result[i].imag() + r_offset); + } + + for(int i = 0; i < 5; ++i) { + VERIFY_IS_APPROX(reverse_output_both_parts(i) + c_offset, reverse_golden_result[i] + c_offset); + VERIFY_IS_APPROX(reverse_output_real_part(i) + r_offset, reverse_golden_result[i].real() + r_offset); + VERIFY_IS_APPROX(reverse_output_imag_part(i) + r_offset, reverse_golden_result[i].imag() + r_offset); + } +} + + +template <int DataLayout, typename RealScalar, bool isComplexInput, int FFTResultType, int FFTDirection, int TensorRank> +static void test_fft_real_input_energy() { + + Eigen::DSizes<ptrdiff_t, TensorRank> dimensions; + ptrdiff_t total_size = 1; + for (int i = 0; i < TensorRank; ++i) { + dimensions[i] = rand() % 20 + 1; + total_size *= dimensions[i]; + } + const DSizes<ptrdiff_t, TensorRank> arr = dimensions; + + typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar; + + Tensor<InputScalar, TensorRank, DataLayout> input; + input.resize(arr); + input.setRandom(); + + array<ptrdiff_t, TensorRank> fft; + for (int i = 0; i < TensorRank; ++i) { + fft[i] = i; + } + + typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar; + Tensor<OutputScalar, TensorRank, DataLayout> output; + output = input.template fft<FFTResultType, FFTDirection>(fft); + + for (int i = 0; i < TensorRank; ++i) { + VERIFY_IS_EQUAL(output.dimension(i), input.dimension(i)); + } + + RealScalar energy_original = 0.0; + RealScalar energy_after_fft = 0.0; + + for (int i = 0; i < total_size; ++i) { + energy_original += numext::abs2(input(i)); + } + + for (int i = 0; i < total_size; ++i) { + energy_after_fft += numext::abs2(output(i)); + } + + if(FFTDirection == FFT_FORWARD) { + VERIFY_IS_APPROX(energy_original, energy_after_fft / total_size); + } + else { + VERIFY_IS_APPROX(energy_original, energy_after_fft * total_size); + } +} + +void test_cxx11_tensor_fft() { + test_fft_complex_input_golden(); + test_fft_real_input_golden(); + + test_fft_2D_golden<ColMajor>(); + test_fft_2D_golden<RowMajor>(); + + test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 1>(); + + test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 2>(); + + test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 3>(); + + test_fft_real_input_energy<ColMajor, float, true, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<ColMajor, double, true, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<ColMajor, float, false, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<ColMajor, double, false, Eigen::BothParts, FFT_FORWARD, 4>(); + + test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 1>(); + test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 1>(); + + test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 2>(); + test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 2>(); + + test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 3>(); + test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 3>(); + + test_fft_real_input_energy<RowMajor, float, true, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<RowMajor, double, true, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<RowMajor, float, false, Eigen::BothParts, FFT_FORWARD, 4>(); + test_fft_real_input_energy<RowMajor, double, false, Eigen::BothParts, FFT_FORWARD, 4>(); +} diff --git a/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp new file mode 100644 index 0000000..e6274f8 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_fixed_size.cpp @@ -0,0 +1,261 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + + +static void test_0d() +{ + TensorFixedSize<float, Sizes<> > scalar1; + TensorFixedSize<float, Sizes<>, RowMajor> scalar2; + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + VERIFY_IS_EQUAL(internal::array_prod(scalar1.dimensions()), 1); + + scalar1() = 7.0; + scalar2() = 13.0; + + // Test against shallow copy. + TensorFixedSize<float, Sizes<> > copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + copy = scalar1; + VERIFY_IS_NOT_EQUAL(scalar1.data(), copy.data()); + VERIFY_IS_APPROX(scalar1(), copy()); + + TensorFixedSize<float, Sizes<> > scalar3 = scalar1.sqrt(); + TensorFixedSize<float, Sizes<>, RowMajor> scalar4 = scalar2.sqrt(); + VERIFY_IS_EQUAL(scalar3.rank(), 0); + VERIFY_IS_APPROX(scalar3(), sqrtf(7.0)); + VERIFY_IS_APPROX(scalar4(), sqrtf(13.0)); + + scalar3 = scalar1 + scalar2; + VERIFY_IS_APPROX(scalar3(), 7.0f + 13.0f); +} + +static void test_1d() +{ + TensorFixedSize<float, Sizes<6> > vec1; + TensorFixedSize<float, Sizes<6>, RowMajor> vec2; + + VERIFY_IS_EQUAL((vec1.size()), 6); + // VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6); + // VERIFY_IS_EQUAL((vec1.dimension(0)), 6); + + vec1(0) = 4.0; vec2(0) = 0.0; + vec1(1) = 8.0; vec2(1) = 1.0; + vec1(2) = 15.0; vec2(2) = 2.0; + vec1(3) = 16.0; vec2(3) = 3.0; + vec1(4) = 23.0; vec2(4) = 4.0; + vec1(5) = 42.0; vec2(5) = 5.0; + + // Test against shallow copy. + TensorFixedSize<float, Sizes<6> > copy = vec1; + VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data()); + for (int i = 0; i < 6; ++i) { + VERIFY_IS_APPROX(vec1(i), copy(i)); + } + copy = vec1; + VERIFY_IS_NOT_EQUAL(vec1.data(), copy.data()); + for (int i = 0; i < 6; ++i) { + VERIFY_IS_APPROX(vec1(i), copy(i)); + } + + TensorFixedSize<float, Sizes<6> > vec3 = vec1.sqrt(); + TensorFixedSize<float, Sizes<6>, RowMajor> vec4 = vec2.sqrt(); + + VERIFY_IS_EQUAL((vec3.size()), 6); + VERIFY_IS_EQUAL(vec3.rank(), 1); + // VERIFY_IS_EQUAL((vec3.dimensions()[0]), 6); + // VERIFY_IS_EQUAL((vec3.dimension(0)), 6); + + VERIFY_IS_APPROX(vec3(0), sqrtf(4.0)); + VERIFY_IS_APPROX(vec3(1), sqrtf(8.0)); + VERIFY_IS_APPROX(vec3(2), sqrtf(15.0)); + VERIFY_IS_APPROX(vec3(3), sqrtf(16.0)); + VERIFY_IS_APPROX(vec3(4), sqrtf(23.0)); + VERIFY_IS_APPROX(vec3(5), sqrtf(42.0)); + + VERIFY_IS_APPROX(vec4(0), sqrtf(0.0)); + VERIFY_IS_APPROX(vec4(1), sqrtf(1.0)); + VERIFY_IS_APPROX(vec4(2), sqrtf(2.0)); + VERIFY_IS_APPROX(vec4(3), sqrtf(3.0)); + VERIFY_IS_APPROX(vec4(4), sqrtf(4.0)); + VERIFY_IS_APPROX(vec4(5), sqrtf(5.0)); + + vec3 = vec1 + vec2; + VERIFY_IS_APPROX(vec3(0), 4.0f + 0.0f); + VERIFY_IS_APPROX(vec3(1), 8.0f + 1.0f); + VERIFY_IS_APPROX(vec3(2), 15.0f + 2.0f); + VERIFY_IS_APPROX(vec3(3), 16.0f + 3.0f); + VERIFY_IS_APPROX(vec3(4), 23.0f + 4.0f); + VERIFY_IS_APPROX(vec3(5), 42.0f + 5.0f); +} + +static void test_tensor_map() +{ + TensorFixedSize<float, Sizes<6> > vec1; + TensorFixedSize<float, Sizes<6>, RowMajor> vec2; + + vec1(0) = 4.0; vec2(0) = 0.0; + vec1(1) = 8.0; vec2(1) = 1.0; + vec1(2) = 15.0; vec2(2) = 2.0; + vec1(3) = 16.0; vec2(3) = 3.0; + vec1(4) = 23.0; vec2(4) = 4.0; + vec1(5) = 42.0; vec2(5) = 5.0; + + float data3[6]; + TensorMap<TensorFixedSize<float, Sizes<6> > > vec3(data3, 6); + vec3 = vec1.sqrt() + vec2; + + VERIFY_IS_APPROX(vec3(0), sqrtf(4.0)); + VERIFY_IS_APPROX(vec3(1), sqrtf(8.0) + 1.0f); + VERIFY_IS_APPROX(vec3(2), sqrtf(15.0) + 2.0f); + VERIFY_IS_APPROX(vec3(3), sqrtf(16.0) + 3.0f); + VERIFY_IS_APPROX(vec3(4), sqrtf(23.0) + 4.0f); + VERIFY_IS_APPROX(vec3(5), sqrtf(42.0) + 5.0f); +} + +static void test_2d() +{ + float data1[6]; + TensorMap<TensorFixedSize<float, Sizes<2, 3> > > mat1(data1,2,3); + float data2[6]; + TensorMap<TensorFixedSize<float, Sizes<2, 3>, RowMajor> > mat2(data2,2,3); + + VERIFY_IS_EQUAL((mat1.size()), 2*3); + VERIFY_IS_EQUAL(mat1.rank(), 2); + // VERIFY_IS_EQUAL((mat1.dimension(0)), 2); + // VERIFY_IS_EQUAL((mat1.dimension(1)), 3); + + mat1(0,0) = 0.0; + mat1(0,1) = 1.0; + mat1(0,2) = 2.0; + mat1(1,0) = 3.0; + mat1(1,1) = 4.0; + mat1(1,2) = 5.0; + + mat2(0,0) = -0.0; + mat2(0,1) = -1.0; + mat2(0,2) = -2.0; + mat2(1,0) = -3.0; + mat2(1,1) = -4.0; + mat2(1,2) = -5.0; + + TensorFixedSize<float, Sizes<2, 3> > mat3; + TensorFixedSize<float, Sizes<2, 3>, RowMajor> mat4; + mat3 = mat1.abs(); + mat4 = mat2.abs(); + + VERIFY_IS_EQUAL((mat3.size()), 2*3); + // VERIFY_IS_EQUAL((mat3.dimension(0)), 2); + // VERIFY_IS_EQUAL((mat3.dimension(1)), 3); + + VERIFY_IS_APPROX(mat3(0,0), 0.0f); + VERIFY_IS_APPROX(mat3(0,1), 1.0f); + VERIFY_IS_APPROX(mat3(0,2), 2.0f); + VERIFY_IS_APPROX(mat3(1,0), 3.0f); + VERIFY_IS_APPROX(mat3(1,1), 4.0f); + VERIFY_IS_APPROX(mat3(1,2), 5.0f); + + VERIFY_IS_APPROX(mat4(0,0), 0.0f); + VERIFY_IS_APPROX(mat4(0,1), 1.0f); + VERIFY_IS_APPROX(mat4(0,2), 2.0f); + VERIFY_IS_APPROX(mat4(1,0), 3.0f); + VERIFY_IS_APPROX(mat4(1,1), 4.0f); + VERIFY_IS_APPROX(mat4(1,2), 5.0f); +} + +static void test_3d() +{ + TensorFixedSize<float, Sizes<2, 3, 7> > mat1; + TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat2; + + VERIFY_IS_EQUAL((mat1.size()), 2*3*7); + VERIFY_IS_EQUAL(mat1.rank(), 3); + // VERIFY_IS_EQUAL((mat1.dimension(0)), 2); + // VERIFY_IS_EQUAL((mat1.dimension(1)), 3); + // VERIFY_IS_EQUAL((mat1.dimension(2)), 7); + + float val = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val += 1.0f; + } + } + } + + TensorFixedSize<float, Sizes<2, 3, 7> > mat3; + mat3 = mat1.sqrt(); + TensorFixedSize<float, Sizes<2, 3, 7>, RowMajor> mat4; + mat4 = mat2.sqrt(); + + VERIFY_IS_EQUAL((mat3.size()), 2*3*7); + // VERIFY_IS_EQUAL((mat3.dimension(0)), 2); + // VERIFY_IS_EQUAL((mat3.dimension(1)), 3); + // VERIFY_IS_EQUAL((mat3.dimension(2)), 7); + + + val = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), sqrtf(val)); + VERIFY_IS_APPROX(mat4(i,j,k), sqrtf(val)); + val += 1.0f; + } + } + } +} + + +static void test_array() +{ + TensorFixedSize<float, Sizes<2, 3, 7> > mat1; + float val = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + val += 1.0f; + } + } + } + + TensorFixedSize<float, Sizes<2, 3, 7> > mat3; + mat3 = mat1.pow(3.5f); + + val = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), powf(val, 3.5f)); + val += 1.0f; + } + } + } +} + +void test_cxx11_tensor_fixed_size() +{ + CALL_SUBTEST(test_0d()); + CALL_SUBTEST(test_1d()); + CALL_SUBTEST(test_tensor_map()); + CALL_SUBTEST(test_2d()); + CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_array()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval.cpp new file mode 100644 index 0000000..45d7345 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_forced_eval.cpp @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/Core> +#include <Eigen/CXX11/Tensor> + +using Eigen::MatrixXf; +using Eigen::Tensor; + +static void test_simple() +{ + MatrixXf m1(3,3); + MatrixXf m2(3,3); + m1.setRandom(); + m2.setRandom(); + + TensorMap<Tensor<float, 2> > mat1(m1.data(), 3,3); + TensorMap<Tensor<float, 2> > mat2(m2.data(), 3,3); + + Tensor<float, 2> mat3(3,3); + mat3 = mat1; + + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 1> dims; + dims[0] = DimPair(1, 0); + + mat3 = mat3.contract(mat2, dims).eval(); + + VERIFY_IS_APPROX(mat3(0, 0), (m1*m2).eval()(0,0)); + VERIFY_IS_APPROX(mat3(0, 1), (m1*m2).eval()(0,1)); + VERIFY_IS_APPROX(mat3(0, 2), (m1*m2).eval()(0,2)); + VERIFY_IS_APPROX(mat3(1, 0), (m1*m2).eval()(1,0)); + VERIFY_IS_APPROX(mat3(1, 1), (m1*m2).eval()(1,1)); + VERIFY_IS_APPROX(mat3(1, 2), (m1*m2).eval()(1,2)); + VERIFY_IS_APPROX(mat3(2, 0), (m1*m2).eval()(2,0)); + VERIFY_IS_APPROX(mat3(2, 1), (m1*m2).eval()(2,1)); + VERIFY_IS_APPROX(mat3(2, 2), (m1*m2).eval()(2,2)); +} + + +static void test_const() +{ + MatrixXf input(3,3); + input.setRandom(); + MatrixXf output = input; + output.rowwise() -= input.colwise().maxCoeff(); + + Eigen::array<int, 1> depth_dim; + depth_dim[0] = 0; + Tensor<float, 2>::Dimensions dims2d; + dims2d[0] = 1; + dims2d[1] = 3; + Eigen::array<int, 2> bcast; + bcast[0] = 3; + bcast[1] = 1; + const TensorMap<Tensor<const float, 2> > input_tensor(input.data(), 3, 3); + Tensor<float, 2> output_tensor= (input_tensor - input_tensor.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_APPROX(output(i, j), output_tensor(i, j)); + } + } +} + + +void test_cxx11_tensor_forced_eval() +{ + CALL_SUBTEST(test_simple()); + CALL_SUBTEST(test_const()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp new file mode 100644 index 0000000..aca036c --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; +template <typename DataType, int DataLayout, typename IndexType> +void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { + + IndexType sizeDim1 = 100; + IndexType sizeDim2 = 20; + IndexType sizeDim3 = 20; + Eigen::array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange); + Eigen::Tensor<DataType, 3, DataLayout, IndexType> in2(tensorRange); + Eigen::Tensor<DataType, 3, DataLayout, IndexType> out(tensorRange); + + DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); + DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); + + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + // creating TensorMap from tensor + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange); + Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); + /// c=(a+b)*b + gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), + (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); + } + } + } + printf("(a+b)*b Test Passed\n"); + sycl_device.deallocate(gpu_in1_data); + sycl_device.deallocate(gpu_in2_data); + sycl_device.deallocate(gpu_out_data); + +} + +template <typename DataType, typename Dev_selector> void tensorForced_evalperDevice(Dev_selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_forced_eval_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_forced_eval_sycl<DataType, ColMajor, int64_t>(sycl_device); +} +void test_cxx11_tensor_forced_eval_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorForced_evalperDevice<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_generator.cpp b/eigen/unsupported/test/cxx11_tensor_generator.cpp new file mode 100644 index 0000000..dcb9287 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_generator.cpp @@ -0,0 +1,91 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +struct Generator1D { + Generator1D() { } + + float operator()(const array<Eigen::DenseIndex, 1>& coordinates) const { + return coordinates[0]; + } +}; + +template <int DataLayout> +static void test_1D() +{ + Tensor<float, 1> vec(6); + Tensor<float, 1> result = vec.generate(Generator1D()); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(result(i), i); + } +} + + +struct Generator2D { + Generator2D() { } + + float operator()(const array<Eigen::DenseIndex, 2>& coordinates) const { + return 3 * coordinates[0] + 11 * coordinates[1]; + } +}; + +template <int DataLayout> +static void test_2D() +{ + Tensor<float, 2> matrix(5, 7); + Tensor<float, 2> result = matrix.generate(Generator2D()); + + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + VERIFY_IS_EQUAL(result(i, j), 3*i + 11*j); + } + } +} + + +template <int DataLayout> +static void test_gaussian() +{ + int rows = 32; + int cols = 48; + array<float, 2> means; + means[0] = rows / 2.0f; + means[1] = cols / 2.0f; + array<float, 2> std_devs; + std_devs[0] = 3.14f; + std_devs[1] = 2.7f; + internal::GaussianGenerator<float, Eigen::DenseIndex, 2> gaussian_gen(means, std_devs); + + Tensor<float, 2> matrix(rows, cols); + Tensor<float, 2> result = matrix.generate(gaussian_gen); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + float g_rows = powf(rows/2.0f - i, 2) / (3.14f * 3.14f) * 0.5f; + float g_cols = powf(cols/2.0f - j, 2) / (2.7f * 2.7f) * 0.5f; + float gaussian = expf(-g_rows - g_cols); + VERIFY_IS_EQUAL(result(i, j), gaussian); + } + } +} + + +void test_cxx11_tensor_generator() +{ + CALL_SUBTEST(test_1D<ColMajor>()); + CALL_SUBTEST(test_1D<RowMajor>()); + CALL_SUBTEST(test_2D<ColMajor>()); + CALL_SUBTEST(test_2D<RowMajor>()); + CALL_SUBTEST(test_gaussian<ColMajor>()); + CALL_SUBTEST(test_gaussian<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_ifft.cpp b/eigen/unsupported/test/cxx11_tensor_ifft.cpp new file mode 100644 index 0000000..5fd88fa --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_ifft.cpp @@ -0,0 +1,154 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Jianwei Cui <thucjw@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <complex> +#include <cmath> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout> +static void test_1D_fft_ifft_invariant(int sequence_length) { + Tensor<double, 1, DataLayout> tensor(sequence_length); + tensor.setRandom(); + + array<int, 1> fft; + fft[0] = 0; + + Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft; + Tensor<std::complex<double>, 1, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), sequence_length); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), sequence_length); + + for (int i = 0; i < sequence_length; ++i) { + VERIFY_IS_APPROX(static_cast<float>(tensor(i)), static_cast<float>(std::real(tensor_after_fft_ifft(i)))); + } +} + +template <int DataLayout> +static void test_2D_fft_ifft_invariant(int dim0, int dim1) { + Tensor<double, 2, DataLayout> tensor(dim0, dim1); + tensor.setRandom(); + + array<int, 2> fft; + fft[0] = 0; + fft[1] = 1; + + Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft; + Tensor<std::complex<double>, 2, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + //std::cout << "[" << i << "][" << j << "]" << " Original data: " << tensor(i,j) << " Transformed data:" << tensor_after_fft_ifft(i,j) << std::endl; + VERIFY_IS_APPROX(static_cast<float>(tensor(i,j)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j)))); + } + } +} + +template <int DataLayout> +static void test_3D_fft_ifft_invariant(int dim0, int dim1, int dim2) { + Tensor<double, 3, DataLayout> tensor(dim0, dim1, dim2); + tensor.setRandom(); + + array<int, 3> fft; + fft[0] = 0; + fft[1] = 1; + fft[2] = 2; + + Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft; + Tensor<std::complex<double>, 3, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::BothParts, Eigen::FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k)), static_cast<float>(std::real(tensor_after_fft_ifft(i,j,k)))); + } + } + } +} + +template <int DataLayout> +static void test_sub_fft_ifft_invariant(int dim0, int dim1, int dim2, int dim3) { + Tensor<double, 4, DataLayout> tensor(dim0, dim1, dim2, dim3); + tensor.setRandom(); + + array<int, 2> fft; + fft[0] = 2; + fft[1] = 0; + + Tensor<std::complex<double>, 4, DataLayout> tensor_after_fft; + Tensor<double, 4, DataLayout> tensor_after_fft_ifft; + + tensor_after_fft = tensor.template fft<Eigen::BothParts, Eigen::FFT_FORWARD>(fft); + tensor_after_fft_ifft = tensor_after_fft.template fft<Eigen::RealPart, Eigen::FFT_REVERSE>(fft); + + VERIFY_IS_EQUAL(tensor_after_fft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft.dimension(3), dim3); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(0), dim0); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(1), dim1); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(2), dim2); + VERIFY_IS_EQUAL(tensor_after_fft_ifft.dimension(3), dim3); + + for (int i = 0; i < dim0; ++i) { + for (int j = 0; j < dim1; ++j) { + for (int k = 0; k < dim2; ++k) { + for (int l = 0; l < dim3; ++l) { + VERIFY_IS_APPROX(static_cast<float>(tensor(i,j,k,l)), static_cast<float>(tensor_after_fft_ifft(i,j,k,l))); + } + } + } + } +} + +void test_cxx11_tensor_ifft() { + CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(4)); + CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(16)); + CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(32)); + CALL_SUBTEST(test_1D_fft_ifft_invariant<ColMajor>(1024*1024)); + + CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(4,4)); + CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(8,16)); + CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(16,32)); + CALL_SUBTEST(test_2D_fft_ifft_invariant<ColMajor>(1024,1024)); + + CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(4,4,4)); + CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(8,16,32)); + CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(16,4,8)); + CALL_SUBTEST(test_3D_fft_ifft_invariant<ColMajor>(256,256,256)); + + CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(4,4,4,4)); + CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(8,16,32,64)); + CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(16,4,8,12)); + CALL_SUBTEST(test_sub_fft_ifft_invariant<ColMajor>(64,64,64,64)); +} diff --git a/eigen/unsupported/test/cxx11_tensor_image_patch.cpp b/eigen/unsupported/test/cxx11_tensor_image_patch.cpp new file mode 100644 index 0000000..475c596 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_image_patch.cpp @@ -0,0 +1,757 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_simple_patch() +{ + Tensor<float, 4> tensor(2,3,5,7); + tensor.setRandom(); + Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0)); + + // Single pixel patch: ColMajor + Tensor<float, 5> single_pixel_patch; + single_pixel_patch = tensor.extract_image_patches(1, 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(4), 7); + + // Single pixel patch: RowMajor + Tensor<float, 5, RowMajor> single_pixel_patch_row_major; + single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 3*5); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(4), 2); + + for (int i = 0; i < tensor.size(); ++i) { + // ColMajor + if (tensor.data()[i] != single_pixel_patch.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " + << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] + << std::endl; + } + VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]); + // RowMajor + if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " + << tensor.data()[i] << " vs " + << single_pixel_patch_row_major.data()[i] << std::endl; + } + VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i], + tensor_row_major.data()[i]); + VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]); + VERIFY_IS_EQUAL(single_pixel_patch.data()[i], + single_pixel_patch_row_major.data()[i]); + } + + // Entire image patch: ColMajor + Tensor<float, 5> entire_image_patch; + entire_image_patch = tensor.extract_image_patches(3, 5); + VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2); + VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3); + VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5); + VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5); + VERIFY_IS_EQUAL(entire_image_patch.dimension(4), 7); + + // Entire image patch: RowMajor + Tensor<float, 5, RowMajor> entire_image_patch_row_major; + entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 3*5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 3); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(4), 2); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + int patchId = i+3*j; + for (int r = 0; r < 3; ++r) { + for (int c = 0; c < 5; ++c) { + for (int d = 0; d < 2; ++d) { + for (int b = 0; b < 7; ++b) { + float expected = 0.0f; + float expected_row_major = 0.0f; + if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { + expected = tensor(d, r-1+i, c-2+j, b); + expected_row_major = tensor_row_major(b, c-2+j, r-1+i, d); + } + // ColMajor + if (entire_image_patch(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId, b), expected); + // RowMajor + if (entire_image_patch_row_major(b, patchId, c, r, d) != + expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j + << " r=" << r << " c=" << c << " d=" << d << " b=" << b + << std::endl; + } + VERIFY_IS_EQUAL(entire_image_patch_row_major(b, patchId, c, r, d), + expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + } + + // 2D patch: ColMajor + Tensor<float, 5> twod_patch; + twod_patch = tensor.extract_image_patches(2, 2); + VERIFY_IS_EQUAL(twod_patch.dimension(0), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(1), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5); + VERIFY_IS_EQUAL(twod_patch.dimension(4), 7); + + // 2D patch: RowMajor + Tensor<float, 5, RowMajor> twod_patch_row_major; + twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 3*5); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(4), 2); + + + // Based on the calculation described in TensorTraits.h, padding happens to be 0. + int row_padding = 0; + int col_padding = 0; + int stride = 1; + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + int patchId = i+3*j; + for (int r = 0; r < 2; ++r) { + for (int c = 0; c < 2; ++c) { + for (int d = 0; d < 2; ++d) { + for (int b = 0; b < 7; ++b) { + float expected = 0.0f; + float expected_row_major = 0.0f; + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + // ColMajor + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) { + expected = tensor(d, row_offset, col_offset, b); + } + if (twod_patch(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId, b), expected); + + // RowMajor + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(2) && col_offset < tensor_row_major.dimension(1)) { + expected_row_major = tensor_row_major(b, col_offset, row_offset, d); + + } + if (twod_patch_row_major(b, patchId, c, r, d) != expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(twod_patch_row_major(b, patchId, c, r, d), expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + } +} + +// Verifies VALID padding (no padding) with incrementing values. +void test_patch_padding_valid() +{ + int input_depth = 3; + int input_rows = 3; + int input_cols = 3; + int input_batches = 1; + int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches); + // Initializes tensor with incrementing numbers. + for (int i = 0; i < tensor.size(); ++i) { + tensor.data()[i] = i + 1; + } + // ColMajor + Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 1); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // RowMajor + Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0)); + + Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); + VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4)); + VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3)); + VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2)); + VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1)); + VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0)); + + // No padding is carried out. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) < input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) < input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + float expected_row_major = 0.0f; + int row_offset = r + i - row_padding; + int col_offset = c + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + expected_row_major = tensor_row_major(b, col_offset, row_offset, d); + } + // ColMajor + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + // RowMajor + if (result_row_major(b, patchId, c, r, d) != expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + } +} + +// Verifies VALID padding (no padding) with the same value. +void test_patch_padding_valid_same_value() +{ + int input_depth = 1; + int input_rows = 5; + int input_cols = 5; + int input_batches = 2; + int ksize = 3; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + // ColMajor + Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches); + tensor = tensor.constant(11.0f); + Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 4); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // RowMajor + Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0)); + + Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, 1, 1, PADDING_VALID); + VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4)); + VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3)); + VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2)); + VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1)); + VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0)); + + // No padding is carried out. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + float expected_row_major = 0.0f; + int row_offset = r + i - row_padding; + int col_offset = c + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + expected_row_major = tensor_row_major(b, col_offset, row_offset, d); + } + // ColMajor + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + // RowMajor + if (result_row_major(b, patchId, c, r, d) != expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + } +} + +// Verifies SAME padding. +void test_patch_padding_same() +{ + int input_depth = 3; + int input_rows = 4; + int input_cols = 2; + int input_batches = 1; + int ksize = 2; // Corresponds to the Rows and Cols for tensor.extract_image_patches<>. + int stride = 2; // Only same stride is supported. + // ColMajor + Tensor<float, 4> tensor(input_depth, input_rows, input_cols, input_batches); + // Initializes tensor with incrementing numbers. + for (int i = 0; i < tensor.size(); ++i) { + tensor.data()[i] = i + 1; + } + Tensor<float, 5> result = tensor.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME); + + VERIFY_IS_EQUAL(result.dimension(0), input_depth); // depth + VERIFY_IS_EQUAL(result.dimension(1), ksize); // kernel rows + VERIFY_IS_EQUAL(result.dimension(2), ksize); // kernel cols + VERIFY_IS_EQUAL(result.dimension(3), 2); // number of patches + VERIFY_IS_EQUAL(result.dimension(4), input_batches); // number of batches + + // RowMajor + Tensor<float, 4, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(3)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(3), tensor_row_major.dimension(0)); + + Tensor<float, 5, RowMajor> result_row_major = tensor_row_major.extract_image_patches(ksize, ksize, stride, stride, PADDING_SAME); + VERIFY_IS_EQUAL(result.dimension(0), result_row_major.dimension(4)); + VERIFY_IS_EQUAL(result.dimension(1), result_row_major.dimension(3)); + VERIFY_IS_EQUAL(result.dimension(2), result_row_major.dimension(2)); + VERIFY_IS_EQUAL(result.dimension(3), result_row_major.dimension(1)); + VERIFY_IS_EQUAL(result.dimension(4), result_row_major.dimension(0)); + + // Based on the calculation described in TensorTraits.h, padding happens to be + // 0. + int row_padding = 0; + int col_padding = 0; + + for (int i = 0; (i+stride+ksize-1) <= input_rows; i += stride) { // input rows + for (int j = 0; (j+stride+ksize-1) <= input_cols; j += stride) { // input cols + int patchId = i+input_rows*j; + for (int r = 0; r < ksize; ++r) { // patch rows + for (int c = 0; c < ksize; ++c) { // patch cols + for (int d = 0; d < input_depth; ++d) { // depth + for (int b = 0; b < input_batches; ++b) { // batch + float expected = 0.0f; + float expected_row_major = 0.0f; + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + if (row_offset >= 0 && col_offset >= 0 && row_offset < input_rows && col_offset < input_cols) { + expected = tensor(d, row_offset, col_offset, b); + expected_row_major = tensor_row_major(b, col_offset, row_offset, d); + } + // ColMajor + if (result(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result(d, r, c, patchId, b), expected); + // RowMajor + if (result_row_major(b, patchId, c, r, d) != expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(result_row_major(b, patchId, c, r, d), expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + } +} + +void test_patch_no_extra_dim() +{ + Tensor<float, 3> tensor(2,3,5); + tensor.setRandom(); + Tensor<float, 3, RowMajor> tensor_row_major = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor_row_major.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor_row_major.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor_row_major.dimension(0)); + + // Single pixel patch: ColMajor + Tensor<float, 4> single_pixel_patch; + single_pixel_patch = tensor.extract_image_patches(1, 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(0), 2); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(1), 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(2), 1); + VERIFY_IS_EQUAL(single_pixel_patch.dimension(3), 3*5); + + // Single pixel patch: RowMajor + Tensor<float, 4, RowMajor> single_pixel_patch_row_major; + single_pixel_patch_row_major = tensor_row_major.extract_image_patches(1, 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(0), 3*5); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(1), 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_pixel_patch_row_major.dimension(3), 2); + + for (int i = 0; i < tensor.size(); ++i) { + // ColMajor + if (tensor.data()[i] != single_pixel_patch.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " << tensor.data()[i] << " vs " << single_pixel_patch.data()[i] << std::endl; + } + VERIFY_IS_EQUAL(single_pixel_patch.data()[i], tensor.data()[i]); + // RowMajor + if (tensor_row_major.data()[i] != single_pixel_patch_row_major.data()[i]) { + std::cout << "Mismatch detected at index " << i << " : " + << tensor.data()[i] << " vs " + << single_pixel_patch_row_major.data()[i] << std::endl; + } + VERIFY_IS_EQUAL(single_pixel_patch_row_major.data()[i], + tensor_row_major.data()[i]); + VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]); + VERIFY_IS_EQUAL(single_pixel_patch.data()[i], + single_pixel_patch_row_major.data()[i]); + } + + // Entire image patch: ColMajor + Tensor<float, 4> entire_image_patch; + entire_image_patch = tensor.extract_image_patches(3, 5); + VERIFY_IS_EQUAL(entire_image_patch.dimension(0), 2); + VERIFY_IS_EQUAL(entire_image_patch.dimension(1), 3); + VERIFY_IS_EQUAL(entire_image_patch.dimension(2), 5); + VERIFY_IS_EQUAL(entire_image_patch.dimension(3), 3*5); + + // Entire image patch: RowMajor + Tensor<float, 4, RowMajor> entire_image_patch_row_major; + entire_image_patch_row_major = tensor_row_major.extract_image_patches(3, 5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(0), 3*5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(1), 5); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(2), 3); + VERIFY_IS_EQUAL(entire_image_patch_row_major.dimension(3), 2); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + int patchId = i+3*j; + for (int r = 0; r < 3; ++r) { + for (int c = 0; c < 5; ++c) { + for (int d = 0; d < 2; ++d) { + float expected = 0.0f; + float expected_row_major = 0.0f; + if (r-1+i >= 0 && c-2+j >= 0 && r-1+i < 3 && c-2+j < 5) { + expected = tensor(d, r-1+i, c-2+j); + expected_row_major = tensor_row_major(c-2+j, r-1+i, d); + } + // ColMajor + if (entire_image_patch(d, r, c, patchId) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; + } + VERIFY_IS_EQUAL(entire_image_patch(d, r, c, patchId), expected); + // RowMajor + if (entire_image_patch_row_major(patchId, c, r, d) != + expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; + } + VERIFY_IS_EQUAL(entire_image_patch_row_major(patchId, c, r, d), + expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } + + // 2D patch: ColMajor + Tensor<float, 4> twod_patch; + twod_patch = tensor.extract_image_patches(2, 2); + VERIFY_IS_EQUAL(twod_patch.dimension(0), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(1), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(3), 3*5); + + // 2D patch: RowMajor + Tensor<float, 4, RowMajor> twod_patch_row_major; + twod_patch_row_major = tensor_row_major.extract_image_patches(2, 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(0), 3*5); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(1), 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch_row_major.dimension(3), 2); + + // Based on the calculation described in TensorTraits.h, padding happens to be 0. + int row_padding = 0; + int col_padding = 0; + int stride = 1; + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + int patchId = i+3*j; + for (int r = 0; r < 2; ++r) { + for (int c = 0; c < 2; ++c) { + for (int d = 0; d < 2; ++d) { + float expected = 0.0f; + float expected_row_major = 0.0f; + int row_offset = r*stride + i - row_padding; + int col_offset = c*stride + j - col_padding; + // ColMajor + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor.dimension(1) && col_offset < tensor.dimension(2)) { + expected = tensor(d, row_offset, col_offset); + } + if (twod_patch(d, r, c, patchId) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; + } + VERIFY_IS_EQUAL(twod_patch(d, r, c, patchId), expected); + // RowMajor + if (row_offset >= 0 && col_offset >= 0 && row_offset < tensor_row_major.dimension(1) && col_offset < tensor_row_major.dimension(0)) { + expected_row_major = tensor_row_major(col_offset, row_offset, d); + } + if (twod_patch_row_major(patchId, c, r, d) != expected_row_major) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << std::endl; + } + VERIFY_IS_EQUAL(twod_patch_row_major(patchId, c, r, d), expected_row_major); + // Check that ColMajor and RowMajor agree. + VERIFY_IS_EQUAL(expected, expected_row_major); + } + } + } + } + } +} + +void test_imagenet_patches() +{ + // Test the code on typical configurations used by the 'imagenet' benchmarks at + // https://github.com/soumith/convnet-benchmarks + // ColMajor + Tensor<float, 4> l_in(3, 128, 128, 16); + l_in.setRandom(); + Tensor<float, 5> l_out = l_in.extract_image_patches(11, 11); + VERIFY_IS_EQUAL(l_out.dimension(0), 3); + VERIFY_IS_EQUAL(l_out.dimension(1), 11); + VERIFY_IS_EQUAL(l_out.dimension(2), 11); + VERIFY_IS_EQUAL(l_out.dimension(3), 128*128); + VERIFY_IS_EQUAL(l_out.dimension(4), 16); + + // RowMajor + Tensor<float, 5, RowMajor> l_out_row_major = l_in.swap_layout().extract_image_patches(11, 11); + VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 16); + VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 128*128); + VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 11); + VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 11); + VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 3); + + for (int b = 0; b < 16; ++b) { + for (int i = 0; i < 128; ++i) { + for (int j = 0; j < 128; ++j) { + int patchId = i+128*j; + for (int c = 0; c < 11; ++c) { + for (int r = 0; r < 11; ++r) { + for (int d = 0; d < 3; ++d) { + float expected = 0.0f; + if (r-5+i >= 0 && c-5+j >= 0 && r-5+i < 128 && c-5+j < 128) { + expected = l_in(d, r-5+i, c-5+j, b); + } + // ColMajor + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); + // RowMajor + if (l_out_row_major(b, patchId, c, r, d) != + expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j + << " r=" << r << " c=" << c << " d=" << d << " b=" << b + << std::endl; + } + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), + expected); + } + } + } + } + } + } + + // ColMajor + l_in.resize(16, 64, 64, 32); + l_in.setRandom(); + l_out = l_in.extract_image_patches(9, 9); + VERIFY_IS_EQUAL(l_out.dimension(0), 16); + VERIFY_IS_EQUAL(l_out.dimension(1), 9); + VERIFY_IS_EQUAL(l_out.dimension(2), 9); + VERIFY_IS_EQUAL(l_out.dimension(3), 64*64); + VERIFY_IS_EQUAL(l_out.dimension(4), 32); + + // RowMajor + l_out_row_major = l_in.swap_layout().extract_image_patches(9, 9); + VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); + VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 64*64); + VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 9); + VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 9); + VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 16); + + for (int b = 0; b < 32; ++b) { + for (int i = 0; i < 64; ++i) { + for (int j = 0; j < 64; ++j) { + int patchId = i+64*j; + for (int c = 0; c < 9; ++c) { + for (int r = 0; r < 9; ++r) { + for (int d = 0; d < 16; ++d) { + float expected = 0.0f; + if (r-4+i >= 0 && c-4+j >= 0 && r-4+i < 64 && c-4+j < 64) { + expected = l_in(d, r-4+i, c-4+j, b); + } + // ColMajor + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); + // RowMajor + if (l_out_row_major(b, patchId, c, r, d) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); + } + } + } + } + } + } + + // ColMajor + l_in.resize(32, 16, 16, 32); + l_in.setRandom(); + l_out = l_in.extract_image_patches(7, 7); + VERIFY_IS_EQUAL(l_out.dimension(0), 32); + VERIFY_IS_EQUAL(l_out.dimension(1), 7); + VERIFY_IS_EQUAL(l_out.dimension(2), 7); + VERIFY_IS_EQUAL(l_out.dimension(3), 16*16); + VERIFY_IS_EQUAL(l_out.dimension(4), 32); + + // RowMajor + l_out_row_major = l_in.swap_layout().extract_image_patches(7, 7); + VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); + VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 16*16); + VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 7); + VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 7); + VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 32); + + for (int b = 0; b < 32; ++b) { + for (int i = 0; i < 16; ++i) { + for (int j = 0; j < 16; ++j) { + int patchId = i+16*j; + for (int c = 0; c < 7; ++c) { + for (int r = 0; r < 7; ++r) { + for (int d = 0; d < 32; ++d) { + float expected = 0.0f; + if (r-3+i >= 0 && c-3+j >= 0 && r-3+i < 16 && c-3+j < 16) { + expected = l_in(d, r-3+i, c-3+j, b); + } + // ColMajor + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); + // RowMajor + if (l_out_row_major(b, patchId, c, r, d) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); + } + } + } + } + } + } + + // ColMajor + l_in.resize(64, 13, 13, 32); + l_in.setRandom(); + l_out = l_in.extract_image_patches(3, 3); + VERIFY_IS_EQUAL(l_out.dimension(0), 64); + VERIFY_IS_EQUAL(l_out.dimension(1), 3); + VERIFY_IS_EQUAL(l_out.dimension(2), 3); + VERIFY_IS_EQUAL(l_out.dimension(3), 13*13); + VERIFY_IS_EQUAL(l_out.dimension(4), 32); + + // RowMajor + l_out_row_major = l_in.swap_layout().extract_image_patches(3, 3); + VERIFY_IS_EQUAL(l_out_row_major.dimension(0), 32); + VERIFY_IS_EQUAL(l_out_row_major.dimension(1), 13*13); + VERIFY_IS_EQUAL(l_out_row_major.dimension(2), 3); + VERIFY_IS_EQUAL(l_out_row_major.dimension(3), 3); + VERIFY_IS_EQUAL(l_out_row_major.dimension(4), 64); + + for (int b = 0; b < 32; ++b) { + for (int i = 0; i < 13; ++i) { + for (int j = 0; j < 13; ++j) { + int patchId = i+13*j; + for (int c = 0; c < 3; ++c) { + for (int r = 0; r < 3; ++r) { + for (int d = 0; d < 64; ++d) { + float expected = 0.0f; + if (r-1+i >= 0 && c-1+j >= 0 && r-1+i < 13 && c-1+j < 13) { + expected = l_in(d, r-1+i, c-1+j, b); + } + // ColMajor + if (l_out(d, r, c, patchId, b) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out(d, r, c, patchId, b), expected); + // RowMajor + if (l_out_row_major(b, patchId, c, r, d) != expected) { + std::cout << "Mismatch detected at index i=" << i << " j=" << j << " r=" << r << " c=" << c << " d=" << d << " b=" << b << std::endl; + } + VERIFY_IS_EQUAL(l_out_row_major(b, patchId, c, r, d), expected); + } + } + } + } + } + } +} + +void test_cxx11_tensor_image_patch() +{ + CALL_SUBTEST_1(test_simple_patch()); + CALL_SUBTEST_2(test_patch_no_extra_dim()); + CALL_SUBTEST_3(test_patch_padding_valid()); + CALL_SUBTEST_4(test_patch_padding_valid_same_value()); + CALL_SUBTEST_5(test_patch_padding_same()); + CALL_SUBTEST_6(test_imagenet_patches()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_index_list.cpp b/eigen/unsupported/test/cxx11_tensor_index_list.cpp new file mode 100644 index 0000000..4cf5df6 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_index_list.cpp @@ -0,0 +1,386 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +#ifdef EIGEN_HAS_INDEX_LIST + +static void test_static_index_list() +{ + Tensor<float, 4> tensor(2,3,5,7); + tensor.setRandom(); + + constexpr auto reduction_axis = make_index_list(0, 1, 2); + VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0); + VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1); + VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 0); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 2); + + EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE); + + Tensor<float, 1> result = tensor.sum(reduction_axis); + for (int i = 0; i < result.size(); ++i) { + float expected = 0.0f; + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 5; ++l) { + expected += tensor(j,k,l,i); + } + } + } + VERIFY_IS_APPROX(result(i), expected); + } +} + + +static void test_type2index_list() +{ + Tensor<float, 5> tensor(2,3,5,7,11); + tensor.setRandom(); + tensor += tensor.constant(10.0f); + + typedef Eigen::IndexList<Eigen::type2index<0>> Dims0; + typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>> Dims1; + typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>> Dims2; + typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>> Dims3; + typedef Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1>, Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> Dims4; + +#if 0 + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims0>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims1>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims2>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims3>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<Dims4>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); +#endif + + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, ColMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims0, 1, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims1, 2, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims2, 3, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims3, 4, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::are_inner_most_dims<Dims4, 5, RowMajor>::value == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + const Dims0 reduction_axis0; + Tensor<float, 4> result0 = tensor.sum(reduction_axis0); + for (int m = 0; m < 11; ++m) { + for (int l = 0; l < 7; ++l) { + for (int k = 0; k < 5; ++k) { + for (int j = 0; j < 3; ++j) { + float expected = 0.0f; + for (int i = 0; i < 2; ++i) { + expected += tensor(i,j,k,l,m); + } + VERIFY_IS_APPROX(result0(j,k,l,m), expected); + } + } + } + } + + const Dims1 reduction_axis1; + Tensor<float, 3> result1 = tensor.sum(reduction_axis1); + for (int m = 0; m < 11; ++m) { + for (int l = 0; l < 7; ++l) { + for (int k = 0; k < 5; ++k) { + float expected = 0.0f; + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < 2; ++i) { + expected += tensor(i,j,k,l,m); + } + } + VERIFY_IS_APPROX(result1(k,l,m), expected); + } + } + } + + const Dims2 reduction_axis2; + Tensor<float, 2> result2 = tensor.sum(reduction_axis2); + for (int m = 0; m < 11; ++m) { + for (int l = 0; l < 7; ++l) { + float expected = 0.0f; + for (int k = 0; k < 5; ++k) { + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < 2; ++i) { + expected += tensor(i,j,k,l,m); + } + } + } + VERIFY_IS_APPROX(result2(l,m), expected); + } + } + + const Dims3 reduction_axis3; + Tensor<float, 1> result3 = tensor.sum(reduction_axis3); + for (int m = 0; m < 11; ++m) { + float expected = 0.0f; + for (int l = 0; l < 7; ++l) { + for (int k = 0; k < 5; ++k) { + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < 2; ++i) { + expected += tensor(i,j,k,l,m); + } + } + } + } + VERIFY_IS_APPROX(result3(m), expected); + } + + const Dims4 reduction_axis4; + Tensor<float, 0> result4 = tensor.sum(reduction_axis4); + float expected = 0.0f; + for (int m = 0; m < 11; ++m) { + for (int l = 0; l < 7; ++l) { + for (int k = 0; k < 5; ++k) { + for (int j = 0; j < 3; ++j) { + for (int i = 0; i < 2; ++i) { + expected += tensor(i,j,k,l,m); + } + } + } + } + } + VERIFY_IS_APPROX(result4(), expected); +} + + +static void test_type2indexpair_list() +{ + Tensor<float, 5> tensor(2,3,5,7,11); + tensor.setRandom(); + tensor += tensor.constant(10.0f); + + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>> Dims0; + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::type2indexpair<1,11>, Eigen::type2indexpair<2,12>> Dims2_a; + typedef Eigen::IndexPairList<Eigen::type2indexpair<0,10>, Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<2,12>> Dims2_b; + typedef Eigen::IndexPairList<Eigen::IndexPair<DenseIndex>, Eigen::type2indexpair<1,11>, Eigen::IndexPair<DenseIndex>> Dims2_c; + + Dims0 d0; + Dims2_a d2_a; + + Dims2_b d2_b; + d2_b.set(1, Eigen::IndexPair<DenseIndex>(1,11)); + + Dims2_c d2_c; + d2_c.set(0, Eigen::IndexPair<DenseIndex>(Eigen::IndexPair<DenseIndex>(0,10))); + d2_c.set(1, Eigen::IndexPair<DenseIndex>(1,11)); // setting type2indexpair to correct value. + d2_c.set(2, Eigen::IndexPair<DenseIndex>(2,12)); + + VERIFY_IS_EQUAL(d2_a[0].first, 0); + VERIFY_IS_EQUAL(d2_a[0].second, 10); + VERIFY_IS_EQUAL(d2_a[1].first, 1); + VERIFY_IS_EQUAL(d2_a[1].second, 11); + VERIFY_IS_EQUAL(d2_a[2].first, 2); + VERIFY_IS_EQUAL(d2_a[2].second, 12); + + VERIFY_IS_EQUAL(d2_b[0].first, 0); + VERIFY_IS_EQUAL(d2_b[0].second, 10); + VERIFY_IS_EQUAL(d2_b[1].first, 1); + VERIFY_IS_EQUAL(d2_b[1].second, 11); + VERIFY_IS_EQUAL(d2_b[2].first, 2); + VERIFY_IS_EQUAL(d2_b[2].second, 12); + + VERIFY_IS_EQUAL(d2_c[0].first, 0); + VERIFY_IS_EQUAL(d2_c[0].second, 10); + VERIFY_IS_EQUAL(d2_c[1].first, 1); + VERIFY_IS_EQUAL(d2_c[1].second, 11); + VERIFY_IS_EQUAL(d2_c[2].first, 2); + VERIFY_IS_EQUAL(d2_c[2].second, 12); + + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_a.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_b.value_known_statically(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((d2_c.value_known_statically(2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims0>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_a>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_b>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 0) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(0, 1) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(1, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 2) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_first_statically_eq<Dims2_c>(2, 3) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims0>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_a>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 10) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 12) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_b>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 10) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(0, 11) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 11) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(1, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 12) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((Eigen::internal::index_pair_second_statically_eq<Dims2_c>(2, 13) == false), YOU_MADE_A_PROGRAMMING_MISTAKE); +} + + +static void test_dynamic_index_list() +{ + Tensor<float, 4> tensor(2,3,5,7); + tensor.setRandom(); + + int dim1 = 2; + int dim2 = 1; + int dim3 = 0; + + auto reduction_axis = make_index_list(dim1, dim2, dim3); + + VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 2); + VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1); + VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 0); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 2); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 0); + + Tensor<float, 1> result = tensor.sum(reduction_axis); + for (int i = 0; i < result.size(); ++i) { + float expected = 0.0f; + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 5; ++l) { + expected += tensor(j,k,l,i); + } + } + } + VERIFY_IS_APPROX(result(i), expected); + } +} + +static void test_mixed_index_list() +{ + Tensor<float, 4> tensor(2,3,5,7); + tensor.setRandom(); + + int dim2 = 1; + int dim4 = 3; + + auto reduction_axis = make_index_list(0, dim2, 2, dim4); + + VERIFY_IS_EQUAL(internal::array_get<0>(reduction_axis), 0); + VERIFY_IS_EQUAL(internal::array_get<1>(reduction_axis), 1); + VERIFY_IS_EQUAL(internal::array_get<2>(reduction_axis), 2); + VERIFY_IS_EQUAL(internal::array_get<3>(reduction_axis), 3); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[0]), 0); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[1]), 1); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[2]), 2); + VERIFY_IS_EQUAL(static_cast<DenseIndex>(reduction_axis[3]), 3); + + typedef IndexList<type2index<0>, int, type2index<2>, int> ReductionIndices; + ReductionIndices reduction_indices; + reduction_indices.set(1, 1); + reduction_indices.set(3, 3); + EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_indices) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_indices) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_known_statically<ReductionIndices>(2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionIndices>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); +#if 0 + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionIndices>() == false), YOU_MADE_A_PROGRAMMING_MISTAKE); +#endif + + typedef IndexList<type2index<0>, type2index<1>, type2index<2>, type2index<3>> ReductionList; + ReductionList reduction_list; + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(0, 0) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(1, 1) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(2, 2) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::index_statically_eq<ReductionList>(3, 3) == true), YOU_MADE_A_PROGRAMMING_MISTAKE); +#if 0 + EIGEN_STATIC_ASSERT((internal::all_indices_known_statically<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::indices_statically_known_to_increase<ReductionList>() == true), YOU_MADE_A_PROGRAMMING_MISTAKE); +#endif + + Tensor<float, 0> result1 = tensor.sum(reduction_axis); + Tensor<float, 0> result2 = tensor.sum(reduction_indices); + Tensor<float, 0> result3 = tensor.sum(reduction_list); + + float expected = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + expected += tensor(i,j,k,l); + } + } + } + } + VERIFY_IS_APPROX(result1(), expected); + VERIFY_IS_APPROX(result2(), expected); + VERIFY_IS_APPROX(result3(), expected); +} + + +static void test_dim_check() +{ + Eigen::IndexList<Eigen::type2index<1>, int> dim1; + dim1.set(1, 2); + Eigen::IndexList<Eigen::type2index<1>, int> dim2; + dim2.set(1, 2); + VERIFY(dimensions_match(dim1, dim2)); +} + + +#endif + +void test_cxx11_tensor_index_list() +{ +#ifdef EIGEN_HAS_INDEX_LIST + CALL_SUBTEST(test_static_index_list()); + CALL_SUBTEST(test_type2index_list()); + CALL_SUBTEST(test_type2indexpair_list()); + CALL_SUBTEST(test_dynamic_index_list()); + CALL_SUBTEST(test_mixed_index_list()); + CALL_SUBTEST(test_dim_check()); +#endif +} diff --git a/eigen/unsupported/test/cxx11_tensor_inflation.cpp b/eigen/unsupported/test/cxx11_tensor_inflation.cpp new file mode 100644 index 0000000..4997935 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_inflation.cpp @@ -0,0 +1,81 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Ke Yang <yangke@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_simple_inflation() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> strides; + + strides[0] = 1; + strides[1] = 1; + strides[2] = 1; + strides[3] = 1; + + Tensor<float, 4, DataLayout> no_stride; + no_stride = tensor.inflate(strides); + + VERIFY_IS_EQUAL(no_stride.dimension(0), 2); + VERIFY_IS_EQUAL(no_stride.dimension(1), 3); + VERIFY_IS_EQUAL(no_stride.dimension(2), 5); + VERIFY_IS_EQUAL(no_stride.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); + } + } + } + } + + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; + Tensor<float, 4, DataLayout> inflated; + inflated = tensor.inflate(strides); + + VERIFY_IS_EQUAL(inflated.dimension(0), 3); + VERIFY_IS_EQUAL(inflated.dimension(1), 9); + VERIFY_IS_EQUAL(inflated.dimension(2), 9); + VERIFY_IS_EQUAL(inflated.dimension(3), 19); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 9; ++k) { + for (int l = 0; l < 19; ++l) { + if (i % 2 == 0 && + j % 4 == 0 && + k % 2 == 0 && + l % 3 == 0) { + VERIFY_IS_EQUAL(inflated(i,j,k,l), + tensor(i/2, j/4, k/2, l/3)); + } else { + VERIFY_IS_EQUAL(0, inflated(i,j,k,l)); + } + } + } + } + } +} + +void test_cxx11_tensor_inflation() +{ + CALL_SUBTEST(test_simple_inflation<ColMajor>()); + CALL_SUBTEST(test_simple_inflation<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_intdiv.cpp b/eigen/unsupported/test/cxx11_tensor_intdiv.cpp new file mode 100644 index 0000000..8e2b70b --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_intdiv.cpp @@ -0,0 +1,147 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +void test_signed_32bit() +{ + // Divide by one + const Eigen::internal::TensorIntDivisor<int32_t, false> div_by_one(1); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div_by_one; + const int32_t slow_div = j / 1; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + + // Standard divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<int32_t, false> div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } + + // Optimized divide by 2 or more + for (int32_t i = 2; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<int32_t, true> div(i); + + for (int32_t j = 0; j < 25000; ++j) { + const int32_t fast_div = j / div; + const int32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } +} + + +void test_unsigned_32bit() +{ + for (uint32_t i = 1; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<uint32_t> div(i); + + for (uint32_t j = 0; j < 25000; ++j) { + const uint32_t fast_div = j / div; + const uint32_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } +} + + +void test_signed_64bit() +{ + for (int64_t i = 1; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<int64_t> div(i); + + for (int64_t j = 0; j < 25000; ++j) { + const int64_t fast_div = j / div; + const int64_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } +} + + +void test_unsigned_64bit() +{ + for (uint64_t i = 1; i < 25000; ++i) { + const Eigen::internal::TensorIntDivisor<uint64_t> div(i); + + for (uint64_t j = 0; j < 25000; ++j) { + const uint64_t fast_div = j / div; + const uint64_t slow_div = j / i; + VERIFY_IS_EQUAL(fast_div, slow_div); + } + } +} + +void test_powers_32bit() { + for (int expon = 1; expon < 31; expon++) { + int32_t div = (1 << expon); + for (int num_expon = 0; num_expon < 32; num_expon++) { + int32_t start_num = (1 << num_expon) - 100; + int32_t end_num = (1 << num_expon) + 100; + if (start_num < 0) + start_num = 0; + for (int32_t num = start_num; num < end_num; num++) { + Eigen::internal::TensorIntDivisor<int32_t> divider = + Eigen::internal::TensorIntDivisor<int32_t>(div); + int32_t result = num/div; + int32_t result_op = divider.divide(num); + VERIFY_IS_EQUAL(result_op, result); + } + } + } +} + +void test_powers_64bit() { + for (int expon = 0; expon < 63; expon++) { + int64_t div = (1ull << expon); + for (int num_expon = 0; num_expon < 63; num_expon++) { + int64_t start_num = (1ull << num_expon) - 10; + int64_t end_num = (1ull << num_expon) + 10; + if (start_num < 0) + start_num = 0; + for (int64_t num = start_num; num < end_num; num++) { + Eigen::internal::TensorIntDivisor<int64_t> divider(div); + int64_t result = num/div; + int64_t result_op = divider.divide(num); + VERIFY_IS_EQUAL(result_op, result); + } + } + } +} + +void test_specific() { + // A particular combination that was previously failing + int64_t div = 209715200; + int64_t num = 3238002688ll; + Eigen::internal::TensorIntDivisor<int64_t> divider(div); + int64_t result = num/div; + int64_t result_op = divider.divide(num); + VERIFY_IS_EQUAL(result, result_op); +} + +void test_cxx11_tensor_intdiv() +{ + CALL_SUBTEST_1(test_signed_32bit()); + CALL_SUBTEST_2(test_unsigned_32bit()); + CALL_SUBTEST_3(test_signed_64bit()); + CALL_SUBTEST_4(test_unsigned_64bit()); + CALL_SUBTEST_5(test_powers_32bit()); + CALL_SUBTEST_6(test_powers_64bit()); + CALL_SUBTEST_7(test_specific()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_io.cpp b/eigen/unsupported/test/cxx11_tensor_io.cpp new file mode 100644 index 0000000..4899605 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_io.cpp @@ -0,0 +1,136 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <sstream> +#include <string> +#include <Eigen/CXX11/Tensor> + + +template<int DataLayout> +static void test_output_0d() +{ + Tensor<int, 0, DataLayout> tensor; + tensor() = 123; + + std::stringstream os; + os << tensor; + + std::string expected("123"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +template<int DataLayout> +static void test_output_1d() +{ + Tensor<int, 1, DataLayout> tensor(5); + for (int i = 0; i < 5; ++i) { + tensor(i) = i; + } + + std::stringstream os; + os << tensor; + + std::string expected("0\n1\n2\n3\n4"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); + + Eigen::Tensor<double,1,DataLayout> empty_tensor(0); + std::stringstream empty_os; + empty_os << empty_tensor; + std::string empty_string; + VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string); +} + + +template<int DataLayout> +static void test_output_2d() +{ + Tensor<int, 2, DataLayout> tensor(5, 3); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 3; ++j) { + tensor(i, j) = i*j; + } + } + + std::stringstream os; + os << tensor; + + std::string expected("0 0 0\n0 1 2\n0 2 4\n0 3 6\n0 4 8"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +template<int DataLayout> +static void test_output_expr() +{ + Tensor<int, 1, DataLayout> tensor1(5); + Tensor<int, 1, DataLayout> tensor2(5); + for (int i = 0; i < 5; ++i) { + tensor1(i) = i; + tensor2(i) = 7; + } + + std::stringstream os; + os << tensor1 + tensor2; + + std::string expected(" 7\n 8\n 9\n10\n11"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +template<int DataLayout> +static void test_output_string() +{ + Tensor<std::string, 2, DataLayout> tensor(5, 3); + tensor.setConstant(std::string("foo")); + + std::cout << tensor << std::endl; + + std::stringstream os; + os << tensor; + + std::string expected("foo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +template<int DataLayout> +static void test_output_const() +{ + Tensor<int, 1, DataLayout> tensor(5); + for (int i = 0; i < 5; ++i) { + tensor(i) = i; + } + + TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5); + + std::stringstream os; + os << tensor_map; + + std::string expected("0\n1\n2\n3\n4"); + VERIFY_IS_EQUAL(std::string(os.str()), expected); +} + + +void test_cxx11_tensor_io() +{ + CALL_SUBTEST(test_output_0d<ColMajor>()); + CALL_SUBTEST(test_output_0d<RowMajor>()); + CALL_SUBTEST(test_output_1d<ColMajor>()); + CALL_SUBTEST(test_output_1d<RowMajor>()); + CALL_SUBTEST(test_output_2d<ColMajor>()); + CALL_SUBTEST(test_output_2d<RowMajor>()); + CALL_SUBTEST(test_output_expr<ColMajor>()); + CALL_SUBTEST(test_output_expr<RowMajor>()); + CALL_SUBTEST(test_output_string<ColMajor>()); + CALL_SUBTEST(test_output_string<RowMajor>()); + CALL_SUBTEST(test_output_const<ColMajor>()); + CALL_SUBTEST(test_output_const<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_layout_swap.cpp b/eigen/unsupported/test/cxx11_tensor_layout_swap.cpp new file mode 100644 index 0000000..ae297a9 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_layout_swap.cpp @@ -0,0 +1,61 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +static void test_simple_swap() +{ + Tensor<float, 3, ColMajor> tensor(2,3,7); + tensor.setRandom(); + + Tensor<float, 3, RowMajor> tensor2 = tensor.swap_layout(); + VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0)); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i)); + } + } + } +} + + +static void test_swap_as_lvalue() +{ + Tensor<float, 3, ColMajor> tensor(2,3,7); + tensor.setRandom(); + + Tensor<float, 3, RowMajor> tensor2(7,3,2); + tensor2.swap_layout() = tensor; + VERIFY_IS_EQUAL(tensor.dimension(0), tensor2.dimension(2)); + VERIFY_IS_EQUAL(tensor.dimension(1), tensor2.dimension(1)); + VERIFY_IS_EQUAL(tensor.dimension(2), tensor2.dimension(0)); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(tensor(i,j,k), tensor2(k,j,i)); + } + } + } +} + + +void test_cxx11_tensor_layout_swap() +{ + CALL_SUBTEST(test_simple_swap()); + CALL_SUBTEST(test_swap_as_lvalue()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_lvalue.cpp b/eigen/unsupported/test/cxx11_tensor_lvalue.cpp new file mode 100644 index 0000000..071f5b4 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_lvalue.cpp @@ -0,0 +1,42 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + + +static void test_compound_assignment() +{ + Tensor<float, 3> mat1(2,3,7); + Tensor<float, 3> mat2(2,3,7); + Tensor<float, 3> mat3(2,3,7); + + mat1.setRandom(); + mat2.setRandom(); + mat3 = mat1; + mat3 += mat2; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(mat3(i,j,k), mat1(i,j,k) + mat2(i,j,k)); + } + } + } +} + + +void test_cxx11_tensor_lvalue() +{ + CALL_SUBTEST(test_compound_assignment()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_map.cpp b/eigen/unsupported/test/cxx11_tensor_map.cpp new file mode 100644 index 0000000..3db0ee7 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_map.cpp @@ -0,0 +1,277 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_0d() +{ + Tensor<int, 0> scalar1; + Tensor<int, 0, RowMajor> scalar2; + + TensorMap<Tensor<const int, 0> > scalar3(scalar1.data()); + TensorMap<Tensor<const int, 0, RowMajor> > scalar4(scalar2.data()); + + scalar1() = 7; + scalar2() = 13; + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar3(), 7); + VERIFY_IS_EQUAL(scalar4(), 13); +} + +static void test_1d() +{ + Tensor<int, 1> vec1(6); + Tensor<int, 1, RowMajor> vec2(6); + + TensorMap<Tensor<const int, 1> > vec3(vec1.data(), 6); + TensorMap<Tensor<const int, 1, RowMajor> > vec4(vec2.data(), 6); + + vec1(0) = 4; vec2(0) = 0; + vec1(1) = 8; vec2(1) = 1; + vec1(2) = 15; vec2(2) = 2; + vec1(3) = 16; vec2(3) = 3; + vec1(4) = 23; vec2(4) = 4; + vec1(5) = 42; vec2(5) = 5; + + VERIFY_IS_EQUAL(vec1.rank(), 1); + VERIFY_IS_EQUAL(vec1.size(), 6); + VERIFY_IS_EQUAL(vec1.dimension(0), 6); + + VERIFY_IS_EQUAL(vec3(0), 4); + VERIFY_IS_EQUAL(vec3(1), 8); + VERIFY_IS_EQUAL(vec3(2), 15); + VERIFY_IS_EQUAL(vec3(3), 16); + VERIFY_IS_EQUAL(vec3(4), 23); + VERIFY_IS_EQUAL(vec3(5), 42); + + VERIFY_IS_EQUAL(vec4(0), 0); + VERIFY_IS_EQUAL(vec4(1), 1); + VERIFY_IS_EQUAL(vec4(2), 2); + VERIFY_IS_EQUAL(vec4(3), 3); + VERIFY_IS_EQUAL(vec4(4), 4); + VERIFY_IS_EQUAL(vec4(5), 5); +} + +static void test_2d() +{ + Tensor<int, 2> mat1(2,3); + Tensor<int, 2, RowMajor> mat2(2,3); + + mat1(0,0) = 0; + mat1(0,1) = 1; + mat1(0,2) = 2; + mat1(1,0) = 3; + mat1(1,1) = 4; + mat1(1,2) = 5; + + mat2(0,0) = 0; + mat2(0,1) = 1; + mat2(0,2) = 2; + mat2(1,0) = 3; + mat2(1,1) = 4; + mat2(1,2) = 5; + + TensorMap<Tensor<const int, 2> > mat3(mat1.data(), 2, 3); + TensorMap<Tensor<const int, 2, RowMajor> > mat4(mat2.data(), 2, 3); + + VERIFY_IS_EQUAL(mat3.rank(), 2); + VERIFY_IS_EQUAL(mat3.size(), 6); + VERIFY_IS_EQUAL(mat3.dimension(0), 2); + VERIFY_IS_EQUAL(mat3.dimension(1), 3); + + VERIFY_IS_EQUAL(mat4.rank(), 2); + VERIFY_IS_EQUAL(mat4.size(), 6); + VERIFY_IS_EQUAL(mat4.dimension(0), 2); + VERIFY_IS_EQUAL(mat4.dimension(1), 3); + + VERIFY_IS_EQUAL(mat3(0,0), 0); + VERIFY_IS_EQUAL(mat3(0,1), 1); + VERIFY_IS_EQUAL(mat3(0,2), 2); + VERIFY_IS_EQUAL(mat3(1,0), 3); + VERIFY_IS_EQUAL(mat3(1,1), 4); + VERIFY_IS_EQUAL(mat3(1,2), 5); + + VERIFY_IS_EQUAL(mat4(0,0), 0); + VERIFY_IS_EQUAL(mat4(0,1), 1); + VERIFY_IS_EQUAL(mat4(0,2), 2); + VERIFY_IS_EQUAL(mat4(1,0), 3); + VERIFY_IS_EQUAL(mat4(1,1), 4); + VERIFY_IS_EQUAL(mat4(1,2), 5); +} + +static void test_3d() +{ + Tensor<int, 3> mat1(2,3,7); + Tensor<int, 3, RowMajor> mat2(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val++; + } + } + } + + TensorMap<Tensor<const int, 3> > mat3(mat1.data(), 2, 3, 7); + TensorMap<Tensor<const int, 3, RowMajor> > mat4(mat2.data(), 2, 3, 7); + + VERIFY_IS_EQUAL(mat3.rank(), 3); + VERIFY_IS_EQUAL(mat3.size(), 2*3*7); + VERIFY_IS_EQUAL(mat3.dimension(0), 2); + VERIFY_IS_EQUAL(mat3.dimension(1), 3); + VERIFY_IS_EQUAL(mat3.dimension(2), 7); + + VERIFY_IS_EQUAL(mat4.rank(), 3); + VERIFY_IS_EQUAL(mat4.size(), 2*3*7); + VERIFY_IS_EQUAL(mat4.dimension(0), 2); + VERIFY_IS_EQUAL(mat4.dimension(1), 3); + VERIFY_IS_EQUAL(mat4.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat3(i,j,k), val); + VERIFY_IS_EQUAL(mat4(i,j,k), val); + val++; + } + } + } +} + + +static void test_from_tensor() +{ + Tensor<int, 3> mat1(2,3,7); + Tensor<int, 3, RowMajor> mat2(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + mat1(i,j,k) = val; + mat2(i,j,k) = val; + val++; + } + } + } + + TensorMap<Tensor<int, 3> > mat3(mat1); + TensorMap<Tensor<int, 3, RowMajor> > mat4(mat2); + + VERIFY_IS_EQUAL(mat3.rank(), 3); + VERIFY_IS_EQUAL(mat3.size(), 2*3*7); + VERIFY_IS_EQUAL(mat3.dimension(0), 2); + VERIFY_IS_EQUAL(mat3.dimension(1), 3); + VERIFY_IS_EQUAL(mat3.dimension(2), 7); + + VERIFY_IS_EQUAL(mat4.rank(), 3); + VERIFY_IS_EQUAL(mat4.size(), 2*3*7); + VERIFY_IS_EQUAL(mat4.dimension(0), 2); + VERIFY_IS_EQUAL(mat4.dimension(1), 3); + VERIFY_IS_EQUAL(mat4.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat3(i,j,k), val); + VERIFY_IS_EQUAL(mat4(i,j,k), val); + val++; + } + } + } + + TensorFixedSize<int, Sizes<2,3,7> > mat5; + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + array<ptrdiff_t, 3> coords; + coords[0] = i; + coords[1] = j; + coords[2] = k; + mat5(coords) = val; + val++; + } + } + } + + TensorMap<TensorFixedSize<int, Sizes<2,3,7> > > mat6(mat5); + + VERIFY_IS_EQUAL(mat6.rank(), 3); + VERIFY_IS_EQUAL(mat6.size(), 2*3*7); + VERIFY_IS_EQUAL(mat6.dimension(0), 2); + VERIFY_IS_EQUAL(mat6.dimension(1), 3); + VERIFY_IS_EQUAL(mat6.dimension(2), 7); + + val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(mat6(i,j,k), val); + val++; + } + } + } +} + + +static int f(const TensorMap<Tensor<int, 3> >& tensor) { + // Size<0> empty; + EIGEN_STATIC_ASSERT((internal::array_size<Sizes<> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT((internal::array_size<DSizes<int, 0> >::value == 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + Tensor<int, 0> result = tensor.sum(); + return result(); +} + +static void test_casting() +{ + Tensor<int, 3> tensor(2,3,7); + + int val = 0; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + tensor(i,j,k) = val; + val++; + } + } + } + + TensorMap<Tensor<int, 3> > map(tensor); + int sum1 = f(map); + int sum2 = f(tensor); + + VERIFY_IS_EQUAL(sum1, sum2); + VERIFY_IS_EQUAL(sum1, 861); +} + +void test_cxx11_tensor_map() +{ + CALL_SUBTEST(test_0d()); + CALL_SUBTEST(test_1d()); + CALL_SUBTEST(test_2d()); + CALL_SUBTEST(test_3d()); + + CALL_SUBTEST(test_from_tensor()); + CALL_SUBTEST(test_casting()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_math.cpp b/eigen/unsupported/test/cxx11_tensor_math.cpp new file mode 100644 index 0000000..61c742a --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_math.cpp @@ -0,0 +1,46 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_tanh() +{ + Tensor<float, 1> vec1(6); + vec1.setRandom(); + + Tensor<float, 1> vec2 = vec1.tanh(); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_APPROX(vec2(i), tanhf(vec1(i))); + } +} + +static void test_sigmoid() +{ + Tensor<float, 1> vec1(6); + vec1.setRandom(); + + Tensor<float, 1> vec2 = vec1.sigmoid(); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_APPROX(vec2(i), 1.0f / (1.0f + std::exp(-vec1(i)))); + } +} + + +void test_cxx11_tensor_math() +{ + CALL_SUBTEST(test_tanh()); + CALL_SUBTEST(test_sigmoid()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_mixed_indices.cpp b/eigen/unsupported/test/cxx11_tensor_mixed_indices.cpp new file mode 100644 index 0000000..4fba6fd --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_mixed_indices.cpp @@ -0,0 +1,53 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +static void test_simple() +{ + Tensor<float, 1, ColMajor> vec1(6); + Tensor<float, 1, ColMajor, int> vec2(6); + + vec1(0) = 4.0; vec2(0) = 0.0; + vec1(1) = 8.0; vec2(1) = 1.0; + vec1(2) = 15.0; vec2(2) = 2.0; + vec1(3) = 16.0; vec2(3) = 3.0; + vec1(4) = 23.0; vec2(4) = 4.0; + vec1(5) = 42.0; vec2(5) = 5.0; + + float data3[6]; + TensorMap<Tensor<float, 1, ColMajor>> vec3(data3, 6); + vec3 = vec1.sqrt(); + float data4[6]; + TensorMap<Tensor<float, 1, ColMajor, int>> vec4(data4, 6); + vec4 = vec2.square(); + + VERIFY_IS_APPROX(vec3(0), sqrtf(4.0)); + VERIFY_IS_APPROX(vec3(1), sqrtf(8.0)); + VERIFY_IS_APPROX(vec3(2), sqrtf(15.0)); + VERIFY_IS_APPROX(vec3(3), sqrtf(16.0)); + VERIFY_IS_APPROX(vec3(4), sqrtf(23.0)); + VERIFY_IS_APPROX(vec3(5), sqrtf(42.0)); + + VERIFY_IS_APPROX(vec4(0), 0.0f); + VERIFY_IS_APPROX(vec4(1), 1.0f); + VERIFY_IS_APPROX(vec4(2), 2.0f * 2.0f); + VERIFY_IS_APPROX(vec4(3), 3.0f * 3.0f); + VERIFY_IS_APPROX(vec4(4), 4.0f * 4.0f); + VERIFY_IS_APPROX(vec4(5), 5.0f * 5.0f); +} + + +void test_cxx11_tensor_mixed_indices() +{ + CALL_SUBTEST(test_simple()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_morphing.cpp b/eigen/unsupported/test/cxx11_tensor_morphing.cpp new file mode 100644 index 0000000..f7de431 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_morphing.cpp @@ -0,0 +1,485 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename> +static void test_simple_reshape() +{ + Tensor<float, 5> tensor1(2,3,1,7,1); + tensor1.setRandom(); + + Tensor<float, 3> tensor2(2,3,7); + Tensor<float, 2> tensor3(6,7); + Tensor<float, 2> tensor4(2,21); + + Tensor<float, 3>::Dimensions dim1(2,3,7); + tensor2 = tensor1.reshape(dim1); + Tensor<float, 2>::Dimensions dim2(6,7); + tensor3 = tensor1.reshape(dim2); + Tensor<float, 2>::Dimensions dim3(2,21); + tensor4 = tensor1.reshape(dim1).reshape(dim3); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); + } + } + } +} + +template<typename> +static void test_reshape_in_expr() { + MatrixXf m1(2,3*5*7*11); + MatrixXf m2(3*5*7*11,13); + m1.setRandom(); + m2.setRandom(); + MatrixXf m3 = m1 * m2; + + TensorMap<Tensor<float, 5>> tensor1(m1.data(), 2,3,5,7,11); + TensorMap<Tensor<float, 5>> tensor2(m2.data(), 3,5,7,11,13); + Tensor<float, 2>::Dimensions newDims1(2,3*5*7*11); + Tensor<float, 2>::Dimensions newDims2(3*5*7*11,13); + typedef Tensor<float, 1>::DimensionPair DimPair; + array<DimPair, 1> contract_along{{DimPair(1, 0)}}; + Tensor<float, 2> tensor3(2,13); + tensor3 = tensor1.reshape(newDims1).contract(tensor2.reshape(newDims2), contract_along); + + Map<MatrixXf> res(tensor3.data(), 2, 13); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 13; ++j) { + VERIFY_IS_APPROX(res(i,j), m3(i,j)); + } + } +} + +template<typename> +static void test_reshape_as_lvalue() +{ + Tensor<float, 3> tensor(2,3,7); + tensor.setRandom(); + + Tensor<float, 2> tensor2d(6,7); + Tensor<float, 3>::Dimensions dim(2,3,7); + tensor2d.reshape(dim) = tensor; + + float scratch[2*3*1*7*1]; + TensorMap<Tensor<float, 5>> tensor5d(scratch, 2,3,1,7,1); + tensor5d.reshape(dim).device(Eigen::DefaultDevice()) = tensor; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); + VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k)); + } + } + } +} + +template<int DataLayout> +static void test_simple_slice() +{ + Tensor<float, 5, DataLayout> tensor(2,3,5,7,11); + tensor.setRandom(); + + Tensor<float, 5, DataLayout> slice1(1,1,1,1,1); + Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5); + Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1); + slice1 = tensor.slice(indices, sizes); + VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); + + Tensor<float, 5, DataLayout> slice2(1,1,2,2,3); + Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5); + Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3); + slice2 = tensor.slice(indices2, sizes2); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); + } + } + } +} + +template<typename=void> +static void test_const_slice() +{ + const float b[1] = {42}; + TensorMap<Tensor<const float, 1> > m(b, 1); + DSizes<DenseIndex, 1> offsets; + offsets[0] = 0; + TensorRef<Tensor<const float, 1> > slice_ref(m.slice(offsets, m.dimensions())); + VERIFY_IS_EQUAL(slice_ref(0), 42); +} + +template<int DataLayout> +static void test_slice_in_expr() { + typedef Matrix<float, Dynamic, Dynamic, DataLayout> Mtx; + Mtx m1(7,7); + Mtx m2(3,3); + m1.setRandom(); + m2.setRandom(); + + Mtx m3 = m1.block(1, 2, 3, 3) * m2.block(0, 2, 3, 1); + + TensorMap<Tensor<float, 2, DataLayout>> tensor1(m1.data(), 7, 7); + TensorMap<Tensor<float, 2, DataLayout>> tensor2(m2.data(), 3, 3); + Tensor<float, 2, DataLayout> tensor3(3,1); + typedef Tensor<float, 1>::DimensionPair DimPair; + array<DimPair, 1> contract_along{{DimPair(1, 0)}}; + + Eigen::DSizes<ptrdiff_t, 2> indices1(1,2); + Eigen::DSizes<ptrdiff_t, 2> sizes1(3,3); + Eigen::DSizes<ptrdiff_t, 2> indices2(0,2); + Eigen::DSizes<ptrdiff_t, 2> sizes2(3,1); + tensor3 = tensor1.slice(indices1, sizes1).contract(tensor2.slice(indices2, sizes2), contract_along); + + Map<Mtx> res(tensor3.data(), 3, 1); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 1; ++j) { + VERIFY_IS_APPROX(res(i,j), m3(i,j)); + } + } + + // Take an arbitrary slice of an arbitrarily sized tensor. + TensorMap<Tensor<const float, 2, DataLayout>> tensor4(m1.data(), 7, 7); + Tensor<float, 1, DataLayout> tensor6 = tensor4.reshape(DSizes<ptrdiff_t, 1>(7*7)).exp().slice(DSizes<ptrdiff_t, 1>(0), DSizes<ptrdiff_t, 1>(35)); + for (int i = 0; i < 35; ++i) { + VERIFY_IS_APPROX(tensor6(i), expf(tensor4.data()[i])); + } +} + +template<int DataLayout> +static void test_slice_as_lvalue() +{ + Tensor<float, 3, DataLayout> tensor1(2,2,7); + tensor1.setRandom(); + Tensor<float, 3, DataLayout> tensor2(2,2,7); + tensor2.setRandom(); + Tensor<float, 3, DataLayout> tensor3(4,3,5); + tensor3.setRandom(); + Tensor<float, 3, DataLayout> tensor4(4,3,2); + tensor4.setRandom(); + Tensor<float, 3, DataLayout> tensor5(10,13,12); + tensor5.setRandom(); + + Tensor<float, 3, DataLayout> result(4,5,7); + Eigen::DSizes<ptrdiff_t, 3> sizes12(2,2,7); + Eigen::DSizes<ptrdiff_t, 3> first_slice(0,0,0); + result.slice(first_slice, sizes12) = tensor1; + Eigen::DSizes<ptrdiff_t, 3> second_slice(2,0,0); + result.slice(second_slice, sizes12).device(Eigen::DefaultDevice()) = tensor2; + + Eigen::DSizes<ptrdiff_t, 3> sizes3(4,3,5); + Eigen::DSizes<ptrdiff_t, 3> third_slice(0,2,0); + result.slice(third_slice, sizes3) = tensor3; + + Eigen::DSizes<ptrdiff_t, 3> sizes4(4,3,2); + Eigen::DSizes<ptrdiff_t, 3> fourth_slice(0,2,5); + result.slice(fourth_slice, sizes4) = tensor4; + + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 7; ++k) { + for (int i = 0; i < 2; ++i) { + VERIFY_IS_EQUAL(result(i,j,k), tensor1(i,j,k)); + VERIFY_IS_EQUAL(result(i+2,j,k), tensor2(i,j,k)); + } + } + } + for (int i = 0; i < 4; ++i) { + for (int j = 2; j < 5; ++j) { + for (int k = 0; k < 5; ++k) { + VERIFY_IS_EQUAL(result(i,j,k), tensor3(i,j-2,k)); + } + for (int k = 5; k < 7; ++k) { + VERIFY_IS_EQUAL(result(i,j,k), tensor4(i,j-2,k-5)); + } + } + } + + Eigen::DSizes<ptrdiff_t, 3> sizes5(4,5,7); + Eigen::DSizes<ptrdiff_t, 3> fifth_slice(0,0,0); + result.slice(fifth_slice, sizes5) = tensor5.slice(fifth_slice, sizes5); + for (int i = 0; i < 4; ++i) { + for (int j = 2; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(result(i,j,k), tensor5(i,j,k)); + } + } + } +} + +template<int DataLayout> +static void test_slice_raw_data() +{ + Tensor<float, 4, DataLayout> tensor(3,5,7,11); + tensor.setRandom(); + + Eigen::DSizes<ptrdiff_t, 4> offsets(1,2,3,4); + Eigen::DSizes<ptrdiff_t, 4> extents(1,1,1,1); + typedef TensorEvaluator<decltype(tensor.slice(offsets, extents)), DefaultDevice> SliceEvaluator; + auto slice1 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice1.dimensions().TotalSize(), 1); + VERIFY_IS_EQUAL(slice1.data()[0], tensor(1,2,3,4)); + + if (DataLayout == ColMajor) { + extents = Eigen::DSizes<ptrdiff_t, 4>(2,1,1,1); + auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2); + VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4)); + VERIFY_IS_EQUAL(slice2.data()[1], tensor(2,2,3,4)); + } else { + extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,1,2); + auto slice2 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice2.dimensions().TotalSize(), 2); + VERIFY_IS_EQUAL(slice2.data()[0], tensor(1,2,3,4)); + VERIFY_IS_EQUAL(slice2.data()[1], tensor(1,2,3,5)); + } + + extents = Eigen::DSizes<ptrdiff_t, 4>(1,2,1,1); + auto slice3 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice3.dimensions().TotalSize(), 2); + VERIFY_IS_EQUAL(slice3.data(), static_cast<float*>(0)); + + if (DataLayout == ColMajor) { + offsets = Eigen::DSizes<ptrdiff_t, 4>(0,2,3,4); + extents = Eigen::DSizes<ptrdiff_t, 4>(3,2,1,1); + auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 6); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 2; ++j) { + VERIFY_IS_EQUAL(slice4.data()[i+3*j], tensor(i,2+j,3,4)); + } + } + } else { + offsets = Eigen::DSizes<ptrdiff_t, 4>(1,2,3,0); + extents = Eigen::DSizes<ptrdiff_t, 4>(1,1,2,11); + auto slice4 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice4.dimensions().TotalSize(), 22); + for (int l = 0; l < 11; ++l) { + for (int k = 0; k < 2; ++k) { + VERIFY_IS_EQUAL(slice4.data()[l+11*k], tensor(1,2,3+k,l)); + } + } + } + + if (DataLayout == ColMajor) { + offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,4); + extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,2); + auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 210); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 2; ++l) { + int slice_index = i + 3 * (j + 5 * (k + 7 * l)); + VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i,j,k,l+4)); + } + } + } + } + } else { + offsets = Eigen::DSizes<ptrdiff_t, 4>(1,0,0,0); + extents = Eigen::DSizes<ptrdiff_t, 4>(2,5,7,11); + auto slice5 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice5.dimensions().TotalSize(), 770); + for (int l = 0; l < 11; ++l) { + for (int k = 0; k < 7; ++k) { + for (int j = 0; j < 5; ++j) { + for (int i = 0; i < 2; ++i) { + int slice_index = l + 11 * (k + 7 * (j + 5 * i)); + VERIFY_IS_EQUAL(slice5.data()[slice_index], tensor(i+1,j,k,l)); + } + } + } + } + + } + + offsets = Eigen::DSizes<ptrdiff_t, 4>(0,0,0,0); + extents = Eigen::DSizes<ptrdiff_t, 4>(3,5,7,11); + auto slice6 = SliceEvaluator(tensor.slice(offsets, extents), DefaultDevice()); + VERIFY_IS_EQUAL(slice6.dimensions().TotalSize(), 3*5*7*11); + VERIFY_IS_EQUAL(slice6.data(), tensor.data()); +} + + +template<int DataLayout> +static void test_strided_slice() +{ + typedef Tensor<float, 5, DataLayout> Tensor5f; + typedef Eigen::DSizes<Eigen::DenseIndex, 5> Index5; + typedef Tensor<float, 2, DataLayout> Tensor2f; + typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2; + Tensor<float, 5, DataLayout> tensor(2,3,5,7,11); + Tensor<float, 2, DataLayout> tensor2(7,11); + tensor.setRandom(); + tensor2.setRandom(); + + if (true) { + Tensor2f slice(2,3); + Index2 strides(-2,-1); + Index2 indicesStart(5,7); + Index2 indicesStop(0,4); + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(j,k), tensor2(5-2*j,7-k)); + } + } + } + + if(true) { + Tensor2f slice(0,1); + Index2 strides(1,1); + Index2 indicesStart(5,4); + Index2 indicesStop(5,5); + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + } + + if(true) { // test clamped degenerate interavls + Tensor2f slice(7,11); + Index2 strides(1,-1); + Index2 indicesStart(-3,20); // should become 0,10 + Index2 indicesStop(20,-11); // should become 11, -1 + slice = tensor2.stridedSlice(indicesStart, indicesStop, strides); + for (int j = 0; j < 7; ++j) { + for (int k = 0; k < 11; ++k) { + VERIFY_IS_EQUAL(slice(j,k), tensor2(j,10-k)); + } + } + } + + if(true) { + Tensor5f slice1(1,1,1,1,1); + Eigen::DSizes<Eigen::DenseIndex, 5> indicesStart(1, 2, 3, 4, 5); + Eigen::DSizes<Eigen::DenseIndex, 5> indicesStop(2, 3, 4, 5, 6); + Eigen::DSizes<Eigen::DenseIndex, 5> strides(1, 1, 1, 1, 1); + slice1 = tensor.stridedSlice(indicesStart, indicesStop, strides); + VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); + } + + if(true) { + Tensor5f slice(1,1,2,2,3); + Index5 start(1, 1, 3, 4, 5); + Index5 stop(2, 2, 5, 6, 8); + Index5 strides(1, 1, 1, 1, 1); + slice = tensor.stridedSlice(start, stop, strides); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); + } + } + } + } + + if(true) { + Tensor5f slice(1,1,2,2,3); + Index5 strides3(1, 1, -2, 1, -1); + Index5 indices3Start(1, 1, 4, 4, 7); + Index5 indices3Stop(2, 2, 0, 6, 4); + slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,4-2*i,4+j,7-k)); + } + } + } + } + + if(false) { // tests degenerate interval + Tensor5f slice(1,1,2,2,3); + Index5 strides3(1, 1, 2, 1, 1); + Index5 indices3Start(1, 1, 4, 4, 7); + Index5 indices3Stop(2, 2, 0, 6, 4); + slice = tensor.stridedSlice(indices3Start, indices3Stop, strides3); + } +} + +template<int DataLayout> +static void test_strided_slice_write() +{ + typedef Tensor<float, 2, DataLayout> Tensor2f; + typedef Eigen::DSizes<Eigen::DenseIndex, 2> Index2; + + Tensor<float, 2, DataLayout> tensor(7,11),tensor2(7,11); + tensor.setRandom(); + tensor2=tensor; + Tensor2f slice(2,3); + + slice.setRandom(); + + Index2 strides(1,1); + Index2 indicesStart(3,4); + Index2 indicesStop(5,7); + Index2 lengths(2,3); + + tensor.slice(indicesStart,lengths)=slice; + tensor2.stridedSlice(indicesStart,indicesStop,strides)=slice; + + for(int i=0;i<7;i++) for(int j=0;j<11;j++){ + VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j)); + } +} + + +template<int DataLayout> +static void test_composition() +{ + Eigen::Tensor<float, 2, DataLayout> matrix(7, 11); + matrix.setRandom(); + + const DSizes<ptrdiff_t, 3> newDims(1, 1, 11); + Eigen::Tensor<float, 3, DataLayout> tensor = + matrix.slice(DSizes<ptrdiff_t, 2>(2, 0), DSizes<ptrdiff_t, 2>(1, 11)).reshape(newDims); + + VERIFY_IS_EQUAL(tensor.dimensions().TotalSize(), 11); + VERIFY_IS_EQUAL(tensor.dimension(0), 1); + VERIFY_IS_EQUAL(tensor.dimension(1), 1); + VERIFY_IS_EQUAL(tensor.dimension(2), 11); + for (int i = 0; i < 11; ++i) { + VERIFY_IS_EQUAL(tensor(0,0,i), matrix(2,i)); + } +} + + +void test_cxx11_tensor_morphing() +{ + CALL_SUBTEST_1(test_simple_reshape<void>()); + CALL_SUBTEST_1(test_reshape_in_expr<void>()); + CALL_SUBTEST_1(test_reshape_as_lvalue<void>()); + + CALL_SUBTEST_1(test_simple_slice<ColMajor>()); + CALL_SUBTEST_1(test_simple_slice<RowMajor>()); + CALL_SUBTEST_1(test_const_slice()); + CALL_SUBTEST_2(test_slice_in_expr<ColMajor>()); + CALL_SUBTEST_3(test_slice_in_expr<RowMajor>()); + CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>()); + CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>()); + CALL_SUBTEST_5(test_slice_raw_data<ColMajor>()); + CALL_SUBTEST_5(test_slice_raw_data<RowMajor>()); + + CALL_SUBTEST_6(test_strided_slice_write<ColMajor>()); + CALL_SUBTEST_6(test_strided_slice<ColMajor>()); + CALL_SUBTEST_6(test_strided_slice_write<RowMajor>()); + CALL_SUBTEST_6(test_strided_slice<RowMajor>()); + + CALL_SUBTEST_7(test_composition<ColMajor>()); + CALL_SUBTEST_7(test_composition<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp new file mode 100644 index 0000000..9b521bc --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_morphing_sycl.cpp @@ -0,0 +1,248 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_morphing_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +template <typename DataType, int DataLayout, typename IndexType> +static void test_simple_reshape(const Eigen::SyclDevice& sycl_device) +{ + typename Tensor<DataType, 5 ,DataLayout, IndexType>::Dimensions dim1(2,3,1,7,1); + typename Tensor<DataType, 3 ,DataLayout, IndexType>::Dimensions dim2(2,3,7); + typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim3(6,7); + typename Tensor<DataType, 2 ,DataLayout, IndexType>::Dimensions dim4(2,21); + + Tensor<DataType, 5, DataLayout, IndexType> tensor1(dim1); + Tensor<DataType, 3, DataLayout, IndexType> tensor2(dim2); + Tensor<DataType, 2, DataLayout, IndexType> tensor3(dim3); + Tensor<DataType, 2, DataLayout, IndexType> tensor4(dim4); + + tensor1.setRandom(); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType))); + DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType))); + DataType* gpu_data4 = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, dim1); + TensorMap<Tensor<DataType, 3,DataLayout, IndexType>> gpu2(gpu_data2, dim2); + TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu3(gpu_data3, dim3); + TensorMap<Tensor<DataType, 2,DataLayout, IndexType>> gpu4(gpu_data4, dim4); + + sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType)); + + gpu2.device(sycl_device)=gpu1.reshape(dim2); + sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType)); + + gpu3.device(sycl_device)=gpu1.reshape(dim3); + sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType)); + + gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4); + sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType)); + for (IndexType i = 0; i < 2; ++i){ + for (IndexType j = 0; j < 3; ++j){ + for (IndexType k = 0; k < 7; ++k){ + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); ///ColMajor + if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) { + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k)); ///ColMajor + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k)); ///ColMajor + } + else{ + //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k)); /// RowMajor + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k)); /// RowMajor + VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k)); /// RowMajor + } + } + } + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); + sycl_device.deallocate(gpu_data3); + sycl_device.deallocate(gpu_data4); +} + + +template<typename DataType, int DataLayout, typename IndexType> +static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device) +{ + typename Tensor<DataType, 3, DataLayout, IndexType>::Dimensions dim1(2,3,7); + typename Tensor<DataType, 2, DataLayout, IndexType>::Dimensions dim2(6,7); + typename Tensor<DataType, 5, DataLayout, IndexType>::Dimensions dim3(2,3,1,7,1); + Tensor<DataType, 3, DataLayout, IndexType> tensor(dim1); + Tensor<DataType, 2, DataLayout, IndexType> tensor2d(dim2); + Tensor<DataType, 5, DataLayout, IndexType> tensor5d(dim3); + + tensor.setRandom(); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType))); + DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType))); + + TensorMap< Tensor<DataType, 3, DataLayout, IndexType> > gpu1(gpu_data1, dim1); + TensorMap< Tensor<DataType, 2, DataLayout, IndexType> > gpu2(gpu_data2, dim2); + TensorMap< Tensor<DataType, 5, DataLayout, IndexType> > gpu3(gpu_data3, dim3); + + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); + + gpu2.reshape(dim1).device(sycl_device)=gpu1; + sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType)); + + gpu3.reshape(dim1).device(sycl_device)=gpu1; + sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType)); + + + for (IndexType i = 0; i < 2; ++i){ + for (IndexType j = 0; j < 3; ++j){ + for (IndexType k = 0; k < 7; ++k){ + VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k)); + if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) { + VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k)); ///ColMajor + } + else{ + VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k)); /// RowMajor + } + } + } + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); + sycl_device.deallocate(gpu_data3); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_simple_slice(const Eigen::SyclDevice &sycl_device) +{ + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + IndexType sizeDim5 = 11; + array<IndexType, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; + Tensor<DataType, 5,DataLayout, IndexType> tensor(tensorRange); + tensor.setRandom(); + array<IndexType, 5> slice1_range ={{1, 1, 1, 1, 1}}; + Tensor<DataType, 5,DataLayout, IndexType> slice1(slice1_range); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType))); + TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu2(gpu_data2, slice1_range); + Eigen::DSizes<IndexType, 5> indices(1,2,3,4,5); + Eigen::DSizes<IndexType, 5> sizes(1,1,1,1,1); + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); + gpu2.device(sycl_device)=gpu1.slice(indices, sizes); + sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType)); + VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); + + + array<IndexType, 5> slice2_range ={{1,1,2,2,3}}; + Tensor<DataType, 5,DataLayout, IndexType> slice2(slice2_range); + DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType))); + TensorMap<Tensor<DataType, 5,DataLayout, IndexType>> gpu3(gpu_data3, slice2_range); + Eigen::DSizes<IndexType, 5> indices2(1,1,3,4,5); + Eigen::DSizes<IndexType, 5> sizes2(1,1,2,2,3); + gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2); + sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType)); + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 2; ++j) { + for (IndexType k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); + } + } + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); + sycl_device.deallocate(gpu_data3); +} + +template<typename DataType, int DataLayout, typename IndexType> +static void test_strided_slice_write_sycl(const Eigen::SyclDevice& sycl_device) +{ + typedef Tensor<DataType, 2, DataLayout, IndexType> Tensor2f; + typedef Eigen::DSizes<IndexType, 2> Index2; + IndexType sizeDim1 = 7L; + IndexType sizeDim2 = 11L; + array<IndexType, 2> tensorRange = {{sizeDim1, sizeDim2}}; + Tensor<DataType, 2, DataLayout, IndexType> tensor(tensorRange),tensor2(tensorRange); + IndexType sliceDim1 = 2; + IndexType sliceDim2 = 3; + array<IndexType, 2> sliceRange = {{sliceDim1, sliceDim2}}; + Tensor2f slice(sliceRange); + Index2 strides(1L,1L); + Index2 indicesStart(3L,4L); + Index2 indicesStop(5L,7L); + Index2 lengths(2L,3L); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType))); + DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice.size()*sizeof(DataType))); + TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, tensorRange); + TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu3(gpu_data3, sliceRange); + + + tensor.setRandom(); + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); + gpu2.device(sycl_device)=gpu1; + + slice.setRandom(); + sycl_device.memcpyHostToDevice(gpu_data3, slice.data(),(slice.size())*sizeof(DataType)); + + + gpu1.slice(indicesStart,lengths).device(sycl_device)=gpu3; + gpu2.stridedSlice(indicesStart,indicesStop,strides).device(sycl_device)=gpu3; + sycl_device.memcpyDeviceToHost(tensor.data(), gpu_data1,(tensor.size())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor2.size())*sizeof(DataType)); + + for(IndexType i=0;i<sizeDim1;i++) + for(IndexType j=0;j<sizeDim2;j++){ + VERIFY_IS_EQUAL(tensor(i,j), tensor2(i,j)); + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); + sycl_device.deallocate(gpu_data3); +} + +template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_simple_slice<DataType, RowMajor, int64_t>(sycl_device); + test_simple_slice<DataType, ColMajor, int64_t>(sycl_device); + test_simple_reshape<DataType, RowMajor, int64_t>(sycl_device); + test_simple_reshape<DataType, ColMajor, int64_t>(sycl_device); + test_reshape_as_lvalue<DataType, RowMajor, int64_t>(sycl_device); + test_reshape_as_lvalue<DataType, ColMajor, int64_t>(sycl_device); + test_strided_slice_write_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_strided_slice_write_sycl<DataType, RowMajor, int64_t>(sycl_device); +} +void test_cxx11_tensor_morphing_sycl() +{ + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_morphing_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_notification.cpp b/eigen/unsupported/test/cxx11_tensor_notification.cpp new file mode 100644 index 0000000..183ef02 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_notification.cpp @@ -0,0 +1,72 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Vijay Vasudevan <vrv@google.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS + +#include <stdlib.h> +#include "main.h" +#include <Eigen/CXX11/Tensor> + + +namespace { + +void WaitAndAdd(Eigen::Notification* n, int* counter) { + n->Wait(); + *counter = *counter + 1; +} + +} // namespace + +static void test_notification_single() +{ + ThreadPool thread_pool(1); + + int counter = 0; + Eigen::Notification n; + std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter); + thread_pool.Schedule(func); + EIGEN_SLEEP(1000); + + // The thread should be waiting for the notification. + VERIFY_IS_EQUAL(counter, 0); + + // Unblock the thread + n.Notify(); + + EIGEN_SLEEP(1000); + + // Verify the counter has been incremented + VERIFY_IS_EQUAL(counter, 1); +} + +// Like test_notification_single() but enqueues multiple threads to +// validate that all threads get notified by Notify(). +static void test_notification_multiple() +{ + ThreadPool thread_pool(1); + + int counter = 0; + Eigen::Notification n; + std::function<void()> func = std::bind(&WaitAndAdd, &n, &counter); + thread_pool.Schedule(func); + thread_pool.Schedule(func); + thread_pool.Schedule(func); + thread_pool.Schedule(func); + EIGEN_SLEEP(1000); + VERIFY_IS_EQUAL(counter, 0); + n.Notify(); + EIGEN_SLEEP(1000); + VERIFY_IS_EQUAL(counter, 4); +} + +void test_cxx11_tensor_notification() +{ + CALL_SUBTEST(test_notification_single()); + CALL_SUBTEST(test_notification_multiple()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_of_complex.cpp b/eigen/unsupported/test/cxx11_tensor_of_complex.cpp new file mode 100644 index 0000000..e9d1b2d --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_of_complex.cpp @@ -0,0 +1,103 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::TensorMap; + + + +static void test_additions() +{ + Tensor<std::complex<float>, 1> data1(3); + Tensor<std::complex<float>, 1> data2(3); + for (int i = 0; i < 3; ++i) { + data1(i) = std::complex<float>(i, -i); + data2(i) = std::complex<float>(i, 7 * i); + } + + Tensor<std::complex<float>, 1> sum = data1 + data2; + for (int i = 0; i < 3; ++i) { + VERIFY_IS_EQUAL(sum(i), std::complex<float>(2*i, 6*i)); + } +} + + +static void test_abs() +{ + Tensor<std::complex<float>, 1> data1(3); + Tensor<std::complex<double>, 1> data2(3); + data1.setRandom(); + data2.setRandom(); + + Tensor<float, 1> abs1 = data1.abs(); + Tensor<double, 1> abs2 = data2.abs(); + for (int i = 0; i < 3; ++i) { + VERIFY_IS_APPROX(abs1(i), std::abs(data1(i))); + VERIFY_IS_APPROX(abs2(i), std::abs(data2(i))); + } +} + + +static void test_conjugate() +{ + Tensor<std::complex<float>, 1> data1(3); + Tensor<std::complex<double>, 1> data2(3); + Tensor<int, 1> data3(3); + data1.setRandom(); + data2.setRandom(); + data3.setRandom(); + + Tensor<std::complex<float>, 1> conj1 = data1.conjugate(); + Tensor<std::complex<double>, 1> conj2 = data2.conjugate(); + Tensor<int, 1> conj3 = data3.conjugate(); + for (int i = 0; i < 3; ++i) { + VERIFY_IS_APPROX(conj1(i), std::conj(data1(i))); + VERIFY_IS_APPROX(conj2(i), std::conj(data2(i))); + VERIFY_IS_APPROX(conj3(i), data3(i)); + } +} + +static void test_contractions() +{ + Tensor<std::complex<float>, 4> t_left(30, 50, 8, 31); + Tensor<std::complex<float>, 5> t_right(8, 31, 7, 20, 10); + Tensor<std::complex<float>, 5> t_result(30, 50, 7, 20, 10); + + t_left.setRandom(); + t_right.setRandom(); + + typedef Map<Matrix<std::complex<float>, Dynamic, Dynamic>> MapXcf; + MapXcf m_left(t_left.data(), 1500, 248); + MapXcf m_right(t_right.data(), 248, 1400); + Matrix<std::complex<float>, Dynamic, Dynamic> m_result(1500, 1400); + + // This contraction should be equivalent to a regular matrix multiplication + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims; + dims[0] = DimPair(2, 0); + dims[1] = DimPair(3, 1); + t_result = t_left.contract(t_right, dims); + m_result = m_left * m_right; + for (int i = 0; i < t_result.dimensions().TotalSize(); i++) { + VERIFY_IS_APPROX(t_result.data()[i], m_result.data()[i]); + } +} + + +void test_cxx11_tensor_of_complex() +{ + CALL_SUBTEST(test_additions()); + CALL_SUBTEST(test_abs()); + CALL_SUBTEST(test_conjugate()); + CALL_SUBTEST(test_contractions()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_of_const_values.cpp b/eigen/unsupported/test/cxx11_tensor_of_const_values.cpp new file mode 100644 index 0000000..f179a0c --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_of_const_values.cpp @@ -0,0 +1,105 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_assign() +{ + float data1[6]; + TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3); + float data2[6]; + const TensorMap<Tensor<float, 2>> mat2(data2, 2, 3); + + for (int i = 0; i < 6; ++i) { + data1[i] = i; + data2[i] = -i; + } + + Tensor<float, 2> rslt1; + rslt1 = mat1; + Tensor<float, 2> rslt2; + rslt2 = mat2; + + Tensor<float, 2> rslt3 = mat1; + Tensor<float, 2> rslt4 = mat2; + + Tensor<float, 2> rslt5(mat1); + Tensor<float, 2> rslt6(mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_APPROX(rslt1(i,j), static_cast<float>(i + 2*j)); + VERIFY_IS_APPROX(rslt2(i,j), static_cast<float>(-i - 2*j)); + VERIFY_IS_APPROX(rslt3(i,j), static_cast<float>(i + 2*j)); + VERIFY_IS_APPROX(rslt4(i,j), static_cast<float>(-i - 2*j)); + VERIFY_IS_APPROX(rslt5(i,j), static_cast<float>(i + 2*j)); + VERIFY_IS_APPROX(rslt6(i,j), static_cast<float>(-i - 2*j)); + } + } +} + + +static void test_plus() +{ + float data1[6]; + TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3); + float data2[6]; + TensorMap<Tensor<float, 2>> mat2(data2, 2, 3); + + for (int i = 0; i < 6; ++i) { + data1[i] = i; + data2[i] = -i; + } + + Tensor<float, 2> sum1; + sum1 = mat1 + mat2; + Tensor<float, 2> sum2; + sum2 = mat2 + mat1; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_APPROX(sum1(i,j), 0.0f); + VERIFY_IS_APPROX(sum2(i,j), 0.0f); + } + } +} + + +static void test_plus_equal() +{ + float data1[6]; + TensorMap<Tensor<const float, 2>> mat1(data1, 2, 3); + float data2[6]; + TensorMap<Tensor<float, 2>> mat2(data2, 2, 3); + + for (int i = 0; i < 6; ++i) { + data1[i] = i; + data2[i] = -i; + } + mat2 += mat1; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_APPROX(mat2(i,j), 0.0f); + } + } +} + + +void test_cxx11_tensor_of_const_values() +{ + CALL_SUBTEST(test_assign()); + CALL_SUBTEST(test_plus()); + CALL_SUBTEST(test_plus_equal()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu new file mode 100644 index 0000000..908a5e5 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -0,0 +1,500 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_of_float16_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<typename> +void test_cuda_numext() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>()); + gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>()); + + Tensor<bool, 1> half_prec(num_elem); + Tensor<bool, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking numext " << i << std::endl; + VERIFY_IS_EQUAL(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + + +#ifdef EIGEN_HAS_CUDA_FP16 + +template<typename> +void test_cuda_conversion() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_half( + d_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv( + d_conv, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random(); + gpu_half.device(gpu_device) = gpu_float.cast<Eigen::half>(); + gpu_conv.device(gpu_device) = gpu_half.cast<float>(); + + Tensor<float, 1> initial(num_elem); + Tensor<float, 1> final(num_elem); + gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float)); + + for (int i = 0; i < num_elem; ++i) { + VERIFY_IS_APPROX(initial(i), final(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_half); + gpu_device.deallocate(d_conv); +} + +template<typename> +void test_cuda_unary() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.abs(); + gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().cast<float>(); + + Tensor<float, 1> half_prec(num_elem); + Tensor<float, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking unary " << i << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> +void test_cuda_elementwise() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1( + d_float1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2( + d_float2, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float1.device(gpu_device) = gpu_float1.random(); + gpu_float2.device(gpu_device) = gpu_float2.random(); + gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1; + gpu_res_half.device(gpu_device) = ((gpu_float1.cast<Eigen::half>() + gpu_float2.cast<Eigen::half>()) * gpu_float1.cast<Eigen::half>()).cast<float>(); + + Tensor<float, 1> half_prec(num_elem); + Tensor<float, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise " << i << ": full prec = " << full_prec(i) << " vs half prec = " << half_prec(i) << std::endl; + VERIFY_IS_APPROX(static_cast<Eigen::half>(full_prec(i)), static_cast<Eigen::half>(half_prec(i))); + } + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> +void test_cuda_trancendental() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_half(d_res3_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem); + + gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); + gpu_float3.device(gpu_device) = gpu_float3.random(); + gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>(); + gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>(); + gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>(); + gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::half>(); + + gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>(); + gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); + + gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>(); + gpu_res2_half.device(gpu_device) = gpu_res2_half.log(); + + gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); + gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); + + gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); + gpu_res3_half.device(gpu_device) = gpu_res3_half.expm1(); + + Tensor<float, 1> input1(num_elem); + Tensor<Eigen::half, 1> half_prec1(num_elem); + Tensor<Eigen::half, 1> full_prec1(num_elem); + Tensor<float, 1> input2(num_elem); + Tensor<Eigen::half, 1> half_prec2(num_elem); + Tensor<Eigen::half, 1> full_prec2(num_elem); + Tensor<float, 1> input3(num_elem); + Tensor<Eigen::half, 1> half_prec3(num_elem); + Tensor<Eigen::half, 1> full_prec3(num_elem); + gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise exp " << i << " input = " << input1(i) << " full = " << full_prec1(i) << " half = " << half_prec1(i) << std::endl; + VERIFY_IS_APPROX(full_prec1(i), half_prec1(i)); + } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl; + if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1 + VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f)); + else + VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); + } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl; + VERIFY_IS_APPROX(full_prec3(i), half_prec3(i)); + } + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_float3); + gpu_device.deallocate(d_res1_half); + gpu_device.deallocate(d_res1_float); + gpu_device.deallocate(d_res2_half); + gpu_device.deallocate(d_res2_float); + gpu_device.deallocate(d_res3_float); + gpu_device.deallocate(d_res3_half); +} + +template<typename> +void test_cuda_contractions() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int rows = 23; + int cols = 23; + int num_elem = rows*cols; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( + d_float1, rows, cols); + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( + d_float2, rows, cols); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_half( + d_res_half, rows, cols); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 2>, Eigen::Aligned> gpu_res_float( + d_res_float, rows, cols); + + gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); + gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f); + + typedef Tensor<float, 2>::DimensionPair DimPair; + Eigen::array<DimPair, 1> dims(DimPair(1, 0)); + gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().contract(gpu_float2.cast<Eigen::half>(), dims); + + Tensor<Eigen::half, 2> half_prec(rows, cols); + Tensor<Eigen::half, 2> full_prec(rows, cols); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < rows; ++i) { + for (int j = 0; j < cols; ++j) { + std::cout << "Checking contract " << i << " " << j << full_prec(i, j) << " " << half_prec(i, j) << std::endl; + if (numext::abs(full_prec(i, j) - half_prec(i, j)) > Eigen::half(1e-2f)) { + VERIFY_IS_APPROX(full_prec(i, j), half_prec(i, j)); + } + } + } + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> +void test_cuda_reductions(int size1, int size2, int redux) { + + std::cout << "Reducing " << size1 << " by " << size2 + << " tensor along dim " << redux << std::endl; + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = size1*size2; + int result_size = (redux == 1 ? size1 : size2); + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(result_size * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( + d_float1, size1, size2); + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( + d_float2, size1, size2); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_half( + d_res_half, result_size); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, result_size); + + gpu_float1.device(gpu_device) = gpu_float1.random() * 2.0f; + gpu_float2.device(gpu_device) = gpu_float2.random() * 2.0f; + + Eigen::array<int, 1> redux_dim = {{redux}}; + gpu_res_float.device(gpu_device) = gpu_float1.sum(redux_dim).cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(redux_dim); + + Tensor<Eigen::half, 1> half_prec(result_size); + Tensor<Eigen::half, 1> full_prec(result_size); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, result_size*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::half)); + gpu_device.synchronize(); + + for (int i = 0; i < result_size; ++i) { + std::cout << "EXPECTED " << full_prec(i) << " GOT " << half_prec(i) << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> +void test_cuda_reductions() { + test_cuda_reductions<void>(13, 13, 0); + test_cuda_reductions<void>(13, 13, 1); + + test_cuda_reductions<void>(35, 36, 0); + test_cuda_reductions<void>(35, 36, 1); + + test_cuda_reductions<void>(36, 35, 0); + test_cuda_reductions<void>(36, 35, 1); +} + +template<typename> +void test_cuda_full_reductions() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int size = 13; + int num_elem = size*size; + + float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + Eigen::half* d_res_half = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); + Eigen::half* d_res_float = (Eigen::half*)gpu_device.allocate(1 * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1( + d_float1, size, size); + Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2( + d_float2, size, size); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_half( + d_res_half); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 0>, Eigen::Aligned> gpu_res_float( + d_res_float); + + gpu_float1.device(gpu_device) = gpu_float1.random(); + gpu_float2.device(gpu_device) = gpu_float2.random(); + + gpu_res_float.device(gpu_device) = gpu_float1.sum().cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().sum(); + + Tensor<Eigen::half, 0> half_prec; + Tensor<Eigen::half, 0> full_prec; + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half)); + gpu_device.synchronize(); + + VERIFY_IS_APPROX(full_prec(), half_prec()); + + gpu_res_float.device(gpu_device) = gpu_float1.maximum().cast<Eigen::half>(); + gpu_res_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().maximum(); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::half)); + gpu_device.synchronize(); + + VERIFY_IS_APPROX(full_prec(), half_prec()); + + gpu_device.deallocate(d_float1); + gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + +template<typename> +void test_cuda_forced_evals() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_half2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_half1( + d_res_half1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_half2( + d_res_half2, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + Eigen::array<int, 1> no_bcast; + no_bcast[0] = 1; + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.abs(); + gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); + gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>(); + + Tensor<float, 1> half_prec1(num_elem); + Tensor<float, 1> half_prec2(num_elem); + Tensor<float, 1> full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res_half1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res_half1, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl; + VERIFY_IS_APPROX(full_prec(i), half_prec1(i)); + VERIFY_IS_APPROX(full_prec(i), half_prec2(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half1); + gpu_device.deallocate(d_res_half2); + gpu_device.deallocate(d_res_float); +} +#endif + + +void test_cxx11_tensor_of_float16_cuda() +{ + CALL_SUBTEST_1(test_cuda_numext<void>()); + +#ifdef EIGEN_HAS_CUDA_FP16 + CALL_SUBTEST_1(test_cuda_conversion<void>()); + CALL_SUBTEST_1(test_cuda_unary<void>()); + CALL_SUBTEST_1(test_cuda_elementwise<void>()); + CALL_SUBTEST_1(test_cuda_trancendental<void>()); + CALL_SUBTEST_2(test_cuda_contractions<void>()); + CALL_SUBTEST_3(test_cuda_reductions<void>()); + CALL_SUBTEST_4(test_cuda_full_reductions<void>()); + CALL_SUBTEST_5(test_cuda_forced_evals<void>()); +#else + std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl; +#endif +} diff --git a/eigen/unsupported/test/cxx11_tensor_of_strings.cpp b/eigen/unsupported/test/cxx11_tensor_of_strings.cpp new file mode 100644 index 0000000..4ef9aed --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_of_strings.cpp @@ -0,0 +1,152 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::TensorMap; + +static void test_assign() +{ + std::string data1[6]; + TensorMap<Tensor<std::string, 2>> mat1(data1, 2, 3); + std::string data2[6]; + const TensorMap<Tensor<const std::string, 2>> mat2(data2, 2, 3); + + for (int i = 0; i < 6; ++i) { + std::ostringstream s1; + s1 << "abc" << i*3; + data1[i] = s1.str(); + std::ostringstream s2; + s2 << "def" << i*5; + data2[i] = s2.str(); + } + + Tensor<std::string, 2> rslt1; + rslt1 = mat1; + Tensor<std::string, 2> rslt2; + rslt2 = mat2; + + Tensor<std::string, 2> rslt3 = mat1; + Tensor<std::string, 2> rslt4 = mat2; + + Tensor<std::string, 2> rslt5(mat1); + Tensor<std::string, 2> rslt6(mat2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(rslt1(i,j), data1[i+2*j]); + VERIFY_IS_EQUAL(rslt2(i,j), data2[i+2*j]); + VERIFY_IS_EQUAL(rslt3(i,j), data1[i+2*j]); + VERIFY_IS_EQUAL(rslt4(i,j), data2[i+2*j]); + VERIFY_IS_EQUAL(rslt5(i,j), data1[i+2*j]); + VERIFY_IS_EQUAL(rslt6(i,j), data2[i+2*j]); + } + } +} + + +static void test_concat() +{ + Tensor<std::string, 2> t1(2, 3); + Tensor<std::string, 2> t2(2, 3); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + std::ostringstream s1; + s1 << "abc" << i + j*2; + t1(i, j) = s1.str(); + std::ostringstream s2; + s2 << "def" << i*5 + j*32; + t2(i, j) = s2.str(); + } + } + + Tensor<std::string, 2> result = t1.concatenate(t2, 1); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 6); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(result(i, j), t1(i, j)); + VERIFY_IS_EQUAL(result(i, j+3), t2(i, j)); + } + } +} + + +static void test_slices() +{ + Tensor<std::string, 2> data(2, 6); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + std::ostringstream s1; + s1 << "abc" << i + j*2; + data(i, j) = s1.str(); + } + } + + const Eigen::DSizes<ptrdiff_t, 2> half_size(2, 3); + const Eigen::DSizes<ptrdiff_t, 2> first_half(0, 0); + const Eigen::DSizes<ptrdiff_t, 2> second_half(0, 3); + + Tensor<std::string, 2> t1 = data.slice(first_half, half_size); + Tensor<std::string, 2> t2 = data.slice(second_half, half_size); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(data(i, j), t1(i, j)); + VERIFY_IS_EQUAL(data(i, j+3), t2(i, j)); + } + } +} + + +static void test_additions() +{ + Tensor<std::string, 1> data1(3); + Tensor<std::string, 1> data2(3); + for (int i = 0; i < 3; ++i) { + data1(i) = "abc"; + std::ostringstream s1; + s1 << i; + data2(i) = s1.str(); + } + + Tensor<std::string, 1> sum = data1 + data2; + for (int i = 0; i < 3; ++i) { + std::ostringstream concat; + concat << "abc" << i; + std::string expected = concat.str(); + VERIFY_IS_EQUAL(sum(i), expected); + } +} + + +static void test_initialization() +{ + Tensor<std::string, 2> a(2, 3); + a.setConstant(std::string("foo")); + for (int i = 0; i < 2*3; ++i) { + VERIFY_IS_EQUAL(a(i), std::string("foo")); + } +} + + +void test_cxx11_tensor_of_strings() +{ + // Beware: none of this is likely to ever work on a GPU. + CALL_SUBTEST(test_assign()); + CALL_SUBTEST(test_concat()); + CALL_SUBTEST(test_slices()); + CALL_SUBTEST(test_additions()); + CALL_SUBTEST(test_initialization()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_padding.cpp b/eigen/unsupported/test/cxx11_tensor_padding.cpp new file mode 100644 index 0000000..ffa1989 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_padding.cpp @@ -0,0 +1,93 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_simple_padding() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + + Tensor<float, 4, DataLayout> padded; + padded = tensor.pad(paddings); + + VERIFY_IS_EQUAL(padded.dimension(0), 2+0); + VERIFY_IS_EQUAL(padded.dimension(1), 3+3); + VERIFY_IS_EQUAL(padded.dimension(2), 5+7); + VERIFY_IS_EQUAL(padded.dimension(3), 7+0); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 6; ++j) { + for (int k = 0; k < 12; ++k) { + for (int l = 0; l < 7; ++l) { + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); + } + } + } + } + } +} + +template<int DataLayout> +static void test_padded_expr() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + + Eigen::DSizes<ptrdiff_t, 2> reshape_dims; + reshape_dims[0] = 12; + reshape_dims[1] = 84; + + Tensor<float, 2, DataLayout> result; + result = tensor.pad(paddings).reshape(reshape_dims); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 6; ++j) { + for (int k = 0; k < 12; ++k) { + for (int l = 0; l < 7; ++l) { + const float result_value = DataLayout == ColMajor ? + result(i+2*j,k+12*l) : result(j+6*i,l+7*k); + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(result_value, 0.0f); + } + } + } + } + } +} + +void test_cxx11_tensor_padding() +{ + CALL_SUBTEST(test_simple_padding<ColMajor>()); + CALL_SUBTEST(test_simple_padding<RowMajor>()); + CALL_SUBTEST(test_padded_expr<ColMajor>()); + CALL_SUBTEST(test_padded_expr<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp new file mode 100644 index 0000000..dc748b7 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_padding_sycl.cpp @@ -0,0 +1,157 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_padding_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + + +template<typename DataType, int DataLayout, typename IndexType> +static void test_simple_padding(const Eigen::SyclDevice& sycl_device) +{ + + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); + tensor.setRandom(); + + array<std::pair<IndexType, IndexType>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + + IndexType padedSizeDim1 = 2; + IndexType padedSizeDim2 = 6; + IndexType padedSizeDim3 = 12; + IndexType padedSizeDim4 = 7; + array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}}; + + Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange); + + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType))); + TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange); + + VERIFY_IS_EQUAL(padded.dimension(0), 2+0); + VERIFY_IS_EQUAL(padded.dimension(1), 3+3); + VERIFY_IS_EQUAL(padded.dimension(2), 5+7); + VERIFY_IS_EQUAL(padded.dimension(3), 7+0); + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); + gpu2.device(sycl_device)=gpu1.pad(paddings); + sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType)); + for (IndexType i = 0; i < padedSizeDim1; ++i) { + for (IndexType j = 0; j < padedSizeDim2; ++j) { + for (IndexType k = 0; k < padedSizeDim3; ++k) { + for (IndexType l = 0; l < padedSizeDim4; ++l) { + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); + } + } + } + } + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); +} + +template<typename DataType, int DataLayout, typename IndexType> +static void test_padded_expr(const Eigen::SyclDevice& sycl_device) +{ + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); + tensor.setRandom(); + + array<std::pair<IndexType, IndexType>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + + Eigen::DSizes<IndexType, 2> reshape_dims; + reshape_dims[0] = 12; + reshape_dims[1] = 84; + + + Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType))); + TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims); + + + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); + gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims); + sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType)); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 6; ++j) { + for (IndexType k = 0; k < 12; ++k) { + for (IndexType l = 0; l < 7; ++l) { + const float result_value = DataLayout == ColMajor ? + result(i+2*j,k+12*l) : result(j+6*i,l+7*k); + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(result_value, 0.0f); + } + } + } + } + } + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); +} + +template<typename DataType, typename dev_Selector> void sycl_padding_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_simple_padding<DataType, RowMajor, int64_t>(sycl_device); + test_simple_padding<DataType, ColMajor, int64_t>(sycl_device); + test_padded_expr<DataType, RowMajor, int64_t>(sycl_device); + test_padded_expr<DataType, ColMajor, int64_t>(sycl_device); + +} +void test_cxx11_tensor_padding_sycl() +{ + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_padding_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_patch.cpp b/eigen/unsupported/test/cxx11_tensor_patch.cpp new file mode 100644 index 0000000..4343597 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_patch.cpp @@ -0,0 +1,172 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_simple_patch() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> patch_dims; + + patch_dims[0] = 1; + patch_dims[1] = 1; + patch_dims[2] = 1; + patch_dims[3] = 1; + + Tensor<float, 5, DataLayout> no_patch; + no_patch = tensor.extract_patches(patch_dims); + + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(no_patch.dimension(0), 1); + VERIFY_IS_EQUAL(no_patch.dimension(1), 1); + VERIFY_IS_EQUAL(no_patch.dimension(2), 1); + VERIFY_IS_EQUAL(no_patch.dimension(3), 1); + VERIFY_IS_EQUAL(no_patch.dimension(4), tensor.size()); + } else { + VERIFY_IS_EQUAL(no_patch.dimension(0), tensor.size()); + VERIFY_IS_EQUAL(no_patch.dimension(1), 1); + VERIFY_IS_EQUAL(no_patch.dimension(2), 1); + VERIFY_IS_EQUAL(no_patch.dimension(3), 1); + VERIFY_IS_EQUAL(no_patch.dimension(4), 1); + } + + for (int i = 0; i < tensor.size(); ++i) { + VERIFY_IS_EQUAL(tensor.data()[i], no_patch.data()[i]); + } + + patch_dims[0] = 2; + patch_dims[1] = 3; + patch_dims[2] = 5; + patch_dims[3] = 7; + Tensor<float, 5, DataLayout> single_patch; + single_patch = tensor.extract_patches(patch_dims); + + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(single_patch.dimension(0), 2); + VERIFY_IS_EQUAL(single_patch.dimension(1), 3); + VERIFY_IS_EQUAL(single_patch.dimension(2), 5); + VERIFY_IS_EQUAL(single_patch.dimension(3), 7); + VERIFY_IS_EQUAL(single_patch.dimension(4), 1); + } else { + VERIFY_IS_EQUAL(single_patch.dimension(0), 1); + VERIFY_IS_EQUAL(single_patch.dimension(1), 2); + VERIFY_IS_EQUAL(single_patch.dimension(2), 3); + VERIFY_IS_EQUAL(single_patch.dimension(3), 5); + VERIFY_IS_EQUAL(single_patch.dimension(4), 7); + } + + for (int i = 0; i < tensor.size(); ++i) { + VERIFY_IS_EQUAL(tensor.data()[i], single_patch.data()[i]); + } + + patch_dims[0] = 1; + patch_dims[1] = 2; + patch_dims[2] = 2; + patch_dims[3] = 1; + Tensor<float, 5, DataLayout> twod_patch; + twod_patch = tensor.extract_patches(patch_dims); + + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(twod_patch.dimension(0), 1); + VERIFY_IS_EQUAL(twod_patch.dimension(1), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(3), 1); + VERIFY_IS_EQUAL(twod_patch.dimension(4), 2*2*4*7); + } else { + VERIFY_IS_EQUAL(twod_patch.dimension(0), 2*2*4*7); + VERIFY_IS_EQUAL(twod_patch.dimension(1), 1); + VERIFY_IS_EQUAL(twod_patch.dimension(2), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(3), 2); + VERIFY_IS_EQUAL(twod_patch.dimension(4), 1); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 4; ++k) { + for (int l = 0; l < 7; ++l) { + int patch_loc; + if (DataLayout == ColMajor) { + patch_loc = i + 2 * (j + 2 * (k + 4 * l)); + } else { + patch_loc = l + 7 * (k + 4 * (j + 2 * i)); + } + for (int x = 0; x < 2; ++x) { + for (int y = 0; y < 2; ++y) { + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(0,x,y,0,patch_loc)); + } else { + VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l), twod_patch(patch_loc,0,x,y,0)); + } + } + } + } + } + } + } + + patch_dims[0] = 1; + patch_dims[1] = 2; + patch_dims[2] = 3; + patch_dims[3] = 5; + Tensor<float, 5, DataLayout> threed_patch; + threed_patch = tensor.extract_patches(patch_dims); + + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(threed_patch.dimension(0), 1); + VERIFY_IS_EQUAL(threed_patch.dimension(1), 2); + VERIFY_IS_EQUAL(threed_patch.dimension(2), 3); + VERIFY_IS_EQUAL(threed_patch.dimension(3), 5); + VERIFY_IS_EQUAL(threed_patch.dimension(4), 2*2*3*3); + } else { + VERIFY_IS_EQUAL(threed_patch.dimension(0), 2*2*3*3); + VERIFY_IS_EQUAL(threed_patch.dimension(1), 1); + VERIFY_IS_EQUAL(threed_patch.dimension(2), 2); + VERIFY_IS_EQUAL(threed_patch.dimension(3), 3); + VERIFY_IS_EQUAL(threed_patch.dimension(4), 5); + } + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 3; ++l) { + int patch_loc; + if (DataLayout == ColMajor) { + patch_loc = i + 2 * (j + 2 * (k + 3 * l)); + } else { + patch_loc = l + 3 * (k + 3 * (j + 2 * i)); + } + for (int x = 0; x < 2; ++x) { + for (int y = 0; y < 3; ++y) { + for (int z = 0; z < 5; ++z) { + if (DataLayout == ColMajor) { + VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(0,x,y,z,patch_loc)); + } else { + VERIFY_IS_EQUAL(tensor(i,j+x,k+y,l+z), threed_patch(patch_loc,0,x,y,z)); + } + } + } + } + } + } + } + } +} + +void test_cxx11_tensor_patch() +{ + CALL_SUBTEST(test_simple_patch<ColMajor>()); + CALL_SUBTEST(test_simple_patch<RowMajor>()); + // CALL_SUBTEST(test_expr_shuffling()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_random.cpp b/eigen/unsupported/test/cxx11_tensor_random.cpp new file mode 100644 index 0000000..0f3dc57 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_random.cpp @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +static void test_default() +{ + Tensor<float, 1> vec(6); + vec.setRandom(); + + // Fixme: we should check that the generated numbers follow a uniform + // distribution instead. + for (int i = 1; i < 6; ++i) { + VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1)); + } +} + +static void test_normal() +{ + Tensor<float, 1> vec(6); + vec.setRandom<Eigen::internal::NormalRandomGenerator<float>>(); + + // Fixme: we should check that the generated numbers follow a gaussian + // distribution instead. + for (int i = 1; i < 6; ++i) { + VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1)); + } +} + + +struct MyGenerator { + MyGenerator() { } + MyGenerator(const MyGenerator&) { } + + // Return a random value to be used. "element_location" is the + // location of the entry to set in the tensor, it can typically + // be ignored. + int operator()(Eigen::DenseIndex element_location, Eigen::DenseIndex /*unused*/ = 0) const { + return static_cast<int>(3 * element_location); + } + + // Same as above but generates several numbers at a time. + internal::packet_traits<int>::type packetOp( + Eigen::DenseIndex packet_location, Eigen::DenseIndex /*unused*/ = 0) const { + const int packetSize = internal::packet_traits<int>::size; + EIGEN_ALIGN_MAX int values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = static_cast<int>(3 * (packet_location + i)); + } + return internal::pload<typename internal::packet_traits<int>::type>(values); + } +}; + + +static void test_custom() +{ + Tensor<int, 1> vec(6); + vec.setRandom<MyGenerator>(); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(vec(i), 3*i); + } +} + +void test_cxx11_tensor_random() +{ + CALL_SUBTEST(test_default()); + CALL_SUBTEST(test_normal()); + CALL_SUBTEST(test_custom()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_random_cuda.cu b/eigen/unsupported/test/cxx11_tensor_random_cuda.cu new file mode 100644 index 0000000..b3be199 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_random_cuda.cu @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_random_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <Eigen/CXX11/Tensor> + + +void test_cuda_random_uniform() +{ + Tensor<float, 2> out(72,97); + out.setZero(); + + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_out; + cudaMalloc((void**)(&d_out), out_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); + + gpu_out.device(gpu_device) = gpu_out.random(); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + // For now we just check thes code doesn't crash. + // TODO: come up with a valid test of randomness +} + + +void test_cuda_random_normal() +{ + Tensor<float, 2> out(72,97); + out.setZero(); + + std::size_t out_bytes = out.size() * sizeof(float); + + float* d_out; + cudaMalloc((void**)(&d_out), out_bytes); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); + + Eigen::internal::NormalRandomGenerator<float> gen(true); + gpu_out.device(gpu_device) = gpu_out.random(gen); + + assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); +} + +static void test_complex() +{ + Tensor<std::complex<float>, 1> vec(6); + vec.setRandom(); + + // Fixme: we should check that the generated numbers follow a uniform + // distribution instead. + for (int i = 1; i < 6; ++i) { + VERIFY_IS_NOT_EQUAL(vec(i), vec(i-1)); + } +} + + +void test_cxx11_tensor_random_cuda() +{ + CALL_SUBTEST(test_cuda_random_uniform()); + CALL_SUBTEST(test_cuda_random_normal()); + CALL_SUBTEST(test_complex()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_reduction.cpp b/eigen/unsupported/test/cxx11_tensor_reduction.cpp new file mode 100644 index 0000000..1490ec3 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_reduction.cpp @@ -0,0 +1,508 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <limits> +#include <numeric> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout> +static void test_trivial_reductions() { + { + Tensor<float, 0, DataLayout> tensor; + tensor.setRandom(); + array<ptrdiff_t, 0> reduction_axis; + + Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result(), tensor()); + } + + { + Tensor<float, 1, DataLayout> tensor(7); + tensor.setRandom(); + array<ptrdiff_t, 0> reduction_axis; + + Tensor<float, 1, DataLayout> result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 7); + for (int i = 0; i < 7; ++i) { + VERIFY_IS_EQUAL(result(i), tensor(i)); + } + } + + { + Tensor<float, 2, DataLayout> tensor(2, 3); + tensor.setRandom(); + array<ptrdiff_t, 0> reduction_axis; + + Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 3); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + VERIFY_IS_EQUAL(result(i, j), tensor(i, j)); + } + } + } +} + +template <int DataLayout> +static void test_simple_reductions() { + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + array<ptrdiff_t, 2> reduction_axis2; + reduction_axis2[0] = 1; + reduction_axis2[1] = 3; + + Tensor<float, 2, DataLayout> result = tensor.sum(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 5); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + float sum = 0.0f; + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 7; ++l) { + sum += tensor(i, k, j, l); + } + } + VERIFY_IS_APPROX(result(i, j), sum); + } + } + + { + Tensor<float, 0, DataLayout> sum1 = tensor.sum(); + VERIFY_IS_EQUAL(sum1.rank(), 0); + + array<ptrdiff_t, 4> reduction_axis4; + reduction_axis4[0] = 0; + reduction_axis4[1] = 1; + reduction_axis4[2] = 2; + reduction_axis4[3] = 3; + Tensor<float, 0, DataLayout> sum2 = tensor.sum(reduction_axis4); + VERIFY_IS_EQUAL(sum2.rank(), 0); + + VERIFY_IS_APPROX(sum1(), sum2()); + } + + reduction_axis2[0] = 0; + reduction_axis2[1] = 2; + result = tensor.prod(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 3); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 7; ++j) { + float prod = 1.0f; + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 5; ++l) { + prod *= tensor(k, i, l, j); + } + } + VERIFY_IS_APPROX(result(i, j), prod); + } + } + + { + Tensor<float, 0, DataLayout> prod1 = tensor.prod(); + VERIFY_IS_EQUAL(prod1.rank(), 0); + + array<ptrdiff_t, 4> reduction_axis4; + reduction_axis4[0] = 0; + reduction_axis4[1] = 1; + reduction_axis4[2] = 2; + reduction_axis4[3] = 3; + Tensor<float, 0, DataLayout> prod2 = tensor.prod(reduction_axis4); + VERIFY_IS_EQUAL(prod2.rank(), 0); + + VERIFY_IS_APPROX(prod1(), prod2()); + } + + reduction_axis2[0] = 0; + reduction_axis2[1] = 2; + result = tensor.maximum(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 3); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 7; ++j) { + float max_val = std::numeric_limits<float>::lowest(); + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 5; ++l) { + max_val = (std::max)(max_val, tensor(k, i, l, j)); + } + } + VERIFY_IS_APPROX(result(i, j), max_val); + } + } + + { + Tensor<float, 0, DataLayout> max1 = tensor.maximum(); + VERIFY_IS_EQUAL(max1.rank(), 0); + + array<ptrdiff_t, 4> reduction_axis4; + reduction_axis4[0] = 0; + reduction_axis4[1] = 1; + reduction_axis4[2] = 2; + reduction_axis4[3] = 3; + Tensor<float, 0, DataLayout> max2 = tensor.maximum(reduction_axis4); + VERIFY_IS_EQUAL(max2.rank(), 0); + + VERIFY_IS_APPROX(max1(), max2()); + } + + reduction_axis2[0] = 0; + reduction_axis2[1] = 1; + result = tensor.minimum(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 5); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 7; ++j) { + float min_val = (std::numeric_limits<float>::max)(); + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 3; ++l) { + min_val = (std::min)(min_val, tensor(k, l, i, j)); + } + } + VERIFY_IS_APPROX(result(i, j), min_val); + } + } + + { + Tensor<float, 0, DataLayout> min1 = tensor.minimum(); + VERIFY_IS_EQUAL(min1.rank(), 0); + + array<ptrdiff_t, 4> reduction_axis4; + reduction_axis4[0] = 0; + reduction_axis4[1] = 1; + reduction_axis4[2] = 2; + reduction_axis4[3] = 3; + Tensor<float, 0, DataLayout> min2 = tensor.minimum(reduction_axis4); + VERIFY_IS_EQUAL(min2.rank(), 0); + + VERIFY_IS_APPROX(min1(), min2()); + } + + reduction_axis2[0] = 0; + reduction_axis2[1] = 1; + result = tensor.mean(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 5); + VERIFY_IS_EQUAL(result.dimension(1), 7); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 7; ++j) { + float sum = 0.0f; + int count = 0; + for (int k = 0; k < 2; ++k) { + for (int l = 0; l < 3; ++l) { + sum += tensor(k, l, i, j); + ++count; + } + } + VERIFY_IS_APPROX(result(i, j), sum / count); + } + } + + { + Tensor<float, 0, DataLayout> mean1 = tensor.mean(); + VERIFY_IS_EQUAL(mean1.rank(), 0); + + array<ptrdiff_t, 4> reduction_axis4; + reduction_axis4[0] = 0; + reduction_axis4[1] = 1; + reduction_axis4[2] = 2; + reduction_axis4[3] = 3; + Tensor<float, 0, DataLayout> mean2 = tensor.mean(reduction_axis4); + VERIFY_IS_EQUAL(mean2.rank(), 0); + + VERIFY_IS_APPROX(mean1(), mean2()); + } + + { + Tensor<int, 1> ints(10); + std::iota(ints.data(), ints.data() + ints.dimension(0), 0); + + TensorFixedSize<bool, Sizes<> > all; + all = ints.all(); + VERIFY(!all()); + all = (ints >= ints.constant(0)).all(); + VERIFY(all()); + + TensorFixedSize<bool, Sizes<> > any; + any = (ints > ints.constant(10)).any(); + VERIFY(!any()); + any = (ints < ints.constant(1)).any(); + VERIFY(any()); + } +} + + +template <int DataLayout> +static void test_reductions_in_expr() { + Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7); + tensor.setRandom(); + array<ptrdiff_t, 2> reduction_axis2; + reduction_axis2[0] = 1; + reduction_axis2[1] = 3; + + Tensor<float, 2, DataLayout> result(2, 5); + result = result.constant(1.0f) - tensor.sum(reduction_axis2); + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 5); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + float sum = 0.0f; + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 7; ++l) { + sum += tensor(i, k, j, l); + } + } + VERIFY_IS_APPROX(result(i, j), 1.0f - sum); + } + } +} + + +template <int DataLayout> +static void test_full_reductions() { + Tensor<float, 2, DataLayout> tensor(2, 3); + tensor.setRandom(); + array<ptrdiff_t, 2> reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; + + Tensor<float, 0, DataLayout> result = tensor.sum(reduction_axis); + VERIFY_IS_EQUAL(result.rank(), 0); + + float sum = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + sum += tensor(i, j); + } + } + VERIFY_IS_APPROX(result(0), sum); + + result = tensor.square().sum(reduction_axis).sqrt(); + VERIFY_IS_EQUAL(result.rank(), 0); + + sum = 0.0f; + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + sum += tensor(i, j) * tensor(i, j); + } + } + VERIFY_IS_APPROX(result(), sqrtf(sum)); +} + +struct UserReducer { + static const bool PacketAccess = false; + UserReducer(float offset) : offset_(offset) {} + void reduce(const float val, float* accum) { *accum += val * val; } + float initialize() const { return 0; } + float finalize(const float accum) const { return 1.0f / (accum + offset_); } + + private: + const float offset_; +}; + +template <int DataLayout> +static void test_user_defined_reductions() { + Tensor<float, 2, DataLayout> tensor(5, 7); + tensor.setRandom(); + array<ptrdiff_t, 1> reduction_axis; + reduction_axis[0] = 1; + + UserReducer reducer(10.0f); + Tensor<float, 1, DataLayout> result = tensor.reduce(reduction_axis, reducer); + VERIFY_IS_EQUAL(result.dimension(0), 5); + for (int i = 0; i < 5; ++i) { + float expected = 10.0f; + for (int j = 0; j < 7; ++j) { + expected += tensor(i, j) * tensor(i, j); + } + expected = 1.0f / expected; + VERIFY_IS_APPROX(result(i), expected); + } +} + +template <int DataLayout> +static void test_tensor_maps() { + int inputs[2 * 3 * 5 * 7]; + TensorMap<Tensor<int, 4, DataLayout> > tensor_map(inputs, 2, 3, 5, 7); + TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const(inputs, 2, 3, 5, + 7); + const TensorMap<Tensor<const int, 4, DataLayout> > tensor_map_const_const( + inputs, 2, 3, 5, 7); + + tensor_map.setRandom(); + array<ptrdiff_t, 2> reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 3; + + Tensor<int, 2, DataLayout> result = tensor_map.sum(reduction_axis); + Tensor<int, 2, DataLayout> result2 = tensor_map_const.sum(reduction_axis); + Tensor<int, 2, DataLayout> result3 = + tensor_map_const_const.sum(reduction_axis); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 5; ++j) { + int sum = 0; + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 7; ++l) { + sum += tensor_map(i, k, j, l); + } + } + VERIFY_IS_EQUAL(result(i, j), sum); + VERIFY_IS_EQUAL(result2(i, j), sum); + VERIFY_IS_EQUAL(result3(i, j), sum); + } + } +} + +template <int DataLayout> +static void test_static_dims() { + Tensor<float, 4, DataLayout> in(72, 53, 97, 113); + Tensor<float, 2, DataLayout> out(72, 97); + in.setRandom(); + +#if !EIGEN_HAS_CONSTEXPR + array<int, 2> reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 3; +#else + Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 97; ++j) { + float expected = -1e10f; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 113; ++l) { + expected = (std::max)(expected, in(i, k, j, l)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +template <int DataLayout> +static void test_innermost_last_dims() { + Tensor<float, 4, DataLayout> in(72, 53, 97, 113); + Tensor<float, 2, DataLayout> out(97, 113); + in.setRandom(); + +// Reduce on the innermost dimensions. +#if !EIGEN_HAS_CONSTEXPR + array<int, 2> reduction_axis; + reduction_axis[0] = 0; + reduction_axis[1] = 1; +#else + // This triggers the use of packets for ColMajor. + Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 97; ++i) { + for (int j = 0; j < 113; ++j) { + float expected = -1e10f; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 72; ++l) { + expected = (std::max)(expected, in(l, k, i, j)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +template <int DataLayout> +static void test_innermost_first_dims() { + Tensor<float, 4, DataLayout> in(72, 53, 97, 113); + Tensor<float, 2, DataLayout> out(72, 53); + in.setRandom(); + +// Reduce on the innermost dimensions. +#if !EIGEN_HAS_CONSTEXPR + array<int, 2> reduction_axis; + reduction_axis[0] = 2; + reduction_axis[1] = 3; +#else + // This triggers the use of packets for RowMajor. + Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 53; ++j) { + float expected = -1e10f; + for (int k = 0; k < 97; ++k) { + for (int l = 0; l < 113; ++l) { + expected = (std::max)(expected, in(i, j, k, l)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +template <int DataLayout> +static void test_reduce_middle_dims() { + Tensor<float, 4, DataLayout> in(72, 53, 97, 113); + Tensor<float, 2, DataLayout> out(72, 53); + in.setRandom(); + +// Reduce on the innermost dimensions. +#if !EIGEN_HAS_CONSTEXPR + array<int, 2> reduction_axis; + reduction_axis[0] = 1; + reduction_axis[1] = 2; +#else + // This triggers the use of packets for RowMajor. + Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis; +#endif + + out = in.maximum(reduction_axis); + + for (int i = 0; i < 72; ++i) { + for (int j = 0; j < 113; ++j) { + float expected = -1e10f; + for (int k = 0; k < 53; ++k) { + for (int l = 0; l < 97; ++l) { + expected = (std::max)(expected, in(i, k, l, j)); + } + } + VERIFY_IS_APPROX(out(i, j), expected); + } + } +} + +void test_cxx11_tensor_reduction() { + CALL_SUBTEST(test_trivial_reductions<ColMajor>()); + CALL_SUBTEST(test_trivial_reductions<RowMajor>()); + CALL_SUBTEST(test_simple_reductions<ColMajor>()); + CALL_SUBTEST(test_simple_reductions<RowMajor>()); + CALL_SUBTEST(test_reductions_in_expr<ColMajor>()); + CALL_SUBTEST(test_reductions_in_expr<RowMajor>()); + CALL_SUBTEST(test_full_reductions<ColMajor>()); + CALL_SUBTEST(test_full_reductions<RowMajor>()); + CALL_SUBTEST(test_user_defined_reductions<ColMajor>()); + CALL_SUBTEST(test_user_defined_reductions<RowMajor>()); + CALL_SUBTEST(test_tensor_maps<ColMajor>()); + CALL_SUBTEST(test_tensor_maps<RowMajor>()); + CALL_SUBTEST(test_static_dims<ColMajor>()); + CALL_SUBTEST(test_static_dims<RowMajor>()); + CALL_SUBTEST(test_innermost_last_dims<ColMajor>()); + CALL_SUBTEST(test_innermost_last_dims<RowMajor>()); + CALL_SUBTEST(test_innermost_first_dims<ColMajor>()); + CALL_SUBTEST(test_innermost_first_dims<RowMajor>()); + CALL_SUBTEST(test_reduce_middle_dims<ColMajor>()); + CALL_SUBTEST(test_reduce_middle_dims<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu b/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu new file mode 100644 index 0000000..6858b43 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -0,0 +1,157 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + + +template<typename Type, int DataLayout> +static void test_full_reductions() { + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + const int num_rows = internal::random<int>(1024, 5*1024); + const int num_cols = internal::random<int>(1024, 5*1024); + + Tensor<Type, 2, DataLayout> in(num_rows, num_cols); + in.setRandom(); + + Tensor<Type, 0, DataLayout> full_redux; + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(Type); + std::size_t out_bytes = full_redux.size() * sizeof(Type); + Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes)); + Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor<Type, 0, DataLayout> full_redux_gpu; + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_gpu()); + + gpu_device.deallocate(gpu_in_ptr); + gpu_device.deallocate(gpu_out_ptr); +} + +template<typename Type, int DataLayout> +static void test_first_dim_reductions() { + int dim_x = 33; + int dim_y = 1; + int dim_z = 128; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 0; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data(T) + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + +template<typename Type, int DataLayout> +static void test_last_dim_reductions() { + int dim_x = 128; + int dim_y = 1; + int dim_z = 33; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 2; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + + +void test_cxx11_tensor_reduction_cuda() { + CALL_SUBTEST_1((test_full_reductions<float, ColMajor>())); + CALL_SUBTEST_1((test_full_reductions<double, ColMajor>())); + CALL_SUBTEST_2((test_full_reductions<float, RowMajor>())); + CALL_SUBTEST_2((test_full_reductions<double, RowMajor>())); + + CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>())); + CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>())) + + CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>())); +} diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp new file mode 100644 index 0000000..440d48b --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp @@ -0,0 +1,181 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_reduction_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_full_reductions_mean_sycl(const Eigen::SyclDevice& sycl_device) { + + const IndexType num_rows = 452; + const IndexType num_cols = 765; + array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; + + Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 0, DataLayout, IndexType> full_redux; + Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; + + in.setRandom(); + + full_redux = in.mean(); + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); + + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); + + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.mean(); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_full_reductions_min_sycl(const Eigen::SyclDevice& sycl_device) { + + const IndexType num_rows = 876; + const IndexType num_cols = 953; + array<IndexType, 2> tensorRange = {{num_rows, num_cols}}; + + Tensor<DataType, 2, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 0, DataLayout, IndexType> full_redux; + Tensor<DataType, 0, DataLayout, IndexType> full_redux_gpu; + + in.setRandom(); + + full_redux = in.minimum(); + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data =(DataType*)sycl_device.allocate(sizeof(DataType)); + + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 0, DataLayout, IndexType> > out_gpu(gpu_out_data); + + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.minimum(); + sycl_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_data, sizeof(DataType)); + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux_gpu(), full_redux()); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_first_dim_reductions_max_sycl(const Eigen::SyclDevice& sycl_device) { + + IndexType dim_x = 145; + IndexType dim_y = 1; + IndexType dim_z = 67; + + array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<IndexType, 1> red_axis; + red_axis[0] = 0; + array<IndexType, 2> reduced_tensorRange = {{dim_y, dim_z}}; + + Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); + + in.setRandom(); + + redux= in.maximum(red_axis); + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); + + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.maximum(red_axis); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + + // Check that the CPU and GPU reductions return the same result. + for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) + for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) + VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); + + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_last_dim_reductions_sum_sycl(const Eigen::SyclDevice &sycl_device) { + + IndexType dim_x = 567; + IndexType dim_y = 1; + IndexType dim_z = 47; + + array<IndexType, 3> tensorRange = {{dim_x, dim_y, dim_z}}; + Eigen::array<IndexType, 1> red_axis; + red_axis[0] = 2; + array<IndexType, 2> reduced_tensorRange = {{dim_x, dim_y}}; + + Tensor<DataType, 3, DataLayout, IndexType> in(tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux(reduced_tensorRange); + Tensor<DataType, 2, DataLayout, IndexType> redux_gpu(reduced_tensorRange); + + in.setRandom(); + + redux= in.sum(red_axis); + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data = static_cast<DataType*>(sycl_device.allocate(redux_gpu.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 3, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 2, DataLayout, IndexType> > out_gpu(gpu_out_data, reduced_tensorRange); + + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.sum(red_axis); + sycl_device.memcpyDeviceToHost(redux_gpu.data(), gpu_out_data, redux_gpu.dimensions().TotalSize()*sizeof(DataType)); + // Check that the CPU and GPU reductions return the same result. + for(IndexType j=0; j<reduced_tensorRange[0]; j++ ) + for(IndexType k=0; k<reduced_tensorRange[1]; k++ ) + VERIFY_IS_APPROX(redux_gpu(j,k), redux(j,k)); + + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); + +} +template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl::device& d){ + std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; + QueueInterface queueInterface(d); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + + test_full_reductions_mean_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_full_reductions_min_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_first_dim_reductions_max_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_last_dim_reductions_sum_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_full_reductions_mean_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_full_reductions_min_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_first_dim_reductions_max_sycl<DataType, ColMajor, int64_t>(sycl_device); + test_last_dim_reductions_sum_sycl<DataType, ColMajor, int64_t>(sycl_device); +} +void test_cxx11_tensor_reduction_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_reduction_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_ref.cpp b/eigen/unsupported/test/cxx11_tensor_ref.cpp new file mode 100644 index 0000000..c8f105e --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_ref.cpp @@ -0,0 +1,248 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_simple_lvalue_ref() +{ + Tensor<int, 1> input(6); + input.setRandom(); + + TensorRef<Tensor<int, 1>> ref3(input); + TensorRef<Tensor<int, 1>> ref4 = input; + + VERIFY_IS_EQUAL(ref3.data(), input.data()); + VERIFY_IS_EQUAL(ref4.data(), input.data()); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(ref3(i), input(i)); + VERIFY_IS_EQUAL(ref4(i), input(i)); + } + + for (int i = 0; i < 6; ++i) { + ref3.coeffRef(i) = i; + } + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(input(i), i); + } + for (int i = 0; i < 6; ++i) { + ref4.coeffRef(i) = -i * 2; + } + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(input(i), -i*2); + } +} + + +static void test_simple_rvalue_ref() +{ + Tensor<int, 1> input1(6); + input1.setRandom(); + Tensor<int, 1> input2(6); + input2.setRandom(); + + TensorRef<Tensor<int, 1>> ref3(input1 + input2); + TensorRef<Tensor<int, 1>> ref4 = input1 + input2; + + VERIFY_IS_NOT_EQUAL(ref3.data(), input1.data()); + VERIFY_IS_NOT_EQUAL(ref4.data(), input1.data()); + VERIFY_IS_NOT_EQUAL(ref3.data(), input2.data()); + VERIFY_IS_NOT_EQUAL(ref4.data(), input2.data()); + + for (int i = 0; i < 6; ++i) { + VERIFY_IS_EQUAL(ref3(i), input1(i) + input2(i)); + VERIFY_IS_EQUAL(ref4(i), input1(i) + input2(i)); + } +} + + +static void test_multiple_dims() +{ + Tensor<float, 3> input(3,5,7); + input.setRandom(); + + TensorRef<Tensor<float, 3>> ref(input); + VERIFY_IS_EQUAL(ref.data(), input.data()); + VERIFY_IS_EQUAL(ref.dimension(0), 3); + VERIFY_IS_EQUAL(ref.dimension(1), 5); + VERIFY_IS_EQUAL(ref.dimension(2), 7); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(ref(i,j,k), input(i,j,k)); + } + } + } +} + + +static void test_slice() +{ + Tensor<float, 5> tensor(2,3,5,7,11); + tensor.setRandom(); + + Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5); + Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1); + TensorRef<Tensor<float, 5>> slice = tensor.slice(indices, sizes); + VERIFY_IS_EQUAL(slice(0,0,0,0,0), tensor(1,2,3,4,5)); + + Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5); + Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3); + slice = tensor.slice(indices2, sizes2); + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 2; ++j) { + for (int k = 0; k < 3; ++k) { + VERIFY_IS_EQUAL(slice(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); + } + } + } + + Eigen::DSizes<ptrdiff_t, 5> indices3(0,0,0,0,0); + Eigen::DSizes<ptrdiff_t, 5> sizes3(2,3,1,1,1); + slice = tensor.slice(indices3, sizes3); + VERIFY_IS_EQUAL(slice.data(), tensor.data()); +} + + +static void test_ref_of_ref() +{ + Tensor<float, 3> input(3,5,7); + input.setRandom(); + + TensorRef<Tensor<float, 3>> ref(input); + TensorRef<Tensor<float, 3>> ref_of_ref(ref); + TensorRef<Tensor<float, 3>> ref_of_ref2; + ref_of_ref2 = ref; + + VERIFY_IS_EQUAL(ref_of_ref.data(), input.data()); + VERIFY_IS_EQUAL(ref_of_ref.dimension(0), 3); + VERIFY_IS_EQUAL(ref_of_ref.dimension(1), 5); + VERIFY_IS_EQUAL(ref_of_ref.dimension(2), 7); + + VERIFY_IS_EQUAL(ref_of_ref2.data(), input.data()); + VERIFY_IS_EQUAL(ref_of_ref2.dimension(0), 3); + VERIFY_IS_EQUAL(ref_of_ref2.dimension(1), 5); + VERIFY_IS_EQUAL(ref_of_ref2.dimension(2), 7); + + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(ref_of_ref(i,j,k), input(i,j,k)); + VERIFY_IS_EQUAL(ref_of_ref2(i,j,k), input(i,j,k)); + } + } + } +} + + +static void test_ref_in_expr() +{ + Tensor<float, 3> input(3,5,7); + input.setRandom(); + TensorRef<Tensor<float, 3>> input_ref(input); + + Tensor<float, 3> result(3,5,7); + result.setRandom(); + TensorRef<Tensor<float, 3>> result_ref(result); + + Tensor<float, 3> bias(3,5,7); + bias.setRandom(); + + result_ref = input_ref + bias; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(result_ref(i,j,k), input(i,j,k) + bias(i,j,k)); + VERIFY_IS_NOT_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k)); + } + } + } + + result = result_ref; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_EQUAL(result(i,j,k), input(i,j,k) + bias(i,j,k)); + } + } + } +} + + +static void test_coeff_ref() +{ + Tensor<float, 5> tensor(2,3,5,7,11); + tensor.setRandom(); + Tensor<float, 5> original = tensor; + + TensorRef<Tensor<float, 4>> slice = tensor.chip(7, 4); + slice.coeffRef(0, 0, 0, 0) = 1.0f; + slice.coeffRef(1, 0, 0, 0) += 2.0f; + + VERIFY_IS_EQUAL(tensor(0,0,0,0,7), 1.0f); + VERIFY_IS_EQUAL(tensor(1,0,0,0,7), original(1,0,0,0,7) + 2.0f); +} + + +static void test_nested_ops_with_ref() +{ + Tensor<float, 4> t(2, 3, 5, 7); + t.setRandom(); + TensorMap<Tensor<const float, 4> > m(t.data(), 2, 3, 5, 7); + array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(2, 1); + paddings[2] = std::make_pair(3, 4); + paddings[3] = std::make_pair(0, 0); + DSizes<Eigen::DenseIndex, 4> shuffle_dims(0, 1, 2, 3); + TensorRef<Tensor<const float, 4> > ref(m.pad(paddings)); + array<std::pair<ptrdiff_t, ptrdiff_t>, 4> trivial; + trivial[0] = std::make_pair(0, 0); + trivial[1] = std::make_pair(0, 0); + trivial[2] = std::make_pair(0, 0); + trivial[3] = std::make_pair(0, 0); + Tensor<float, 4> padded = ref.shuffle(shuffle_dims).pad(trivial); + VERIFY_IS_EQUAL(padded.dimension(0), 2+0); + VERIFY_IS_EQUAL(padded.dimension(1), 3+3); + VERIFY_IS_EQUAL(padded.dimension(2), 5+7); + VERIFY_IS_EQUAL(padded.dimension(3), 7+0); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 6; ++j) { + for (int k = 0; k < 12; ++k) { + for (int l = 0; l < 7; ++l) { + if (j >= 2 && j < 5 && k >= 3 && k < 8) { + VERIFY_IS_EQUAL(padded(i,j,k,l), t(i,j-2,k-3,l)); + } else { + VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); + } + } + } + } + } +} + + +void test_cxx11_tensor_ref() +{ + CALL_SUBTEST(test_simple_lvalue_ref()); + CALL_SUBTEST(test_simple_rvalue_ref()); + CALL_SUBTEST(test_multiple_dims()); + CALL_SUBTEST(test_slice()); + CALL_SUBTEST(test_ref_of_ref()); + CALL_SUBTEST(test_ref_in_expr()); + CALL_SUBTEST(test_coeff_ref()); + CALL_SUBTEST(test_nested_ops_with_ref()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_reverse.cpp b/eigen/unsupported/test/cxx11_tensor_reverse.cpp new file mode 100644 index 0000000..b35b8d2 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_reverse.cpp @@ -0,0 +1,190 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Navdeep Jaitly <ndjaitly@google.com and +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::array; + +template <int DataLayout> +static void test_simple_reverse() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + array<bool, 4> dim_rev; + dim_rev[0] = false; + dim_rev[1] = true; + dim_rev[2] = true; + dim_rev[3] = false; + + Tensor<float, 4, DataLayout> reversed_tensor; + reversed_tensor = tensor.reverse(dim_rev); + + VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2); + VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3); + VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5); + VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l)); + } + } + } + } + + dim_rev[0] = true; + dim_rev[1] = false; + dim_rev[2] = false; + dim_rev[3] = false; + + reversed_tensor = tensor.reverse(dim_rev); + + VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2); + VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3); + VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5); + VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7); + + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l)); + } + } + } + } + + dim_rev[0] = true; + dim_rev[1] = false; + dim_rev[2] = false; + dim_rev[3] = true; + + reversed_tensor = tensor.reverse(dim_rev); + + VERIFY_IS_EQUAL(reversed_tensor.dimension(0), 2); + VERIFY_IS_EQUAL(reversed_tensor.dimension(1), 3); + VERIFY_IS_EQUAL(reversed_tensor.dimension(2), 5); + VERIFY_IS_EQUAL(reversed_tensor.dimension(3), 7); + + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l)); + } + } + } + } +} + + +template <int DataLayout> +static void test_expr_reverse(bool LValue) +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + array<bool, 4> dim_rev; + dim_rev[0] = false; + dim_rev[1] = true; + dim_rev[2] = false; + dim_rev[3] = true; + + Tensor<float, 4, DataLayout> expected(2, 3, 5, 7); + if (LValue) { + expected.reverse(dim_rev) = tensor; + } else { + expected = tensor.reverse(dim_rev); + } + + Tensor<float, 4, DataLayout> result(2,3,5,7); + + array<ptrdiff_t, 4> src_slice_dim; + src_slice_dim[0] = 2; + src_slice_dim[1] = 3; + src_slice_dim[2] = 1; + src_slice_dim[3] = 7; + array<ptrdiff_t, 4> src_slice_start; + src_slice_start[0] = 0; + src_slice_start[1] = 0; + src_slice_start[2] = 0; + src_slice_start[3] = 0; + array<ptrdiff_t, 4> dst_slice_dim = src_slice_dim; + array<ptrdiff_t, 4> dst_slice_start = src_slice_start; + + for (int i = 0; i < 5; ++i) { + if (LValue) { + result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) = + tensor.slice(src_slice_start, src_slice_dim); + } else { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.slice(src_slice_start, src_slice_dim).reverse(dim_rev); + } + src_slice_start[2] += 1; + dst_slice_start[2] += 1; + } + + VERIFY_IS_EQUAL(result.dimension(0), 2); + VERIFY_IS_EQUAL(result.dimension(1), 3); + VERIFY_IS_EQUAL(result.dimension(2), 5); + VERIFY_IS_EQUAL(result.dimension(3), 7); + + for (int i = 0; i < expected.dimension(0); ++i) { + for (int j = 0; j < expected.dimension(1); ++j) { + for (int k = 0; k < expected.dimension(2); ++k) { + for (int l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } + + dst_slice_start[2] = 0; + result.setRandom(); + for (int i = 0; i < 5; ++i) { + if (LValue) { + result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev) = + tensor.slice(dst_slice_start, dst_slice_dim); + } else { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); + } + dst_slice_start[2] += 1; + } + + for (int i = 0; i < expected.dimension(0); ++i) { + for (int j = 0; j < expected.dimension(1); ++j) { + for (int k = 0; k < expected.dimension(2); ++k) { + for (int l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } +} + + +void test_cxx11_tensor_reverse() +{ + CALL_SUBTEST(test_simple_reverse<ColMajor>()); + CALL_SUBTEST(test_simple_reverse<RowMajor>()); + CALL_SUBTEST(test_expr_reverse<ColMajor>(true)); + CALL_SUBTEST(test_expr_reverse<RowMajor>(true)); + CALL_SUBTEST(test_expr_reverse<ColMajor>(false)); + CALL_SUBTEST(test_expr_reverse<RowMajor>(false)); +} diff --git a/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp new file mode 100644 index 0000000..2f54844 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_reverse_sycl.cpp @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_reverse_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_simple_reverse(const Eigen::SyclDevice& sycl_device) { + + IndexType dim1 = 2; + IndexType dim2 = 3; + IndexType dim3 = 5; + IndexType dim4 = 7; + + array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); + Tensor<DataType, 4, DataLayout, IndexType> reversed_tensor(tensorRange); + tensor.setRandom(); + + array<bool, 4> dim_rev; + dim_rev[0] = false; + dim_rev[1] = true; + dim_rev[2] = true; + dim_rev[3] = false; + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data =static_cast<DataType*>(sycl_device.allocate(reversed_tensor.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu(gpu_out_data, tensorRange); + + sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); + out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); + sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + // Check that the CPU and GPU reductions return the same result. + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(i,2-j,4-k,l)); + } + } + } + } + dim_rev[0] = true; + dim_rev[1] = false; + dim_rev[2] = false; + dim_rev[3] = false; + + out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); + sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,l)); + } + } + } + } + + dim_rev[0] = true; + dim_rev[1] = false; + dim_rev[2] = false; + dim_rev[3] = true; + out_gpu.device(sycl_device) = in_gpu.reverse(dim_rev); + sycl_device.memcpyDeviceToHost(reversed_tensor.data(), gpu_out_data, reversed_tensor.dimensions().TotalSize()*sizeof(DataType)); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), reversed_tensor(1-i,j,k,6-l)); + } + } + } + } + + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} + + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_expr_reverse(const Eigen::SyclDevice& sycl_device, bool LValue) +{ + IndexType dim1 = 2; + IndexType dim2 = 3; + IndexType dim3 = 5; + IndexType dim4 = 7; + + array<IndexType, 4> tensorRange = {{dim1, dim2, dim3, dim4}}; + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); + Tensor<DataType, 4, DataLayout, IndexType> expected(tensorRange); + Tensor<DataType, 4, DataLayout, IndexType> result(tensorRange); + tensor.setRandom(); + + array<bool, 4> dim_rev; + dim_rev[0] = false; + dim_rev[1] = true; + dim_rev[2] = false; + dim_rev[3] = true; + + DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(tensor.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data_expected =static_cast<DataType*>(sycl_device.allocate(expected.dimensions().TotalSize()*sizeof(DataType))); + DataType* gpu_out_data_result =static_cast<DataType*>(sycl_device.allocate(result.dimensions().TotalSize()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > in_gpu(gpu_in_data, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_expected(gpu_out_data_expected, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout, IndexType> > out_gpu_result(gpu_out_data_result, tensorRange); + + + sycl_device.memcpyHostToDevice(gpu_in_data, tensor.data(),(tensor.dimensions().TotalSize())*sizeof(DataType)); + + if (LValue) { + out_gpu_expected.reverse(dim_rev).device(sycl_device) = in_gpu; + } else { + out_gpu_expected.device(sycl_device) = in_gpu.reverse(dim_rev); + } + sycl_device.memcpyDeviceToHost(expected.data(), gpu_out_data_expected, expected.dimensions().TotalSize()*sizeof(DataType)); + + + array<IndexType, 4> src_slice_dim; + src_slice_dim[0] = 2; + src_slice_dim[1] = 3; + src_slice_dim[2] = 1; + src_slice_dim[3] = 7; + array<IndexType, 4> src_slice_start; + src_slice_start[0] = 0; + src_slice_start[1] = 0; + src_slice_start[2] = 0; + src_slice_start[3] = 0; + array<IndexType, 4> dst_slice_dim = src_slice_dim; + array<IndexType, 4> dst_slice_start = src_slice_start; + + for (IndexType i = 0; i < 5; ++i) { + if (LValue) { + out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = + in_gpu.slice(src_slice_start, src_slice_dim); + } else { + out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = + in_gpu.slice(src_slice_start, src_slice_dim).reverse(dim_rev); + } + src_slice_start[2] += 1; + dst_slice_start[2] += 1; + } + sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); + + for (IndexType i = 0; i < expected.dimension(0); ++i) { + for (IndexType j = 0; j < expected.dimension(1); ++j) { + for (IndexType k = 0; k < expected.dimension(2); ++k) { + for (IndexType l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } + + dst_slice_start[2] = 0; + result.setRandom(); + sycl_device.memcpyHostToDevice(gpu_out_data_result, result.data(),(result.dimensions().TotalSize())*sizeof(DataType)); + for (IndexType i = 0; i < 5; ++i) { + if (LValue) { + out_gpu_result.slice(dst_slice_start, dst_slice_dim).reverse(dim_rev).device(sycl_device) = + in_gpu.slice(dst_slice_start, dst_slice_dim); + } else { + out_gpu_result.slice(dst_slice_start, dst_slice_dim).device(sycl_device) = + in_gpu.reverse(dim_rev).slice(dst_slice_start, dst_slice_dim); + } + dst_slice_start[2] += 1; + } + sycl_device.memcpyDeviceToHost(result.data(), gpu_out_data_result, result.dimensions().TotalSize()*sizeof(DataType)); + + for (IndexType i = 0; i < expected.dimension(0); ++i) { + for (IndexType j = 0; j < expected.dimension(1); ++j) { + for (IndexType k = 0; k < expected.dimension(2); ++k) { + for (IndexType l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } +} + + + +template<typename DataType> void sycl_reverse_test_per_device(const cl::sycl::device& d){ + std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl; + QueueInterface queueInterface(d); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_simple_reverse<DataType, RowMajor, int64_t>(sycl_device); + test_simple_reverse<DataType, ColMajor, int64_t>(sycl_device); + test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, false); + test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, false); + test_expr_reverse<DataType, RowMajor, int64_t>(sycl_device, true); + test_expr_reverse<DataType, ColMajor, int64_t>(sycl_device, true); +} +void test_cxx11_tensor_reverse_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_reverse_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_roundings.cpp b/eigen/unsupported/test/cxx11_tensor_roundings.cpp new file mode 100644 index 0000000..2c26151 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_roundings.cpp @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +static void test_float_rounding() +{ + Tensor<float, 2> ftensor(20,30); + ftensor = ftensor.random() * 100.f; + + Tensor<float, 2> result = ftensor.round(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(result(i,j), numext::round(ftensor(i,j))); + } + } +} + +static void test_float_flooring() +{ + Tensor<float, 2> ftensor(20,30); + ftensor = ftensor.random() * 100.f; + + Tensor<float, 2> result = ftensor.floor(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(result(i,j), numext::floor(ftensor(i,j))); + } + } +} + +static void test_float_ceiling() +{ + Tensor<float, 2> ftensor(20,30); + ftensor = ftensor.random() * 100.f; + + Tensor<float, 2> result = ftensor.ceil(); + + for (int i = 0; i < 20; ++i) { + for (int j = 0; j < 30; ++j) { + VERIFY_IS_EQUAL(result(i,j), numext::ceil(ftensor(i,j))); + } + } +} + +void test_cxx11_tensor_roundings() +{ + CALL_SUBTEST(test_float_rounding()); + CALL_SUBTEST(test_float_ceiling()); + CALL_SUBTEST(test_float_flooring()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_scan.cpp b/eigen/unsupported/test/cxx11_tensor_scan.cpp new file mode 100644 index 0000000..af59aa3 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_scan.cpp @@ -0,0 +1,110 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Igor Babuschkin <igor@babuschk.in> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include <limits> +#include <numeric> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template <int DataLayout, typename Type=float, bool Exclusive = false> +static void test_1d_scan() +{ + int size = 50; + Tensor<Type, 1, DataLayout> tensor(size); + tensor.setRandom(); + Tensor<Type, 1, DataLayout> result = tensor.cumsum(0, Exclusive); + + VERIFY_IS_EQUAL(tensor.dimension(0), result.dimension(0)); + + float accum = 0; + for (int i = 0; i < size; i++) { + if (Exclusive) { + VERIFY_IS_EQUAL(result(i), accum); + accum += tensor(i); + } else { + accum += tensor(i); + VERIFY_IS_EQUAL(result(i), accum); + } + } + + accum = 1; + result = tensor.cumprod(0, Exclusive); + for (int i = 0; i < size; i++) { + if (Exclusive) { + VERIFY_IS_EQUAL(result(i), accum); + accum *= tensor(i); + } else { + accum *= tensor(i); + VERIFY_IS_EQUAL(result(i), accum); + } + } +} + +template <int DataLayout, typename Type=float> +static void test_4d_scan() +{ + int size = 5; + Tensor<Type, 4, DataLayout> tensor(size, size, size, size); + tensor.setRandom(); + + Tensor<Type, 4, DataLayout> result(size, size, size, size); + + result = tensor.cumsum(0); + float accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(i, 1, 2, 3); + VERIFY_IS_EQUAL(result(i, 1, 2, 3), accum); + } + result = tensor.cumsum(1); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, i, 2, 3); + VERIFY_IS_EQUAL(result(1, i, 2, 3), accum); + } + result = tensor.cumsum(2); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, 2, i, 3); + VERIFY_IS_EQUAL(result(1, 2, i, 3), accum); + } + result = tensor.cumsum(3); + accum = 0; + for (int i = 0; i < size; i++) { + accum += tensor(1, 2, 3, i); + VERIFY_IS_EQUAL(result(1, 2, 3, i), accum); + } +} + +template <int DataLayout> +static void test_tensor_maps() { + int inputs[20]; + TensorMap<Tensor<int, 1, DataLayout> > tensor_map(inputs, 20); + tensor_map.setRandom(); + + Tensor<int, 1, DataLayout> result = tensor_map.cumsum(0); + + int accum = 0; + for (int i = 0; i < 20; ++i) { + accum += tensor_map(i); + VERIFY_IS_EQUAL(result(i), accum); + } +} + +void test_cxx11_tensor_scan() { + CALL_SUBTEST((test_1d_scan<ColMajor, float, true>())); + CALL_SUBTEST((test_1d_scan<ColMajor, float, false>())); + CALL_SUBTEST((test_1d_scan<RowMajor, float, true>())); + CALL_SUBTEST((test_1d_scan<RowMajor, float, false>())); + CALL_SUBTEST(test_4d_scan<ColMajor>()); + CALL_SUBTEST(test_4d_scan<RowMajor>()); + CALL_SUBTEST(test_tensor_maps<ColMajor>()); + CALL_SUBTEST(test_tensor_maps<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu b/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu new file mode 100644 index 0000000..5f146f3 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_scan_cuda +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; +typedef Tensor<float, 1>::DimensionPair DimPair; + +template<int DataLayout> +void test_cuda_cumsum(int m_size, int k_size, int n_size) +{ + std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl; + Tensor<float, 3, DataLayout> t_input(m_size, k_size, n_size); + Tensor<float, 3, DataLayout> t_result(m_size, k_size, n_size); + Tensor<float, 3, DataLayout> t_result_gpu(m_size, k_size, n_size); + + t_input.setRandom(); + + std::size_t t_input_bytes = t_input.size() * sizeof(float); + std::size_t t_result_bytes = t_result.size() * sizeof(float); + + float* d_t_input; + float* d_t_result; + + cudaMalloc((void**)(&d_t_input), t_input_bytes); + cudaMalloc((void**)(&d_t_result), t_result_bytes); + + cudaMemcpy(d_t_input, t_input.data(), t_input_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > + gpu_t_input(d_t_input, Eigen::array<int, 3>(m_size, k_size, n_size)); + Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > + gpu_t_result(d_t_result, Eigen::array<int, 3>(m_size, k_size, n_size)); + + gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); + t_result = t_input.cumsum(1); + + cudaMemcpy(t_result_gpu.data(), d_t_result, t_result_bytes, cudaMemcpyDeviceToHost); + for (DenseIndex i = 0; i < t_result.size(); i++) { + if (fabs(t_result(i) - t_result_gpu(i)) < 1e-4f) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), t_result_gpu(i), 1e-4f)) { + continue; + } + std::cout << "mismatch detected at index " << i << ": " << t_result(i) + << " vs " << t_result_gpu(i) << std::endl; + assert(false); + } + + cudaFree((void*)d_t_input); + cudaFree((void*)d_t_result); +} + + +void test_cxx11_tensor_scan_cuda() +{ + CALL_SUBTEST_1(test_cuda_cumsum<ColMajor>(128, 128, 128)); + CALL_SUBTEST_2(test_cuda_cumsum<RowMajor>(128, 128, 128)); +} diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling.cpp new file mode 100644 index 0000000..d11444a --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_shuffling.cpp @@ -0,0 +1,228 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::array; + +template <int DataLayout> +static void test_simple_shuffling() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> shuffles; + shuffles[0] = 0; + shuffles[1] = 1; + shuffles[2] = 2; + shuffles[3] = 3; + + Tensor<float, 4, DataLayout> no_shuffle; + no_shuffle = tensor.shuffle(shuffles); + + VERIFY_IS_EQUAL(no_shuffle.dimension(0), 2); + VERIFY_IS_EQUAL(no_shuffle.dimension(1), 3); + VERIFY_IS_EQUAL(no_shuffle.dimension(2), 5); + VERIFY_IS_EQUAL(no_shuffle.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l)); + } + } + } + } + + shuffles[0] = 2; + shuffles[1] = 3; + shuffles[2] = 1; + shuffles[3] = 0; + Tensor<float, 4, DataLayout> shuffle; + shuffle = tensor.shuffle(shuffles); + + VERIFY_IS_EQUAL(shuffle.dimension(0), 5); + VERIFY_IS_EQUAL(shuffle.dimension(1), 7); + VERIFY_IS_EQUAL(shuffle.dimension(2), 3); + VERIFY_IS_EQUAL(shuffle.dimension(3), 2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); + } + } + } + } +} + + +template <int DataLayout> +static void test_expr_shuffling() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + array<ptrdiff_t, 4> shuffles; + shuffles[0] = 2; + shuffles[1] = 3; + shuffles[2] = 1; + shuffles[3] = 0; + Tensor<float, 4, DataLayout> expected; + expected = tensor.shuffle(shuffles); + + Tensor<float, 4, DataLayout> result(5,7,3,2); + + array<int, 4> src_slice_dim{{2,3,1,7}}; + array<int, 4> src_slice_start{{0,0,0,0}}; + array<int, 4> dst_slice_dim{{1,7,3,2}}; + array<int, 4> dst_slice_start{{0,0,0,0}}; + + for (int i = 0; i < 5; ++i) { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.slice(src_slice_start, src_slice_dim).shuffle(shuffles); + src_slice_start[2] += 1; + dst_slice_start[0] += 1; + } + + VERIFY_IS_EQUAL(result.dimension(0), 5); + VERIFY_IS_EQUAL(result.dimension(1), 7); + VERIFY_IS_EQUAL(result.dimension(2), 3); + VERIFY_IS_EQUAL(result.dimension(3), 2); + + for (int i = 0; i < expected.dimension(0); ++i) { + for (int j = 0; j < expected.dimension(1); ++j) { + for (int k = 0; k < expected.dimension(2); ++k) { + for (int l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } + + dst_slice_start[0] = 0; + result.setRandom(); + for (int i = 0; i < 5; ++i) { + result.slice(dst_slice_start, dst_slice_dim) = + tensor.shuffle(shuffles).slice(dst_slice_start, dst_slice_dim); + dst_slice_start[0] += 1; + } + + for (int i = 0; i < expected.dimension(0); ++i) { + for (int j = 0; j < expected.dimension(1); ++j) { + for (int k = 0; k < expected.dimension(2); ++k) { + for (int l = 0; l < expected.dimension(3); ++l) { + VERIFY_IS_EQUAL(result(i,j,k,l), expected(i,j,k,l)); + } + } + } + } +} + + +template <int DataLayout> +static void test_shuffling_as_value() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> shuffles; + shuffles[2] = 0; + shuffles[3] = 1; + shuffles[1] = 2; + shuffles[0] = 3; + Tensor<float, 4, DataLayout> shuffle(5,7,3,2); + shuffle.shuffle(shuffles) = tensor; + + VERIFY_IS_EQUAL(shuffle.dimension(0), 5); + VERIFY_IS_EQUAL(shuffle.dimension(1), 7); + VERIFY_IS_EQUAL(shuffle.dimension(2), 3); + VERIFY_IS_EQUAL(shuffle.dimension(3), 2); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); + } + } + } + } + + array<ptrdiff_t, 4> no_shuffle; + no_shuffle[0] = 0; + no_shuffle[1] = 1; + no_shuffle[2] = 2; + no_shuffle[3] = 3; + Tensor<float, 4, DataLayout> shuffle2(5,7,3,2); + shuffle2.shuffle(shuffles) = tensor.shuffle(no_shuffle); + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 7; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 2; ++l) { + VERIFY_IS_EQUAL(shuffle2(i,j,k,l), shuffle(i,j,k,l)); + } + } + } + } +} + + +template <int DataLayout> +static void test_shuffle_unshuffle() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + + // Choose a random permutation. + array<ptrdiff_t, 4> shuffles; + for (int i = 0; i < 4; ++i) { + shuffles[i] = i; + } + array<ptrdiff_t, 4> shuffles_inverse; + for (int i = 0; i < 4; ++i) { + const ptrdiff_t index = internal::random<ptrdiff_t>(i, 3); + shuffles_inverse[shuffles[index]] = i; + std::swap(shuffles[i], shuffles[index]); + } + + Tensor<float, 4, DataLayout> shuffle; + shuffle = tensor.shuffle(shuffles).shuffle(shuffles_inverse); + + VERIFY_IS_EQUAL(shuffle.dimension(0), 2); + VERIFY_IS_EQUAL(shuffle.dimension(1), 3); + VERIFY_IS_EQUAL(shuffle.dimension(2), 5); + VERIFY_IS_EQUAL(shuffle.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(i,j,k,l)); + } + } + } + } +} + + +void test_cxx11_tensor_shuffling() +{ + CALL_SUBTEST(test_simple_shuffling<ColMajor>()); + CALL_SUBTEST(test_simple_shuffling<RowMajor>()); + CALL_SUBTEST(test_expr_shuffling<ColMajor>()); + CALL_SUBTEST(test_expr_shuffling<RowMajor>()); + CALL_SUBTEST(test_shuffling_as_value<ColMajor>()); + CALL_SUBTEST(test_shuffling_as_value<RowMajor>()); + CALL_SUBTEST(test_shuffle_unshuffle<ColMajor>()); + CALL_SUBTEST(test_shuffle_unshuffle<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp new file mode 100644 index 0000000..c88db7c --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_shuffling_sycl.cpp @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +template <typename DataType, int DataLayout, typename IndexType> +static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device) +{ + IndexType sizeDim1 = 2; + IndexType sizeDim2 = 3; + IndexType sizeDim3 = 5; + IndexType sizeDim4 = 7; + array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; + Tensor<DataType, 4, DataLayout,IndexType> tensor(tensorRange); + Tensor<DataType, 4, DataLayout,IndexType> no_shuffle(tensorRange); + tensor.setRandom(); + + const size_t buffSize =tensor.size()*sizeof(DataType); + array<IndexType, 4> shuffles; + shuffles[0] = 0; + shuffles[1] = 1; + shuffles[2] = 2; + shuffles[3] = 3; + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(buffSize)); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(buffSize)); + + + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 4, DataLayout,IndexType>> gpu2(gpu_data2, tensorRange); + + sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize); + + gpu2.device(sycl_device)=gpu1.shuffle(shuffles); + sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize); + sycl_device.synchronize(); + + VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1); + VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2); + VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3); + VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim4; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l)); + } + } + } + } + + shuffles[0] = 2; + shuffles[1] = 3; + shuffles[2] = 1; + shuffles[3] = 0; + array<IndexType, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}}; + Tensor<DataType, 4, DataLayout,IndexType> shuffle(tensorrangeShuffle); + DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(buffSize)); + TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu3(gpu_data3, tensorrangeShuffle); + + gpu3.device(sycl_device)=gpu1.shuffle(shuffles); + sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize); + sycl_device.synchronize(); + + VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3); + VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4); + VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2); + VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + for (IndexType l = 0; l < sizeDim4; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i)); + } + } + } + } +} + + +template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device); + test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device); + +} +void test_cxx11_tensor_shuffling_sycl() +{ + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_simple.cpp b/eigen/unsupported/test/cxx11_tensor_simple.cpp new file mode 100644 index 0000000..5a0d339 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_simple.cpp @@ -0,0 +1,327 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_0d() +{ + Tensor<int, 0> scalar1; + Tensor<int, 0, RowMajor> scalar2; + Tensor<int, 0> scalar3; + Tensor<int, 0, RowMajor> scalar4; + + scalar3.resize(); + scalar4.resize(); + + scalar1() = 7; + scalar2() = 13; + scalar3.setValues(17); + scalar4.setZero(); + + VERIFY_IS_EQUAL(scalar1.rank(), 0); + VERIFY_IS_EQUAL(scalar1.size(), 1); + + VERIFY_IS_EQUAL(scalar1(), 7); + VERIFY_IS_EQUAL(scalar2(), 13); + VERIFY_IS_EQUAL(scalar3(), 17); + VERIFY_IS_EQUAL(scalar4(), 0); + + Tensor<int, 0> scalar5(scalar1); + + VERIFY_IS_EQUAL(scalar5(), 7); + VERIFY_IS_EQUAL(scalar5.data()[0], 7); +} + +static void test_1d() +{ + Tensor<int, 1> vec1(6); + Tensor<int, 1, RowMajor> vec2(6); + Tensor<int, 1> vec3; + Tensor<int, 1, RowMajor> vec4; + + vec3.resize(6); + vec4.resize(6); + + vec1(0) = 4; vec2(0) = 0; vec3(0) = 5; + vec1(1) = 8; vec2(1) = 1; vec3(1) = 4; + vec1(2) = 15; vec2(2) = 2; vec3(2) = 3; + vec1(3) = 16; vec2(3) = 3; vec3(3) = 2; + vec1(4) = 23; vec2(4) = 4; vec3(4) = 1; + vec1(5) = 42; vec2(5) = 5; vec3(5) = 0; + vec4.setZero(); + + VERIFY_IS_EQUAL((vec1.rank()), 1); + VERIFY_IS_EQUAL((vec1.size()), 6); + VERIFY_IS_EQUAL((vec1.dimensions()[0]), 6); + + VERIFY_IS_EQUAL((vec1[0]), 4); + VERIFY_IS_EQUAL((vec1[1]), 8); + VERIFY_IS_EQUAL((vec1[2]), 15); + VERIFY_IS_EQUAL((vec1[3]), 16); + VERIFY_IS_EQUAL((vec1[4]), 23); + VERIFY_IS_EQUAL((vec1[5]), 42); + + VERIFY_IS_EQUAL((vec2[0]), 0); + VERIFY_IS_EQUAL((vec2[1]), 1); + VERIFY_IS_EQUAL((vec2[2]), 2); + VERIFY_IS_EQUAL((vec2[3]), 3); + VERIFY_IS_EQUAL((vec2[4]), 4); + VERIFY_IS_EQUAL((vec2[5]), 5); + + VERIFY_IS_EQUAL((vec3[0]), 5); + VERIFY_IS_EQUAL((vec3[1]), 4); + VERIFY_IS_EQUAL((vec3[2]), 3); + VERIFY_IS_EQUAL((vec3[3]), 2); + VERIFY_IS_EQUAL((vec3[4]), 1); + VERIFY_IS_EQUAL((vec3[5]), 0); + + VERIFY_IS_EQUAL((vec4[0]), 0); + VERIFY_IS_EQUAL((vec4[1]), 0); + VERIFY_IS_EQUAL((vec4[2]), 0); + VERIFY_IS_EQUAL((vec4[3]), 0); + VERIFY_IS_EQUAL((vec4[4]), 0); + VERIFY_IS_EQUAL((vec4[5]), 0); + + Tensor<int, 1> vec5(vec1); + + VERIFY_IS_EQUAL((vec5(0)), 4); + VERIFY_IS_EQUAL((vec5(1)), 8); + VERIFY_IS_EQUAL((vec5(2)), 15); + VERIFY_IS_EQUAL((vec5(3)), 16); + VERIFY_IS_EQUAL((vec5(4)), 23); + VERIFY_IS_EQUAL((vec5(5)), 42); + + VERIFY_IS_EQUAL((vec5.data()[0]), 4); + VERIFY_IS_EQUAL((vec5.data()[1]), 8); + VERIFY_IS_EQUAL((vec5.data()[2]), 15); + VERIFY_IS_EQUAL((vec5.data()[3]), 16); + VERIFY_IS_EQUAL((vec5.data()[4]), 23); + VERIFY_IS_EQUAL((vec5.data()[5]), 42); +} + +static void test_2d() +{ + Tensor<int, 2> mat1(2,3); + Tensor<int, 2, RowMajor> mat2(2,3); + + mat1(0,0) = 0; + mat1(0,1) = 1; + mat1(0,2) = 2; + mat1(1,0) = 3; + mat1(1,1) = 4; + mat1(1,2) = 5; + + mat2(0,0) = 0; + mat2(0,1) = 1; + mat2(0,2) = 2; + mat2(1,0) = 3; + mat2(1,1) = 4; + mat2(1,2) = 5; + + VERIFY_IS_EQUAL((mat1.rank()), 2); + VERIFY_IS_EQUAL((mat1.size()), 6); + VERIFY_IS_EQUAL((mat1.dimensions()[0]), 2); + VERIFY_IS_EQUAL((mat1.dimensions()[1]), 3); + + VERIFY_IS_EQUAL((mat2.rank()), 2); + VERIFY_IS_EQUAL((mat2.size()), 6); + VERIFY_IS_EQUAL((mat2.dimensions()[0]), 2); + VERIFY_IS_EQUAL((mat2.dimensions()[1]), 3); + + VERIFY_IS_EQUAL((mat1.data()[0]), 0); + VERIFY_IS_EQUAL((mat1.data()[1]), 3); + VERIFY_IS_EQUAL((mat1.data()[2]), 1); + VERIFY_IS_EQUAL((mat1.data()[3]), 4); + VERIFY_IS_EQUAL((mat1.data()[4]), 2); + VERIFY_IS_EQUAL((mat1.data()[5]), 5); + + VERIFY_IS_EQUAL((mat2.data()[0]), 0); + VERIFY_IS_EQUAL((mat2.data()[1]), 1); + VERIFY_IS_EQUAL((mat2.data()[2]), 2); + VERIFY_IS_EQUAL((mat2.data()[3]), 3); + VERIFY_IS_EQUAL((mat2.data()[4]), 4); + VERIFY_IS_EQUAL((mat2.data()[5]), 5); +} + +static void test_3d() +{ + Tensor<int, 3> epsilon(3,3,3); + epsilon.setZero(); + epsilon(0,1,2) = epsilon(2,0,1) = epsilon(1,2,0) = 1; + epsilon(2,1,0) = epsilon(0,2,1) = epsilon(1,0,2) = -1; + + VERIFY_IS_EQUAL((epsilon.size()), 27); + VERIFY_IS_EQUAL((epsilon.dimensions()[0]), 3); + VERIFY_IS_EQUAL((epsilon.dimensions()[1]), 3); + VERIFY_IS_EQUAL((epsilon.dimensions()[2]), 3); + + VERIFY_IS_EQUAL((epsilon(0,0,0)), 0); + VERIFY_IS_EQUAL((epsilon(0,0,1)), 0); + VERIFY_IS_EQUAL((epsilon(0,0,2)), 0); + VERIFY_IS_EQUAL((epsilon(0,1,0)), 0); + VERIFY_IS_EQUAL((epsilon(0,1,1)), 0); + VERIFY_IS_EQUAL((epsilon(0,2,0)), 0); + VERIFY_IS_EQUAL((epsilon(0,2,2)), 0); + VERIFY_IS_EQUAL((epsilon(1,0,0)), 0); + VERIFY_IS_EQUAL((epsilon(1,0,1)), 0); + VERIFY_IS_EQUAL((epsilon(1,1,0)), 0); + VERIFY_IS_EQUAL((epsilon(1,1,1)), 0); + VERIFY_IS_EQUAL((epsilon(1,1,2)), 0); + VERIFY_IS_EQUAL((epsilon(1,2,1)), 0); + VERIFY_IS_EQUAL((epsilon(1,2,2)), 0); + VERIFY_IS_EQUAL((epsilon(2,0,0)), 0); + VERIFY_IS_EQUAL((epsilon(2,0,2)), 0); + VERIFY_IS_EQUAL((epsilon(2,1,1)), 0); + VERIFY_IS_EQUAL((epsilon(2,1,2)), 0); + VERIFY_IS_EQUAL((epsilon(2,2,0)), 0); + VERIFY_IS_EQUAL((epsilon(2,2,1)), 0); + VERIFY_IS_EQUAL((epsilon(2,2,2)), 0); + + VERIFY_IS_EQUAL((epsilon(0,1,2)), 1); + VERIFY_IS_EQUAL((epsilon(2,0,1)), 1); + VERIFY_IS_EQUAL((epsilon(1,2,0)), 1); + VERIFY_IS_EQUAL((epsilon(2,1,0)), -1); + VERIFY_IS_EQUAL((epsilon(0,2,1)), -1); + VERIFY_IS_EQUAL((epsilon(1,0,2)), -1); + + array<Eigen::DenseIndex, 3> dims; + dims[0] = 2; + dims[1] = 3; + dims[2] = 4; + Tensor<int, 3> t1(dims); + Tensor<int, 3, RowMajor> t2(dims); + + VERIFY_IS_EQUAL((t1.size()), 24); + VERIFY_IS_EQUAL((t1.dimensions()[0]), 2); + VERIFY_IS_EQUAL((t1.dimensions()[1]), 3); + VERIFY_IS_EQUAL((t1.dimensions()[2]), 4); + + VERIFY_IS_EQUAL((t2.size()), 24); + VERIFY_IS_EQUAL((t2.dimensions()[0]), 2); + VERIFY_IS_EQUAL((t2.dimensions()[1]), 3); + VERIFY_IS_EQUAL((t2.dimensions()[2]), 4); + + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 3; j++) { + for (int k = 0; k < 4; k++) { + t1(i, j, k) = 100 * i + 10 * j + k; + t2(i, j, k) = 100 * i + 10 * j + k; + } + } + } + + VERIFY_IS_EQUAL((t1.data()[0]), 0); + VERIFY_IS_EQUAL((t1.data()[1]), 100); + VERIFY_IS_EQUAL((t1.data()[2]), 10); + VERIFY_IS_EQUAL((t1.data()[3]), 110); + VERIFY_IS_EQUAL((t1.data()[4]), 20); + VERIFY_IS_EQUAL((t1.data()[5]), 120); + VERIFY_IS_EQUAL((t1.data()[6]), 1); + VERIFY_IS_EQUAL((t1.data()[7]), 101); + VERIFY_IS_EQUAL((t1.data()[8]), 11); + VERIFY_IS_EQUAL((t1.data()[9]), 111); + VERIFY_IS_EQUAL((t1.data()[10]), 21); + VERIFY_IS_EQUAL((t1.data()[11]), 121); + VERIFY_IS_EQUAL((t1.data()[12]), 2); + VERIFY_IS_EQUAL((t1.data()[13]), 102); + VERIFY_IS_EQUAL((t1.data()[14]), 12); + VERIFY_IS_EQUAL((t1.data()[15]), 112); + VERIFY_IS_EQUAL((t1.data()[16]), 22); + VERIFY_IS_EQUAL((t1.data()[17]), 122); + VERIFY_IS_EQUAL((t1.data()[18]), 3); + VERIFY_IS_EQUAL((t1.data()[19]), 103); + VERIFY_IS_EQUAL((t1.data()[20]), 13); + VERIFY_IS_EQUAL((t1.data()[21]), 113); + VERIFY_IS_EQUAL((t1.data()[22]), 23); + VERIFY_IS_EQUAL((t1.data()[23]), 123); + + VERIFY_IS_EQUAL((t2.data()[0]), 0); + VERIFY_IS_EQUAL((t2.data()[1]), 1); + VERIFY_IS_EQUAL((t2.data()[2]), 2); + VERIFY_IS_EQUAL((t2.data()[3]), 3); + VERIFY_IS_EQUAL((t2.data()[4]), 10); + VERIFY_IS_EQUAL((t2.data()[5]), 11); + VERIFY_IS_EQUAL((t2.data()[6]), 12); + VERIFY_IS_EQUAL((t2.data()[7]), 13); + VERIFY_IS_EQUAL((t2.data()[8]), 20); + VERIFY_IS_EQUAL((t2.data()[9]), 21); + VERIFY_IS_EQUAL((t2.data()[10]), 22); + VERIFY_IS_EQUAL((t2.data()[11]), 23); + VERIFY_IS_EQUAL((t2.data()[12]), 100); + VERIFY_IS_EQUAL((t2.data()[13]), 101); + VERIFY_IS_EQUAL((t2.data()[14]), 102); + VERIFY_IS_EQUAL((t2.data()[15]), 103); + VERIFY_IS_EQUAL((t2.data()[16]), 110); + VERIFY_IS_EQUAL((t2.data()[17]), 111); + VERIFY_IS_EQUAL((t2.data()[18]), 112); + VERIFY_IS_EQUAL((t2.data()[19]), 113); + VERIFY_IS_EQUAL((t2.data()[20]), 120); + VERIFY_IS_EQUAL((t2.data()[21]), 121); + VERIFY_IS_EQUAL((t2.data()[22]), 122); + VERIFY_IS_EQUAL((t2.data()[23]), 123); +} + +static void test_simple_assign() +{ + Tensor<int, 3> epsilon(3,3,3); + epsilon.setZero(); + epsilon(0,1,2) = epsilon(2,0,1) = epsilon(1,2,0) = 1; + epsilon(2,1,0) = epsilon(0,2,1) = epsilon(1,0,2) = -1; + + Tensor<int, 3> e2(3,3,3); + e2.setZero(); + VERIFY_IS_EQUAL((e2(1,2,0)), 0); + + e2 = epsilon; + VERIFY_IS_EQUAL((e2(1,2,0)), 1); + VERIFY_IS_EQUAL((e2(0,1,2)), 1); + VERIFY_IS_EQUAL((e2(2,0,1)), 1); + VERIFY_IS_EQUAL((e2(2,1,0)), -1); + VERIFY_IS_EQUAL((e2(0,2,1)), -1); + VERIFY_IS_EQUAL((e2(1,0,2)), -1); +} + +static void test_resize() +{ + Tensor<int, 3> epsilon; + epsilon.resize(2,3,7); + VERIFY_IS_EQUAL(epsilon.dimension(0), 2); + VERIFY_IS_EQUAL(epsilon.dimension(1), 3); + VERIFY_IS_EQUAL(epsilon.dimension(2), 7); + VERIFY_IS_EQUAL(epsilon.size(), 2*3*7); + + const int* old_data = epsilon.data(); + epsilon.resize(3,2,7); + VERIFY_IS_EQUAL(epsilon.dimension(0), 3); + VERIFY_IS_EQUAL(epsilon.dimension(1), 2); + VERIFY_IS_EQUAL(epsilon.dimension(2), 7); + VERIFY_IS_EQUAL(epsilon.size(), 2*3*7); + VERIFY_IS_EQUAL(epsilon.data(), old_data); + + epsilon.resize(3,5,7); + VERIFY_IS_EQUAL(epsilon.dimension(0), 3); + VERIFY_IS_EQUAL(epsilon.dimension(1), 5); + VERIFY_IS_EQUAL(epsilon.dimension(2), 7); + VERIFY_IS_EQUAL(epsilon.size(), 3*5*7); +} + +void test_cxx11_tensor_simple() +{ + CALL_SUBTEST(test_0d()); + CALL_SUBTEST(test_1d()); + CALL_SUBTEST(test_2d()); + CALL_SUBTEST(test_3d()); + CALL_SUBTEST(test_simple_assign()); + CALL_SUBTEST(test_resize()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_striding.cpp b/eigen/unsupported/test/cxx11_tensor_striding.cpp new file mode 100644 index 0000000..935b908 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_striding.cpp @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +template<int DataLayout> +static void test_simple_striding() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> strides; + strides[0] = 1; + strides[1] = 1; + strides[2] = 1; + strides[3] = 1; + + Tensor<float, 4, DataLayout> no_stride; + no_stride = tensor.stride(strides); + + VERIFY_IS_EQUAL(no_stride.dimension(0), 2); + VERIFY_IS_EQUAL(no_stride.dimension(1), 3); + VERIFY_IS_EQUAL(no_stride.dimension(2), 5); + VERIFY_IS_EQUAL(no_stride.dimension(3), 7); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); + } + } + } + } + + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; + Tensor<float, 4, DataLayout> stride; + stride = tensor.stride(strides); + + VERIFY_IS_EQUAL(stride.dimension(0), 1); + VERIFY_IS_EQUAL(stride.dimension(1), 1); + VERIFY_IS_EQUAL(stride.dimension(2), 3); + VERIFY_IS_EQUAL(stride.dimension(3), 3); + + for (int i = 0; i < 1; ++i) { + for (int j = 0; j < 1; ++j) { + for (int k = 0; k < 3; ++k) { + for (int l = 0; l < 3; ++l) { + VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l)); + } + } + } + } +} + + +template<int DataLayout> +static void test_striding_as_lvalue() +{ + Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<ptrdiff_t, 4> strides; + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; + + Tensor<float, 4, DataLayout> result(3, 12, 10, 21); + result.stride(strides) = tensor; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), result(2*i,4*j,2*k,3*l)); + } + } + } + } + + array<ptrdiff_t, 4> no_strides; + no_strides[0] = 1; + no_strides[1] = 1; + no_strides[2] = 1; + no_strides[3] = 1; + Tensor<float, 4, DataLayout> result2(3, 12, 10, 21); + result2.stride(strides) = tensor.stride(no_strides); + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), result2(2*i,4*j,2*k,3*l)); + } + } + } + } +} + + +void test_cxx11_tensor_striding() +{ + CALL_SUBTEST(test_simple_striding<ColMajor>()); + CALL_SUBTEST(test_simple_striding<RowMajor>()); + CALL_SUBTEST(test_striding_as_lvalue<ColMajor>()); + CALL_SUBTEST(test_striding_as_lvalue<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp new file mode 100644 index 0000000..603c374 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_striding_sycl.cpp @@ -0,0 +1,203 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_striding_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include <iostream> +#include <chrono> +#include <ctime> + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + + +template <typename DataType, int DataLayout, typename IndexType> +static void test_simple_striding(const Eigen::SyclDevice& sycl_device) +{ + + Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; + Eigen::array<IndexType, 4> stride_dims = {{1,1,3,3}}; + + + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); + Tensor<DataType, 4, DataLayout,IndexType> no_stride(tensor_dims); + Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); + + + std::size_t tensor_bytes = tensor.size() * sizeof(DataType); + std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); + std::size_t stride_bytes = stride.size() * sizeof(DataType); + DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); + DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); + DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, tensor_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); + + + tensor.setRandom(); + array<IndexType, 4> strides; + strides[0] = 1; + strides[1] = 1; + strides[2] = 1; + strides[3] = 1; + sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); + gpu_no_stride.device(sycl_device)=gpu_tensor.stride(strides); + sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); + + //no_stride = tensor.stride(strides); + + VERIFY_IS_EQUAL(no_stride.dimension(0), 2); + VERIFY_IS_EQUAL(no_stride.dimension(1), 3); + VERIFY_IS_EQUAL(no_stride.dimension(2), 5); + VERIFY_IS_EQUAL(no_stride.dimension(3), 7); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(i,j,k,l)); + } + } + } + } + + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; +//Tensor<float, 4, DataLayout> stride; +// stride = tensor.stride(strides); + + gpu_stride.device(sycl_device)=gpu_tensor.stride(strides); + sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); + + VERIFY_IS_EQUAL(stride.dimension(0), 1); + VERIFY_IS_EQUAL(stride.dimension(1), 1); + VERIFY_IS_EQUAL(stride.dimension(2), 3); + VERIFY_IS_EQUAL(stride.dimension(3), 3); + + for (IndexType i = 0; i < 1; ++i) { + for (IndexType j = 0; j < 1; ++j) { + for (IndexType k = 0; k < 3; ++k) { + for (IndexType l = 0; l < 3; ++l) { + VERIFY_IS_EQUAL(tensor(2*i,4*j,2*k,3*l), stride(i,j,k,l)); + } + } + } + } + + sycl_device.deallocate(d_tensor); + sycl_device.deallocate(d_no_stride); + sycl_device.deallocate(d_stride); +} + +template <typename DataType, int DataLayout, typename IndexType> +static void test_striding_as_lvalue(const Eigen::SyclDevice& sycl_device) +{ + + Eigen::array<IndexType, 4> tensor_dims = {{2,3,5,7}}; + Eigen::array<IndexType, 4> stride_dims = {{3,12,10,21}}; + + + Tensor<DataType, 4, DataLayout, IndexType> tensor(tensor_dims); + Tensor<DataType, 4, DataLayout,IndexType> no_stride(stride_dims); + Tensor<DataType, 4, DataLayout,IndexType> stride(stride_dims); + + + std::size_t tensor_bytes = tensor.size() * sizeof(DataType); + std::size_t no_stride_bytes = no_stride.size() * sizeof(DataType); + std::size_t stride_bytes = stride.size() * sizeof(DataType); + + DataType * d_tensor = static_cast<DataType*>(sycl_device.allocate(tensor_bytes)); + DataType * d_no_stride = static_cast<DataType*>(sycl_device.allocate(no_stride_bytes)); + DataType * d_stride = static_cast<DataType*>(sycl_device.allocate(stride_bytes)); + + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_tensor(d_tensor, tensor_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_no_stride(d_no_stride, stride_dims); + Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, IndexType> > gpu_stride(d_stride, stride_dims); + + //Tensor<float, 4, DataLayout> tensor(2,3,5,7); + tensor.setRandom(); + array<IndexType, 4> strides; + strides[0] = 2; + strides[1] = 4; + strides[2] = 2; + strides[3] = 3; + +// Tensor<float, 4, DataLayout> result(3, 12, 10, 21); +// result.stride(strides) = tensor; + sycl_device.memcpyHostToDevice(d_tensor, tensor.data(), tensor_bytes); + gpu_stride.stride(strides).device(sycl_device)=gpu_tensor; + sycl_device.memcpyDeviceToHost(stride.data(), d_stride, stride_bytes); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), stride(2*i,4*j,2*k,3*l)); + } + } + } + } + + array<IndexType, 4> no_strides; + no_strides[0] = 1; + no_strides[1] = 1; + no_strides[2] = 1; + no_strides[3] = 1; +// Tensor<float, 4, DataLayout> result2(3, 12, 10, 21); +// result2.stride(strides) = tensor.stride(no_strides); + + gpu_no_stride.stride(strides).device(sycl_device)=gpu_tensor.stride(no_strides); + sycl_device.memcpyDeviceToHost(no_stride.data(), d_no_stride, no_stride_bytes); + + for (IndexType i = 0; i < 2; ++i) { + for (IndexType j = 0; j < 3; ++j) { + for (IndexType k = 0; k < 5; ++k) { + for (IndexType l = 0; l < 7; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), no_stride(2*i,4*j,2*k,3*l)); + } + } + } + } + sycl_device.deallocate(d_tensor); + sycl_device.deallocate(d_no_stride); + sycl_device.deallocate(d_stride); +} + + +template <typename Dev_selector> void tensorStridingPerDevice(Dev_selector& s){ + QueueInterface queueInterface(s); + auto sycl_device=Eigen::SyclDevice(&queueInterface); + test_simple_striding<float, ColMajor, int64_t>(sycl_device); + test_simple_striding<float, RowMajor, int64_t>(sycl_device); + test_striding_as_lvalue<float, ColMajor, int64_t>(sycl_device); + test_striding_as_lvalue<float, RowMajor, int64_t>(sycl_device); +} + +void test_cxx11_tensor_striding_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(tensorStridingPerDevice(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_sugar.cpp b/eigen/unsupported/test/cxx11_tensor_sugar.cpp new file mode 100644 index 0000000..2f56eb4 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_sugar.cpp @@ -0,0 +1,81 @@ +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; +using Eigen::RowMajor; + +static void test_comparison_sugar() { + // we already trust comparisons between tensors, we're simply checking that + // the sugared versions are doing the same thing + Tensor<int, 3> t(6, 7, 5); + + t.setRandom(); + // make sure we have at least one value == 0 + t(0,0,0) = 0; + + Tensor<bool,0> b; + +#define TEST_TENSOR_EQUAL(e1, e2) \ + b = ((e1) == (e2)).all(); \ + VERIFY(b()) + +#define TEST_OP(op) TEST_TENSOR_EQUAL(t op 0, t op t.constant(0)) + + TEST_OP(==); + TEST_OP(!=); + TEST_OP(<=); + TEST_OP(>=); + TEST_OP(<); + TEST_OP(>); +#undef TEST_OP +#undef TEST_TENSOR_EQUAL +} + + +static void test_scalar_sugar_add_mul() { + Tensor<float, 3> A(6, 7, 5); + Tensor<float, 3> B(6, 7, 5); + A.setRandom(); + B.setRandom(); + + const float alpha = 0.43f; + const float beta = 0.21f; + const float gamma = 0.14f; + + Tensor<float, 3> R = A.constant(gamma) + A * A.constant(alpha) + B * B.constant(beta); + Tensor<float, 3> S = A * alpha + B * beta + gamma; + Tensor<float, 3> T = gamma + alpha * A + beta * B; + + for (int i = 0; i < 6*7*5; ++i) { + VERIFY_IS_APPROX(R(i), S(i)); + VERIFY_IS_APPROX(R(i), T(i)); + } +} + +static void test_scalar_sugar_sub_div() { + Tensor<float, 3> A(6, 7, 5); + Tensor<float, 3> B(6, 7, 5); + A.setRandom(); + B.setRandom(); + + const float alpha = 0.43f; + const float beta = 0.21f; + const float gamma = 0.14f; + const float delta = 0.32f; + + Tensor<float, 3> R = A.constant(gamma) - A / A.constant(alpha) + - B.constant(beta) / B - A.constant(delta); + Tensor<float, 3> S = gamma - A / alpha - beta / B - delta; + + for (int i = 0; i < 6*7*5; ++i) { + VERIFY_IS_APPROX(R(i), S(i)); + } +} + +void test_cxx11_tensor_sugar() +{ + CALL_SUBTEST(test_comparison_sugar()); + CALL_SUBTEST(test_scalar_sugar_add_mul()); + CALL_SUBTEST(test_scalar_sugar_sub_div()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_sycl.cpp b/eigen/unsupported/test/cxx11_tensor_sycl.cpp new file mode 100644 index 0000000..5cd0f4c --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_sycl.cpp @@ -0,0 +1,276 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: <eigen@codeplay.com> +// Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t +#define EIGEN_USE_SYCL + +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +template <typename DataType, int DataLayout, typename IndexType> +void test_sycl_mem_transfers(const Eigen::SyclDevice &sycl_device) { + IndexType sizeDim1 = 100; + IndexType sizeDim2 = 10; + IndexType sizeDim3 = 20; + array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Tensor<DataType, 3, DataLayout, IndexType> in1(tensorRange); + Tensor<DataType, 3, DataLayout, IndexType> out1(tensorRange); + Tensor<DataType, 3, DataLayout, IndexType> out2(tensorRange); + Tensor<DataType, 3, DataLayout, IndexType> out3(tensorRange); + + in1 = in1.random(); + + DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); + DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(out1.size()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu1(gpu_data1, tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu2(gpu_data2, tensorRange); + + sycl_device.memcpyHostToDevice(gpu_data1, in1.data(),(in1.size())*sizeof(DataType)); + sycl_device.memcpyHostToDevice(gpu_data2, in1.data(),(in1.size())*sizeof(DataType)); + gpu1.device(sycl_device) = gpu1 * 3.14f; + gpu2.device(sycl_device) = gpu2 * 2.7f; + sycl_device.memcpyDeviceToHost(out1.data(), gpu_data1,(out1.size())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(out2.data(), gpu_data1,(out2.size())*sizeof(DataType)); + sycl_device.memcpyDeviceToHost(out3.data(), gpu_data2,(out3.size())*sizeof(DataType)); + sycl_device.synchronize(); + + for (IndexType i = 0; i < in1.size(); ++i) { + VERIFY_IS_APPROX(out1(i), in1(i) * 3.14f); + VERIFY_IS_APPROX(out2(i), in1(i) * 3.14f); + VERIFY_IS_APPROX(out3(i), in1(i) * 2.7f); + } + + sycl_device.deallocate(gpu_data1); + sycl_device.deallocate(gpu_data2); +} + +template <typename DataType, int DataLayout, typename IndexType> +void test_sycl_mem_sync(const Eigen::SyclDevice &sycl_device) { + IndexType size = 20; + array<IndexType, 1> tensorRange = {{size}}; + Tensor<DataType, 1, DataLayout, IndexType> in1(tensorRange); + Tensor<DataType, 1, DataLayout, IndexType> in2(tensorRange); + Tensor<DataType, 1, DataLayout, IndexType> out(tensorRange); + + in1 = in1.random(); + in2 = in1; + + DataType* gpu_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 1, DataLayout, IndexType>> gpu1(gpu_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_data, in1.data(),(in1.size())*sizeof(DataType)); + sycl_device.synchronize(); + in1.setZero(); + + sycl_device.memcpyDeviceToHost(out.data(), gpu_data, out.size()*sizeof(DataType)); + sycl_device.synchronize(); + + for (IndexType i = 0; i < in1.size(); ++i) { + VERIFY_IS_APPROX(out(i), in2(i)); + } + + sycl_device.deallocate(gpu_data); +} + +template <typename DataType, int DataLayout, typename IndexType> +void test_sycl_computations(const Eigen::SyclDevice &sycl_device) { + + IndexType sizeDim1 = 100; + IndexType sizeDim2 = 10; + IndexType sizeDim3 = 20; + array<IndexType, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Tensor<DataType, 3,DataLayout, IndexType> in1(tensorRange); + Tensor<DataType, 3,DataLayout, IndexType> in2(tensorRange); + Tensor<DataType, 3,DataLayout, IndexType> in3(tensorRange); + Tensor<DataType, 3,DataLayout, IndexType> out(tensorRange); + + in2 = in2.random(); + in3 = in3.random(); + + DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.size()*sizeof(DataType))); + DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.size()*sizeof(DataType))); + DataType * gpu_in3_data = static_cast<DataType*>(sycl_device.allocate(in3.size()*sizeof(DataType))); + DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.size()*sizeof(DataType))); + + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in1(gpu_in1_data, tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in2(gpu_in2_data, tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_in3(gpu_in3_data, tensorRange); + TensorMap<Tensor<DataType, 3, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); + + /// a=1.2f + gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); + sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.size())*sizeof(DataType)); + sycl_device.synchronize(); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(in1(i,j,k), 1.2f); + } + } + } + printf("a=1.2f Test passed\n"); + + /// a=b*1.2f + gpu_out.device(sycl_device) = gpu_in1 * 1.2f; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 1.2f); + } + } + } + printf("a=b*1.2f Test Passed\n"); + + /// c=a*b + sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.size())*sizeof(DataType)); + gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in2(i,j,k)); + } + } + } + printf("c=a*b Test Passed\n"); + + /// c=a+b + gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) + + in2(i,j,k)); + } + } + } + printf("c=a+b Test Passed\n"); + + /// c=a*a + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in1(i,j,k)); + } + } + } + printf("c= a*a Test Passed\n"); + + //a*3.14f + b*2.7f + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); + sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 3.14f + + in2(i,j,k) * 2.7f); + } + } + } + printf("a*3.14f + b*2.7f Test Passed\n"); + + ///d= (a>0.5? b:c) + sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.size())*sizeof(DataType)); + gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.size())*sizeof(DataType)); + sycl_device.synchronize(); + for (IndexType i = 0; i < sizeDim1; ++i) { + for (IndexType j = 0; j < sizeDim2; ++j) { + for (IndexType k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) + ? in2(i, j, k) + : in3(i, j, k)); + } + } + } + printf("d= (a>0.5? b:c) Test Passed\n"); + sycl_device.deallocate(gpu_in1_data); + sycl_device.deallocate(gpu_in2_data); + sycl_device.deallocate(gpu_in3_data); + sycl_device.deallocate(gpu_out_data); +} +template<typename Scalar1, typename Scalar2, int DataLayout, typename IndexType> +static void test_sycl_cast(const Eigen::SyclDevice& sycl_device){ + IndexType size = 20; + array<IndexType, 1> tensorRange = {{size}}; + Tensor<Scalar1, 1, DataLayout, IndexType> in(tensorRange); + Tensor<Scalar2, 1, DataLayout, IndexType> out(tensorRange); + Tensor<Scalar2, 1, DataLayout, IndexType> out_host(tensorRange); + + in = in.random(); + + Scalar1* gpu_in_data = static_cast<Scalar1*>(sycl_device.allocate(in.size()*sizeof(Scalar1))); + Scalar2 * gpu_out_data = static_cast<Scalar2*>(sycl_device.allocate(out.size()*sizeof(Scalar2))); + + TensorMap<Tensor<Scalar1, 1, DataLayout, IndexType>> gpu_in(gpu_in_data, tensorRange); + TensorMap<Tensor<Scalar2, 1, DataLayout, IndexType>> gpu_out(gpu_out_data, tensorRange); + sycl_device.memcpyHostToDevice(gpu_in_data, in.data(),(in.size())*sizeof(Scalar1)); + gpu_out.device(sycl_device) = gpu_in. template cast<Scalar2>(); + sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data, out.size()*sizeof(Scalar2)); + out_host = in. template cast<Scalar2>(); + for(IndexType i=0; i< size; i++) + { + VERIFY_IS_APPROX(out(i), out_host(i)); + } + printf("cast Test Passed\n"); + sycl_device.deallocate(gpu_in_data); + sycl_device.deallocate(gpu_out_data); +} +template<typename DataType, typename dev_Selector> void sycl_computing_test_per_device(dev_Selector s){ + QueueInterface queueInterface(s); + auto sycl_device = Eigen::SyclDevice(&queueInterface); + test_sycl_mem_transfers<DataType, RowMajor, int64_t>(sycl_device); + test_sycl_computations<DataType, RowMajor, int64_t>(sycl_device); + test_sycl_mem_sync<DataType, RowMajor, int64_t>(sycl_device); + test_sycl_mem_transfers<DataType, ColMajor, int64_t>(sycl_device); + test_sycl_computations<DataType, ColMajor, int64_t>(sycl_device); + test_sycl_mem_sync<DataType, ColMajor, int64_t>(sycl_device); + test_sycl_cast<DataType, int, RowMajor, int64_t>(sycl_device); + test_sycl_cast<DataType, int, ColMajor, int64_t>(sycl_device); +} + +void test_cxx11_tensor_sycl() { + for (const auto& device :Eigen::get_sycl_supported_devices()) { + CALL_SUBTEST(sycl_computing_test_per_device<float>(device)); + } +} diff --git a/eigen/unsupported/test/cxx11_tensor_symmetry.cpp b/eigen/unsupported/test/cxx11_tensor_symmetry.cpp new file mode 100644 index 0000000..d680e9b --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_symmetry.cpp @@ -0,0 +1,818 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2013 Christian Seiler <christian@iwakd.de> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> +#include <Eigen/CXX11/TensorSymmetry> + +#include <map> +#include <set> + +using Eigen::Tensor; +using Eigen::SGroup; +using Eigen::DynamicSGroup; +using Eigen::StaticSGroup; +using Eigen::Symmetry; +using Eigen::AntiSymmetry; +using Eigen::Hermiticity; +using Eigen::AntiHermiticity; + +using Eigen::NegationFlag; +using Eigen::ConjugationFlag; +using Eigen::GlobalZeroFlag; +using Eigen::GlobalRealFlag; +using Eigen::GlobalImagFlag; + +// helper function to determine if the compiler intantiated a static +// or dynamic symmetry group +template<typename... Sym> +bool isDynGroup(StaticSGroup<Sym...> const& dummy) +{ + (void)dummy; + return false; +} + +bool isDynGroup(DynamicSGroup const& dummy) +{ + (void)dummy; + return true; +} + +// helper class for checking that the symmetry groups are correct +struct checkIdx { + template<typename ArrType> + static inline int doCheck_(ArrType e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected) + { + // use decimal representation of value + uint64_t value = e[0]; + for (std::size_t i = 1; i < e.size(); i++) + value = value * 10 + e[i]; + + // we want to make sure that we find each element + auto it = expected.find(value); + VERIFY((it != expected.end())); + VERIFY_IS_EQUAL(it->second, flags); + + // we want to make sure we only have each element once; + // set::insert returns true for the second part of the pair + // if the element was really inserted and not already there + auto p = found.insert(value); + VERIFY((p.second)); + + return dummy; + } + + static inline int run(std::vector<int> e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected) + { + return doCheck_(e, flags, dummy, found, expected); + } + + template<std::size_t N> + static inline int run(std::array<int, N> e, int flags, int dummy, std::set<uint64_t>& found, std::map<uint64_t, int> const& expected) + { + return doCheck_(e, flags, dummy, found, expected); + } +}; + +static void test_symgroups_static() +{ + std::array<int, 7> identity{{0,1,2,3,4,5,6}}; + + // Simple static symmetry group + StaticSGroup< + AntiSymmetry<0,1>, + Hermiticity<0,2> + > group; + + std::set<uint64_t> found; + std::map<uint64_t, int> expected; + expected[ 123456] = 0; + expected[1023456] = NegationFlag; + expected[2103456] = ConjugationFlag; + expected[1203456] = ConjugationFlag | NegationFlag; + expected[2013456] = ConjugationFlag | NegationFlag; + expected[ 213456] = ConjugationFlag; + + VERIFY_IS_EQUAL(group.size(), 6u); + VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag); + group.apply<checkIdx, int>(identity, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 6u); +} + +static void test_symgroups_dynamic() +{ + std::vector<int> identity; + for (int i = 0; i <= 6; i++) + identity.push_back(i); + + // Simple dynamic symmetry group + DynamicSGroup group; + group.add(0,1,NegationFlag); + group.add(0,2,ConjugationFlag); + + VERIFY_IS_EQUAL(group.size(), 6u); + VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag); + + std::set<uint64_t> found; + std::map<uint64_t, int> expected; + expected[ 123456] = 0; + expected[1023456] = NegationFlag; + expected[2103456] = ConjugationFlag; + expected[1203456] = ConjugationFlag | NegationFlag; + expected[2013456] = ConjugationFlag | NegationFlag; + expected[ 213456] = ConjugationFlag; + + VERIFY_IS_EQUAL(group.size(), 6u); + VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag); + group.apply<checkIdx, int>(identity, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 6u); +} + +static void test_symgroups_selection() +{ + std::array<int, 7> identity7{{0,1,2,3,4,5,6}}; + std::array<int, 10> identity10{{0,1,2,3,4,5,6,7,8,9}}; + + { + // Do the same test as in test_symgroups_static but + // require selection via SGroup + SGroup< + AntiSymmetry<0,1>, + Hermiticity<0,2> + > group; + + std::set<uint64_t> found; + std::map<uint64_t, int> expected; + expected[ 123456] = 0; + expected[1023456] = NegationFlag; + expected[2103456] = ConjugationFlag; + expected[1203456] = ConjugationFlag | NegationFlag; + expected[2013456] = ConjugationFlag | NegationFlag; + expected[ 213456] = ConjugationFlag; + + VERIFY(!isDynGroup(group)); + VERIFY_IS_EQUAL(group.size(), 6u); + VERIFY_IS_EQUAL(group.globalFlags(), GlobalImagFlag); + group.apply<checkIdx, int>(identity7, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 6u); + } + + { + // simple factorizing group: 5 generators, 2^5 = 32 elements + // selection should make this dynamic, although static group + // can still be reasonably generated + SGroup< + Symmetry<0,1>, + Symmetry<2,3>, + Symmetry<4,5>, + Symmetry<6,7>, + Symmetry<8,9> + > group; + + std::set<uint64_t> found; + std::map<uint64_t, int> expected; + expected[ 123456789] = 0; expected[ 123456798] = 0; expected[ 123457689] = 0; expected[ 123457698] = 0; + expected[ 123546789] = 0; expected[ 123546798] = 0; expected[ 123547689] = 0; expected[ 123547698] = 0; + expected[ 132456789] = 0; expected[ 132456798] = 0; expected[ 132457689] = 0; expected[ 132457698] = 0; + expected[ 132546789] = 0; expected[ 132546798] = 0; expected[ 132547689] = 0; expected[ 132547698] = 0; + expected[1023456789] = 0; expected[1023456798] = 0; expected[1023457689] = 0; expected[1023457698] = 0; + expected[1023546789] = 0; expected[1023546798] = 0; expected[1023547689] = 0; expected[1023547698] = 0; + expected[1032456789] = 0; expected[1032456798] = 0; expected[1032457689] = 0; expected[1032457698] = 0; + expected[1032546789] = 0; expected[1032546798] = 0; expected[1032547689] = 0; expected[1032547698] = 0; + + VERIFY(isDynGroup(group)); + VERIFY_IS_EQUAL(group.size(), 32u); + VERIFY_IS_EQUAL(group.globalFlags(), 0); + group.apply<checkIdx, int>(identity10, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 32u); + + // no verify that we could also generate a static group + // with these generators + found.clear(); + StaticSGroup< + Symmetry<0,1>, + Symmetry<2,3>, + Symmetry<4,5>, + Symmetry<6,7>, + Symmetry<8,9> + > group_static; + VERIFY_IS_EQUAL(group_static.size(), 32u); + VERIFY_IS_EQUAL(group_static.globalFlags(), 0); + group_static.apply<checkIdx, int>(identity10, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 32u); + } + + { + // try to create a HUGE group + SGroup< + Symmetry<0,1>, + Symmetry<1,2>, + Symmetry<2,3>, + Symmetry<3,4>, + Symmetry<4,5>, + Symmetry<5,6> + > group; + + std::set<uint64_t> found; + uint64_t pre_expected[5040] = { + 123456, 1023456, 213456, 2013456, 1203456, 2103456, 132456, 1032456, 312456, 3012456, 1302456, 3102456, + 231456, 2031456, 321456, 3021456, 2301456, 3201456, 1230456, 2130456, 1320456, 3120456, 2310456, 3210456, + 124356, 1024356, 214356, 2014356, 1204356, 2104356, 142356, 1042356, 412356, 4012356, 1402356, 4102356, + 241356, 2041356, 421356, 4021356, 2401356, 4201356, 1240356, 2140356, 1420356, 4120356, 2410356, 4210356, + 134256, 1034256, 314256, 3014256, 1304256, 3104256, 143256, 1043256, 413256, 4013256, 1403256, 4103256, + 341256, 3041256, 431256, 4031256, 3401256, 4301256, 1340256, 3140256, 1430256, 4130256, 3410256, 4310256, + 234156, 2034156, 324156, 3024156, 2304156, 3204156, 243156, 2043156, 423156, 4023156, 2403156, 4203156, + 342156, 3042156, 432156, 4032156, 3402156, 4302156, 2340156, 3240156, 2430156, 4230156, 3420156, 4320156, + 1234056, 2134056, 1324056, 3124056, 2314056, 3214056, 1243056, 2143056, 1423056, 4123056, 2413056, 4213056, + 1342056, 3142056, 1432056, 4132056, 3412056, 4312056, 2341056, 3241056, 2431056, 4231056, 3421056, 4321056, + 123546, 1023546, 213546, 2013546, 1203546, 2103546, 132546, 1032546, 312546, 3012546, 1302546, 3102546, + 231546, 2031546, 321546, 3021546, 2301546, 3201546, 1230546, 2130546, 1320546, 3120546, 2310546, 3210546, + 125346, 1025346, 215346, 2015346, 1205346, 2105346, 152346, 1052346, 512346, 5012346, 1502346, 5102346, + 251346, 2051346, 521346, 5021346, 2501346, 5201346, 1250346, 2150346, 1520346, 5120346, 2510346, 5210346, + 135246, 1035246, 315246, 3015246, 1305246, 3105246, 153246, 1053246, 513246, 5013246, 1503246, 5103246, + 351246, 3051246, 531246, 5031246, 3501246, 5301246, 1350246, 3150246, 1530246, 5130246, 3510246, 5310246, + 235146, 2035146, 325146, 3025146, 2305146, 3205146, 253146, 2053146, 523146, 5023146, 2503146, 5203146, + 352146, 3052146, 532146, 5032146, 3502146, 5302146, 2350146, 3250146, 2530146, 5230146, 3520146, 5320146, + 1235046, 2135046, 1325046, 3125046, 2315046, 3215046, 1253046, 2153046, 1523046, 5123046, 2513046, 5213046, + 1352046, 3152046, 1532046, 5132046, 3512046, 5312046, 2351046, 3251046, 2531046, 5231046, 3521046, 5321046, + 124536, 1024536, 214536, 2014536, 1204536, 2104536, 142536, 1042536, 412536, 4012536, 1402536, 4102536, + 241536, 2041536, 421536, 4021536, 2401536, 4201536, 1240536, 2140536, 1420536, 4120536, 2410536, 4210536, + 125436, 1025436, 215436, 2015436, 1205436, 2105436, 152436, 1052436, 512436, 5012436, 1502436, 5102436, + 251436, 2051436, 521436, 5021436, 2501436, 5201436, 1250436, 2150436, 1520436, 5120436, 2510436, 5210436, + 145236, 1045236, 415236, 4015236, 1405236, 4105236, 154236, 1054236, 514236, 5014236, 1504236, 5104236, + 451236, 4051236, 541236, 5041236, 4501236, 5401236, 1450236, 4150236, 1540236, 5140236, 4510236, 5410236, + 245136, 2045136, 425136, 4025136, 2405136, 4205136, 254136, 2054136, 524136, 5024136, 2504136, 5204136, + 452136, 4052136, 542136, 5042136, 4502136, 5402136, 2450136, 4250136, 2540136, 5240136, 4520136, 5420136, + 1245036, 2145036, 1425036, 4125036, 2415036, 4215036, 1254036, 2154036, 1524036, 5124036, 2514036, 5214036, + 1452036, 4152036, 1542036, 5142036, 4512036, 5412036, 2451036, 4251036, 2541036, 5241036, 4521036, 5421036, + 134526, 1034526, 314526, 3014526, 1304526, 3104526, 143526, 1043526, 413526, 4013526, 1403526, 4103526, + 341526, 3041526, 431526, 4031526, 3401526, 4301526, 1340526, 3140526, 1430526, 4130526, 3410526, 4310526, + 135426, 1035426, 315426, 3015426, 1305426, 3105426, 153426, 1053426, 513426, 5013426, 1503426, 5103426, + 351426, 3051426, 531426, 5031426, 3501426, 5301426, 1350426, 3150426, 1530426, 5130426, 3510426, 5310426, + 145326, 1045326, 415326, 4015326, 1405326, 4105326, 154326, 1054326, 514326, 5014326, 1504326, 5104326, + 451326, 4051326, 541326, 5041326, 4501326, 5401326, 1450326, 4150326, 1540326, 5140326, 4510326, 5410326, + 345126, 3045126, 435126, 4035126, 3405126, 4305126, 354126, 3054126, 534126, 5034126, 3504126, 5304126, + 453126, 4053126, 543126, 5043126, 4503126, 5403126, 3450126, 4350126, 3540126, 5340126, 4530126, 5430126, + 1345026, 3145026, 1435026, 4135026, 3415026, 4315026, 1354026, 3154026, 1534026, 5134026, 3514026, 5314026, + 1453026, 4153026, 1543026, 5143026, 4513026, 5413026, 3451026, 4351026, 3541026, 5341026, 4531026, 5431026, + 234516, 2034516, 324516, 3024516, 2304516, 3204516, 243516, 2043516, 423516, 4023516, 2403516, 4203516, + 342516, 3042516, 432516, 4032516, 3402516, 4302516, 2340516, 3240516, 2430516, 4230516, 3420516, 4320516, + 235416, 2035416, 325416, 3025416, 2305416, 3205416, 253416, 2053416, 523416, 5023416, 2503416, 5203416, + 352416, 3052416, 532416, 5032416, 3502416, 5302416, 2350416, 3250416, 2530416, 5230416, 3520416, 5320416, + 245316, 2045316, 425316, 4025316, 2405316, 4205316, 254316, 2054316, 524316, 5024316, 2504316, 5204316, + 452316, 4052316, 542316, 5042316, 4502316, 5402316, 2450316, 4250316, 2540316, 5240316, 4520316, 5420316, + 345216, 3045216, 435216, 4035216, 3405216, 4305216, 354216, 3054216, 534216, 5034216, 3504216, 5304216, + 453216, 4053216, 543216, 5043216, 4503216, 5403216, 3450216, 4350216, 3540216, 5340216, 4530216, 5430216, + 2345016, 3245016, 2435016, 4235016, 3425016, 4325016, 2354016, 3254016, 2534016, 5234016, 3524016, 5324016, + 2453016, 4253016, 2543016, 5243016, 4523016, 5423016, 3452016, 4352016, 3542016, 5342016, 4532016, 5432016, + 1234506, 2134506, 1324506, 3124506, 2314506, 3214506, 1243506, 2143506, 1423506, 4123506, 2413506, 4213506, + 1342506, 3142506, 1432506, 4132506, 3412506, 4312506, 2341506, 3241506, 2431506, 4231506, 3421506, 4321506, + 1235406, 2135406, 1325406, 3125406, 2315406, 3215406, 1253406, 2153406, 1523406, 5123406, 2513406, 5213406, + 1352406, 3152406, 1532406, 5132406, 3512406, 5312406, 2351406, 3251406, 2531406, 5231406, 3521406, 5321406, + 1245306, 2145306, 1425306, 4125306, 2415306, 4215306, 1254306, 2154306, 1524306, 5124306, 2514306, 5214306, + 1452306, 4152306, 1542306, 5142306, 4512306, 5412306, 2451306, 4251306, 2541306, 5241306, 4521306, 5421306, + 1345206, 3145206, 1435206, 4135206, 3415206, 4315206, 1354206, 3154206, 1534206, 5134206, 3514206, 5314206, + 1453206, 4153206, 1543206, 5143206, 4513206, 5413206, 3451206, 4351206, 3541206, 5341206, 4531206, 5431206, + 2345106, 3245106, 2435106, 4235106, 3425106, 4325106, 2354106, 3254106, 2534106, 5234106, 3524106, 5324106, + 2453106, 4253106, 2543106, 5243106, 4523106, 5423106, 3452106, 4352106, 3542106, 5342106, 4532106, 5432106, + 123465, 1023465, 213465, 2013465, 1203465, 2103465, 132465, 1032465, 312465, 3012465, 1302465, 3102465, + 231465, 2031465, 321465, 3021465, 2301465, 3201465, 1230465, 2130465, 1320465, 3120465, 2310465, 3210465, + 124365, 1024365, 214365, 2014365, 1204365, 2104365, 142365, 1042365, 412365, 4012365, 1402365, 4102365, + 241365, 2041365, 421365, 4021365, 2401365, 4201365, 1240365, 2140365, 1420365, 4120365, 2410365, 4210365, + 134265, 1034265, 314265, 3014265, 1304265, 3104265, 143265, 1043265, 413265, 4013265, 1403265, 4103265, + 341265, 3041265, 431265, 4031265, 3401265, 4301265, 1340265, 3140265, 1430265, 4130265, 3410265, 4310265, + 234165, 2034165, 324165, 3024165, 2304165, 3204165, 243165, 2043165, 423165, 4023165, 2403165, 4203165, + 342165, 3042165, 432165, 4032165, 3402165, 4302165, 2340165, 3240165, 2430165, 4230165, 3420165, 4320165, + 1234065, 2134065, 1324065, 3124065, 2314065, 3214065, 1243065, 2143065, 1423065, 4123065, 2413065, 4213065, + 1342065, 3142065, 1432065, 4132065, 3412065, 4312065, 2341065, 3241065, 2431065, 4231065, 3421065, 4321065, + 123645, 1023645, 213645, 2013645, 1203645, 2103645, 132645, 1032645, 312645, 3012645, 1302645, 3102645, + 231645, 2031645, 321645, 3021645, 2301645, 3201645, 1230645, 2130645, 1320645, 3120645, 2310645, 3210645, + 126345, 1026345, 216345, 2016345, 1206345, 2106345, 162345, 1062345, 612345, 6012345, 1602345, 6102345, + 261345, 2061345, 621345, 6021345, 2601345, 6201345, 1260345, 2160345, 1620345, 6120345, 2610345, 6210345, + 136245, 1036245, 316245, 3016245, 1306245, 3106245, 163245, 1063245, 613245, 6013245, 1603245, 6103245, + 361245, 3061245, 631245, 6031245, 3601245, 6301245, 1360245, 3160245, 1630245, 6130245, 3610245, 6310245, + 236145, 2036145, 326145, 3026145, 2306145, 3206145, 263145, 2063145, 623145, 6023145, 2603145, 6203145, + 362145, 3062145, 632145, 6032145, 3602145, 6302145, 2360145, 3260145, 2630145, 6230145, 3620145, 6320145, + 1236045, 2136045, 1326045, 3126045, 2316045, 3216045, 1263045, 2163045, 1623045, 6123045, 2613045, 6213045, + 1362045, 3162045, 1632045, 6132045, 3612045, 6312045, 2361045, 3261045, 2631045, 6231045, 3621045, 6321045, + 124635, 1024635, 214635, 2014635, 1204635, 2104635, 142635, 1042635, 412635, 4012635, 1402635, 4102635, + 241635, 2041635, 421635, 4021635, 2401635, 4201635, 1240635, 2140635, 1420635, 4120635, 2410635, 4210635, + 126435, 1026435, 216435, 2016435, 1206435, 2106435, 162435, 1062435, 612435, 6012435, 1602435, 6102435, + 261435, 2061435, 621435, 6021435, 2601435, 6201435, 1260435, 2160435, 1620435, 6120435, 2610435, 6210435, + 146235, 1046235, 416235, 4016235, 1406235, 4106235, 164235, 1064235, 614235, 6014235, 1604235, 6104235, + 461235, 4061235, 641235, 6041235, 4601235, 6401235, 1460235, 4160235, 1640235, 6140235, 4610235, 6410235, + 246135, 2046135, 426135, 4026135, 2406135, 4206135, 264135, 2064135, 624135, 6024135, 2604135, 6204135, + 462135, 4062135, 642135, 6042135, 4602135, 6402135, 2460135, 4260135, 2640135, 6240135, 4620135, 6420135, + 1246035, 2146035, 1426035, 4126035, 2416035, 4216035, 1264035, 2164035, 1624035, 6124035, 2614035, 6214035, + 1462035, 4162035, 1642035, 6142035, 4612035, 6412035, 2461035, 4261035, 2641035, 6241035, 4621035, 6421035, + 134625, 1034625, 314625, 3014625, 1304625, 3104625, 143625, 1043625, 413625, 4013625, 1403625, 4103625, + 341625, 3041625, 431625, 4031625, 3401625, 4301625, 1340625, 3140625, 1430625, 4130625, 3410625, 4310625, + 136425, 1036425, 316425, 3016425, 1306425, 3106425, 163425, 1063425, 613425, 6013425, 1603425, 6103425, + 361425, 3061425, 631425, 6031425, 3601425, 6301425, 1360425, 3160425, 1630425, 6130425, 3610425, 6310425, + 146325, 1046325, 416325, 4016325, 1406325, 4106325, 164325, 1064325, 614325, 6014325, 1604325, 6104325, + 461325, 4061325, 641325, 6041325, 4601325, 6401325, 1460325, 4160325, 1640325, 6140325, 4610325, 6410325, + 346125, 3046125, 436125, 4036125, 3406125, 4306125, 364125, 3064125, 634125, 6034125, 3604125, 6304125, + 463125, 4063125, 643125, 6043125, 4603125, 6403125, 3460125, 4360125, 3640125, 6340125, 4630125, 6430125, + 1346025, 3146025, 1436025, 4136025, 3416025, 4316025, 1364025, 3164025, 1634025, 6134025, 3614025, 6314025, + 1463025, 4163025, 1643025, 6143025, 4613025, 6413025, 3461025, 4361025, 3641025, 6341025, 4631025, 6431025, + 234615, 2034615, 324615, 3024615, 2304615, 3204615, 243615, 2043615, 423615, 4023615, 2403615, 4203615, + 342615, 3042615, 432615, 4032615, 3402615, 4302615, 2340615, 3240615, 2430615, 4230615, 3420615, 4320615, + 236415, 2036415, 326415, 3026415, 2306415, 3206415, 263415, 2063415, 623415, 6023415, 2603415, 6203415, + 362415, 3062415, 632415, 6032415, 3602415, 6302415, 2360415, 3260415, 2630415, 6230415, 3620415, 6320415, + 246315, 2046315, 426315, 4026315, 2406315, 4206315, 264315, 2064315, 624315, 6024315, 2604315, 6204315, + 462315, 4062315, 642315, 6042315, 4602315, 6402315, 2460315, 4260315, 2640315, 6240315, 4620315, 6420315, + 346215, 3046215, 436215, 4036215, 3406215, 4306215, 364215, 3064215, 634215, 6034215, 3604215, 6304215, + 463215, 4063215, 643215, 6043215, 4603215, 6403215, 3460215, 4360215, 3640215, 6340215, 4630215, 6430215, + 2346015, 3246015, 2436015, 4236015, 3426015, 4326015, 2364015, 3264015, 2634015, 6234015, 3624015, 6324015, + 2463015, 4263015, 2643015, 6243015, 4623015, 6423015, 3462015, 4362015, 3642015, 6342015, 4632015, 6432015, + 1234605, 2134605, 1324605, 3124605, 2314605, 3214605, 1243605, 2143605, 1423605, 4123605, 2413605, 4213605, + 1342605, 3142605, 1432605, 4132605, 3412605, 4312605, 2341605, 3241605, 2431605, 4231605, 3421605, 4321605, + 1236405, 2136405, 1326405, 3126405, 2316405, 3216405, 1263405, 2163405, 1623405, 6123405, 2613405, 6213405, + 1362405, 3162405, 1632405, 6132405, 3612405, 6312405, 2361405, 3261405, 2631405, 6231405, 3621405, 6321405, + 1246305, 2146305, 1426305, 4126305, 2416305, 4216305, 1264305, 2164305, 1624305, 6124305, 2614305, 6214305, + 1462305, 4162305, 1642305, 6142305, 4612305, 6412305, 2461305, 4261305, 2641305, 6241305, 4621305, 6421305, + 1346205, 3146205, 1436205, 4136205, 3416205, 4316205, 1364205, 3164205, 1634205, 6134205, 3614205, 6314205, + 1463205, 4163205, 1643205, 6143205, 4613205, 6413205, 3461205, 4361205, 3641205, 6341205, 4631205, 6431205, + 2346105, 3246105, 2436105, 4236105, 3426105, 4326105, 2364105, 3264105, 2634105, 6234105, 3624105, 6324105, + 2463105, 4263105, 2643105, 6243105, 4623105, 6423105, 3462105, 4362105, 3642105, 6342105, 4632105, 6432105, + 123564, 1023564, 213564, 2013564, 1203564, 2103564, 132564, 1032564, 312564, 3012564, 1302564, 3102564, + 231564, 2031564, 321564, 3021564, 2301564, 3201564, 1230564, 2130564, 1320564, 3120564, 2310564, 3210564, + 125364, 1025364, 215364, 2015364, 1205364, 2105364, 152364, 1052364, 512364, 5012364, 1502364, 5102364, + 251364, 2051364, 521364, 5021364, 2501364, 5201364, 1250364, 2150364, 1520364, 5120364, 2510364, 5210364, + 135264, 1035264, 315264, 3015264, 1305264, 3105264, 153264, 1053264, 513264, 5013264, 1503264, 5103264, + 351264, 3051264, 531264, 5031264, 3501264, 5301264, 1350264, 3150264, 1530264, 5130264, 3510264, 5310264, + 235164, 2035164, 325164, 3025164, 2305164, 3205164, 253164, 2053164, 523164, 5023164, 2503164, 5203164, + 352164, 3052164, 532164, 5032164, 3502164, 5302164, 2350164, 3250164, 2530164, 5230164, 3520164, 5320164, + 1235064, 2135064, 1325064, 3125064, 2315064, 3215064, 1253064, 2153064, 1523064, 5123064, 2513064, 5213064, + 1352064, 3152064, 1532064, 5132064, 3512064, 5312064, 2351064, 3251064, 2531064, 5231064, 3521064, 5321064, + 123654, 1023654, 213654, 2013654, 1203654, 2103654, 132654, 1032654, 312654, 3012654, 1302654, 3102654, + 231654, 2031654, 321654, 3021654, 2301654, 3201654, 1230654, 2130654, 1320654, 3120654, 2310654, 3210654, + 126354, 1026354, 216354, 2016354, 1206354, 2106354, 162354, 1062354, 612354, 6012354, 1602354, 6102354, + 261354, 2061354, 621354, 6021354, 2601354, 6201354, 1260354, 2160354, 1620354, 6120354, 2610354, 6210354, + 136254, 1036254, 316254, 3016254, 1306254, 3106254, 163254, 1063254, 613254, 6013254, 1603254, 6103254, + 361254, 3061254, 631254, 6031254, 3601254, 6301254, 1360254, 3160254, 1630254, 6130254, 3610254, 6310254, + 236154, 2036154, 326154, 3026154, 2306154, 3206154, 263154, 2063154, 623154, 6023154, 2603154, 6203154, + 362154, 3062154, 632154, 6032154, 3602154, 6302154, 2360154, 3260154, 2630154, 6230154, 3620154, 6320154, + 1236054, 2136054, 1326054, 3126054, 2316054, 3216054, 1263054, 2163054, 1623054, 6123054, 2613054, 6213054, + 1362054, 3162054, 1632054, 6132054, 3612054, 6312054, 2361054, 3261054, 2631054, 6231054, 3621054, 6321054, + 125634, 1025634, 215634, 2015634, 1205634, 2105634, 152634, 1052634, 512634, 5012634, 1502634, 5102634, + 251634, 2051634, 521634, 5021634, 2501634, 5201634, 1250634, 2150634, 1520634, 5120634, 2510634, 5210634, + 126534, 1026534, 216534, 2016534, 1206534, 2106534, 162534, 1062534, 612534, 6012534, 1602534, 6102534, + 261534, 2061534, 621534, 6021534, 2601534, 6201534, 1260534, 2160534, 1620534, 6120534, 2610534, 6210534, + 156234, 1056234, 516234, 5016234, 1506234, 5106234, 165234, 1065234, 615234, 6015234, 1605234, 6105234, + 561234, 5061234, 651234, 6051234, 5601234, 6501234, 1560234, 5160234, 1650234, 6150234, 5610234, 6510234, + 256134, 2056134, 526134, 5026134, 2506134, 5206134, 265134, 2065134, 625134, 6025134, 2605134, 6205134, + 562134, 5062134, 652134, 6052134, 5602134, 6502134, 2560134, 5260134, 2650134, 6250134, 5620134, 6520134, + 1256034, 2156034, 1526034, 5126034, 2516034, 5216034, 1265034, 2165034, 1625034, 6125034, 2615034, 6215034, + 1562034, 5162034, 1652034, 6152034, 5612034, 6512034, 2561034, 5261034, 2651034, 6251034, 5621034, 6521034, + 135624, 1035624, 315624, 3015624, 1305624, 3105624, 153624, 1053624, 513624, 5013624, 1503624, 5103624, + 351624, 3051624, 531624, 5031624, 3501624, 5301624, 1350624, 3150624, 1530624, 5130624, 3510624, 5310624, + 136524, 1036524, 316524, 3016524, 1306524, 3106524, 163524, 1063524, 613524, 6013524, 1603524, 6103524, + 361524, 3061524, 631524, 6031524, 3601524, 6301524, 1360524, 3160524, 1630524, 6130524, 3610524, 6310524, + 156324, 1056324, 516324, 5016324, 1506324, 5106324, 165324, 1065324, 615324, 6015324, 1605324, 6105324, + 561324, 5061324, 651324, 6051324, 5601324, 6501324, 1560324, 5160324, 1650324, 6150324, 5610324, 6510324, + 356124, 3056124, 536124, 5036124, 3506124, 5306124, 365124, 3065124, 635124, 6035124, 3605124, 6305124, + 563124, 5063124, 653124, 6053124, 5603124, 6503124, 3560124, 5360124, 3650124, 6350124, 5630124, 6530124, + 1356024, 3156024, 1536024, 5136024, 3516024, 5316024, 1365024, 3165024, 1635024, 6135024, 3615024, 6315024, + 1563024, 5163024, 1653024, 6153024, 5613024, 6513024, 3561024, 5361024, 3651024, 6351024, 5631024, 6531024, + 235614, 2035614, 325614, 3025614, 2305614, 3205614, 253614, 2053614, 523614, 5023614, 2503614, 5203614, + 352614, 3052614, 532614, 5032614, 3502614, 5302614, 2350614, 3250614, 2530614, 5230614, 3520614, 5320614, + 236514, 2036514, 326514, 3026514, 2306514, 3206514, 263514, 2063514, 623514, 6023514, 2603514, 6203514, + 362514, 3062514, 632514, 6032514, 3602514, 6302514, 2360514, 3260514, 2630514, 6230514, 3620514, 6320514, + 256314, 2056314, 526314, 5026314, 2506314, 5206314, 265314, 2065314, 625314, 6025314, 2605314, 6205314, + 562314, 5062314, 652314, 6052314, 5602314, 6502314, 2560314, 5260314, 2650314, 6250314, 5620314, 6520314, + 356214, 3056214, 536214, 5036214, 3506214, 5306214, 365214, 3065214, 635214, 6035214, 3605214, 6305214, + 563214, 5063214, 653214, 6053214, 5603214, 6503214, 3560214, 5360214, 3650214, 6350214, 5630214, 6530214, + 2356014, 3256014, 2536014, 5236014, 3526014, 5326014, 2365014, 3265014, 2635014, 6235014, 3625014, 6325014, + 2563014, 5263014, 2653014, 6253014, 5623014, 6523014, 3562014, 5362014, 3652014, 6352014, 5632014, 6532014, + 1235604, 2135604, 1325604, 3125604, 2315604, 3215604, 1253604, 2153604, 1523604, 5123604, 2513604, 5213604, + 1352604, 3152604, 1532604, 5132604, 3512604, 5312604, 2351604, 3251604, 2531604, 5231604, 3521604, 5321604, + 1236504, 2136504, 1326504, 3126504, 2316504, 3216504, 1263504, 2163504, 1623504, 6123504, 2613504, 6213504, + 1362504, 3162504, 1632504, 6132504, 3612504, 6312504, 2361504, 3261504, 2631504, 6231504, 3621504, 6321504, + 1256304, 2156304, 1526304, 5126304, 2516304, 5216304, 1265304, 2165304, 1625304, 6125304, 2615304, 6215304, + 1562304, 5162304, 1652304, 6152304, 5612304, 6512304, 2561304, 5261304, 2651304, 6251304, 5621304, 6521304, + 1356204, 3156204, 1536204, 5136204, 3516204, 5316204, 1365204, 3165204, 1635204, 6135204, 3615204, 6315204, + 1563204, 5163204, 1653204, 6153204, 5613204, 6513204, 3561204, 5361204, 3651204, 6351204, 5631204, 6531204, + 2356104, 3256104, 2536104, 5236104, 3526104, 5326104, 2365104, 3265104, 2635104, 6235104, 3625104, 6325104, + 2563104, 5263104, 2653104, 6253104, 5623104, 6523104, 3562104, 5362104, 3652104, 6352104, 5632104, 6532104, + 124563, 1024563, 214563, 2014563, 1204563, 2104563, 142563, 1042563, 412563, 4012563, 1402563, 4102563, + 241563, 2041563, 421563, 4021563, 2401563, 4201563, 1240563, 2140563, 1420563, 4120563, 2410563, 4210563, + 125463, 1025463, 215463, 2015463, 1205463, 2105463, 152463, 1052463, 512463, 5012463, 1502463, 5102463, + 251463, 2051463, 521463, 5021463, 2501463, 5201463, 1250463, 2150463, 1520463, 5120463, 2510463, 5210463, + 145263, 1045263, 415263, 4015263, 1405263, 4105263, 154263, 1054263, 514263, 5014263, 1504263, 5104263, + 451263, 4051263, 541263, 5041263, 4501263, 5401263, 1450263, 4150263, 1540263, 5140263, 4510263, 5410263, + 245163, 2045163, 425163, 4025163, 2405163, 4205163, 254163, 2054163, 524163, 5024163, 2504163, 5204163, + 452163, 4052163, 542163, 5042163, 4502163, 5402163, 2450163, 4250163, 2540163, 5240163, 4520163, 5420163, + 1245063, 2145063, 1425063, 4125063, 2415063, 4215063, 1254063, 2154063, 1524063, 5124063, 2514063, 5214063, + 1452063, 4152063, 1542063, 5142063, 4512063, 5412063, 2451063, 4251063, 2541063, 5241063, 4521063, 5421063, + 124653, 1024653, 214653, 2014653, 1204653, 2104653, 142653, 1042653, 412653, 4012653, 1402653, 4102653, + 241653, 2041653, 421653, 4021653, 2401653, 4201653, 1240653, 2140653, 1420653, 4120653, 2410653, 4210653, + 126453, 1026453, 216453, 2016453, 1206453, 2106453, 162453, 1062453, 612453, 6012453, 1602453, 6102453, + 261453, 2061453, 621453, 6021453, 2601453, 6201453, 1260453, 2160453, 1620453, 6120453, 2610453, 6210453, + 146253, 1046253, 416253, 4016253, 1406253, 4106253, 164253, 1064253, 614253, 6014253, 1604253, 6104253, + 461253, 4061253, 641253, 6041253, 4601253, 6401253, 1460253, 4160253, 1640253, 6140253, 4610253, 6410253, + 246153, 2046153, 426153, 4026153, 2406153, 4206153, 264153, 2064153, 624153, 6024153, 2604153, 6204153, + 462153, 4062153, 642153, 6042153, 4602153, 6402153, 2460153, 4260153, 2640153, 6240153, 4620153, 6420153, + 1246053, 2146053, 1426053, 4126053, 2416053, 4216053, 1264053, 2164053, 1624053, 6124053, 2614053, 6214053, + 1462053, 4162053, 1642053, 6142053, 4612053, 6412053, 2461053, 4261053, 2641053, 6241053, 4621053, 6421053, + 125643, 1025643, 215643, 2015643, 1205643, 2105643, 152643, 1052643, 512643, 5012643, 1502643, 5102643, + 251643, 2051643, 521643, 5021643, 2501643, 5201643, 1250643, 2150643, 1520643, 5120643, 2510643, 5210643, + 126543, 1026543, 216543, 2016543, 1206543, 2106543, 162543, 1062543, 612543, 6012543, 1602543, 6102543, + 261543, 2061543, 621543, 6021543, 2601543, 6201543, 1260543, 2160543, 1620543, 6120543, 2610543, 6210543, + 156243, 1056243, 516243, 5016243, 1506243, 5106243, 165243, 1065243, 615243, 6015243, 1605243, 6105243, + 561243, 5061243, 651243, 6051243, 5601243, 6501243, 1560243, 5160243, 1650243, 6150243, 5610243, 6510243, + 256143, 2056143, 526143, 5026143, 2506143, 5206143, 265143, 2065143, 625143, 6025143, 2605143, 6205143, + 562143, 5062143, 652143, 6052143, 5602143, 6502143, 2560143, 5260143, 2650143, 6250143, 5620143, 6520143, + 1256043, 2156043, 1526043, 5126043, 2516043, 5216043, 1265043, 2165043, 1625043, 6125043, 2615043, 6215043, + 1562043, 5162043, 1652043, 6152043, 5612043, 6512043, 2561043, 5261043, 2651043, 6251043, 5621043, 6521043, + 145623, 1045623, 415623, 4015623, 1405623, 4105623, 154623, 1054623, 514623, 5014623, 1504623, 5104623, + 451623, 4051623, 541623, 5041623, 4501623, 5401623, 1450623, 4150623, 1540623, 5140623, 4510623, 5410623, + 146523, 1046523, 416523, 4016523, 1406523, 4106523, 164523, 1064523, 614523, 6014523, 1604523, 6104523, + 461523, 4061523, 641523, 6041523, 4601523, 6401523, 1460523, 4160523, 1640523, 6140523, 4610523, 6410523, + 156423, 1056423, 516423, 5016423, 1506423, 5106423, 165423, 1065423, 615423, 6015423, 1605423, 6105423, + 561423, 5061423, 651423, 6051423, 5601423, 6501423, 1560423, 5160423, 1650423, 6150423, 5610423, 6510423, + 456123, 4056123, 546123, 5046123, 4506123, 5406123, 465123, 4065123, 645123, 6045123, 4605123, 6405123, + 564123, 5064123, 654123, 6054123, 5604123, 6504123, 4560123, 5460123, 4650123, 6450123, 5640123, 6540123, + 1456023, 4156023, 1546023, 5146023, 4516023, 5416023, 1465023, 4165023, 1645023, 6145023, 4615023, 6415023, + 1564023, 5164023, 1654023, 6154023, 5614023, 6514023, 4561023, 5461023, 4651023, 6451023, 5641023, 6541023, + 245613, 2045613, 425613, 4025613, 2405613, 4205613, 254613, 2054613, 524613, 5024613, 2504613, 5204613, + 452613, 4052613, 542613, 5042613, 4502613, 5402613, 2450613, 4250613, 2540613, 5240613, 4520613, 5420613, + 246513, 2046513, 426513, 4026513, 2406513, 4206513, 264513, 2064513, 624513, 6024513, 2604513, 6204513, + 462513, 4062513, 642513, 6042513, 4602513, 6402513, 2460513, 4260513, 2640513, 6240513, 4620513, 6420513, + 256413, 2056413, 526413, 5026413, 2506413, 5206413, 265413, 2065413, 625413, 6025413, 2605413, 6205413, + 562413, 5062413, 652413, 6052413, 5602413, 6502413, 2560413, 5260413, 2650413, 6250413, 5620413, 6520413, + 456213, 4056213, 546213, 5046213, 4506213, 5406213, 465213, 4065213, 645213, 6045213, 4605213, 6405213, + 564213, 5064213, 654213, 6054213, 5604213, 6504213, 4560213, 5460213, 4650213, 6450213, 5640213, 6540213, + 2456013, 4256013, 2546013, 5246013, 4526013, 5426013, 2465013, 4265013, 2645013, 6245013, 4625013, 6425013, + 2564013, 5264013, 2654013, 6254013, 5624013, 6524013, 4562013, 5462013, 4652013, 6452013, 5642013, 6542013, + 1245603, 2145603, 1425603, 4125603, 2415603, 4215603, 1254603, 2154603, 1524603, 5124603, 2514603, 5214603, + 1452603, 4152603, 1542603, 5142603, 4512603, 5412603, 2451603, 4251603, 2541603, 5241603, 4521603, 5421603, + 1246503, 2146503, 1426503, 4126503, 2416503, 4216503, 1264503, 2164503, 1624503, 6124503, 2614503, 6214503, + 1462503, 4162503, 1642503, 6142503, 4612503, 6412503, 2461503, 4261503, 2641503, 6241503, 4621503, 6421503, + 1256403, 2156403, 1526403, 5126403, 2516403, 5216403, 1265403, 2165403, 1625403, 6125403, 2615403, 6215403, + 1562403, 5162403, 1652403, 6152403, 5612403, 6512403, 2561403, 5261403, 2651403, 6251403, 5621403, 6521403, + 1456203, 4156203, 1546203, 5146203, 4516203, 5416203, 1465203, 4165203, 1645203, 6145203, 4615203, 6415203, + 1564203, 5164203, 1654203, 6154203, 5614203, 6514203, 4561203, 5461203, 4651203, 6451203, 5641203, 6541203, + 2456103, 4256103, 2546103, 5246103, 4526103, 5426103, 2465103, 4265103, 2645103, 6245103, 4625103, 6425103, + 2564103, 5264103, 2654103, 6254103, 5624103, 6524103, 4562103, 5462103, 4652103, 6452103, 5642103, 6542103, + 134562, 1034562, 314562, 3014562, 1304562, 3104562, 143562, 1043562, 413562, 4013562, 1403562, 4103562, + 341562, 3041562, 431562, 4031562, 3401562, 4301562, 1340562, 3140562, 1430562, 4130562, 3410562, 4310562, + 135462, 1035462, 315462, 3015462, 1305462, 3105462, 153462, 1053462, 513462, 5013462, 1503462, 5103462, + 351462, 3051462, 531462, 5031462, 3501462, 5301462, 1350462, 3150462, 1530462, 5130462, 3510462, 5310462, + 145362, 1045362, 415362, 4015362, 1405362, 4105362, 154362, 1054362, 514362, 5014362, 1504362, 5104362, + 451362, 4051362, 541362, 5041362, 4501362, 5401362, 1450362, 4150362, 1540362, 5140362, 4510362, 5410362, + 345162, 3045162, 435162, 4035162, 3405162, 4305162, 354162, 3054162, 534162, 5034162, 3504162, 5304162, + 453162, 4053162, 543162, 5043162, 4503162, 5403162, 3450162, 4350162, 3540162, 5340162, 4530162, 5430162, + 1345062, 3145062, 1435062, 4135062, 3415062, 4315062, 1354062, 3154062, 1534062, 5134062, 3514062, 5314062, + 1453062, 4153062, 1543062, 5143062, 4513062, 5413062, 3451062, 4351062, 3541062, 5341062, 4531062, 5431062, + 134652, 1034652, 314652, 3014652, 1304652, 3104652, 143652, 1043652, 413652, 4013652, 1403652, 4103652, + 341652, 3041652, 431652, 4031652, 3401652, 4301652, 1340652, 3140652, 1430652, 4130652, 3410652, 4310652, + 136452, 1036452, 316452, 3016452, 1306452, 3106452, 163452, 1063452, 613452, 6013452, 1603452, 6103452, + 361452, 3061452, 631452, 6031452, 3601452, 6301452, 1360452, 3160452, 1630452, 6130452, 3610452, 6310452, + 146352, 1046352, 416352, 4016352, 1406352, 4106352, 164352, 1064352, 614352, 6014352, 1604352, 6104352, + 461352, 4061352, 641352, 6041352, 4601352, 6401352, 1460352, 4160352, 1640352, 6140352, 4610352, 6410352, + 346152, 3046152, 436152, 4036152, 3406152, 4306152, 364152, 3064152, 634152, 6034152, 3604152, 6304152, + 463152, 4063152, 643152, 6043152, 4603152, 6403152, 3460152, 4360152, 3640152, 6340152, 4630152, 6430152, + 1346052, 3146052, 1436052, 4136052, 3416052, 4316052, 1364052, 3164052, 1634052, 6134052, 3614052, 6314052, + 1463052, 4163052, 1643052, 6143052, 4613052, 6413052, 3461052, 4361052, 3641052, 6341052, 4631052, 6431052, + 135642, 1035642, 315642, 3015642, 1305642, 3105642, 153642, 1053642, 513642, 5013642, 1503642, 5103642, + 351642, 3051642, 531642, 5031642, 3501642, 5301642, 1350642, 3150642, 1530642, 5130642, 3510642, 5310642, + 136542, 1036542, 316542, 3016542, 1306542, 3106542, 163542, 1063542, 613542, 6013542, 1603542, 6103542, + 361542, 3061542, 631542, 6031542, 3601542, 6301542, 1360542, 3160542, 1630542, 6130542, 3610542, 6310542, + 156342, 1056342, 516342, 5016342, 1506342, 5106342, 165342, 1065342, 615342, 6015342, 1605342, 6105342, + 561342, 5061342, 651342, 6051342, 5601342, 6501342, 1560342, 5160342, 1650342, 6150342, 5610342, 6510342, + 356142, 3056142, 536142, 5036142, 3506142, 5306142, 365142, 3065142, 635142, 6035142, 3605142, 6305142, + 563142, 5063142, 653142, 6053142, 5603142, 6503142, 3560142, 5360142, 3650142, 6350142, 5630142, 6530142, + 1356042, 3156042, 1536042, 5136042, 3516042, 5316042, 1365042, 3165042, 1635042, 6135042, 3615042, 6315042, + 1563042, 5163042, 1653042, 6153042, 5613042, 6513042, 3561042, 5361042, 3651042, 6351042, 5631042, 6531042, + 145632, 1045632, 415632, 4015632, 1405632, 4105632, 154632, 1054632, 514632, 5014632, 1504632, 5104632, + 451632, 4051632, 541632, 5041632, 4501632, 5401632, 1450632, 4150632, 1540632, 5140632, 4510632, 5410632, + 146532, 1046532, 416532, 4016532, 1406532, 4106532, 164532, 1064532, 614532, 6014532, 1604532, 6104532, + 461532, 4061532, 641532, 6041532, 4601532, 6401532, 1460532, 4160532, 1640532, 6140532, 4610532, 6410532, + 156432, 1056432, 516432, 5016432, 1506432, 5106432, 165432, 1065432, 615432, 6015432, 1605432, 6105432, + 561432, 5061432, 651432, 6051432, 5601432, 6501432, 1560432, 5160432, 1650432, 6150432, 5610432, 6510432, + 456132, 4056132, 546132, 5046132, 4506132, 5406132, 465132, 4065132, 645132, 6045132, 4605132, 6405132, + 564132, 5064132, 654132, 6054132, 5604132, 6504132, 4560132, 5460132, 4650132, 6450132, 5640132, 6540132, + 1456032, 4156032, 1546032, 5146032, 4516032, 5416032, 1465032, 4165032, 1645032, 6145032, 4615032, 6415032, + 1564032, 5164032, 1654032, 6154032, 5614032, 6514032, 4561032, 5461032, 4651032, 6451032, 5641032, 6541032, + 345612, 3045612, 435612, 4035612, 3405612, 4305612, 354612, 3054612, 534612, 5034612, 3504612, 5304612, + 453612, 4053612, 543612, 5043612, 4503612, 5403612, 3450612, 4350612, 3540612, 5340612, 4530612, 5430612, + 346512, 3046512, 436512, 4036512, 3406512, 4306512, 364512, 3064512, 634512, 6034512, 3604512, 6304512, + 463512, 4063512, 643512, 6043512, 4603512, 6403512, 3460512, 4360512, 3640512, 6340512, 4630512, 6430512, + 356412, 3056412, 536412, 5036412, 3506412, 5306412, 365412, 3065412, 635412, 6035412, 3605412, 6305412, + 563412, 5063412, 653412, 6053412, 5603412, 6503412, 3560412, 5360412, 3650412, 6350412, 5630412, 6530412, + 456312, 4056312, 546312, 5046312, 4506312, 5406312, 465312, 4065312, 645312, 6045312, 4605312, 6405312, + 564312, 5064312, 654312, 6054312, 5604312, 6504312, 4560312, 5460312, 4650312, 6450312, 5640312, 6540312, + 3456012, 4356012, 3546012, 5346012, 4536012, 5436012, 3465012, 4365012, 3645012, 6345012, 4635012, 6435012, + 3564012, 5364012, 3654012, 6354012, 5634012, 6534012, 4563012, 5463012, 4653012, 6453012, 5643012, 6543012, + 1345602, 3145602, 1435602, 4135602, 3415602, 4315602, 1354602, 3154602, 1534602, 5134602, 3514602, 5314602, + 1453602, 4153602, 1543602, 5143602, 4513602, 5413602, 3451602, 4351602, 3541602, 5341602, 4531602, 5431602, + 1346502, 3146502, 1436502, 4136502, 3416502, 4316502, 1364502, 3164502, 1634502, 6134502, 3614502, 6314502, + 1463502, 4163502, 1643502, 6143502, 4613502, 6413502, 3461502, 4361502, 3641502, 6341502, 4631502, 6431502, + 1356402, 3156402, 1536402, 5136402, 3516402, 5316402, 1365402, 3165402, 1635402, 6135402, 3615402, 6315402, + 1563402, 5163402, 1653402, 6153402, 5613402, 6513402, 3561402, 5361402, 3651402, 6351402, 5631402, 6531402, + 1456302, 4156302, 1546302, 5146302, 4516302, 5416302, 1465302, 4165302, 1645302, 6145302, 4615302, 6415302, + 1564302, 5164302, 1654302, 6154302, 5614302, 6514302, 4561302, 5461302, 4651302, 6451302, 5641302, 6541302, + 3456102, 4356102, 3546102, 5346102, 4536102, 5436102, 3465102, 4365102, 3645102, 6345102, 4635102, 6435102, + 3564102, 5364102, 3654102, 6354102, 5634102, 6534102, 4563102, 5463102, 4653102, 6453102, 5643102, 6543102, + 234561, 2034561, 324561, 3024561, 2304561, 3204561, 243561, 2043561, 423561, 4023561, 2403561, 4203561, + 342561, 3042561, 432561, 4032561, 3402561, 4302561, 2340561, 3240561, 2430561, 4230561, 3420561, 4320561, + 235461, 2035461, 325461, 3025461, 2305461, 3205461, 253461, 2053461, 523461, 5023461, 2503461, 5203461, + 352461, 3052461, 532461, 5032461, 3502461, 5302461, 2350461, 3250461, 2530461, 5230461, 3520461, 5320461, + 245361, 2045361, 425361, 4025361, 2405361, 4205361, 254361, 2054361, 524361, 5024361, 2504361, 5204361, + 452361, 4052361, 542361, 5042361, 4502361, 5402361, 2450361, 4250361, 2540361, 5240361, 4520361, 5420361, + 345261, 3045261, 435261, 4035261, 3405261, 4305261, 354261, 3054261, 534261, 5034261, 3504261, 5304261, + 453261, 4053261, 543261, 5043261, 4503261, 5403261, 3450261, 4350261, 3540261, 5340261, 4530261, 5430261, + 2345061, 3245061, 2435061, 4235061, 3425061, 4325061, 2354061, 3254061, 2534061, 5234061, 3524061, 5324061, + 2453061, 4253061, 2543061, 5243061, 4523061, 5423061, 3452061, 4352061, 3542061, 5342061, 4532061, 5432061, + 234651, 2034651, 324651, 3024651, 2304651, 3204651, 243651, 2043651, 423651, 4023651, 2403651, 4203651, + 342651, 3042651, 432651, 4032651, 3402651, 4302651, 2340651, 3240651, 2430651, 4230651, 3420651, 4320651, + 236451, 2036451, 326451, 3026451, 2306451, 3206451, 263451, 2063451, 623451, 6023451, 2603451, 6203451, + 362451, 3062451, 632451, 6032451, 3602451, 6302451, 2360451, 3260451, 2630451, 6230451, 3620451, 6320451, + 246351, 2046351, 426351, 4026351, 2406351, 4206351, 264351, 2064351, 624351, 6024351, 2604351, 6204351, + 462351, 4062351, 642351, 6042351, 4602351, 6402351, 2460351, 4260351, 2640351, 6240351, 4620351, 6420351, + 346251, 3046251, 436251, 4036251, 3406251, 4306251, 364251, 3064251, 634251, 6034251, 3604251, 6304251, + 463251, 4063251, 643251, 6043251, 4603251, 6403251, 3460251, 4360251, 3640251, 6340251, 4630251, 6430251, + 2346051, 3246051, 2436051, 4236051, 3426051, 4326051, 2364051, 3264051, 2634051, 6234051, 3624051, 6324051, + 2463051, 4263051, 2643051, 6243051, 4623051, 6423051, 3462051, 4362051, 3642051, 6342051, 4632051, 6432051, + 235641, 2035641, 325641, 3025641, 2305641, 3205641, 253641, 2053641, 523641, 5023641, 2503641, 5203641, + 352641, 3052641, 532641, 5032641, 3502641, 5302641, 2350641, 3250641, 2530641, 5230641, 3520641, 5320641, + 236541, 2036541, 326541, 3026541, 2306541, 3206541, 263541, 2063541, 623541, 6023541, 2603541, 6203541, + 362541, 3062541, 632541, 6032541, 3602541, 6302541, 2360541, 3260541, 2630541, 6230541, 3620541, 6320541, + 256341, 2056341, 526341, 5026341, 2506341, 5206341, 265341, 2065341, 625341, 6025341, 2605341, 6205341, + 562341, 5062341, 652341, 6052341, 5602341, 6502341, 2560341, 5260341, 2650341, 6250341, 5620341, 6520341, + 356241, 3056241, 536241, 5036241, 3506241, 5306241, 365241, 3065241, 635241, 6035241, 3605241, 6305241, + 563241, 5063241, 653241, 6053241, 5603241, 6503241, 3560241, 5360241, 3650241, 6350241, 5630241, 6530241, + 2356041, 3256041, 2536041, 5236041, 3526041, 5326041, 2365041, 3265041, 2635041, 6235041, 3625041, 6325041, + 2563041, 5263041, 2653041, 6253041, 5623041, 6523041, 3562041, 5362041, 3652041, 6352041, 5632041, 6532041, + 245631, 2045631, 425631, 4025631, 2405631, 4205631, 254631, 2054631, 524631, 5024631, 2504631, 5204631, + 452631, 4052631, 542631, 5042631, 4502631, 5402631, 2450631, 4250631, 2540631, 5240631, 4520631, 5420631, + 246531, 2046531, 426531, 4026531, 2406531, 4206531, 264531, 2064531, 624531, 6024531, 2604531, 6204531, + 462531, 4062531, 642531, 6042531, 4602531, 6402531, 2460531, 4260531, 2640531, 6240531, 4620531, 6420531, + 256431, 2056431, 526431, 5026431, 2506431, 5206431, 265431, 2065431, 625431, 6025431, 2605431, 6205431, + 562431, 5062431, 652431, 6052431, 5602431, 6502431, 2560431, 5260431, 2650431, 6250431, 5620431, 6520431, + 456231, 4056231, 546231, 5046231, 4506231, 5406231, 465231, 4065231, 645231, 6045231, 4605231, 6405231, + 564231, 5064231, 654231, 6054231, 5604231, 6504231, 4560231, 5460231, 4650231, 6450231, 5640231, 6540231, + 2456031, 4256031, 2546031, 5246031, 4526031, 5426031, 2465031, 4265031, 2645031, 6245031, 4625031, 6425031, + 2564031, 5264031, 2654031, 6254031, 5624031, 6524031, 4562031, 5462031, 4652031, 6452031, 5642031, 6542031, + 345621, 3045621, 435621, 4035621, 3405621, 4305621, 354621, 3054621, 534621, 5034621, 3504621, 5304621, + 453621, 4053621, 543621, 5043621, 4503621, 5403621, 3450621, 4350621, 3540621, 5340621, 4530621, 5430621, + 346521, 3046521, 436521, 4036521, 3406521, 4306521, 364521, 3064521, 634521, 6034521, 3604521, 6304521, + 463521, 4063521, 643521, 6043521, 4603521, 6403521, 3460521, 4360521, 3640521, 6340521, 4630521, 6430521, + 356421, 3056421, 536421, 5036421, 3506421, 5306421, 365421, 3065421, 635421, 6035421, 3605421, 6305421, + 563421, 5063421, 653421, 6053421, 5603421, 6503421, 3560421, 5360421, 3650421, 6350421, 5630421, 6530421, + 456321, 4056321, 546321, 5046321, 4506321, 5406321, 465321, 4065321, 645321, 6045321, 4605321, 6405321, + 564321, 5064321, 654321, 6054321, 5604321, 6504321, 4560321, 5460321, 4650321, 6450321, 5640321, 6540321, + 3456021, 4356021, 3546021, 5346021, 4536021, 5436021, 3465021, 4365021, 3645021, 6345021, 4635021, 6435021, + 3564021, 5364021, 3654021, 6354021, 5634021, 6534021, 4563021, 5463021, 4653021, 6453021, 5643021, 6543021, + 2345601, 3245601, 2435601, 4235601, 3425601, 4325601, 2354601, 3254601, 2534601, 5234601, 3524601, 5324601, + 2453601, 4253601, 2543601, 5243601, 4523601, 5423601, 3452601, 4352601, 3542601, 5342601, 4532601, 5432601, + 2346501, 3246501, 2436501, 4236501, 3426501, 4326501, 2364501, 3264501, 2634501, 6234501, 3624501, 6324501, + 2463501, 4263501, 2643501, 6243501, 4623501, 6423501, 3462501, 4362501, 3642501, 6342501, 4632501, 6432501, + 2356401, 3256401, 2536401, 5236401, 3526401, 5326401, 2365401, 3265401, 2635401, 6235401, 3625401, 6325401, + 2563401, 5263401, 2653401, 6253401, 5623401, 6523401, 3562401, 5362401, 3652401, 6352401, 5632401, 6532401, + 2456301, 4256301, 2546301, 5246301, 4526301, 5426301, 2465301, 4265301, 2645301, 6245301, 4625301, 6425301, + 2564301, 5264301, 2654301, 6254301, 5624301, 6524301, 4562301, 5462301, 4652301, 6452301, 5642301, 6542301, + 3456201, 4356201, 3546201, 5346201, 4536201, 5436201, 3465201, 4365201, 3645201, 6345201, 4635201, 6435201, + 3564201, 5364201, 3654201, 6354201, 5634201, 6534201, 4563201, 5463201, 4653201, 6453201, 5643201, 6543201, + 1234560, 2134560, 1324560, 3124560, 2314560, 3214560, 1243560, 2143560, 1423560, 4123560, 2413560, 4213560, + 1342560, 3142560, 1432560, 4132560, 3412560, 4312560, 2341560, 3241560, 2431560, 4231560, 3421560, 4321560, + 1235460, 2135460, 1325460, 3125460, 2315460, 3215460, 1253460, 2153460, 1523460, 5123460, 2513460, 5213460, + 1352460, 3152460, 1532460, 5132460, 3512460, 5312460, 2351460, 3251460, 2531460, 5231460, 3521460, 5321460, + 1245360, 2145360, 1425360, 4125360, 2415360, 4215360, 1254360, 2154360, 1524360, 5124360, 2514360, 5214360, + 1452360, 4152360, 1542360, 5142360, 4512360, 5412360, 2451360, 4251360, 2541360, 5241360, 4521360, 5421360, + 1345260, 3145260, 1435260, 4135260, 3415260, 4315260, 1354260, 3154260, 1534260, 5134260, 3514260, 5314260, + 1453260, 4153260, 1543260, 5143260, 4513260, 5413260, 3451260, 4351260, 3541260, 5341260, 4531260, 5431260, + 2345160, 3245160, 2435160, 4235160, 3425160, 4325160, 2354160, 3254160, 2534160, 5234160, 3524160, 5324160, + 2453160, 4253160, 2543160, 5243160, 4523160, 5423160, 3452160, 4352160, 3542160, 5342160, 4532160, 5432160, + 1234650, 2134650, 1324650, 3124650, 2314650, 3214650, 1243650, 2143650, 1423650, 4123650, 2413650, 4213650, + 1342650, 3142650, 1432650, 4132650, 3412650, 4312650, 2341650, 3241650, 2431650, 4231650, 3421650, 4321650, + 1236450, 2136450, 1326450, 3126450, 2316450, 3216450, 1263450, 2163450, 1623450, 6123450, 2613450, 6213450, + 1362450, 3162450, 1632450, 6132450, 3612450, 6312450, 2361450, 3261450, 2631450, 6231450, 3621450, 6321450, + 1246350, 2146350, 1426350, 4126350, 2416350, 4216350, 1264350, 2164350, 1624350, 6124350, 2614350, 6214350, + 1462350, 4162350, 1642350, 6142350, 4612350, 6412350, 2461350, 4261350, 2641350, 6241350, 4621350, 6421350, + 1346250, 3146250, 1436250, 4136250, 3416250, 4316250, 1364250, 3164250, 1634250, 6134250, 3614250, 6314250, + 1463250, 4163250, 1643250, 6143250, 4613250, 6413250, 3461250, 4361250, 3641250, 6341250, 4631250, 6431250, + 2346150, 3246150, 2436150, 4236150, 3426150, 4326150, 2364150, 3264150, 2634150, 6234150, 3624150, 6324150, + 2463150, 4263150, 2643150, 6243150, 4623150, 6423150, 3462150, 4362150, 3642150, 6342150, 4632150, 6432150, + 1235640, 2135640, 1325640, 3125640, 2315640, 3215640, 1253640, 2153640, 1523640, 5123640, 2513640, 5213640, + 1352640, 3152640, 1532640, 5132640, 3512640, 5312640, 2351640, 3251640, 2531640, 5231640, 3521640, 5321640, + 1236540, 2136540, 1326540, 3126540, 2316540, 3216540, 1263540, 2163540, 1623540, 6123540, 2613540, 6213540, + 1362540, 3162540, 1632540, 6132540, 3612540, 6312540, 2361540, 3261540, 2631540, 6231540, 3621540, 6321540, + 1256340, 2156340, 1526340, 5126340, 2516340, 5216340, 1265340, 2165340, 1625340, 6125340, 2615340, 6215340, + 1562340, 5162340, 1652340, 6152340, 5612340, 6512340, 2561340, 5261340, 2651340, 6251340, 5621340, 6521340, + 1356240, 3156240, 1536240, 5136240, 3516240, 5316240, 1365240, 3165240, 1635240, 6135240, 3615240, 6315240, + 1563240, 5163240, 1653240, 6153240, 5613240, 6513240, 3561240, 5361240, 3651240, 6351240, 5631240, 6531240, + 2356140, 3256140, 2536140, 5236140, 3526140, 5326140, 2365140, 3265140, 2635140, 6235140, 3625140, 6325140, + 2563140, 5263140, 2653140, 6253140, 5623140, 6523140, 3562140, 5362140, 3652140, 6352140, 5632140, 6532140, + 1245630, 2145630, 1425630, 4125630, 2415630, 4215630, 1254630, 2154630, 1524630, 5124630, 2514630, 5214630, + 1452630, 4152630, 1542630, 5142630, 4512630, 5412630, 2451630, 4251630, 2541630, 5241630, 4521630, 5421630, + 1246530, 2146530, 1426530, 4126530, 2416530, 4216530, 1264530, 2164530, 1624530, 6124530, 2614530, 6214530, + 1462530, 4162530, 1642530, 6142530, 4612530, 6412530, 2461530, 4261530, 2641530, 6241530, 4621530, 6421530, + 1256430, 2156430, 1526430, 5126430, 2516430, 5216430, 1265430, 2165430, 1625430, 6125430, 2615430, 6215430, + 1562430, 5162430, 1652430, 6152430, 5612430, 6512430, 2561430, 5261430, 2651430, 6251430, 5621430, 6521430, + 1456230, 4156230, 1546230, 5146230, 4516230, 5416230, 1465230, 4165230, 1645230, 6145230, 4615230, 6415230, + 1564230, 5164230, 1654230, 6154230, 5614230, 6514230, 4561230, 5461230, 4651230, 6451230, 5641230, 6541230, + 2456130, 4256130, 2546130, 5246130, 4526130, 5426130, 2465130, 4265130, 2645130, 6245130, 4625130, 6425130, + 2564130, 5264130, 2654130, 6254130, 5624130, 6524130, 4562130, 5462130, 4652130, 6452130, 5642130, 6542130, + 1345620, 3145620, 1435620, 4135620, 3415620, 4315620, 1354620, 3154620, 1534620, 5134620, 3514620, 5314620, + 1453620, 4153620, 1543620, 5143620, 4513620, 5413620, 3451620, 4351620, 3541620, 5341620, 4531620, 5431620, + 1346520, 3146520, 1436520, 4136520, 3416520, 4316520, 1364520, 3164520, 1634520, 6134520, 3614520, 6314520, + 1463520, 4163520, 1643520, 6143520, 4613520, 6413520, 3461520, 4361520, 3641520, 6341520, 4631520, 6431520, + 1356420, 3156420, 1536420, 5136420, 3516420, 5316420, 1365420, 3165420, 1635420, 6135420, 3615420, 6315420, + 1563420, 5163420, 1653420, 6153420, 5613420, 6513420, 3561420, 5361420, 3651420, 6351420, 5631420, 6531420, + 1456320, 4156320, 1546320, 5146320, 4516320, 5416320, 1465320, 4165320, 1645320, 6145320, 4615320, 6415320, + 1564320, 5164320, 1654320, 6154320, 5614320, 6514320, 4561320, 5461320, 4651320, 6451320, 5641320, 6541320, + 3456120, 4356120, 3546120, 5346120, 4536120, 5436120, 3465120, 4365120, 3645120, 6345120, 4635120, 6435120, + 3564120, 5364120, 3654120, 6354120, 5634120, 6534120, 4563120, 5463120, 4653120, 6453120, 5643120, 6543120, + 2345610, 3245610, 2435610, 4235610, 3425610, 4325610, 2354610, 3254610, 2534610, 5234610, 3524610, 5324610, + 2453610, 4253610, 2543610, 5243610, 4523610, 5423610, 3452610, 4352610, 3542610, 5342610, 4532610, 5432610, + 2346510, 3246510, 2436510, 4236510, 3426510, 4326510, 2364510, 3264510, 2634510, 6234510, 3624510, 6324510, + 2463510, 4263510, 2643510, 6243510, 4623510, 6423510, 3462510, 4362510, 3642510, 6342510, 4632510, 6432510, + 2356410, 3256410, 2536410, 5236410, 3526410, 5326410, 2365410, 3265410, 2635410, 6235410, 3625410, 6325410, + 2563410, 5263410, 2653410, 6253410, 5623410, 6523410, 3562410, 5362410, 3652410, 6352410, 5632410, 6532410, + 2456310, 4256310, 2546310, 5246310, 4526310, 5426310, 2465310, 4265310, 2645310, 6245310, 4625310, 6425310, + 2564310, 5264310, 2654310, 6254310, 5624310, 6524310, 4562310, 5462310, 4652310, 6452310, 5642310, 6542310, + 3456210, 4356210, 3546210, 5346210, 4536210, 5436210, 3465210, 4365210, 3645210, 6345210, 4635210, 6435210, + 3564210, 5364210, 3654210, 6354210, 5634210, 6534210, 4563210, 5463210, 4653210, 6453210, 5643210, 6543210 + }; + std::map<uint64_t, int> expected; + for (std::size_t i = 0; i < 5040; i++) + expected[pre_expected[i]] = 0; // flags are 0, everything is symmetric here + + VERIFY(isDynGroup(group)); + VERIFY_IS_EQUAL(group.size(), 5040u); + VERIFY_IS_EQUAL(group.globalFlags(), 0); + group.apply<checkIdx, int>(identity7, 0, found, expected); + VERIFY_IS_EQUAL(found.size(), 5040u); + } +} + +static void test_tensor_epsilon() +{ + SGroup<AntiSymmetry<0,1>, AntiSymmetry<1,2>> sym; + Tensor<int, 3> epsilon(3,3,3); + + epsilon.setZero(); + sym(epsilon, 0, 1, 2) = 1; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) { + for (int k = 0; k < 3; k++) { + VERIFY_IS_EQUAL((epsilon(i,j,k)), (- (j - i) * (k - j) * (i - k) / 2) ); + } + } + } +} + +static void test_tensor_sym() +{ + SGroup<Symmetry<0,1>, Symmetry<2,3>> sym; + Tensor<int, 4> t(10,10,10,10); + + t.setZero(); + + for (int l = 0; l < 10; l++) { + for (int k = l; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = j; i < 10; i++) { + sym(t, i, j, k, l) = (i + j) * (k + l); + } + } + } + } + + for (int l = 0; l < 10; l++) { + for (int k = 0; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l))); + } + } + } + } + +} + +static void test_tensor_asym() +{ + SGroup<AntiSymmetry<0,1>, AntiSymmetry<2,3>> sym; + Tensor<int, 4> t(10,10,10,10); + + t.setZero(); + + for (int l = 0; l < 10; l++) { + for (int k = l + 1; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = j + 1; i < 10; i++) { + sym(t, i, j, k, l) = ((i * j) + (k * l)); + } + } + } + } + + for (int l = 0; l < 10; l++) { + for (int k = 0; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + if (i < j && k < l) + VERIFY_IS_EQUAL((t(i, j, k, l)), (((i * j) + (k * l)))); + else if (i > j && k > l) + VERIFY_IS_EQUAL((t(i, j, k, l)), (((i * j) + (k * l)))); + else if (i < j && k > l) + VERIFY_IS_EQUAL((t(i, j, k, l)), (- ((i * j) + (k * l)))); + else if (i > j && k < l) + VERIFY_IS_EQUAL((t(i, j, k, l)), (- ((i * j) + (k * l)))); + else + VERIFY_IS_EQUAL((t(i, j, k, l)), 0); + } + } + } + } +} + +static void test_tensor_dynsym() +{ + DynamicSGroup sym; + sym.addSymmetry(0,1); + sym.addSymmetry(2,3); + Tensor<int, 4> t(10,10,10,10); + + t.setZero(); + + for (int l = 0; l < 10; l++) { + for (int k = l; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = j; i < 10; i++) { + sym(t, i, j, k, l) = (i + j) * (k + l); + } + } + } + } + + for (int l = 0; l < 10; l++) { + for (int k = 0; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l))); + } + } + } + } +} + +static void test_tensor_randacc() +{ + SGroup<Symmetry<0,1>, Symmetry<2,3>> sym; + Tensor<int, 4> t(10,10,10,10); + + t.setZero(); + + // set elements 1 million times, that way we access the + // entire matrix + for (int n = 0; n < 1000000; n++) { + int i = rand() % 10; + int j = rand() % 10; + int k = rand() % 10; + int l = rand() % 10; + // only access those indices in a given order + if (i < j) + std::swap(i, j); + if (k < l) + std::swap(k, l); + sym(t, i, j, k, l) = (i + j) * (k + l); + } + + for (int l = 0; l < 10; l++) { + for (int k = 0; k < 10; k++) { + for (int j = 0; j < 10; j++) { + for (int i = 0; i < 10; i++) { + VERIFY_IS_EQUAL((t(i, j, k, l)), ((i + j) * (k + l))); + } + } + } + } +} + +void test_cxx11_tensor_symmetry() +{ + CALL_SUBTEST(test_symgroups_static()); + CALL_SUBTEST(test_symgroups_dynamic()); + CALL_SUBTEST(test_symgroups_selection()); + CALL_SUBTEST(test_tensor_epsilon()); + CALL_SUBTEST(test_tensor_sym()); + CALL_SUBTEST(test_tensor_asym()); + CALL_SUBTEST(test_tensor_dynsym()); + CALL_SUBTEST(test_tensor_randacc()); +} + +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; + */ diff --git a/eigen/unsupported/test/cxx11_tensor_thread_pool.cpp b/eigen/unsupported/test/cxx11_tensor_thread_pool.cpp new file mode 100644 index 0000000..2ef665f --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -0,0 +1,373 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_USE_THREADS + + +#include "main.h" +#include <iostream> +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + + +void test_multithread_elementwise() +{ + Tensor<float, 3> in1(2,3,7); + Tensor<float, 3> in2(2,3,7); + Tensor<float, 3> out(2,3,7); + + in1.setRandom(); + in2.setRandom(); + + Eigen::ThreadPool tp(internal::random<int>(3, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11)); + out.device(thread_pool_device) = in1 + in2 * 3.14f; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); + } + } + } +} + + +void test_multithread_compound_assignment() +{ + Tensor<float, 3> in1(2,3,7); + Tensor<float, 3> in2(2,3,7); + Tensor<float, 3> out(2,3,7); + + in1.setRandom(); + in2.setRandom(); + + Eigen::ThreadPool tp(internal::random<int>(3, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(3, 11)); + out.device(thread_pool_device) = in1; + out.device(thread_pool_device) += in2 * 3.14f; + + for (int i = 0; i < 2; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 7; ++k) { + VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k) * 3.14f); + } + } + } +} + +template<int DataLayout> +void test_multithread_contraction() +{ + Tensor<float, 4, DataLayout> t_left(30, 50, 37, 31); + Tensor<float, 5, DataLayout> t_right(37, 31, 70, 2, 10); + Tensor<float, 5, DataLayout> t_result(30, 50, 70, 2, 10); + + t_left.setRandom(); + t_right.setRandom(); + + // this contraction should be equivalent to a single matrix multiplication + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims({{DimPair(2, 0), DimPair(3, 1)}}); + + typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; + MapXf m_left(t_left.data(), 1500, 1147); + MapXf m_right(t_right.data(), 1147, 1400); + Matrix<float, Dynamic, Dynamic, DataLayout> m_result(1500, 1400); + + Eigen::ThreadPool tp(4); + Eigen::ThreadPoolDevice thread_pool_device(&tp, 4); + + // compute results by separate methods + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + m_result = m_left * m_right; + + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + VERIFY(&t_result.data()[i] != &m_result.data()[i]); + if (fabsf(t_result(i) - m_result(i)) < 1e-4f) { + continue; + } + if (Eigen::internal::isApprox(t_result(i), m_result(i), 1e-4f)) { + continue; + } + std::cout << "mismatch detected at index " << i << ": " << t_result(i) + << " vs " << m_result(i) << std::endl; + assert(false); + } +} + +template<int DataLayout> +void test_contraction_corner_cases() +{ + Tensor<float, 2, DataLayout> t_left(32, 500); + Tensor<float, 2, DataLayout> t_right(32, 28*28); + Tensor<float, 2, DataLayout> t_result(500, 28*28); + + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result = t_result.constant(NAN); + + // this contraction should be equivalent to a single matrix multiplication + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 1> dims{{DimPair(0, 0)}}; + + typedef Map<Matrix<float, Dynamic, Dynamic, DataLayout>> MapXf; + MapXf m_left(t_left.data(), 32, 500); + MapXf m_right(t_right.data(), 32, 28*28); + Matrix<float, Dynamic, Dynamic, DataLayout> m_result(500, 28*28); + + Eigen::ThreadPool tp(12); + Eigen::ThreadPoolDevice thread_pool_device(&tp, 12); + + // compute results by separate methods + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + m_result = m_left.transpose() * m_right; + + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!(numext::isnan)(t_result.data()[i])); + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { + std::cout << "mismatch detected at index " << i << " : " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 1); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_result.resize (1, 28*28); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 1); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!(numext::isnan)(t_result.data()[i])); + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 500); + t_right.resize(32, 4); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result.resize (500, 4); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 500); + new(&m_right) MapXf(t_right.data(), 32, 4); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!(numext::isnan)(t_result.data()[i])); + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } + + t_left.resize(32, 1); + t_right.resize(32, 4); + t_left = (t_left.constant(-0.5f) + t_left.random()) * 2.0f; + t_right = (t_right.constant(-0.6f) + t_right.random()) * 2.0f; + t_result.resize (1, 4); + t_result = t_result.constant(NAN); + t_result.device(thread_pool_device) = t_left.contract(t_right, dims); + new(&m_left) MapXf(t_left.data(), 32, 1); + new(&m_right) MapXf(t_right.data(), 32, 4); + m_result = m_left.transpose() * m_right; + for (ptrdiff_t i = 0; i < t_result.size(); i++) { + assert(!(numext::isnan)(t_result.data()[i])); + if (fabsf(t_result.data()[i] - m_result.data()[i]) >= 1e-4f) { + std::cout << "mismatch detected: " << t_result.data()[i] << " vs " << m_result.data()[i] << std::endl; + assert(false); + } + } +} + +template<int DataLayout> +void test_multithread_contraction_agrees_with_singlethread() { + int contract_size = internal::random<int>(1, 5000); + + Tensor<float, 3, DataLayout> left(internal::random<int>(1, 80), + contract_size, + internal::random<int>(1, 100)); + + Tensor<float, 4, DataLayout> right(internal::random<int>(1, 25), + internal::random<int>(1, 37), + contract_size, + internal::random<int>(1, 51)); + + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor<float, 1>::DimensionPair DimPair; + Eigen::array<DimPair, 1> dims({{DimPair(1, 2)}}); + + Eigen::ThreadPool tp(internal::random<int>(2, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); + + Tensor<float, 5, DataLayout> st_result; + st_result = left.contract(right, dims); + + Tensor<float, 5, DataLayout> tp_result(st_result.dimensions()); + tp_result.device(thread_pool_device) = left.contract(right, dims); + + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); + for (ptrdiff_t i = 0; i < st_result.size(); i++) { + // if both of the values are very small, then do nothing (because the test will fail + // due to numerical precision issues when values are small) + if (numext::abs(st_result.data()[i] - tp_result.data()[i]) >= 1e-4f) { + VERIFY_IS_APPROX(st_result.data()[i], tp_result.data()[i]); + } + } +} + + +template<int DataLayout> +void test_full_contraction() { + int contract_size1 = internal::random<int>(1, 500); + int contract_size2 = internal::random<int>(1, 500); + + Tensor<float, 2, DataLayout> left(contract_size1, + contract_size2); + Tensor<float, 2, DataLayout> right(contract_size1, + contract_size2); + left.setRandom(); + right.setRandom(); + + // add constants to shift values away from 0 for more precision + left += left.constant(1.5f); + right += right.constant(1.5f); + + typedef Tensor<float, 2>::DimensionPair DimPair; + Eigen::array<DimPair, 2> dims({{DimPair(0, 0), DimPair(1, 1)}}); + + Eigen::ThreadPool tp(internal::random<int>(2, 11)); + Eigen::ThreadPoolDevice thread_pool_device(&tp, internal::random<int>(2, 11)); + + Tensor<float, 0, DataLayout> st_result; + st_result = left.contract(right, dims); + + Tensor<float, 0, DataLayout> tp_result; + tp_result.device(thread_pool_device) = left.contract(right, dims); + + VERIFY(dimensions_match(st_result.dimensions(), tp_result.dimensions())); + // if both of the values are very small, then do nothing (because the test will fail + // due to numerical precision issues when values are small) + if (numext::abs(st_result() - tp_result()) >= 1e-4f) { + VERIFY_IS_APPROX(st_result(), tp_result()); + } +} + +template<int DataLayout> +void test_multithreaded_reductions() { + const int num_threads = internal::random<int>(3, 11); + ThreadPool thread_pool(num_threads); + Eigen::ThreadPoolDevice thread_pool_device(&thread_pool, num_threads); + + const int num_rows = internal::random<int>(13, 732); + const int num_cols = internal::random<int>(13, 732); + Tensor<float, 2, DataLayout> t1(num_rows, num_cols); + t1.setRandom(); + + Tensor<float, 0, DataLayout> full_redux; + full_redux = t1.sum(); + + Tensor<float, 0, DataLayout> full_redux_tp; + full_redux_tp.device(thread_pool_device) = t1.sum(); + + // Check that the single threaded and the multi threaded reductions return + // the same result. + VERIFY_IS_APPROX(full_redux(), full_redux_tp()); +} + + +void test_memcpy() { + + for (int i = 0; i < 5; ++i) { + const int num_threads = internal::random<int>(3, 11); + Eigen::ThreadPool tp(num_threads); + Eigen::ThreadPoolDevice thread_pool_device(&tp, num_threads); + + const int size = internal::random<int>(13, 7632); + Tensor<float, 1> t1(size); + t1.setRandom(); + std::vector<float> result(size); + thread_pool_device.memcpy(&result[0], t1.data(), size*sizeof(float)); + for (int j = 0; j < size; j++) { + VERIFY_IS_EQUAL(t1(j), result[j]); + } + } +} + + +void test_multithread_random() +{ + Eigen::ThreadPool tp(2); + Eigen::ThreadPoolDevice device(&tp, 2); + Tensor<float, 1> t(1 << 20); + t.device(device) = t.random<Eigen::internal::NormalRandomGenerator<float>>(); +} + +template<int DataLayout> +void test_multithread_shuffle() +{ + Tensor<float, 4, DataLayout> tensor(17,5,7,11); + tensor.setRandom(); + + const int num_threads = internal::random<int>(2, 11); + ThreadPool threads(num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads); + + Tensor<float, 4, DataLayout> shuffle(7,5,11,17); + array<ptrdiff_t, 4> shuffles = {{2,1,3,0}}; + shuffle.device(device) = tensor.shuffle(shuffles); + + for (int i = 0; i < 17; ++i) { + for (int j = 0; j < 5; ++j) { + for (int k = 0; k < 7; ++k) { + for (int l = 0; l < 11; ++l) { + VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,j,l,i)); + } + } + } + } +} + + +void test_cxx11_tensor_thread_pool() +{ + CALL_SUBTEST_1(test_multithread_elementwise()); + CALL_SUBTEST_1(test_multithread_compound_assignment()); + + CALL_SUBTEST_2(test_multithread_contraction<ColMajor>()); + CALL_SUBTEST_2(test_multithread_contraction<RowMajor>()); + + CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<ColMajor>()); + CALL_SUBTEST_3(test_multithread_contraction_agrees_with_singlethread<RowMajor>()); + + // Exercise various cases that have been problematic in the past. + CALL_SUBTEST_4(test_contraction_corner_cases<ColMajor>()); + CALL_SUBTEST_4(test_contraction_corner_cases<RowMajor>()); + + CALL_SUBTEST_4(test_full_contraction<ColMajor>()); + CALL_SUBTEST_4(test_full_contraction<RowMajor>()); + + CALL_SUBTEST_5(test_multithreaded_reductions<ColMajor>()); + CALL_SUBTEST_5(test_multithreaded_reductions<RowMajor>()); + + CALL_SUBTEST_6(test_memcpy()); + CALL_SUBTEST_6(test_multithread_random()); + CALL_SUBTEST_6(test_multithread_shuffle<ColMajor>()); + CALL_SUBTEST_6(test_multithread_shuffle<RowMajor>()); +} diff --git a/eigen/unsupported/test/cxx11_tensor_uint128.cpp b/eigen/unsupported/test/cxx11_tensor_uint128.cpp new file mode 100644 index 0000000..d2a1e86 --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_uint128.cpp @@ -0,0 +1,160 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <Eigen/CXX11/Tensor> + + +#if EIGEN_COMP_MSVC +#define EIGEN_NO_INT128 +#else +typedef __uint128_t uint128_t; +#endif + +// Only run the test on compilers that support 128bit integers natively +#ifndef EIGEN_NO_INT128 + +using Eigen::internal::TensorUInt128; +using Eigen::internal::static_val; + +void VERIFY_EQUAL(TensorUInt128<uint64_t, uint64_t> actual, uint128_t expected) { + bool matchl = actual.lower() == static_cast<uint64_t>(expected); + bool matchh = actual.upper() == static_cast<uint64_t>(expected >> 64); + if (!matchl || !matchh) { + const char* testname = g_test_stack.back().c_str(); + std::cerr << "Test " << testname << " failed in " << __FILE__ + << " (" << __LINE__ << ")" + << std::endl; + abort(); + } +} + + +void test_add() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i + j; + uint128_t expected = a + b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_sub() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i - j; + uint128_t expected = a - b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_mul() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i * j; + uint128_t expected = a * b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_div() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i1 = 0; i1 < 100; ++i1) { + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<uint64_t, uint64_t> i(i1, i2); + uint128_t a = (static_cast<uint128_t>(i1) << 64) + static_cast<uint128_t>(i2); + for (uint64_t j1 = 0; j1 < 100; ++j1) { + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<uint64_t, uint64_t> j(j1, j2); + uint128_t b = (static_cast<uint128_t>(j1) << 64) + static_cast<uint128_t>(j2); + TensorUInt128<uint64_t, uint64_t> actual = i / j; + uint128_t expected = a / b; + VERIFY_EQUAL(actual, expected); + } + } + } + } +} + +void test_misc1() { + uint64_t incr = internal::random<uint64_t>(1, 9999999999); + for (uint64_t i2 = 1; i2 < 100 * incr; i2 += incr) { + TensorUInt128<static_val<0>, uint64_t> i(0, i2); + uint128_t a = static_cast<uint128_t>(i2); + for (uint64_t j2 = 1; j2 < 100 * incr; j2 += incr) { + TensorUInt128<static_val<0>, uint64_t> j(0, j2); + uint128_t b = static_cast<uint128_t>(j2); + uint64_t actual = (i * j).upper(); + uint64_t expected = (a * b) >> 64; + VERIFY_IS_EQUAL(actual, expected); + } + } +} + +void test_misc2() { + int64_t incr = internal::random<int64_t>(1, 100); + for (int64_t log_div = 0; log_div < 63; ++log_div) { + for (int64_t divider = 1; divider <= 1000000 * incr; divider += incr) { + uint64_t expected = (static_cast<uint128_t>(1) << (64+log_div)) / static_cast<uint128_t>(divider) - (static_cast<uint128_t>(1) << 64) + 1; + uint64_t shift = 1ULL << log_div; + + TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1)); + uint64_t actual = static_cast<uint64_t>(result); + VERIFY_IS_EQUAL(actual, expected); + } + } +} +#endif + + +void test_cxx11_tensor_uint128() +{ +#ifdef EIGEN_NO_INT128 + // Skip the test on compilers that don't support 128bit integers natively + return; +#else + CALL_SUBTEST_1(test_add()); + CALL_SUBTEST_2(test_sub()); + CALL_SUBTEST_3(test_mul()); + CALL_SUBTEST_4(test_div()); + CALL_SUBTEST_5(test_misc1()); + CALL_SUBTEST_6(test_misc2()); +#endif +} diff --git a/eigen/unsupported/test/cxx11_tensor_volume_patch.cpp b/eigen/unsupported/test/cxx11_tensor_volume_patch.cpp new file mode 100644 index 0000000..ca6840f --- /dev/null +++ b/eigen/unsupported/test/cxx11_tensor_volume_patch.cpp @@ -0,0 +1,112 @@ +#include "main.h" + +#include <Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +static void test_single_voxel_patch() +{ + Tensor<float, 5> tensor(4,2,3,5,7); + tensor.setRandom(); + Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout(); + + Tensor<float, 6> single_voxel_patch; + single_voxel_patch = tensor.extract_volume_patches(1, 1, 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(0), 4); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(1), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(4), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch.dimension(5), 7); + + Tensor<float, 6, RowMajor> single_voxel_patch_row_major; + single_voxel_patch_row_major = tensor_row_major.extract_volume_patches(1, 1, 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1); + VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4); + + for (int i = 0; i < tensor.size(); ++i) { + VERIFY_IS_EQUAL(tensor.data()[i], single_voxel_patch.data()[i]); + VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]); + VERIFY_IS_EQUAL(tensor.data()[i], tensor_row_major.data()[i]); + } +} + + +static void test_entire_volume_patch() +{ + const int depth = 4; + const int patch_z = 2; + const int patch_y = 3; + const int patch_x = 5; + const int batch = 7; + + Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch); + tensor.setRandom(); + Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout(); + + Tensor<float, 6> entire_volume_patch; + entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(0), depth); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(1), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(2), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(3), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(4), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch.dimension(5), batch); + + Tensor<float, 6, RowMajor> entire_volume_patch_row_major; + entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z); + VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth); + + const int dz = patch_z - 1; + const int dy = patch_y - 1; + const int dx = patch_x - 1; + + const int forward_pad_z = dz - dz / 2; + const int forward_pad_y = dy - dy / 2; + const int forward_pad_x = dx - dx / 2; + + for (int pz = 0; pz < patch_z; pz++) { + for (int py = 0; py < patch_y; py++) { + for (int px = 0; px < patch_x; px++) { + const int patchId = pz + patch_z * (py + px * patch_y); + for (int z = 0; z < patch_z; z++) { + for (int y = 0; y < patch_y; y++) { + for (int x = 0; x < patch_x; x++) { + for (int b = 0; b < batch; b++) { + for (int d = 0; d < depth; d++) { + float expected = 0.0f; + float expected_row_major = 0.0f; + const int eff_z = z - forward_pad_z + pz; + const int eff_y = y - forward_pad_y + py; + const int eff_x = x - forward_pad_x + px; + if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 && + eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) { + expected = tensor(d, eff_z, eff_y, eff_x, b); + expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d); + } + VERIFY_IS_EQUAL(entire_volume_patch(d, z, y, x, patchId, b), expected); + VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major); + } + } + } + } + } + } + } + } +} + +void test_cxx11_tensor_volume_patch() +{ + CALL_SUBTEST(test_single_voxel_patch()); + CALL_SUBTEST(test_entire_volume_patch()); +} diff --git a/eigen/unsupported/test/forward_adolc.cpp b/eigen/unsupported/test/forward_adolc.cpp index d4baafe..866db8e 100644 --- a/eigen/unsupported/test/forward_adolc.cpp +++ b/eigen/unsupported/test/forward_adolc.cpp @@ -13,8 +13,6 @@ #define NUMBER_DIRECTIONS 16 #include <unsupported/Eigen/AdolcForward> -int adtl::ADOLC_numDir; - template<typename Vector> EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p) { @@ -123,7 +121,7 @@ template<typename Func> void adolc_forward_jacobian(const Func& f) void test_forward_adolc() { - adtl::ADOLC_numDir = NUMBER_DIRECTIONS; + adtl::setNumDir(NUMBER_DIRECTIONS); for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST(( adolc_forward_jacobian(TestFunc1<double,2,2>()) )); diff --git a/eigen/unsupported/test/jacobisvd.cpp b/eigen/unsupported/test/jacobisvd.cpp deleted file mode 100644 index b4e884e..0000000 --- a/eigen/unsupported/test/jacobisvd.cpp +++ /dev/null @@ -1,198 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> -// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#include "svd_common.h" - -template<typename MatrixType, int QRPreconditioner> -void jacobisvd_check_full(const MatrixType& m, const JacobiSVD<MatrixType, QRPreconditioner>& svd) -{ - svd_check_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner > >(m, svd); -} - -template<typename MatrixType, int QRPreconditioner> -void jacobisvd_compare_to_full(const MatrixType& m, - unsigned int computationOptions, - const JacobiSVD<MatrixType, QRPreconditioner>& referenceSvd) -{ - svd_compare_to_full<MatrixType, JacobiSVD<MatrixType, QRPreconditioner> >(m, computationOptions, referenceSvd); -} - - -template<typename MatrixType, int QRPreconditioner> -void jacobisvd_solve(const MatrixType& m, unsigned int computationOptions) -{ - svd_solve< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, computationOptions); -} - - - -template<typename MatrixType, int QRPreconditioner> -void jacobisvd_test_all_computation_options(const MatrixType& m) -{ - - if (QRPreconditioner == NoQRPreconditioner && m.rows() != m.cols()) - return; - - JacobiSVD< MatrixType, QRPreconditioner > fullSvd(m, ComputeFullU|ComputeFullV); - svd_test_computation_options_1< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd); - - if(QRPreconditioner == FullPivHouseholderQRPreconditioner) - return; - svd_test_computation_options_2< MatrixType, JacobiSVD< MatrixType, QRPreconditioner > >(m, fullSvd); - -} - -template<typename MatrixType> -void jacobisvd(const MatrixType& a = MatrixType(), bool pickrandom = true) -{ - MatrixType m = pickrandom ? MatrixType::Random(a.rows(), a.cols()) : a; - - jacobisvd_test_all_computation_options<MatrixType, FullPivHouseholderQRPreconditioner>(m); - jacobisvd_test_all_computation_options<MatrixType, ColPivHouseholderQRPreconditioner>(m); - jacobisvd_test_all_computation_options<MatrixType, HouseholderQRPreconditioner>(m); - jacobisvd_test_all_computation_options<MatrixType, NoQRPreconditioner>(m); -} - - -template<typename MatrixType> -void jacobisvd_verify_assert(const MatrixType& m) -{ - - svd_verify_assert<MatrixType, JacobiSVD< MatrixType > >(m); - - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - MatrixType a = MatrixType::Zero(rows, cols); - a.setZero(); - - if (ColsAtCompileTime == Dynamic) - { - JacobiSVD<MatrixType, FullPivHouseholderQRPreconditioner> svd_fullqr; - VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeFullU|ComputeThinV)) - VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeThinV)) - VERIFY_RAISES_ASSERT(svd_fullqr.compute(a, ComputeThinU|ComputeFullV)) - } -} - -template<typename MatrixType> -void jacobisvd_method() -{ - enum { Size = MatrixType::RowsAtCompileTime }; - typedef typename MatrixType::RealScalar RealScalar; - typedef Matrix<RealScalar, Size, 1> RealVecType; - MatrixType m = MatrixType::Identity(); - VERIFY_IS_APPROX(m.jacobiSvd().singularValues(), RealVecType::Ones()); - VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixU()); - VERIFY_RAISES_ASSERT(m.jacobiSvd().matrixV()); - VERIFY_IS_APPROX(m.jacobiSvd(ComputeFullU|ComputeFullV).solve(m), m); -} - - - -template<typename MatrixType> -void jacobisvd_inf_nan() -{ - svd_inf_nan<MatrixType, JacobiSVD< MatrixType > >(); -} - - -// Regression test for bug 286: JacobiSVD loops indefinitely with some -// matrices containing denormal numbers. -void jacobisvd_bug286() -{ -#if defined __INTEL_COMPILER -// shut up warning #239: floating point underflow -#pragma warning push -#pragma warning disable 239 -#endif - Matrix2d M; - M << -7.90884e-313, -4.94e-324, - 0, 5.60844e-313; -#if defined __INTEL_COMPILER -#pragma warning pop -#endif - JacobiSVD<Matrix2d> svd; - svd.compute(M); // just check we don't loop indefinitely -} - - -void jacobisvd_preallocate() -{ - svd_preallocate< JacobiSVD <MatrixXf> >(); -} - -void test_jacobisvd() -{ - CALL_SUBTEST_11(( jacobisvd<Matrix<double,Dynamic,Dynamic> > - (Matrix<double,Dynamic,Dynamic>(16, 6)) )); - - CALL_SUBTEST_3(( jacobisvd_verify_assert(Matrix3f()) )); - CALL_SUBTEST_4(( jacobisvd_verify_assert(Matrix4d()) )); - CALL_SUBTEST_7(( jacobisvd_verify_assert(MatrixXf(10,12)) )); - CALL_SUBTEST_8(( jacobisvd_verify_assert(MatrixXcd(7,5)) )); - - for(int i = 0; i < g_repeat; i++) { - Matrix2cd m; - m << 0, 1, - 0, 1; - CALL_SUBTEST_1(( jacobisvd(m, false) )); - m << 1, 0, - 1, 0; - CALL_SUBTEST_1(( jacobisvd(m, false) )); - - Matrix2d n; - n << 0, 0, - 0, 0; - CALL_SUBTEST_2(( jacobisvd(n, false) )); - n << 0, 0, - 0, 1; - CALL_SUBTEST_2(( jacobisvd(n, false) )); - - CALL_SUBTEST_3(( jacobisvd<Matrix3f>() )); - CALL_SUBTEST_4(( jacobisvd<Matrix4d>() )); - CALL_SUBTEST_5(( jacobisvd<Matrix<float,3,5> >() )); - CALL_SUBTEST_6(( jacobisvd<Matrix<double,Dynamic,2> >(Matrix<double,Dynamic,2>(10,2)) )); - - int r = internal::random<int>(1, 30), - c = internal::random<int>(1, 30); - CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(r,c)) )); - CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(r,c)) )); - (void) r; - (void) c; - - // Test on inf/nan matrix - CALL_SUBTEST_7( jacobisvd_inf_nan<MatrixXf>() ); - } - - CALL_SUBTEST_7(( jacobisvd<MatrixXf>(MatrixXf(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/2))) )); - CALL_SUBTEST_8(( jacobisvd<MatrixXcd>(MatrixXcd(internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3), internal::random<int>(EIGEN_TEST_MAX_SIZE/4, EIGEN_TEST_MAX_SIZE/3))) )); - - - // test matrixbase method - CALL_SUBTEST_1(( jacobisvd_method<Matrix2cd>() )); - CALL_SUBTEST_3(( jacobisvd_method<Matrix3f>() )); - - - // Test problem size constructors - CALL_SUBTEST_7( JacobiSVD<MatrixXf>(10,10) ); - - // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_9( jacobisvd_preallocate() ); - - // Regression check for bug 286 - CALL_SUBTEST_2( jacobisvd_bug286() ); -} diff --git a/eigen/unsupported/test/kronecker_product.cpp b/eigen/unsupported/test/kronecker_product.cpp index 8ddc6ec..e770049 100644 --- a/eigen/unsupported/test/kronecker_product.cpp +++ b/eigen/unsupported/test/kronecker_product.cpp @@ -9,12 +9,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 #include "sparse.h" #include <Eigen/SparseExtra> #include <Eigen/KroneckerProduct> - template<typename MatrixType> void check_dimension(const MatrixType& ab, const int rows, const int cols) { @@ -107,31 +107,34 @@ void test_kronecker_product() SparseMatrix<double,RowMajor> SM_row_a(SM_a), SM_row_b(SM_b); - // test kroneckerProduct(DM_block,DM,DM_fixedSize) + // test DM_fixedSize = kroneckerProduct(DM_block,DM) Matrix<double, 6, 6> DM_fix_ab = kroneckerProduct(DM_a.topLeftCorner<2,3>(),DM_b); CALL_SUBTEST(check_kronecker_product(DM_fix_ab)); + CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a.topLeftCorner<2,3>(),DM_b))); for(int i=0;i<DM_fix_ab.rows();++i) for(int j=0;j<DM_fix_ab.cols();++j) VERIFY_IS_APPROX(kroneckerProduct(DM_a,DM_b).coeff(i,j), DM_fix_ab(i,j)); - // test kroneckerProduct(DM,DM,DM_block) + // test DM_block = kroneckerProduct(DM,DM) MatrixXd DM_block_ab(10,15); DM_block_ab.block<6,6>(2,5) = kroneckerProduct(DM_a,DM_b); CALL_SUBTEST(check_kronecker_product(DM_block_ab.block<6,6>(2,5))); - // test kroneckerProduct(DM,DM,DM) + // test DM = kroneckerProduct(DM,DM) MatrixXd DM_ab = kroneckerProduct(DM_a,DM_b); CALL_SUBTEST(check_kronecker_product(DM_ab)); + CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a,DM_b))); - // test kroneckerProduct(SM,DM,SM) + // test SM = kroneckerProduct(SM,DM) SparseMatrix<double> SM_ab = kroneckerProduct(SM_a,DM_b); CALL_SUBTEST(check_kronecker_product(SM_ab)); SparseMatrix<double,RowMajor> SM_ab2 = kroneckerProduct(SM_a,DM_b); CALL_SUBTEST(check_kronecker_product(SM_ab2)); + CALL_SUBTEST(check_kronecker_product(kroneckerProduct(SM_a,DM_b))); - // test kroneckerProduct(DM,SM,SM) + // test SM = kroneckerProduct(DM,SM) SM_ab.setZero(); SM_ab.insert(0,0)=37.0; SM_ab = kroneckerProduct(DM_a,SM_b); @@ -140,8 +143,9 @@ void test_kronecker_product() SM_ab2.insert(0,0)=37.0; SM_ab2 = kroneckerProduct(DM_a,SM_b); CALL_SUBTEST(check_kronecker_product(SM_ab2)); + CALL_SUBTEST(check_kronecker_product(kroneckerProduct(DM_a,SM_b))); - // test kroneckerProduct(SM,SM,SM) + // test SM = kroneckerProduct(SM,SM) SM_ab.resize(2,33); SM_ab.insert(0,0)=37.0; SM_ab = kroneckerProduct(SM_a,SM_b); @@ -150,8 +154,9 @@ void test_kronecker_product() SM_ab2.insert(0,0)=37.0; SM_ab2 = kroneckerProduct(SM_a,SM_b); CALL_SUBTEST(check_kronecker_product(SM_ab2)); + CALL_SUBTEST(check_kronecker_product(kroneckerProduct(SM_a,SM_b))); - // test kroneckerProduct(SM,SM,SM) with sparse pattern + // test SM = kroneckerProduct(SM,SM) with sparse pattern SM_a.resize(4,5); SM_b.resize(3,2); SM_a.resizeNonZeros(0); @@ -169,7 +174,7 @@ void test_kronecker_product() SM_ab = kroneckerProduct(SM_a,SM_b); CALL_SUBTEST(check_sparse_kronecker_product(SM_ab)); - // test dimension of result of kroneckerProduct(DM,DM,DM) + // test dimension of result of DM = kroneckerProduct(DM,DM) MatrixXd DM_a2(2,1); MatrixXd DM_b2(5,4); MatrixXd DM_ab2 = kroneckerProduct(DM_a2,DM_b2); @@ -178,4 +183,70 @@ void test_kronecker_product() DM_b2.resize(4,8); DM_ab2 = kroneckerProduct(DM_a2,DM_b2); CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8)); + + for(int i = 0; i < g_repeat; i++) + { + double density = Eigen::internal::random<double>(0.01,0.5); + int ra = Eigen::internal::random<int>(1,50); + int ca = Eigen::internal::random<int>(1,50); + int rb = Eigen::internal::random<int>(1,50); + int cb = Eigen::internal::random<int>(1,50); + SparseMatrix<float,ColMajor> sA(ra,ca), sB(rb,cb), sC; + SparseMatrix<float,RowMajor> sC2; + MatrixXf dA(ra,ca), dB(rb,cb), dC; + initSparse(density, dA, sA); + initSparse(density, dB, sB); + + sC = kroneckerProduct(sA,sB); + dC = kroneckerProduct(dA,dB); + VERIFY_IS_APPROX(MatrixXf(sC),dC); + + sC = kroneckerProduct(sA.transpose(),sB); + dC = kroneckerProduct(dA.transpose(),dB); + VERIFY_IS_APPROX(MatrixXf(sC),dC); + + sC = kroneckerProduct(sA.transpose(),sB.transpose()); + dC = kroneckerProduct(dA.transpose(),dB.transpose()); + VERIFY_IS_APPROX(MatrixXf(sC),dC); + + sC = kroneckerProduct(sA,sB.transpose()); + dC = kroneckerProduct(dA,dB.transpose()); + VERIFY_IS_APPROX(MatrixXf(sC),dC); + + sC2 = kroneckerProduct(sA,sB); + dC = kroneckerProduct(dA,dB); + VERIFY_IS_APPROX(MatrixXf(sC2),dC); + + sC2 = kroneckerProduct(dA,sB); + dC = kroneckerProduct(dA,dB); + VERIFY_IS_APPROX(MatrixXf(sC2),dC); + + sC2 = kroneckerProduct(sA,dB); + dC = kroneckerProduct(dA,dB); + VERIFY_IS_APPROX(MatrixXf(sC2),dC); + + sC2 = kroneckerProduct(2*sA,sB); + dC = kroneckerProduct(2*dA,dB); + VERIFY_IS_APPROX(MatrixXf(sC2),dC); + } +} + +#endif + +#ifdef EIGEN_TEST_PART_2 + +// simply check that for a dense kronecker product, sparse module is not needed + +#include "main.h" +#include <Eigen/KroneckerProduct> + +void test_kronecker_product() +{ + MatrixXd a(2,2), b(3,3), c; + a.setRandom(); + b.setRandom(); + c = kroneckerProduct(a,b); + VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b); } + +#endif diff --git a/eigen/unsupported/test/levenberg_marquardt.cpp b/eigen/unsupported/test/levenberg_marquardt.cpp index 0446472..64f168c 100644 --- a/eigen/unsupported/test/levenberg_marquardt.cpp +++ b/eigen/unsupported/test/levenberg_marquardt.cpp @@ -9,6 +9,9 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// FIXME: These tests all check for hard-coded values. Ideally, parameters and start estimates should be randomized. + + #include <stdio.h> #include "main.h" @@ -20,6 +23,9 @@ using std::sqrt; +// tolerance for chekcing number of iterations +#define LM_EVAL_COUNT_TOL 4/3 + struct lmder_functor : DenseFunctor<double> { lmder_functor(void): DenseFunctor<double>(3,15) {} @@ -275,7 +281,7 @@ const double chwirut2_functor::m_y[54] = { 92.9000E0 ,57.1000E0 ,31.0500E0 ,11.5 void testNistChwirut2(void) { const int n=3; - int info; + LevenbergMarquardtSpace::Status info; VectorXd x(n); @@ -610,7 +616,7 @@ const double lanczos1_functor::y[24] = { 2.513400000000E+00 ,2.044333373291E+00 void testNistLanczos1(void) { const int n=6; - int info; + LevenbergMarquardtSpace::Status info; VectorXd x(n); @@ -624,11 +630,11 @@ void testNistLanczos1(void) info = lm.minimize(x); // check return value - VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall); VERIFY_IS_EQUAL(lm.nfev(), 79); VERIFY_IS_EQUAL(lm.njev(), 72); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.430899764097e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -645,11 +651,11 @@ void testNistLanczos1(void) info = lm.minimize(x); // check return value - VERIFY_IS_EQUAL(info, 2); + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall); VERIFY_IS_EQUAL(lm.nfev(), 9); VERIFY_IS_EQUAL(lm.njev(), 8); // check norm^2 -// VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.428595533845e-25); // should be 1.4307867721E-25, but nist results are on 128-bit floats + VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25); // check x VERIFY_IS_APPROX(x[0], 9.5100000027E-02); VERIFY_IS_APPROX(x[1], 1.0000000001E+00); @@ -696,7 +702,7 @@ const double rat42_functor::y[9] = { 8.930E0 ,10.800E0 ,18.590E0 ,22.330E0 ,39.3 void testNistRat42(void) { const int n=3; - int info; + LevenbergMarquardtSpace::Status info; VectorXd x(n); @@ -710,7 +716,7 @@ void testNistRat42(void) info = lm.minimize(x); // check return value - VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); VERIFY_IS_EQUAL(lm.nfev(), 10); VERIFY_IS_EQUAL(lm.njev(), 8); // check norm^2 @@ -728,7 +734,7 @@ void testNistRat42(void) info = lm.minimize(x); // check return value - VERIFY_IS_EQUAL(info, 1); + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); VERIFY_IS_EQUAL(lm.nfev(), 6); VERIFY_IS_EQUAL(lm.njev(), 5); // check norm^2 @@ -774,7 +780,7 @@ const double MGH10_functor::y[16] = { 3.478000E+04, 2.861000E+04, 2.365000E+04, void testNistMGH10(void) { const int n=3; - int info; + LevenbergMarquardtSpace::Status info; VectorXd x(n); @@ -786,17 +792,26 @@ void testNistMGH10(void) MGH10_functor functor; LevenbergMarquardt<MGH10_functor> lm(functor); info = lm.minimize(x); + ++g_test_level; + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + --g_test_level; + // was: VERIFY_IS_EQUAL(info, 1); - // check return value - VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev(), 284 ); - VERIFY_IS_EQUAL(lm.njev(), 249 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); // check x VERIFY_IS_APPROX(x[0], 5.6096364710E-03); VERIFY_IS_APPROX(x[1], 6.1813463463E+03); VERIFY_IS_APPROX(x[2], 3.4522363462E+02); + + // check return value + + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev(), 284 ); + VERIFY_IS_EQUAL(lm.njev(), 249 ); + --g_test_level; + VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL); /* * Second try @@ -804,17 +819,25 @@ void testNistMGH10(void) x<< 0.02, 4000., 250.; // do the computation info = lm.minimize(x); + ++g_test_level; + VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall); + // was: VERIFY_IS_EQUAL(info, 1); + --g_test_level; - // check return value - VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev(), 126); - VERIFY_IS_EQUAL(lm.njev(), 116); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01); // check x VERIFY_IS_APPROX(x[0], 5.6096364710E-03); VERIFY_IS_APPROX(x[1], 6.1813463463E+03); VERIFY_IS_APPROX(x[2], 3.4522363462E+02); + + // check return value + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev(), 126); + VERIFY_IS_EQUAL(lm.njev(), 116); + --g_test_level; + VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL); } @@ -866,15 +889,16 @@ void testNistBoxBOD(void) lm.setFactor(10); info = lm.minimize(x); - // check return value - VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev(), 31); - VERIFY_IS_EQUAL(lm.njev(), 25); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03); // check x VERIFY_IS_APPROX(x[0], 2.1380940889E+02); VERIFY_IS_APPROX(x[1], 5.4723748542E-01); + + // check return value + VERIFY_IS_EQUAL(info, 1); + VERIFY(lm.nfev() < 31); // 31 + VERIFY(lm.njev() < 25); // 25 /* * Second try @@ -888,8 +912,12 @@ void testNistBoxBOD(void) // check return value VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev(), 15 ); - VERIFY_IS_EQUAL(lm.njev(), 14 ); + ++g_test_level; + VERIFY_IS_EQUAL(lm.nfev(), 16 ); + VERIFY_IS_EQUAL(lm.njev(), 15 ); + --g_test_level; + VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL); + VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03); // check x @@ -948,10 +976,6 @@ void testNistMGH17(void) lm.setMaxfev(1000); info = lm.minimize(x); - // check return value -// VERIFY_IS_EQUAL(info, 2); //FIXME Use (lm.info() == Success) -// VERIFY_IS_EQUAL(lm.nfev(), 602 ); - VERIFY_IS_EQUAL(lm.njev(), 545 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05); // check x @@ -960,6 +984,11 @@ void testNistMGH17(void) VERIFY_IS_APPROX(x[2], -1.4646871366E+00); VERIFY_IS_APPROX(x[3], 1.2867534640E-02); VERIFY_IS_APPROX(x[4], 2.2122699662E-02); + + // check return value +// VERIFY_IS_EQUAL(info, 2); //FIXME Use (lm.info() == Success) + VERIFY(lm.nfev() < 700 ); // 602 + VERIFY(lm.njev() < 600 ); // 545 /* * Second try @@ -1035,10 +1064,6 @@ void testNistMGH09(void) lm.setMaxfev(1000); info = lm.minimize(x); - // check return value - VERIFY_IS_EQUAL(info, 1); - VERIFY_IS_EQUAL(lm.nfev(), 490 ); - VERIFY_IS_EQUAL(lm.njev(), 376 ); // check norm^2 VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04); // check x @@ -1046,6 +1071,10 @@ void testNistMGH09(void) VERIFY_IS_APPROX(x[1], 0.19126423573); // should be 1.9128232873E-01 VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01 VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01 + // check return value + VERIFY_IS_EQUAL(info, 1); + VERIFY(lm.nfev() < 510 ); // 490 + VERIFY(lm.njev() < 400 ); // 376 /* * Second try diff --git a/eigen/unsupported/test/matrix_function.cpp b/eigen/unsupported/test/matrix_function.cpp index 3c76cfb..7c9b68a 100644 --- a/eigen/unsupported/test/matrix_function.cpp +++ b/eigen/unsupported/test/matrix_function.cpp @@ -102,7 +102,7 @@ void testMatrixExponential(const MatrixType& A) typedef typename NumTraits<Scalar>::Real RealScalar; typedef std::complex<RealScalar> ComplexScalar; - VERIFY_IS_APPROX(A.exp(), A.matrixFunction(StdStemFunctions<ComplexScalar>::exp)); + VERIFY_IS_APPROX(A.exp(), A.matrixFunction(internal::stem_function_exp<ComplexScalar>)); } template<typename MatrixType> @@ -113,8 +113,8 @@ void testMatrixLogarithm(const MatrixType& A) MatrixType scaledA; RealScalar maxImagPartOfSpectrum = A.eigenvalues().imag().cwiseAbs().maxCoeff(); - if (maxImagPartOfSpectrum >= 0.9 * M_PI) - scaledA = A * 0.9 * M_PI / maxImagPartOfSpectrum; + if (maxImagPartOfSpectrum >= RealScalar(0.9L * EIGEN_PI)) + scaledA = A * RealScalar(0.9L * EIGEN_PI) / maxImagPartOfSpectrum; else scaledA = A; diff --git a/eigen/unsupported/test/matrix_functions.h b/eigen/unsupported/test/matrix_functions.h index 5817cae..4e26364 100644 --- a/eigen/unsupported/test/matrix_functions.h +++ b/eigen/unsupported/test/matrix_functions.h @@ -10,27 +10,47 @@ #include "main.h" #include <unsupported/Eigen/MatrixFunctions> +// For complex matrices, any matrix is fine. +template<typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex> +struct processTriangularMatrix +{ + static void run(MatrixType&, MatrixType&, const MatrixType&) + { } +}; + +// For real matrices, make sure none of the eigenvalues are negative. +template<typename MatrixType> +struct processTriangularMatrix<MatrixType,0> +{ + static void run(MatrixType& m, MatrixType& T, const MatrixType& U) + { + const Index size = m.cols(); + + for (Index i=0; i < size; ++i) { + if (i == size - 1 || T.coeff(i+1,i) == 0) + T.coeffRef(i,i) = std::abs(T.coeff(i,i)); + else + ++i; + } + m = U * T * U.transpose(); + } +}; + template <typename MatrixType, int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex> struct generateTestMatrix; -// for real matrices, make sure none of the eigenvalues are negative template <typename MatrixType> struct generateTestMatrix<MatrixType,0> { static void run(MatrixType& result, typename MatrixType::Index size) { - MatrixType mat = MatrixType::Random(size, size); - EigenSolver<MatrixType> es(mat); - typename EigenSolver<MatrixType>::EigenvalueType eivals = es.eigenvalues(); - for (typename MatrixType::Index i = 0; i < size; ++i) { - if (eivals(i).imag() == 0 && eivals(i).real() < 0) - eivals(i) = -eivals(i); - } - result = (es.eigenvectors() * eivals.asDiagonal() * es.eigenvectors().inverse()).real(); + result = MatrixType::Random(size, size); + RealSchur<MatrixType> schur(result); + MatrixType T = schur.matrixT(); + processTriangularMatrix<MatrixType>::run(result, T, schur.matrixU()); } }; -// for complex matrices, any matrix is fine template <typename MatrixType> struct generateTestMatrix<MatrixType,1> { @@ -41,7 +61,7 @@ struct generateTestMatrix<MatrixType,1> }; template <typename Derived, typename OtherDerived> -double relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B) +typename Derived::RealScalar relerr(const MatrixBase<Derived>& A, const MatrixBase<OtherDerived>& B) { return std::sqrt((A - B).cwiseAbs2().sum() / (std::min)(A.cwiseAbs2().sum(), B.cwiseAbs2().sum())); } diff --git a/eigen/unsupported/test/matrix_power.cpp b/eigen/unsupported/test/matrix_power.cpp index b9d513b..7ccfacf 100644 --- a/eigen/unsupported/test/matrix_power.cpp +++ b/eigen/unsupported/test/matrix_power.cpp @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2012 Chen-Pang He <jdh8@ms63.hinet.net> +// Copyright (C) 2012, 2013 Chen-Pang He <jdh8@ms63.hinet.net> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -9,35 +9,8 @@ #include "matrix_functions.h" -template <typename MatrixType, int IsComplex = NumTraits<typename MatrixType::Scalar>::IsComplex> -struct generateTriangularMatrix; - -// for real matrices, make sure none of the eigenvalues are negative -template <typename MatrixType> -struct generateTriangularMatrix<MatrixType,0> -{ - static void run(MatrixType& result, typename MatrixType::Index size) - { - result.resize(size, size); - result.template triangularView<Upper>() = MatrixType::Random(size, size); - for (typename MatrixType::Index i = 0; i < size; ++i) - result.coeffRef(i,i) = std::abs(result.coeff(i,i)); - } -}; - -// for complex matrices, any matrix is fine -template <typename MatrixType> -struct generateTriangularMatrix<MatrixType,1> -{ - static void run(MatrixType& result, typename MatrixType::Index size) - { - result.resize(size, size); - result.template triangularView<Upper>() = MatrixType::Random(size, size); - } -}; - template<typename T> -void test2dRotation(double tol) +void test2dRotation(const T& tol) { Matrix<T,2,2> A, B, C; T angle, c, s; @@ -46,19 +19,19 @@ void test2dRotation(double tol) MatrixPower<Matrix<T,2,2> > Apow(A); for (int i=0; i<=20; ++i) { - angle = pow(10, (i-10) / 5.); + angle = std::pow(T(10), (i-10) / T(5.)); c = std::cos(angle); s = std::sin(angle); B << c, s, -s, c; - C = Apow(std::ldexp(angle,1) / M_PI); + C = Apow(std::ldexp(angle,1) / T(EIGEN_PI)); std::cout << "test2dRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n'; - VERIFY(C.isApprox(B, static_cast<T>(tol))); + VERIFY(C.isApprox(B, tol)); } } template<typename T> -void test2dHyperbolicRotation(double tol) +void test2dHyperbolicRotation(const T& tol) { Matrix<std::complex<T>,2,2> A, B, C; T angle, ch = std::cosh((T)1); @@ -75,12 +48,26 @@ void test2dHyperbolicRotation(double tol) C = Apow(angle); std::cout << "test2dHyperbolicRotation: i = " << i << " error powerm = " << relerr(C,B) << '\n'; - VERIFY(C.isApprox(B, static_cast<T>(tol))); + VERIFY(C.isApprox(B, tol)); + } +} + +template<typename T> +void test3dRotation(const T& tol) +{ + Matrix<T,3,1> v; + T angle; + + for (int i=0; i<=20; ++i) { + v = Matrix<T,3,1>::Random(); + v.normalize(); + angle = std::pow(T(10), (i-10) / T(5.)); + VERIFY(AngleAxis<T>(angle, v).matrix().isApprox(AngleAxis<T>(1,v).matrix().pow(angle), tol)); } } template<typename MatrixType> -void testExponentLaws(const MatrixType& m, double tol) +void testGeneral(const MatrixType& m, const typename MatrixType::RealScalar& tol) { typedef typename MatrixType::RealScalar RealScalar; MatrixType m1, m2, m3, m4, m5; @@ -97,37 +84,121 @@ void testExponentLaws(const MatrixType& m, double tol) m4 = mpow(x+y); m5.noalias() = m2 * m3; - VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol))); + VERIFY(m4.isApprox(m5, tol)); m4 = mpow(x*y); m5 = m2.pow(y); - VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol))); + VERIFY(m4.isApprox(m5, tol)); m4 = (std::abs(x) * m1).pow(y); m5 = std::pow(std::abs(x), y) * m3; - VERIFY(m4.isApprox(m5, static_cast<RealScalar>(tol))); + VERIFY(m4.isApprox(m5, tol)); + } +} + +template<typename MatrixType> +void testSingular(const MatrixType& m_const, const typename MatrixType::RealScalar& tol) +{ + // we need to pass by reference in order to prevent errors with + // MSVC for aligned data types ... + MatrixType& m = const_cast<MatrixType&>(m_const); + + const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex; + typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType; + typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur; + MatrixType T; + + for (int i=0; i < g_repeat; ++i) { + m.setRandom(); + m.col(0).fill(0); + + schur.compute(m); + T = schur.matrixT(); + const MatrixType& U = schur.matrixU(); + processTriangularMatrix<MatrixType>::run(m, T, U); + MatrixPower<MatrixType> mpow(m); + + T = T.sqrt(); + VERIFY(mpow(0.5L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + + T = T.sqrt(); + VERIFY(mpow(0.25L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + + T = T.sqrt(); + VERIFY(mpow(0.125L).isApprox(U * (TriangularType(T) * U.adjoint()), tol)); + } +} + +template<typename MatrixType> +void testLogThenExp(const MatrixType& m_const, const typename MatrixType::RealScalar& tol) +{ + // we need to pass by reference in order to prevent errors with + // MSVC for aligned data types ... + MatrixType& m = const_cast<MatrixType&>(m_const); + + typedef typename MatrixType::Scalar Scalar; + Scalar x; + + for (int i=0; i < g_repeat; ++i) { + generateTestMatrix<MatrixType>::run(m, m.rows()); + x = internal::random<Scalar>(); + VERIFY(m.pow(x).isApprox((x * m.log()).exp(), tol)); } } typedef Matrix<double,3,3,RowMajor> Matrix3dRowMajor; +typedef Matrix<long double,3,3> Matrix3e; typedef Matrix<long double,Dynamic,Dynamic> MatrixXe; void test_matrix_power() { CALL_SUBTEST_2(test2dRotation<double>(1e-13)); CALL_SUBTEST_1(test2dRotation<float>(2e-5)); // was 1e-5, relaxed for clang 2.8 / linux / x86-64 - CALL_SUBTEST_9(test2dRotation<long double>(1e-13)); + CALL_SUBTEST_9(test2dRotation<long double>(1e-13L)); CALL_SUBTEST_2(test2dHyperbolicRotation<double>(1e-14)); CALL_SUBTEST_1(test2dHyperbolicRotation<float>(1e-5)); - CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14)); - - CALL_SUBTEST_2(testExponentLaws(Matrix2d(), 1e-13)); - CALL_SUBTEST_7(testExponentLaws(Matrix3dRowMajor(), 1e-13)); - CALL_SUBTEST_3(testExponentLaws(Matrix4cd(), 1e-13)); - CALL_SUBTEST_4(testExponentLaws(MatrixXd(8,8), 2e-12)); - CALL_SUBTEST_1(testExponentLaws(Matrix2f(), 1e-4)); - CALL_SUBTEST_5(testExponentLaws(Matrix3cf(), 1e-4)); - CALL_SUBTEST_8(testExponentLaws(Matrix4f(), 1e-4)); - CALL_SUBTEST_6(testExponentLaws(MatrixXf(2,2), 1e-3)); // see bug 614 - CALL_SUBTEST_9(testExponentLaws(MatrixXe(7,7), 1e-13)); + CALL_SUBTEST_9(test2dHyperbolicRotation<long double>(1e-14L)); + + CALL_SUBTEST_10(test3dRotation<double>(1e-13)); + CALL_SUBTEST_11(test3dRotation<float>(1e-5)); + CALL_SUBTEST_12(test3dRotation<long double>(1e-13L)); + + CALL_SUBTEST_2(testGeneral(Matrix2d(), 1e-13)); + CALL_SUBTEST_7(testGeneral(Matrix3dRowMajor(), 1e-13)); + CALL_SUBTEST_3(testGeneral(Matrix4cd(), 1e-13)); + CALL_SUBTEST_4(testGeneral(MatrixXd(8,8), 2e-12)); + CALL_SUBTEST_1(testGeneral(Matrix2f(), 1e-4)); + CALL_SUBTEST_5(testGeneral(Matrix3cf(), 1e-4)); + CALL_SUBTEST_8(testGeneral(Matrix4f(), 1e-4)); + CALL_SUBTEST_6(testGeneral(MatrixXf(2,2), 1e-3)); // see bug 614 + CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13L)); + CALL_SUBTEST_10(testGeneral(Matrix3d(), 1e-13)); + CALL_SUBTEST_11(testGeneral(Matrix3f(), 1e-4)); + CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13L)); + + CALL_SUBTEST_2(testSingular(Matrix2d(), 1e-13)); + CALL_SUBTEST_7(testSingular(Matrix3dRowMajor(), 1e-13)); + CALL_SUBTEST_3(testSingular(Matrix4cd(), 1e-13)); + CALL_SUBTEST_4(testSingular(MatrixXd(8,8), 2e-12)); + CALL_SUBTEST_1(testSingular(Matrix2f(), 1e-4)); + CALL_SUBTEST_5(testSingular(Matrix3cf(), 1e-4)); + CALL_SUBTEST_8(testSingular(Matrix4f(), 1e-4)); + CALL_SUBTEST_6(testSingular(MatrixXf(2,2), 1e-3)); + CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13L)); + CALL_SUBTEST_10(testSingular(Matrix3d(), 1e-13)); + CALL_SUBTEST_11(testSingular(Matrix3f(), 1e-4)); + CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13L)); + + CALL_SUBTEST_2(testLogThenExp(Matrix2d(), 1e-13)); + CALL_SUBTEST_7(testLogThenExp(Matrix3dRowMajor(), 1e-13)); + CALL_SUBTEST_3(testLogThenExp(Matrix4cd(), 1e-13)); + CALL_SUBTEST_4(testLogThenExp(MatrixXd(8,8), 2e-12)); + CALL_SUBTEST_1(testLogThenExp(Matrix2f(), 1e-4)); + CALL_SUBTEST_5(testLogThenExp(Matrix3cf(), 1e-4)); + CALL_SUBTEST_8(testLogThenExp(Matrix4f(), 1e-4)); + CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2), 1e-3)); + CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13L)); + CALL_SUBTEST_10(testLogThenExp(Matrix3d(), 1e-13)); + CALL_SUBTEST_11(testLogThenExp(Matrix3f(), 1e-4)); + CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13L)); } diff --git a/eigen/unsupported/test/minres.cpp b/eigen/unsupported/test/minres.cpp index 509ebe0..8b300b7 100644 --- a/eigen/unsupported/test/minres.cpp +++ b/eigen/unsupported/test/minres.cpp @@ -1,8 +1,8 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr> // Copyright (C) 2012 Giacomo Po <gpo@ucla.edu> +// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -14,21 +14,14 @@ template<typename T> void test_minres_T() { - MINRES<SparseMatrix<T>, Lower|Upper, DiagonalPreconditioner<T> > minres_colmajor_diag; + // Identity preconditioner MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_lower_I; MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner > minres_colmajor_upper_I; -// MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> > minres_colmajor_ilut; - //minres<SparseMatrix<T>, SSORPreconditioner<T> > minres_colmajor_ssor; - - -// CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag) ); - // CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut) ); - //CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor) ); // Diagonal preconditioner MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag; MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag; - MINRES<SparseMatrix<T>, Upper|Lower, DiagonalPreconditioner<T> > minres_colmajor_uplo_diag; + MINRES<SparseMatrix<T>, Lower|Upper, DiagonalPreconditioner<T> > minres_colmajor_uplo_diag; // call tests for SPD matrix CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) ); @@ -36,14 +29,16 @@ template<typename T> void test_minres_T() CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag) ); CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag) ); -// CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_uplo_diag) ); + CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_uplo_diag) ); // TO DO: symmetric semi-definite matrix // TO DO: symmetric indefinite matrix + } void test_minres() { CALL_SUBTEST_1(test_minres_T<double>()); -// CALL_SUBTEST_2(test_minres_T<std::complex<double> >()); +// CALL_SUBTEST_2(test_minres_T<std::compex<double> >()); + } diff --git a/eigen/unsupported/test/mpreal/mpreal.h b/eigen/unsupported/test/mpreal/mpreal.h index f83e52d..8404f1f 100644 --- a/eigen/unsupported/test/mpreal/mpreal.h +++ b/eigen/unsupported/test/mpreal/mpreal.h @@ -1,3074 +1,3104 @@ -/* - MPFR C++: Multi-precision floating point number class for C++. - Based on MPFR library: http://mpfr.org - - Project homepage: http://www.holoborodko.com/pavel/mpfr - Contact e-mail: pavel@holoborodko.com - - Copyright (c) 2008-2014 Pavel Holoborodko - - Contributors: - Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman, - Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen, - Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng, - Alexei Zubanov, Jauhien Piatlicki, Victor Berger, John Westwood, - Petr Aleksandrov, Orion Poplawski, Charles Karney. - - Licensing: - (A) MPFR C++ is under GNU General Public License ("GPL"). - - (B) Non-free licenses may also be purchased from the author, for users who - do not want their programs protected by the GPL. - - The non-free licenses are for users that wish to use MPFR C++ in - their products but are unwilling to release their software - under the GPL (which would require them to release source code - and allow free redistribution). - - Such users can purchase an unlimited-use license from the author. - Contact us for more details. - - GNU General Public License ("GPL") copyright permissions statement: - ************************************************************************** - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. -*/ - -#ifndef __MPREAL_H__ -#define __MPREAL_H__ - -#include <string> -#include <iostream> -#include <sstream> -#include <stdexcept> -#include <cfloat> -#include <cmath> -#include <cstring> -#include <limits> - -// Options -// FIXME HAVE_INT64_SUPPORT leads to clashes with long int and int64_t on some systems. -//#define MPREAL_HAVE_INT64_SUPPORT // Enable int64_t support if possible. Available only for MSVC 2010 & GCC. -#define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC. -#define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits<mpfr::mpreal> specialization. - // Meaning that "digits", "round_style" and similar members are defined as functions, not constants. - // See std::numeric_limits<mpfr::mpreal> at the end of the file for more information. - -// Library version -#define MPREAL_VERSION_MAJOR 3 -#define MPREAL_VERSION_MINOR 5 -#define MPREAL_VERSION_PATCHLEVEL 9 -#define MPREAL_VERSION_STRING "3.5.9" - -// Detect compiler using signatures from http://predef.sourceforge.net/ -#if defined(__GNUC__) && defined(__INTEL_COMPILER) - #define IsInf(x) isinf(x) // Intel ICC compiler on Linux - -#elif defined(_MSC_VER) // Microsoft Visual C++ - #define IsInf(x) (!_finite(x)) - -#else - #define IsInf(x) std::isinf(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance -#endif - -// A Clang feature extension to determine compiler features. -#ifndef __has_feature - #define __has_feature(x) 0 -#endif - -// Detect support for r-value references (move semantic). Borrowed from Eigen. -#if (__has_feature(cxx_rvalue_references) || \ - defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ - (defined(_MSC_VER) && _MSC_VER >= 1600)) - - #define MPREAL_HAVE_MOVE_SUPPORT - - // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization - #define mpfr_is_initialized(x) (0 != (x)->_mpfr_d) - #define mpfr_set_uninitialized(x) ((x)->_mpfr_d = 0 ) -#endif - -// Detect support for explicit converters. -#if (__has_feature(cxx_explicit_conversions) || \ - defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \ - (defined(_MSC_VER) && _MSC_VER >= 1800)) - - #define MPREAL_HAVE_EXPLICIT_CONVERTERS -#endif - -// Detect available 64-bit capabilities -#if defined(MPREAL_HAVE_INT64_SUPPORT) - - #define MPFR_USE_INTMAX_T // Should be defined before mpfr.h - - #if defined(_MSC_VER) // MSVC + Windows - #if (_MSC_VER >= 1600) - #include <stdint.h> // <stdint.h> is available only in msvc2010! - - #else // MPFR relies on intmax_t which is available only in msvc2010 - #undef MPREAL_HAVE_INT64_SUPPORT // Besides, MPFR & MPIR have to be compiled with msvc2010 - #undef MPFR_USE_INTMAX_T // Since we cannot detect this, disable x64 by default - // Someone should change this manually if needed. - #endif - - #elif defined (__GNUC__) && defined(__linux__) - #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) || defined (__PPC64__) - #undef MPREAL_HAVE_INT64_SUPPORT // Remove all shaman dances for x64 builds since - #undef MPFR_USE_INTMAX_T // GCC already supports x64 as of "long int" is 64-bit integer, nothing left to do - #else - #include <stdint.h> // use int64_t, uint64_t otherwise - #endif - - #else - #include <stdint.h> // rely on int64_t, uint64_t in all other cases, Mac OSX, etc. - #endif - -#endif - -#if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG) - #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString(); - #define MPREAL_MSVC_DEBUGVIEW_DATA std::string DebugView; -#else - #define MPREAL_MSVC_DEBUGVIEW_CODE - #define MPREAL_MSVC_DEBUGVIEW_DATA -#endif - -#include <mpfr.h> - -#if (MPFR_VERSION < MPFR_VERSION_NUM(3,0,0)) - #include <cstdlib> // Needed for random() -#endif - -// Less important options -#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal - // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits - // = -1 disables overflow checks (default) -#if defined(__GNUC__) - #define MPREAL_PERMISSIVE_EXPR __extension__ -#else - #define MPREAL_PERMISSIVE_EXPR -#endif - -namespace mpfr { - -class mpreal { -private: - mpfr_t mp; - -public: - - // Get default rounding mode & precision - inline static mp_rnd_t get_default_rnd() { return (mp_rnd_t)(mpfr_get_default_rounding_mode()); } - inline static mp_prec_t get_default_prec() { return mpfr_get_default_prec(); } - - // Constructors && type conversions - mpreal(); - mpreal(const mpreal& u); - mpreal(const mpf_t u); - mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - - // Construct mpreal from mpfr_t structure. - // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers. - mpreal(const mpfr_t u, bool shared = false); - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal(const uint64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const int64_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd()); -#endif - - mpreal(const char* s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); - mpreal(const std::string& s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd()); - - ~mpreal(); - -#ifdef MPREAL_HAVE_MOVE_SUPPORT - mpreal& operator=(mpreal&& v); - mpreal(mpreal&& u); -#endif - - // Operations - // = - // +, -, *, /, ++, --, <<, >> - // *=, +=, -=, /=, - // <, >, ==, <=, >= - - // = - mpreal& operator=(const mpreal& v); - mpreal& operator=(const mpf_t v); - mpreal& operator=(const mpz_t v); - mpreal& operator=(const mpq_t v); - mpreal& operator=(const long double v); - mpreal& operator=(const double v); - mpreal& operator=(const unsigned long int v); - mpreal& operator=(const unsigned int v); - mpreal& operator=(const long int v); - mpreal& operator=(const int v); - mpreal& operator=(const char* s); - mpreal& operator=(const std::string& s); - - // + - mpreal& operator+=(const mpreal& v); - mpreal& operator+=(const mpf_t v); - mpreal& operator+=(const mpz_t v); - mpreal& operator+=(const mpq_t v); - mpreal& operator+=(const long double u); - mpreal& operator+=(const double u); - mpreal& operator+=(const unsigned long int u); - mpreal& operator+=(const unsigned int u); - mpreal& operator+=(const long int u); - mpreal& operator+=(const int u); - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - mpreal& operator+=(const int64_t u); - mpreal& operator+=(const uint64_t u); - mpreal& operator-=(const int64_t u); - mpreal& operator-=(const uint64_t u); - mpreal& operator*=(const int64_t u); - mpreal& operator*=(const uint64_t u); - mpreal& operator/=(const int64_t u); - mpreal& operator/=(const uint64_t u); -#endif - - const mpreal operator+() const; - mpreal& operator++ (); - const mpreal operator++ (int); - - // - - mpreal& operator-=(const mpreal& v); - mpreal& operator-=(const mpz_t v); - mpreal& operator-=(const mpq_t v); - mpreal& operator-=(const long double u); - mpreal& operator-=(const double u); - mpreal& operator-=(const unsigned long int u); - mpreal& operator-=(const unsigned int u); - mpreal& operator-=(const long int u); - mpreal& operator-=(const int u); - const mpreal operator-() const; - friend const mpreal operator-(const unsigned long int b, const mpreal& a); - friend const mpreal operator-(const unsigned int b, const mpreal& a); - friend const mpreal operator-(const long int b, const mpreal& a); - friend const mpreal operator-(const int b, const mpreal& a); - friend const mpreal operator-(const double b, const mpreal& a); - mpreal& operator-- (); - const mpreal operator-- (int); - - // * - mpreal& operator*=(const mpreal& v); - mpreal& operator*=(const mpz_t v); - mpreal& operator*=(const mpq_t v); - mpreal& operator*=(const long double v); - mpreal& operator*=(const double v); - mpreal& operator*=(const unsigned long int v); - mpreal& operator*=(const unsigned int v); - mpreal& operator*=(const long int v); - mpreal& operator*=(const int v); - - // / - mpreal& operator/=(const mpreal& v); - mpreal& operator/=(const mpz_t v); - mpreal& operator/=(const mpq_t v); - mpreal& operator/=(const long double v); - mpreal& operator/=(const double v); - mpreal& operator/=(const unsigned long int v); - mpreal& operator/=(const unsigned int v); - mpreal& operator/=(const long int v); - mpreal& operator/=(const int v); - friend const mpreal operator/(const unsigned long int b, const mpreal& a); - friend const mpreal operator/(const unsigned int b, const mpreal& a); - friend const mpreal operator/(const long int b, const mpreal& a); - friend const mpreal operator/(const int b, const mpreal& a); - friend const mpreal operator/(const double b, const mpreal& a); - - //<<= Fast Multiplication by 2^u - mpreal& operator<<=(const unsigned long int u); - mpreal& operator<<=(const unsigned int u); - mpreal& operator<<=(const long int u); - mpreal& operator<<=(const int u); - - //>>= Fast Division by 2^u - mpreal& operator>>=(const unsigned long int u); - mpreal& operator>>=(const unsigned int u); - mpreal& operator>>=(const long int u); - mpreal& operator>>=(const int u); - - // Boolean Operators - friend bool operator > (const mpreal& a, const mpreal& b); - friend bool operator >= (const mpreal& a, const mpreal& b); - friend bool operator < (const mpreal& a, const mpreal& b); - friend bool operator <= (const mpreal& a, const mpreal& b); - friend bool operator == (const mpreal& a, const mpreal& b); - friend bool operator != (const mpreal& a, const mpreal& b); - - // Optimized specializations for boolean operators - friend bool operator == (const mpreal& a, const unsigned long int b); - friend bool operator == (const mpreal& a, const unsigned int b); - friend bool operator == (const mpreal& a, const long int b); - friend bool operator == (const mpreal& a, const int b); - friend bool operator == (const mpreal& a, const long double b); - friend bool operator == (const mpreal& a, const double b); - - // Type Conversion operators - bool toBool (mp_rnd_t mode = GMP_RNDZ) const; - long toLong (mp_rnd_t mode = GMP_RNDZ) const; - unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const; - float toFloat (mp_rnd_t mode = GMP_RNDN) const; - double toDouble (mp_rnd_t mode = GMP_RNDN) const; - long double toLDouble (mp_rnd_t mode = GMP_RNDN) const; - -#if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator bool () const { return toBool(); } - explicit operator int () const { return toLong(); } - explicit operator long () const { return toLong(); } - explicit operator long long () const { return toLong(); } - explicit operator unsigned () const { return toULong(); } - explicit operator unsigned long () const { return toULong(); } - explicit operator unsigned long long () const { return toULong(); } - explicit operator float () const { return toFloat(); } - explicit operator double () const { return toDouble(); } - explicit operator long double () const { return toLDouble(); } -#endif - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - int64_t toInt64 (mp_rnd_t mode = GMP_RNDZ) const; - uint64_t toUInt64 (mp_rnd_t mode = GMP_RNDZ) const; - - #if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS) - explicit operator int64_t () const { return toInt64(); } - explicit operator uint64_t () const { return toUInt64(); } - #endif -#endif - - // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions - ::mpfr_ptr mpfr_ptr(); - ::mpfr_srcptr mpfr_ptr() const; - ::mpfr_srcptr mpfr_srcptr() const; - - // Convert mpreal to string with n significant digits in base b - // n = -1 -> convert with the maximum available digits - std::string toString(int n = -1, int b = 10, mp_rnd_t mode = mpreal::get_default_rnd()) const; - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - std::string toString(const std::string& format) const; -#endif - - std::ostream& output(std::ostream& os) const; - - // Math Functions - friend const mpreal sqr (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sqrt(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sqrt(const unsigned long int v, mp_rnd_t rnd_mode); - friend const mpreal cbrt(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal root(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode); - friend const mpreal pow (const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode); - friend const mpreal pow (const mpreal& a, const mpz_t b, mp_rnd_t rnd_mode); - friend const mpreal pow (const mpreal& a, const unsigned long int b, mp_rnd_t rnd_mode); - friend const mpreal pow (const mpreal& a, const long int b, mp_rnd_t rnd_mode); - friend const mpreal pow (const unsigned long int a, const mpreal& b, mp_rnd_t rnd_mode); - friend const mpreal pow (const unsigned long int a, const unsigned long int b, mp_rnd_t rnd_mode); - friend const mpreal fabs(const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal abs(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode); - friend inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode); - friend inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode); - friend inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode); - friend inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode); - friend int cmpabs(const mpreal& a,const mpreal& b); - - friend const mpreal log (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal log2 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal exp2 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal log1p(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal expm1(const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal cos(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sin(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal tan(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sec(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal csc(const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal cot(const mpreal& v, mp_rnd_t rnd_mode); - friend int sin_cos(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal acos (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal asin (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal atan (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode); - friend const mpreal acot (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal asec (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal acsc (const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal cosh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sinh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal tanh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal sech (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal csch (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal coth (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal acosh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal asinh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal atanh (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal acoth (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal asech (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal acsch (const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - - friend const mpreal fac_ui (unsigned long int v, mp_prec_t prec, mp_rnd_t rnd_mode); - friend const mpreal eint (const mpreal& v, mp_rnd_t rnd_mode); - - friend const mpreal gamma (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal lngamma (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal lgamma (const mpreal& v, int *signp, mp_rnd_t rnd_mode); - friend const mpreal zeta (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal erf (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal erfc (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); - friend const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode); - friend const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode); - friend const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode); - friend int sgn(const mpreal& v); // returns -1 or +1 - -// MPFR 2.4.0 Specifics -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - friend int sinh_cosh (mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal li2 (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal fmod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - friend const mpreal rec_sqrt (const mpreal& v, mp_rnd_t rnd_mode); - - // MATLAB's semantic equivalents - friend const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Remainder after division - friend const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Modulus after division -#endif - -// MPFR 3.0.0 Specifics -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) - friend const mpreal digamma (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal ai (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear - friend const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear - friend const mpreal grandom (unsigned int seed); -#endif - - // Uniformly distributed random number generation in [0,1] using - // Mersenne-Twister algorithm by default. - // Use parameter to setup seed, e.g.: random((unsigned)time(NULL)) - // Check urandom() for more precise control. - friend const mpreal random(unsigned int seed); - - // Exponent and mantissa manipulation - friend const mpreal frexp(const mpreal& v, mp_exp_t* exp); - friend const mpreal ldexp(const mpreal& v, mp_exp_t exp); - - // Splits mpreal value into fractional and integer parts. - // Returns fractional part and stores integer part in n. - friend const mpreal modf(const mpreal& v, mpreal& n); - - // Constants - // don't forget to call mpfr_free_cache() for every thread where you are using const-functions - friend const mpreal const_log2 (mp_prec_t prec, mp_rnd_t rnd_mode); - friend const mpreal const_pi (mp_prec_t prec, mp_rnd_t rnd_mode); - friend const mpreal const_euler (mp_prec_t prec, mp_rnd_t rnd_mode); - friend const mpreal const_catalan (mp_prec_t prec, mp_rnd_t rnd_mode); - - // returns +inf iff sign>=0 otherwise -inf - friend const mpreal const_infinity(int sign, mp_prec_t prec); - - // Output/ Input - friend std::ostream& operator<<(std::ostream& os, const mpreal& v); - friend std::istream& operator>>(std::istream& is, mpreal& v); - - // Integer Related Functions - friend const mpreal rint (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal ceil (const mpreal& v); - friend const mpreal floor(const mpreal& v); - friend const mpreal round(const mpreal& v); - friend const mpreal trunc(const mpreal& v); - friend const mpreal rint_ceil (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal rint_floor (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal rint_round (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal rint_trunc (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal frac (const mpreal& v, mp_rnd_t rnd_mode); - friend const mpreal remainder ( const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - friend const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - - // Miscellaneous Functions - friend const mpreal nexttoward (const mpreal& x, const mpreal& y); - friend const mpreal nextabove (const mpreal& x); - friend const mpreal nextbelow (const mpreal& x); - - // use gmp_randinit_default() to init state, gmp_randclear() to clear - friend const mpreal urandomb (gmp_randstate_t& state); - -// MPFR < 2.4.2 Specifics -#if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) - friend const mpreal random2 (mp_size_t size, mp_exp_t exp); -#endif - - // Instance Checkers - friend bool isnan (const mpreal& v); - friend bool isinf (const mpreal& v); - friend bool isfinite (const mpreal& v); - - friend bool isnum (const mpreal& v); - friend bool iszero (const mpreal& v); - friend bool isint (const mpreal& v); - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) - friend bool isregular(const mpreal& v); -#endif - - // Set/Get instance properties - inline mp_prec_t get_prec() const; - inline void set_prec(mp_prec_t prec, mp_rnd_t rnd_mode = get_default_rnd()); // Change precision with rounding mode - - // Aliases for get_prec(), set_prec() - needed for compatibility with std::complex<mpreal> interface - inline mpreal& setPrecision(int Precision, mp_rnd_t RoundingMode = get_default_rnd()); - inline int getPrecision() const; - - // Set mpreal to +/- inf, NaN, +/-0 - mpreal& setInf (int Sign = +1); - mpreal& setNan (); - mpreal& setZero (int Sign = +1); - mpreal& setSign (int Sign, mp_rnd_t RoundingMode = get_default_rnd()); - - //Exponent - mp_exp_t get_exp(); - int set_exp(mp_exp_t e); - int check_range (int t, mp_rnd_t rnd_mode = get_default_rnd()); - int subnormalize (int t,mp_rnd_t rnd_mode = get_default_rnd()); - - // Inexact conversion from float - inline bool fits_in_bits(double x, int n); - - // Set/Get global properties - static void set_default_prec(mp_prec_t prec); - static void set_default_rnd(mp_rnd_t rnd_mode); - - static mp_exp_t get_emin (void); - static mp_exp_t get_emax (void); - static mp_exp_t get_emin_min (void); - static mp_exp_t get_emin_max (void); - static mp_exp_t get_emax_min (void); - static mp_exp_t get_emax_max (void); - static int set_emin (mp_exp_t exp); - static int set_emax (mp_exp_t exp); - - // Efficient swapping of two mpreal values - needed for std algorithms - friend void swap(mpreal& x, mpreal& y); - - friend const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - friend const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); - -private: - // Human friendly Debug Preview in Visual Studio. - // Put one of these lines: - // - // mpfr::mpreal=<DebugView> ; Show value only - // mpfr::mpreal=<DebugView>, <mp[0]._mpfr_prec,u>bits ; Show value & precision - // - // at the beginning of - // [Visual Studio Installation Folder]\Common7\Packages\Debugger\autoexp.dat - MPREAL_MSVC_DEBUGVIEW_DATA - - // "Smart" resources deallocation. Checks if instance initialized before deletion. - void clear(::mpfr_ptr); -}; - -////////////////////////////////////////////////////////////////////////// -// Exceptions -class conversion_overflow : public std::exception { -public: - std::string why() { return "inexact conversion from floating point"; } -}; - -////////////////////////////////////////////////////////////////////////// -// Constructors & converters -// Default constructor: creates mp number and initializes it to 0. -inline mpreal::mpreal() -{ - mpfr_init2 (mpfr_ptr(), mpreal::get_default_prec()); - mpfr_set_ui(mpfr_ptr(), 0, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const mpreal& u) -{ - mpfr_init2(mpfr_ptr(),mpfr_get_prec(u.mpfr_srcptr())); - mpfr_set (mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -#ifdef MPREAL_HAVE_MOVE_SUPPORT -inline mpreal::mpreal(mpreal&& other) -{ - mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pinter to actual data - mpfr_swap(mpfr_ptr(), other.mpfr_ptr()); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal& mpreal::operator=(mpreal&& other) -{ - mpfr_swap(mpfr_ptr(), other.mpfr_ptr()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} -#endif - -inline mpreal::mpreal(const mpfr_t u, bool shared) -{ - if(shared) - { - std::memcpy(mpfr_ptr(), u, sizeof(mpfr_t)); - } - else - { - mpfr_init2(mpfr_ptr(), mpfr_get_prec(u)); - mpfr_set (mpfr_ptr(), u, mpreal::get_default_rnd()); - } - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const mpf_t u) -{ - mpfr_init2(mpfr_ptr(),(mp_prec_t) mpf_get_prec(u)); // (gmp: mp_bitcnt_t) unsigned long -> long (mpfr: mp_prec_t) - mpfr_set_f(mpfr_ptr(),u,mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const mpz_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2(mpfr_ptr(), prec); - mpfr_set_z(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const mpq_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2(mpfr_ptr(), prec); - mpfr_set_q(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const double u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2(mpfr_ptr(), prec); - -#if (MPREAL_DOUBLE_BITS_OVERFLOW > -1) - if(fits_in_bits(u, MPREAL_DOUBLE_BITS_OVERFLOW)) - { - mpfr_set_d(mpfr_ptr(), u, mode); - }else - throw conversion_overflow(); -#else - mpfr_set_d(mpfr_ptr(), u, mode); -#endif - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const long double u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_ld(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_ui(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_ui(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_si(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_si(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal::mpreal(const uint64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_uj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const int64_t u, mp_prec_t prec, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_sj(mpfr_ptr(), u, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} -#endif - -inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s, base, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t mode) -{ - mpfr_init2 (mpfr_ptr(), prec); - mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode); - - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline void mpreal::clear(::mpfr_ptr x) -{ -#ifdef MPREAL_HAVE_MOVE_SUPPORT - if(mpfr_is_initialized(x)) -#endif - mpfr_clear(x); -} - -inline mpreal::~mpreal() -{ - clear(mpfr_ptr()); -} - -// internal namespace needed for template magic -namespace internal{ - - // Use SFINAE to restrict arithmetic operations instantiation only for numeric types - // This is needed for smooth integration with libraries based on expression templates, like Eigen. - // TODO: Do the same for boolean operators. - template <typename ArgumentType> struct result_type {}; - - template <> struct result_type<mpreal> {typedef mpreal type;}; - template <> struct result_type<mpz_t> {typedef mpreal type;}; - template <> struct result_type<mpq_t> {typedef mpreal type;}; - template <> struct result_type<long double> {typedef mpreal type;}; - template <> struct result_type<double> {typedef mpreal type;}; - template <> struct result_type<unsigned long int> {typedef mpreal type;}; - template <> struct result_type<unsigned int> {typedef mpreal type;}; - template <> struct result_type<long int> {typedef mpreal type;}; - template <> struct result_type<int> {typedef mpreal type;}; - -#if defined (MPREAL_HAVE_INT64_SUPPORT) - template <> struct result_type<int64_t > {typedef mpreal type;}; - template <> struct result_type<uint64_t > {typedef mpreal type;}; -#endif -} - -// + Addition -template <typename Rhs> -inline const typename internal::result_type<Rhs>::type - operator+(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) += rhs; } - -template <typename Lhs> -inline const typename internal::result_type<Lhs>::type - operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; } - -// - Subtraction -template <typename Rhs> -inline const typename internal::result_type<Rhs>::type - operator-(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) -= rhs; } - -template <typename Lhs> -inline const typename internal::result_type<Lhs>::type - operator-(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) -= rhs; } - -// * Multiplication -template <typename Rhs> -inline const typename internal::result_type<Rhs>::type - operator*(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) *= rhs; } - -template <typename Lhs> -inline const typename internal::result_type<Lhs>::type - operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; } - -// / Division -template <typename Rhs> -inline const typename internal::result_type<Rhs>::type - operator/(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) /= rhs; } - -template <typename Lhs> -inline const typename internal::result_type<Lhs>::type - operator/(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) /= rhs; } - -////////////////////////////////////////////////////////////////////////// -// sqrt -const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal sqrt(const long int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal sqrt(const int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal sqrt(const long double v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal sqrt(const double v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -// abs -inline const mpreal abs(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()); - -////////////////////////////////////////////////////////////////////////// -// pow -const mpreal pow(const mpreal& a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const mpreal& a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const mpreal& a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const mpreal& a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const unsigned int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const unsigned long int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned long int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned long int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned long int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned long int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const unsigned int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const unsigned int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const long int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -const mpreal pow(const double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); -inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd()); - -////////////////////////////////////////////////////////////////////////// -// Estimate machine epsilon for the given precision -// Returns smallest eps such that 1.0 + eps != 1.0 -inline mpreal machine_epsilon(mp_prec_t prec = mpreal::get_default_prec()); - -// Returns smallest eps such that x + eps != x (relative machine epsilon) -inline mpreal machine_epsilon(const mpreal& x); - -// Gives max & min values for the required precision, -// minval is 'safe' meaning 1 / minval does not overflow -// maxval is 'safe' meaning 1 / maxval does not underflow -inline mpreal minval(mp_prec_t prec = mpreal::get_default_prec()); -inline mpreal maxval(mp_prec_t prec = mpreal::get_default_prec()); - -// 'Dirty' equality check 1: |a-b| < min{|a|,|b|} * eps -inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps); - -// 'Dirty' equality check 2: |a-b| < min{|a|,|b|} * eps( min{|a|,|b|} ) -inline bool isEqualFuzzy(const mpreal& a, const mpreal& b); - -// 'Bitwise' equality check -// maxUlps - a and b can be apart by maxUlps binary numbers. -inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps); - -////////////////////////////////////////////////////////////////////////// -// Convert precision in 'bits' to decimal digits and vice versa. -// bits = ceil(digits*log[2](10)) -// digits = floor(bits*log[10](2)) - -inline mp_prec_t digits2bits(int d); -inline int bits2digits(mp_prec_t b); - -////////////////////////////////////////////////////////////////////////// -// min, max -const mpreal (max)(const mpreal& x, const mpreal& y); -const mpreal (min)(const mpreal& x, const mpreal& y); - -////////////////////////////////////////////////////////////////////////// -// Implementation -////////////////////////////////////////////////////////////////////////// - -////////////////////////////////////////////////////////////////////////// -// Operators - Assignment -inline mpreal& mpreal::operator=(const mpreal& v) -{ - if (this != &v) - { - mp_prec_t tp = mpfr_get_prec( mpfr_srcptr()); - mp_prec_t vp = mpfr_get_prec(v.mpfr_srcptr()); - - if(tp != vp){ - clear(mpfr_ptr()); - mpfr_init2(mpfr_ptr(), vp); - } - - mpfr_set(mpfr_ptr(), v.mpfr_srcptr(), mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - } - return *this; -} - -inline mpreal& mpreal::operator=(const mpf_t v) -{ - mpfr_set_f(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const mpz_t v) -{ - mpfr_set_z(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const mpq_t v) -{ - mpfr_set_q(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const long double v) -{ - mpfr_set_ld(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const double v) -{ -#if (MPREAL_DOUBLE_BITS_OVERFLOW > -1) - if(fits_in_bits(v, MPREAL_DOUBLE_BITS_OVERFLOW)) - { - mpfr_set_d(mpfr_ptr(),v,mpreal::get_default_rnd()); - }else - throw conversion_overflow(); -#else - mpfr_set_d(mpfr_ptr(),v,mpreal::get_default_rnd()); -#endif - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const unsigned long int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const unsigned int v) -{ - mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const long int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const int v) -{ - mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd()); - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator=(const char* s) -{ - // Use other converters for more precise control on base & precision & rounding: - // - // mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) - // mpreal(const std::string& s,mp_prec_t prec, int base, mp_rnd_t mode) - // - // Here we assume base = 10 and we use precision of target variable. - - mpfr_t t; - - mpfr_init2(t, mpfr_get_prec(mpfr_srcptr())); - - if(0 == mpfr_set_str(t, s, 10, mpreal::get_default_rnd())) - { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - } - - clear(t); - return *this; -} - -inline mpreal& mpreal::operator=(const std::string& s) -{ - // Use other converters for more precise control on base & precision & rounding: - // - // mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode) - // mpreal(const std::string& s,mp_prec_t prec, int base, mp_rnd_t mode) - // - // Here we assume base = 10 and we use precision of target variable. - - mpfr_t t; - - mpfr_init2(t, mpfr_get_prec(mpfr_srcptr())); - - if(0 == mpfr_set_str(t, s.c_str(), 10, mpreal::get_default_rnd())) - { - mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - } - - clear(t); - return *this; -} - - -////////////////////////////////////////////////////////////////////////// -// + Addition -inline mpreal& mpreal::operator+=(const mpreal& v) -{ - mpfr_add(mpfr_ptr(), mpfr_srcptr(), v.mpfr_srcptr(), mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const mpf_t u) -{ - *this += mpreal(u); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const mpz_t u) -{ - mpfr_add_z(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const mpq_t u) -{ - mpfr_add_q(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+= (const long double u) -{ - *this += mpreal(u); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+= (const double u) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpfr_add_d(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); -#else - *this += mpreal(u); -#endif - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const unsigned long int u) -{ - mpfr_add_ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const unsigned int u) -{ - mpfr_add_ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const long int u) -{ - mpfr_add_si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator+=(const int u) -{ - mpfr_add_si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline mpreal& mpreal::operator+=(const int64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator+=(const uint64_t u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const int64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator-=(const uint64_t u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const int64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator*=(const uint64_t u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const int64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -inline mpreal& mpreal::operator/=(const uint64_t u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; } -#endif - -inline const mpreal mpreal::operator+()const { return mpreal(*this); } - -inline const mpreal operator+(const mpreal& a, const mpreal& b) -{ - mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr()))); - mpfr_add(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd()); - return c; -} - -inline mpreal& mpreal::operator++() -{ - return *this += 1; -} - -inline const mpreal mpreal::operator++ (int) -{ - mpreal x(*this); - *this += 1; - return x; -} - -inline mpreal& mpreal::operator--() -{ - return *this -= 1; -} - -inline const mpreal mpreal::operator-- (int) -{ - mpreal x(*this); - *this -= 1; - return x; -} - -////////////////////////////////////////////////////////////////////////// -// - Subtraction -inline mpreal& mpreal::operator-=(const mpreal& v) -{ - mpfr_sub(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const mpz_t v) -{ - mpfr_sub_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const mpq_t v) -{ - mpfr_sub_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const long double v) -{ - *this -= mpreal(v); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const double v) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpfr_sub_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); -#else - *this -= mpreal(v); -#endif - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const unsigned long int v) -{ - mpfr_sub_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const unsigned int v) -{ - mpfr_sub_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const long int v) -{ - mpfr_sub_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator-=(const int v) -{ - mpfr_sub_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline const mpreal mpreal::operator-()const -{ - mpreal u(*this); - mpfr_neg(u.mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd()); - return u; -} - -inline const mpreal operator-(const mpreal& a, const mpreal& b) -{ - mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr()))); - mpfr_sub(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd()); - return c; -} - -inline const mpreal operator-(const double b, const mpreal& a) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - mpfr_d_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -#else - mpreal x(b, mpfr_get_prec(a.mpfr_ptr())); - x -= a; - return x; -#endif -} - -inline const mpreal operator-(const unsigned long int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator-(const unsigned int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator-(const long int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator-(const int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -////////////////////////////////////////////////////////////////////////// -// * Multiplication -inline mpreal& mpreal::operator*= (const mpreal& v) -{ - mpfr_mul(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const mpz_t v) -{ - mpfr_mul_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const mpq_t v) -{ - mpfr_mul_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const long double v) -{ - *this *= mpreal(v); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const double v) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpfr_mul_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); -#else - *this *= mpreal(v); -#endif - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const unsigned long int v) -{ - mpfr_mul_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const unsigned int v) -{ - mpfr_mul_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const long int v) -{ - mpfr_mul_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator*=(const int v) -{ - mpfr_mul_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline const mpreal operator*(const mpreal& a, const mpreal& b) -{ - mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr()))); - mpfr_mul(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd()); - return c; -} - -////////////////////////////////////////////////////////////////////////// -// / Division -inline mpreal& mpreal::operator/=(const mpreal& v) -{ - mpfr_div(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const mpz_t v) -{ - mpfr_div_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const mpq_t v) -{ - mpfr_div_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const long double v) -{ - *this /= mpreal(v); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const double v) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpfr_div_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); -#else - *this /= mpreal(v); -#endif - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const unsigned long int v) -{ - mpfr_div_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const unsigned int v) -{ - mpfr_div_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const long int v) -{ - mpfr_div_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator/=(const int v) -{ - mpfr_div_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline const mpreal operator/(const mpreal& a, const mpreal& b) -{ - mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_srcptr()), mpfr_get_prec(b.mpfr_srcptr()))); - mpfr_div(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd()); - return c; -} - -inline const mpreal operator/(const unsigned long int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator/(const unsigned int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator/(const long int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator/(const int b, const mpreal& a) -{ - mpreal x(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -} - -inline const mpreal operator/(const double b, const mpreal& a) -{ -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - mpreal x(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_d_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd()); - return x; -#else - mpreal x(0, mpfr_get_prec(a.mpfr_ptr())); - x /= a; - return x; -#endif -} - -////////////////////////////////////////////////////////////////////////// -// Shifts operators - Multiplication/Division by power of 2 -inline mpreal& mpreal::operator<<=(const unsigned long int u) -{ - mpfr_mul_2ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator<<=(const unsigned int u) -{ - mpfr_mul_2ui(mpfr_ptr(),mpfr_srcptr(),static_cast<unsigned long int>(u),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator<<=(const long int u) -{ - mpfr_mul_2si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator<<=(const int u) -{ - mpfr_mul_2si(mpfr_ptr(),mpfr_srcptr(),static_cast<long int>(u),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator>>=(const unsigned long int u) -{ - mpfr_div_2ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator>>=(const unsigned int u) -{ - mpfr_div_2ui(mpfr_ptr(),mpfr_srcptr(),static_cast<unsigned long int>(u),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator>>=(const long int u) -{ - mpfr_div_2si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::operator>>=(const int u) -{ - mpfr_div_2si(mpfr_ptr(),mpfr_srcptr(),static_cast<long int>(u),mpreal::get_default_rnd()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline const mpreal operator<<(const mpreal& v, const unsigned long int k) -{ - return mul_2ui(v,k); -} - -inline const mpreal operator<<(const mpreal& v, const unsigned int k) -{ - return mul_2ui(v,static_cast<unsigned long int>(k)); -} - -inline const mpreal operator<<(const mpreal& v, const long int k) -{ - return mul_2si(v,k); -} - -inline const mpreal operator<<(const mpreal& v, const int k) -{ - return mul_2si(v,static_cast<long int>(k)); -} - -inline const mpreal operator>>(const mpreal& v, const unsigned long int k) -{ - return div_2ui(v,k); -} - -inline const mpreal operator>>(const mpreal& v, const long int k) -{ - return div_2si(v,k); -} - -inline const mpreal operator>>(const mpreal& v, const unsigned int k) -{ - return div_2ui(v,static_cast<unsigned long int>(k)); -} - -inline const mpreal operator>>(const mpreal& v, const int k) -{ - return div_2si(v,static_cast<long int>(k)); -} - -// mul_2ui -inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode) -{ - mpreal x(v); - mpfr_mul_2ui(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode); - return x; -} - -// mul_2si -inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode) -{ - mpreal x(v); - mpfr_mul_2si(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode); - return x; -} - -inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode) -{ - mpreal x(v); - mpfr_div_2ui(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode); - return x; -} - -inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode) -{ - mpreal x(v); - mpfr_div_2si(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode); - return x; -} - -////////////////////////////////////////////////////////////////////////// -//Boolean operators -inline bool operator > (const mpreal& a, const mpreal& b){ return (mpfr_greater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator >= (const mpreal& a, const mpreal& b){ return (mpfr_greaterequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator < (const mpreal& a, const mpreal& b){ return (mpfr_less_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator <= (const mpreal& a, const mpreal& b){ return (mpfr_lessequal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator == (const mpreal& a, const mpreal& b){ return (mpfr_equal_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } -inline bool operator != (const mpreal& a, const mpreal& b){ return (mpfr_lessgreater_p (a.mpfr_srcptr(),b.mpfr_srcptr()) !=0 ); } - -inline bool operator == (const mpreal& a, const unsigned long int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const unsigned int b ){ return (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const int b ){ return (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const long double b ){ return (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); } -inline bool operator == (const mpreal& a, const double b ){ return (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); } - - -inline bool isnan (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); } -inline bool isinf (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); } -inline bool isfinite (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); } -inline bool iszero (const mpreal& op){ return (mpfr_zero_p (op.mpfr_srcptr()) != 0 ); } -inline bool isint (const mpreal& op){ return (mpfr_integer_p(op.mpfr_srcptr()) != 0 ); } - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) -inline bool isregular(const mpreal& op){ return (mpfr_regular_p(op.mpfr_srcptr()));} -#endif - -////////////////////////////////////////////////////////////////////////// -// Type Converters -inline bool mpreal::toBool (mp_rnd_t /*mode*/) const { return mpfr_zero_p (mpfr_srcptr()) == 0; } -inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); } -inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); } -inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); } -inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); } -inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); } - -#if defined (MPREAL_HAVE_INT64_SUPPORT) -inline int64_t mpreal::toInt64 (mp_rnd_t mode) const{ return mpfr_get_sj(mpfr_srcptr(), mode); } -inline uint64_t mpreal::toUInt64(mp_rnd_t mode) const{ return mpfr_get_uj(mpfr_srcptr(), mode); } -#endif - -inline ::mpfr_ptr mpreal::mpfr_ptr() { return mp; } -inline ::mpfr_srcptr mpreal::mpfr_ptr() const { return mp; } -inline ::mpfr_srcptr mpreal::mpfr_srcptr() const { return mp; } - -template <class T> -inline std::string toString(T t, std::ios_base & (*f)(std::ios_base&)) -{ - std::ostringstream oss; - oss << f << t; - return oss.str(); -} - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - -inline std::string mpreal::toString(const std::string& format) const -{ - char *s = NULL; - std::string out; - - if( !format.empty() ) - { - if(!(mpfr_asprintf(&s, format.c_str(), mpfr_srcptr()) < 0)) - { - out = std::string(s); - - mpfr_free_str(s); - } - } - - return out; -} - -#endif - -inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const -{ - // TODO: Add extended format specification (f, e, rounding mode) as it done in output operator - (void)b; - (void)mode; - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - - std::ostringstream format; - - int digits = (n >= 0) ? n : bits2digits(mpfr_get_prec(mpfr_srcptr())); - - format << "%." << digits << "RNg"; - - return toString(format.str()); - -#else - - char *s, *ns = NULL; - size_t slen, nslen; - mp_exp_t exp; - std::string out; - - if(mpfr_inf_p(mp)) - { - if(mpfr_sgn(mp)>0) return "+Inf"; - else return "-Inf"; - } - - if(mpfr_zero_p(mp)) return "0"; - if(mpfr_nan_p(mp)) return "NaN"; - - s = mpfr_get_str(NULL, &exp, b, 0, mp, mode); - ns = mpfr_get_str(NULL, &exp, b, (std::max)(0,n), mp, mode); - - if(s!=NULL && ns!=NULL) - { - slen = strlen(s); - nslen = strlen(ns); - if(nslen<=slen) - { - mpfr_free_str(s); - s = ns; - slen = nslen; - } - else { - mpfr_free_str(ns); - } - - // Make human eye-friendly formatting if possible - if (exp>0 && static_cast<size_t>(exp)<slen) - { - if(s[0]=='-') - { - // Remove zeros starting from right end - char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp) ptr--; - - if(ptr==s+exp) out = std::string(s,exp+1); - else out = std::string(s,exp+1)+'.'+std::string(s+exp+1,ptr-(s+exp+1)+1); - - //out = string(s,exp+1)+'.'+string(s+exp+1); - } - else - { - // Remove zeros starting from right end - char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+exp-1) ptr--; - - if(ptr==s+exp-1) out = std::string(s,exp); - else out = std::string(s,exp)+'.'+std::string(s+exp,ptr-(s+exp)+1); - - //out = string(s,exp)+'.'+string(s+exp); - } - - }else{ // exp<0 || exp>slen - if(s[0]=='-') - { - // Remove zeros starting from right end - char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s+1) ptr--; - - if(ptr==s+1) out = std::string(s,2); - else out = std::string(s,2)+'.'+std::string(s+2,ptr-(s+2)+1); - - //out = string(s,2)+'.'+string(s+2); - } - else - { - // Remove zeros starting from right end - char* ptr = s+slen-1; - while (*ptr=='0' && ptr>s) ptr--; - - if(ptr==s) out = std::string(s,1); - else out = std::string(s,1)+'.'+std::string(s+1,ptr-(s+1)+1); - - //out = string(s,1)+'.'+string(s+1); - } - - // Make final string - if(--exp) - { - if(exp>0) out += "e+"+mpfr::toString<mp_exp_t>(exp,std::dec); - else out += "e"+mpfr::toString<mp_exp_t>(exp,std::dec); - } - } - - mpfr_free_str(s); - return out; - }else{ - return "conversion error!"; - } -#endif -} - - -////////////////////////////////////////////////////////////////////////// -// I/O -inline std::ostream& mpreal::output(std::ostream& os) const -{ - std::ostringstream format; - const std::ios::fmtflags flags = os.flags(); - - format << ((flags & std::ios::showpos) ? "%+" : "%"); - if (os.precision() >= 0) - format << '.' << os.precision() << "R*" - << ((flags & std::ios::floatfield) == std::ios::fixed ? 'f' : - (flags & std::ios::floatfield) == std::ios::scientific ? 'e' : - 'g'); - else - format << "R*e"; - - char *s = NULL; - if(!(mpfr_asprintf(&s, format.str().c_str(), - mpfr::mpreal::get_default_rnd(), - mpfr_srcptr()) - < 0)) - { - os << std::string(s); - mpfr_free_str(s); - } - return os; -} - -inline std::ostream& operator<<(std::ostream& os, const mpreal& v) -{ - return v.output(os); -} - -inline std::istream& operator>>(std::istream &is, mpreal& v) -{ - // TODO: use cout::hexfloat and other flags to setup base - std::string tmp; - is >> tmp; - mpfr_set_str(v.mpfr_ptr(), tmp.c_str(), 10, mpreal::get_default_rnd()); - return is; -} - -////////////////////////////////////////////////////////////////////////// -// Bits - decimal digits relation -// bits = ceil(digits*log[2](10)) -// digits = floor(bits*log[10](2)) - -inline mp_prec_t digits2bits(int d) -{ - const double LOG2_10 = 3.3219280948873624; - - return mp_prec_t(std::ceil( d * LOG2_10 )); -} - -inline int bits2digits(mp_prec_t b) -{ - const double LOG10_2 = 0.30102999566398119; - - return int(std::floor( b * LOG10_2 )); -} - -////////////////////////////////////////////////////////////////////////// -// Set/Get number properties -inline int sgn(const mpreal& op) -{ - int r = mpfr_signbit(op.mpfr_srcptr()); - return (r > 0? -1 : 1); -} - -inline mpreal& mpreal::setSign(int sign, mp_rnd_t RoundingMode) -{ - mpfr_setsign(mpfr_ptr(), mpfr_srcptr(), (sign < 0 ? 1 : 0), RoundingMode); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline int mpreal::getPrecision() const -{ - return int(mpfr_get_prec(mpfr_srcptr())); -} - -inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode) -{ - mpfr_prec_round(mpfr_ptr(), Precision, RoundingMode); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::setInf(int sign) -{ - mpfr_set_inf(mpfr_ptr(), sign); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::setNan() -{ - mpfr_set_nan(mpfr_ptr()); - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mpreal& mpreal::setZero(int sign) -{ - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) - mpfr_set_zero(mpfr_ptr(), sign); -#else - mpfr_set_si(mpfr_ptr(), 0, (mpfr_get_default_rounding_mode)()); - setSign(sign); -#endif - - MPREAL_MSVC_DEBUGVIEW_CODE; - return *this; -} - -inline mp_prec_t mpreal::get_prec() const -{ - return mpfr_get_prec(mpfr_srcptr()); -} - -inline void mpreal::set_prec(mp_prec_t prec, mp_rnd_t rnd_mode) -{ - mpfr_prec_round(mpfr_ptr(),prec,rnd_mode); - MPREAL_MSVC_DEBUGVIEW_CODE; -} - -inline mp_exp_t mpreal::get_exp () -{ - return mpfr_get_exp(mpfr_srcptr()); -} - -inline int mpreal::set_exp (mp_exp_t e) -{ - int x = mpfr_set_exp(mpfr_ptr(), e); - MPREAL_MSVC_DEBUGVIEW_CODE; - return x; -} - -inline const mpreal frexp(const mpreal& v, mp_exp_t* exp) -{ - mpreal x(v); - *exp = x.get_exp(); - x.set_exp(0); - return x; -} - -inline const mpreal ldexp(const mpreal& v, mp_exp_t exp) -{ - mpreal x(v); - - // rounding is not important since we just increasing the exponent - mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd()); - return x; -} - -inline mpreal machine_epsilon(mp_prec_t prec) -{ - /* the smallest eps such that 1 + eps != 1 */ - return machine_epsilon(mpreal(1, prec)); -} - -inline mpreal machine_epsilon(const mpreal& x) -{ - /* the smallest eps such that x + eps != x */ - if( x < 0) - { - return nextabove(-x) + x; - }else{ - return nextabove( x) - x; - } -} - -// minval is 'safe' meaning 1 / minval does not overflow -inline mpreal minval(mp_prec_t prec) -{ - /* min = 1/2 * 2^emin = 2^(emin - 1) */ - return mpreal(1, prec) << mpreal::get_emin()-1; -} - -// maxval is 'safe' meaning 1 / maxval does not underflow -inline mpreal maxval(mp_prec_t prec) -{ - /* max = (1 - eps) * 2^emax, eps is machine epsilon */ - return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax(); -} - -inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps) -{ - return abs(a - b) <= machine_epsilon((max)(abs(a), abs(b))) * maxUlps; -} - -inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps) -{ - return abs(a - b) <= eps; -} - -inline bool isEqualFuzzy(const mpreal& a, const mpreal& b) -{ - return isEqualFuzzy(a, b, machine_epsilon((max)(1, (min)(abs(a), abs(b))))); -} - -inline const mpreal modf(const mpreal& v, mpreal& n) -{ - mpreal f(v); - - // rounding is not important since we are using the same number - mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd()); - mpfr_trunc(n.mpfr_ptr(),v.mpfr_srcptr()); - return f; -} - -inline int mpreal::check_range (int t, mp_rnd_t rnd_mode) -{ - return mpfr_check_range(mpfr_ptr(),t,rnd_mode); -} - -inline int mpreal::subnormalize (int t,mp_rnd_t rnd_mode) -{ - int r = mpfr_subnormalize(mpfr_ptr(),t,rnd_mode); - MPREAL_MSVC_DEBUGVIEW_CODE; - return r; -} - -inline mp_exp_t mpreal::get_emin (void) -{ - return mpfr_get_emin(); -} - -inline int mpreal::set_emin (mp_exp_t exp) -{ - return mpfr_set_emin(exp); -} - -inline mp_exp_t mpreal::get_emax (void) -{ - return mpfr_get_emax(); -} - -inline int mpreal::set_emax (mp_exp_t exp) -{ - return mpfr_set_emax(exp); -} - -inline mp_exp_t mpreal::get_emin_min (void) -{ - return mpfr_get_emin_min(); -} - -inline mp_exp_t mpreal::get_emin_max (void) -{ - return mpfr_get_emin_max(); -} - -inline mp_exp_t mpreal::get_emax_min (void) -{ - return mpfr_get_emax_min(); -} - -inline mp_exp_t mpreal::get_emax_max (void) -{ - return mpfr_get_emax_max(); -} - -////////////////////////////////////////////////////////////////////////// -// Mathematical Functions -////////////////////////////////////////////////////////////////////////// -#define MPREAL_UNARY_MATH_FUNCTION_BODY(f) \ - mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); \ - mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r); \ - return y; - -inline const mpreal sqr (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) -{ MPREAL_UNARY_MATH_FUNCTION_BODY(sqr ); } - -inline const mpreal sqrt (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) -{ MPREAL_UNARY_MATH_FUNCTION_BODY(sqrt); } - -inline const mpreal sqrt(const unsigned long int x, mp_rnd_t r) -{ - mpreal y; - mpfr_sqrt_ui(y.mpfr_ptr(), x, r); - return y; -} - -inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode) -{ - return sqrt(static_cast<unsigned long int>(v),rnd_mode); -} - -inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode) -{ - if (v>=0) return sqrt(static_cast<unsigned long int>(v),rnd_mode); - else return mpreal().setNan(); // NaN -} - -inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode) -{ - if (v>=0) return sqrt(static_cast<unsigned long int>(v),rnd_mode); - else return mpreal().setNan(); // NaN -} - -inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); - mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r); - return y; -} - -inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal y(0, mpfr_get_prec(a.mpfr_srcptr())); - mpfr_dim(y.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), r); - return y; -} - -inline int cmpabs(const mpreal& a,const mpreal& b) -{ - return mpfr_cmpabs(a.mpfr_ptr(), b.mpfr_srcptr()); -} - -inline int sin_cos(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - return mpfr_sin_cos(s.mpfr_ptr(), c.mpfr_ptr(), v.mpfr_srcptr(), rnd_mode); -} - -inline const mpreal sqrt (const long double v, mp_rnd_t rnd_mode) { return sqrt(mpreal(v),rnd_mode); } -inline const mpreal sqrt (const double v, mp_rnd_t rnd_mode) { return sqrt(mpreal(v),rnd_mode); } - -inline const mpreal cbrt (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cbrt ); } -inline const mpreal fabs (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(abs ); } -inline const mpreal abs (const mpreal& x, mp_rnd_t r) { MPREAL_UNARY_MATH_FUNCTION_BODY(abs ); } -inline const mpreal log (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log ); } -inline const mpreal log2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log2 ); } -inline const mpreal log10 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log10); } -inline const mpreal exp (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp ); } -inline const mpreal exp2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp2 ); } -inline const mpreal exp10 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp10); } -inline const mpreal cos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cos ); } -inline const mpreal sin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sin ); } -inline const mpreal tan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(tan ); } -inline const mpreal sec (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sec ); } -inline const mpreal csc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(csc ); } -inline const mpreal cot (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cot ); } -inline const mpreal acos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(acos ); } -inline const mpreal asin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asin ); } -inline const mpreal atan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atan ); } - -inline const mpreal acot (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atan (1/v, r); } -inline const mpreal asec (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acos (1/v, r); } -inline const mpreal acsc (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asin (1/v, r); } -inline const mpreal acoth (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atanh(1/v, r); } -inline const mpreal asech (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acosh(1/v, r); } -inline const mpreal acsch (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asinh(1/v, r); } - -inline const mpreal cosh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cosh ); } -inline const mpreal sinh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sinh ); } -inline const mpreal tanh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(tanh ); } -inline const mpreal sech (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sech ); } -inline const mpreal csch (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(csch ); } -inline const mpreal coth (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(coth ); } -inline const mpreal acosh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(acosh); } -inline const mpreal asinh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asinh); } -inline const mpreal atanh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atanh); } - -inline const mpreal log1p (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log1p ); } -inline const mpreal expm1 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(expm1 ); } -inline const mpreal eint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(eint ); } -inline const mpreal gamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); } -inline const mpreal lngamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma); } -inline const mpreal zeta (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(zeta ); } -inline const mpreal erf (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erf ); } -inline const mpreal erfc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erfc ); } -inline const mpreal besselj0(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(j0 ); } -inline const mpreal besselj1(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(j1 ); } -inline const mpreal bessely0(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(y0 ); } -inline const mpreal bessely1(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(y1 ); } - -inline const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); - mpfr_atan2(a.mpfr_ptr(), y.mpfr_srcptr(), x.mpfr_srcptr(), rnd_mode); - return a; -} - -inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); - mpfr_hypot(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode); - return a; -} - -inline const mpreal remainder (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); - mpfr_remainder(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode); - return a; -} - -inline const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision())); - mpfr_remquo(a.mpfr_ptr(),q, x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode); - return a; -} - -inline const mpreal fac_ui (unsigned long int v, mp_prec_t prec = mpreal::get_default_prec(), - mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(0, prec); - mpfr_fac_ui(x.mpfr_ptr(),v,rnd_mode); - return x; -} - - -inline const mpreal lgamma (const mpreal& v, int *signp = 0, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(v); - int tsignp; - - if(signp) mpfr_lgamma(x.mpfr_ptr(), signp,v.mpfr_srcptr(),rnd_mode); - else mpfr_lgamma(x.mpfr_ptr(),&tsignp,v.mpfr_srcptr(),rnd_mode); - - return x; -} - - -inline const mpreal besseljn (long n, const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal y(0, x.getPrecision()); - mpfr_jn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r); - return y; -} - -inline const mpreal besselyn (long n, const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal y(0, x.getPrecision()); - mpfr_yn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r); - return y; -} - -inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mp_prec_t p1, p2, p3; - - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); - - a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); - - mpfr_fma(a.mp,v1.mp,v2.mp,v3.mp,rnd_mode); - return a; -} - -inline const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mp_prec_t p1, p2, p3; - - p1 = v1.get_prec(); - p2 = v2.get_prec(); - p3 = v3.get_prec(); - - a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1)); - - mpfr_fms(a.mp,v1.mp,v2.mp,v3.mp,rnd_mode); - return a; -} - -inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mp_prec_t p1, p2; - - p1 = v1.get_prec(); - p2 = v2.get_prec(); - - a.set_prec(p1>p2?p1:p2); - - mpfr_agm(a.mp, v1.mp, v2.mp, rnd_mode); - - return a; -} - -inline const mpreal sum (const mpreal tab[], unsigned long int n, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x; - mpfr_ptr* t; - unsigned long int i; - - t = new mpfr_ptr[n]; - for (i=0;i<n;i++) t[i] = (mpfr_ptr)tab[i].mp; - mpfr_sum(x.mp,t,n,rnd_mode); - delete[] t; - return x; -} - -////////////////////////////////////////////////////////////////////////// -// MPFR 2.4.0 Specifics -#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0)) - -inline int sinh_cosh(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - return mpfr_sinh_cosh(s.mp,c.mp,v.mp,rnd_mode); -} - -inline const mpreal li2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) -{ - MPREAL_UNARY_MATH_FUNCTION_BODY(li2); -} - -inline const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - /* R = rem(X,Y) if Y != 0, returns X - n * Y where n = trunc(X/Y). */ - return fmod(x, y, rnd_mode); -} - -inline const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - (void)rnd_mode; - - /* - - m = mod(x,y) if y != 0, returns x - n*y where n = floor(x/y) - - The following are true by convention: - - mod(x,0) is x - - mod(x,x) is 0 - - mod(x,y) for x != y and y != 0 has the same sign as y. - - */ - - if(iszero(y)) return x; - if(x == y) return 0; - - mpreal m = x - floor(x / y) * y; - - m.setSign(sgn(y)); // make sure result has the same sign as Y - - return m; -} - -inline const mpreal fmod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mp_prec_t yp, xp; - - yp = y.get_prec(); - xp = x.get_prec(); - - a.set_prec(yp>xp?yp:xp); - - mpfr_fmod(a.mp, x.mp, y.mp, rnd_mode); - - return a; -} - -inline const mpreal rec_sqrt(const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(v); - mpfr_rec_sqrt(x.mp,v.mp,rnd_mode); - return x; -} -#endif // MPFR 2.4.0 Specifics - -////////////////////////////////////////////////////////////////////////// -// MPFR 3.0.0 Specifics -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) -inline const mpreal digamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(digamma); } -inline const mpreal ai (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(ai); } -#endif // MPFR 3.0.0 Specifics - -////////////////////////////////////////////////////////////////////////// -// Constants -inline const mpreal const_log2 (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal x(0, p); - mpfr_const_log2(x.mpfr_ptr(), r); - return x; -} - -inline const mpreal const_pi (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal x(0, p); - mpfr_const_pi(x.mpfr_ptr(), r); - return x; -} - -inline const mpreal const_euler (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal x(0, p); - mpfr_const_euler(x.mpfr_ptr(), r); - return x; -} - -inline const mpreal const_catalan (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd()) -{ - mpreal x(0, p); - mpfr_const_catalan(x.mpfr_ptr(), r); - return x; -} - -inline const mpreal const_infinity (int sign = 1, mp_prec_t p = mpreal::get_default_prec()) -{ - mpreal x(0, p); - mpfr_set_inf(x.mpfr_ptr(), sign); - return x; -} - -////////////////////////////////////////////////////////////////////////// -// Integer Related Functions -inline const mpreal ceil(const mpreal& v) -{ - mpreal x(v); - mpfr_ceil(x.mp,v.mp); - return x; -} - -inline const mpreal floor(const mpreal& v) -{ - mpreal x(v); - mpfr_floor(x.mp,v.mp); - return x; -} - -inline const mpreal round(const mpreal& v) -{ - mpreal x(v); - mpfr_round(x.mp,v.mp); - return x; -} - -inline const mpreal trunc(const mpreal& v) -{ - mpreal x(v); - mpfr_trunc(x.mp,v.mp); - return x; -} - -inline const mpreal rint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint ); } -inline const mpreal rint_ceil (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_ceil ); } -inline const mpreal rint_floor (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_floor); } -inline const mpreal rint_round (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_round); } -inline const mpreal rint_trunc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_trunc); } -inline const mpreal frac (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(frac ); } - -////////////////////////////////////////////////////////////////////////// -// Miscellaneous Functions -inline void swap (mpreal& a, mpreal& b) { mpfr_swap(a.mp,b.mp); } -inline const mpreal (max)(const mpreal& x, const mpreal& y){ return (x>y?x:y); } -inline const mpreal (min)(const mpreal& x, const mpreal& y){ return (x<y?x:y); } - -inline const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mpfr_max(a.mp,x.mp,y.mp,rnd_mode); - return a; -} - -inline const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal a; - mpfr_min(a.mp,x.mp,y.mp,rnd_mode); - return a; -} - -inline const mpreal nexttoward (const mpreal& x, const mpreal& y) -{ - mpreal a(x); - mpfr_nexttoward(a.mp,y.mp); - return a; -} - -inline const mpreal nextabove (const mpreal& x) -{ - mpreal a(x); - mpfr_nextabove(a.mp); - return a; -} - -inline const mpreal nextbelow (const mpreal& x) -{ - mpreal a(x); - mpfr_nextbelow(a.mp); - return a; -} - -inline const mpreal urandomb (gmp_randstate_t& state) -{ - mpreal x; - mpfr_urandomb(x.mp,state); - return x; -} - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0)) -// use gmp_randinit_default() to init state, gmp_randclear() to clear -inline const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x; - mpfr_urandom(x.mp,state,rnd_mode); - return x; -} - -inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x; - mpfr_grandom(x.mp, NULL, state, rnd_mode); - return x; -} - -#endif - -#if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2)) -inline const mpreal random2 (mp_size_t size, mp_exp_t exp) -{ - mpreal x; - mpfr_random2(x.mp,size,exp); - return x; -} -#endif - -// Uniformly distributed random number generation -// a = random(seed); <- initialization & first random number generation -// a = random(); <- next random numbers generation -// seed != 0 -inline const mpreal random(unsigned int seed = 0) -{ - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) - static gmp_randstate_t state; - static bool isFirstTime = true; - - if(isFirstTime) - { - gmp_randinit_default(state); - gmp_randseed_ui(state,0); - isFirstTime = false; - } - - if(seed != 0) gmp_randseed_ui(state,seed); - - return mpfr::urandom(state); -#else - if(seed != 0) std::srand(seed); - return mpfr::mpreal(std::rand()/(double)RAND_MAX); -#endif - -} - -#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0)) -inline const mpreal grandom(unsigned int seed = 0) -{ - static gmp_randstate_t state; - static bool isFirstTime = true; - - if(isFirstTime) - { - gmp_randinit_default(state); - gmp_randseed_ui(state,0); - isFirstTime = false; - } - - if(seed != 0) gmp_randseed_ui(state,seed); - - return mpfr::grandom(state); -} -#endif - -////////////////////////////////////////////////////////////////////////// -// Set/Get global properties -inline void mpreal::set_default_prec(mp_prec_t prec) -{ - mpfr_set_default_prec(prec); -} - -inline void mpreal::set_default_rnd(mp_rnd_t rnd_mode) -{ - mpfr_set_default_rounding_mode(rnd_mode); -} - -inline bool mpreal::fits_in_bits(double x, int n) -{ - int i; - double t; - return IsInf(x) || (std::modf ( std::ldexp ( std::frexp ( x, &i ), n ), &t ) == 0.0); -} - -inline const mpreal pow(const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(a); - mpfr_pow(x.mp,x.mp,b.mp,rnd_mode); - return x; -} - -inline const mpreal pow(const mpreal& a, const mpz_t b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(a); - mpfr_pow_z(x.mp,x.mp,b,rnd_mode); - return x; -} - -inline const mpreal pow(const mpreal& a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(a); - mpfr_pow_ui(x.mp,x.mp,b,rnd_mode); - return x; -} - -inline const mpreal pow(const mpreal& a, const unsigned int b, mp_rnd_t rnd_mode) -{ - return pow(a,static_cast<unsigned long int>(b),rnd_mode); -} - -inline const mpreal pow(const mpreal& a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(a); - mpfr_pow_si(x.mp,x.mp,b,rnd_mode); - return x; -} - -inline const mpreal pow(const mpreal& a, const int b, mp_rnd_t rnd_mode) -{ - return pow(a,static_cast<long int>(b),rnd_mode); -} - -inline const mpreal pow(const mpreal& a, const long double b, mp_rnd_t rnd_mode) -{ - return pow(a,mpreal(b),rnd_mode); -} - -inline const mpreal pow(const mpreal& a, const double b, mp_rnd_t rnd_mode) -{ - return pow(a,mpreal(b),rnd_mode); -} - -inline const mpreal pow(const unsigned long int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd()) -{ - mpreal x(a); - mpfr_ui_pow(x.mp,a,b.mp,rnd_mode); - return x; -} - -inline const mpreal pow(const unsigned int a, const mpreal& b, mp_rnd_t rnd_mode) -{ - return pow(static_cast<unsigned long int>(a),b,rnd_mode); -} - -inline const mpreal pow(const long int a, const mpreal& b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); - else return pow(mpreal(a),b,rnd_mode); -} - -inline const mpreal pow(const int a, const mpreal& b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); - else return pow(mpreal(a),b,rnd_mode); -} - -inline const mpreal pow(const long double a, const mpreal& b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); -} - -inline const mpreal pow(const double a, const mpreal& b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); -} - -// pow unsigned long int -inline const mpreal pow(const unsigned long int a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - mpreal x(a); - mpfr_ui_pow_ui(x.mp,a,b,rnd_mode); - return x; -} - -inline const mpreal pow(const unsigned long int a, const unsigned int b, mp_rnd_t rnd_mode) -{ - return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui -} - -inline const mpreal pow(const unsigned long int a, const long int b, mp_rnd_t rnd_mode) -{ - if(b>0) return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned long int a, const int b, mp_rnd_t rnd_mode) -{ - if(b>0) return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned long int a, const long double b, mp_rnd_t rnd_mode) -{ - return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned long int a, const double b, mp_rnd_t rnd_mode) -{ - return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow -} - -// pow unsigned int -inline const mpreal pow(const unsigned int a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui -} - -inline const mpreal pow(const unsigned int a, const unsigned int b, mp_rnd_t rnd_mode) -{ - return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui -} - -inline const mpreal pow(const unsigned int a, const long int b, mp_rnd_t rnd_mode) -{ - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned int a, const int b, mp_rnd_t rnd_mode) -{ - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned int a, const long double b, mp_rnd_t rnd_mode) -{ - return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow -} - -inline const mpreal pow(const unsigned int a, const double b, mp_rnd_t rnd_mode) -{ - return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow -} - -// pow long int -inline const mpreal pow(const long int a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - if (a>0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui - else return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const long int a, const unsigned int b, mp_rnd_t rnd_mode) -{ - if (a>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const long int a, const long int b, mp_rnd_t rnd_mode) -{ - if (a>0) - { - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - }else{ - return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si - } -} - -inline const mpreal pow(const long int a, const int b, mp_rnd_t rnd_mode) -{ - if (a>0) - { - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - }else{ - return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si - } -} - -inline const mpreal pow(const long int a, const long double b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow -} - -inline const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow -} - -// pow int -inline const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - if (a>0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui - else return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode) -{ - if (a>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode) -{ - if (a>0) - { - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - }else{ - return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si - } -} - -inline const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode) -{ - if (a>0) - { - if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui - else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - }else{ - return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si - } -} - -inline const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow -} - -inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode) -{ - if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow - else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow -} - -// pow long double -inline const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),mpreal(b),rnd_mode); -} - -inline const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui -} - -inline const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si -} - -inline const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si -} - -inline const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),mpreal(b),rnd_mode); -} - -inline const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); // mpfr_pow_ui -} - -inline const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); // mpfr_pow_ui -} - -inline const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si -} - -inline const mpreal pow(const double a, const int b, mp_rnd_t rnd_mode) -{ - return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si -} -} // End of mpfr namespace - -// Explicit specialization of std::swap for mpreal numbers -// Thus standard algorithms will use efficient version of swap (due to Koenig lookup) -// Non-throwing swap C++ idiom: http://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Non-throwing_swap -namespace std -{ - // we are allowed to extend namespace std with specializations only - template <> - inline void swap(mpfr::mpreal& x, mpfr::mpreal& y) - { - return mpfr::swap(x, y); - } - - template<> - class numeric_limits<mpfr::mpreal> - { - public: - static const bool is_specialized = true; - static const bool is_signed = true; - static const bool is_integer = false; - static const bool is_exact = false; - static const int radix = 2; - - static const bool has_infinity = true; - static const bool has_quiet_NaN = true; - static const bool has_signaling_NaN = true; - - static const bool is_iec559 = true; // = IEEE 754 - static const bool is_bounded = true; - static const bool is_modulo = false; - static const bool traps = true; - static const bool tinyness_before = true; - - static const float_denorm_style has_denorm = denorm_absent; - - inline static mpfr::mpreal (min) (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::minval(precision); } - inline static mpfr::mpreal (max) (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(precision); } - inline static mpfr::mpreal lowest (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(precision); } - - // Returns smallest eps such that 1 + eps != 1 (classic machine epsilon) - inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(precision); } - - // Returns smallest eps such that x + eps != x (relative machine epsilon) - inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) { return mpfr::machine_epsilon(x); } - - inline static mpfr::mpreal round_error(mp_prec_t precision = mpfr::mpreal::get_default_prec()) - { - mp_rnd_t r = mpfr::mpreal::get_default_rnd(); - - if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision); - else return mpfr::mpreal(1.0, precision); - } - - inline static const mpfr::mpreal infinity() { return mpfr::const_infinity(); } - inline static const mpfr::mpreal quiet_NaN() { return mpfr::mpreal().setNan(); } - inline static const mpfr::mpreal signaling_NaN() { return mpfr::mpreal().setNan(); } - inline static const mpfr::mpreal denorm_min() { return (min)(); } - - // Please note, exponent range is not fixed in MPFR - static const int min_exponent = MPFR_EMIN_DEFAULT; - static const int max_exponent = MPFR_EMAX_DEFAULT; - MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811); - MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811); - -#ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS - - // Following members should be constant according to standard, but they can be variable in MPFR - // So we define them as functions here. - // - // This is preferable way for std::numeric_limits<mpfr::mpreal> specialization. - // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost. - // See below for compatible implementation. - inline static float_round_style round_style() - { - mp_rnd_t r = mpfr::mpreal::get_default_rnd(); - - switch (r) - { - case GMP_RNDN: return round_to_nearest; - case GMP_RNDZ: return round_toward_zero; - case GMP_RNDU: return round_toward_infinity; - case GMP_RNDD: return round_toward_neg_infinity; - default: return round_indeterminate; - } - } - - inline static int digits() { return int(mpfr::mpreal::get_default_prec()); } - inline static int digits(const mpfr::mpreal& x) { return x.getPrecision(); } - - inline static int digits10(mp_prec_t precision = mpfr::mpreal::get_default_prec()) - { - return mpfr::bits2digits(precision); - } - - inline static int digits10(const mpfr::mpreal& x) - { - return mpfr::bits2digits(x.getPrecision()); - } - - inline static int max_digits10(mp_prec_t precision = mpfr::mpreal::get_default_prec()) - { - return digits10(precision); - } -#else - // Digits and round_style are NOT constants when it comes to mpreal. - // If possible, please use functions digits() and round_style() defined above. - // - // These (default) values are preserved for compatibility with existing libraries, e.g. boost. - // Change them accordingly to your application. - // - // For example, if you use 256 bits of precision uniformly in your program, then: - // digits = 256 - // digits10 = 77 - // max_digits10 = 78 - // - // Approximate formula for decimal digits is: digits10 = floor(log10(2) * digits). See bits2digits() for more details. - - static const std::float_round_style round_style = round_to_nearest; - static const int digits = 53; - static const int digits10 = 15; - static const int max_digits10 = 16; -#endif - }; - -} - -#endif /* __MPREAL_H__ */ +/*
+ MPFR C++: Multi-precision floating point number class for C++.
+ Based on MPFR library: http://mpfr.org
+
+ Project homepage: http://www.holoborodko.com/pavel/mpfr
+ Contact e-mail: pavel@holoborodko.com
+
+ Copyright (c) 2008-2015 Pavel Holoborodko
+
+ Contributors:
+ Dmitriy Gubanov, Konstantin Holoborodko, Brian Gladman,
+ Helmut Jarausch, Fokko Beekhof, Ulrich Mutze, Heinz van Saanen,
+ Pere Constans, Peter van Hoof, Gael Guennebaud, Tsai Chia Cheng,
+ Alexei Zubanov, Jauhien Piatlicki, Victor Berger, John Westwood,
+ Petr Aleksandrov, Orion Poplawski, Charles Karney, Arash Partow,
+ Rodney James, Jorge Leitao.
+
+ Licensing:
+ (A) MPFR C++ is under GNU General Public License ("GPL").
+
+ (B) Non-free licenses may also be purchased from the author, for users who
+ do not want their programs protected by the GPL.
+
+ The non-free licenses are for users that wish to use MPFR C++ in
+ their products but are unwilling to release their software
+ under the GPL (which would require them to release source code
+ and allow free redistribution).
+
+ Such users can purchase an unlimited-use license from the author.
+ Contact us for more details.
+
+ GNU General Public License ("GPL") copyright permissions statement:
+ **************************************************************************
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __MPREAL_H__
+#define __MPREAL_H__
+
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <cfloat>
+#include <cmath>
+#include <cstring>
+#include <limits>
+#include <complex>
+#include <algorithm>
+
+// Options
+#define MPREAL_HAVE_MSVC_DEBUGVIEW // Enable Debugger Visualizer for "Debug" builds in MSVC.
+#define MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS // Enable extended std::numeric_limits<mpfr::mpreal> specialization.
+ // Meaning that "digits", "round_style" and similar members are defined as functions, not constants.
+ // See std::numeric_limits<mpfr::mpreal> at the end of the file for more information.
+
+// Library version
+#define MPREAL_VERSION_MAJOR 3
+#define MPREAL_VERSION_MINOR 6
+#define MPREAL_VERSION_PATCHLEVEL 2
+#define MPREAL_VERSION_STRING "3.6.2"
+
+// Detect compiler using signatures from http://predef.sourceforge.net/
+#if defined(__GNUC__)
+ #define IsInf(x) (isinf)(x) // GNU C++/Intel ICC compiler on Linux
+#elif defined(_MSC_VER) // Microsoft Visual C++
+ #define IsInf(x) (!_finite(x))
+#else
+ #define IsInf(x) (std::isinf)(x) // GNU C/C++ (and/or other compilers), just hope for C99 conformance
+#endif
+
+// A Clang feature extension to determine compiler features.
+#ifndef __has_feature
+ #define __has_feature(x) 0
+#endif
+
+// Detect support for r-value references (move semantic). Borrowed from Eigen.
+#if (__has_feature(cxx_rvalue_references) || \
+ defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L || \
+ (defined(_MSC_VER) && _MSC_VER >= 1600))
+
+ #define MPREAL_HAVE_MOVE_SUPPORT
+
+ // Use fields in mpfr_t structure to check if it was initialized / set dummy initialization
+ #define mpfr_is_initialized(x) (0 != (x)->_mpfr_d)
+ #define mpfr_set_uninitialized(x) ((x)->_mpfr_d = 0 )
+#endif
+
+// Detect support for explicit converters.
+#if (__has_feature(cxx_explicit_conversions) || \
+ (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GNUC_MINOR__ >= 5) || __cplusplus >= 201103L || \
+ (defined(_MSC_VER) && _MSC_VER >= 1800))
+
+ #define MPREAL_HAVE_EXPLICIT_CONVERTERS
+#endif
+
+#define MPFR_USE_INTMAX_T // Enable 64-bit integer types - should be defined before mpfr.h
+
+#if defined(MPREAL_HAVE_MSVC_DEBUGVIEW) && defined(_MSC_VER) && defined(_DEBUG)
+ #define MPREAL_MSVC_DEBUGVIEW_CODE DebugView = toString();
+ #define MPREAL_MSVC_DEBUGVIEW_DATA std::string DebugView;
+#else
+ #define MPREAL_MSVC_DEBUGVIEW_CODE
+ #define MPREAL_MSVC_DEBUGVIEW_DATA
+#endif
+
+#include <mpfr.h>
+
+#if (MPFR_VERSION < MPFR_VERSION_NUM(3,0,0))
+ #include <cstdlib> // Needed for random()
+#endif
+
+// Less important options
+#define MPREAL_DOUBLE_BITS_OVERFLOW -1 // Triggers overflow exception during conversion to double if mpreal
+ // cannot fit in MPREAL_DOUBLE_BITS_OVERFLOW bits
+ // = -1 disables overflow checks (default)
+
+// Fast replacement for mpfr_set_zero(x, +1):
+// (a) uses low-level data members, might not be compatible with new versions of MPFR
+// (b) sign is not set, add (x)->_mpfr_sign = 1;
+#define mpfr_set_zero_fast(x) ((x)->_mpfr_exp = __MPFR_EXP_ZERO)
+
+#if defined(__GNUC__)
+ #define MPREAL_PERMISSIVE_EXPR __extension__
+#else
+ #define MPREAL_PERMISSIVE_EXPR
+#endif
+
+namespace mpfr {
+
+class mpreal {
+private:
+ mpfr_t mp;
+
+public:
+
+ // Get default rounding mode & precision
+ inline static mp_rnd_t get_default_rnd() { return (mp_rnd_t)(mpfr_get_default_rounding_mode()); }
+ inline static mp_prec_t get_default_prec() { return mpfr_get_default_prec(); }
+
+ // Constructors && type conversions
+ mpreal();
+ mpreal(const mpreal& u);
+ mpreal(const mpf_t u);
+ mpreal(const mpz_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const mpq_t u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const long double u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const unsigned long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const long long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const unsigned long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const unsigned int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const long int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const int u, mp_prec_t prec = mpreal::get_default_prec(), mp_rnd_t mode = mpreal::get_default_rnd());
+
+ // Construct mpreal from mpfr_t structure.
+ // shared = true allows to avoid deep copy, so that mpreal and 'u' share the same data & pointers.
+ mpreal(const mpfr_t u, bool shared = false);
+
+ mpreal(const char* s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd());
+ mpreal(const std::string& s, mp_prec_t prec = mpreal::get_default_prec(), int base = 10, mp_rnd_t mode = mpreal::get_default_rnd());
+
+ ~mpreal();
+
+#ifdef MPREAL_HAVE_MOVE_SUPPORT
+ mpreal& operator=(mpreal&& v);
+ mpreal(mpreal&& u);
+#endif
+
+ // Operations
+ // =
+ // +, -, *, /, ++, --, <<, >>
+ // *=, +=, -=, /=,
+ // <, >, ==, <=, >=
+
+ // =
+ mpreal& operator=(const mpreal& v);
+ mpreal& operator=(const mpf_t v);
+ mpreal& operator=(const mpz_t v);
+ mpreal& operator=(const mpq_t v);
+ mpreal& operator=(const long double v);
+ mpreal& operator=(const double v);
+ mpreal& operator=(const unsigned long int v);
+ mpreal& operator=(const unsigned long long int v);
+ mpreal& operator=(const long long int v);
+ mpreal& operator=(const unsigned int v);
+ mpreal& operator=(const long int v);
+ mpreal& operator=(const int v);
+ mpreal& operator=(const char* s);
+ mpreal& operator=(const std::string& s);
+ template <typename real_t> mpreal& operator= (const std::complex<real_t>& z);
+
+ // +
+ mpreal& operator+=(const mpreal& v);
+ mpreal& operator+=(const mpf_t v);
+ mpreal& operator+=(const mpz_t v);
+ mpreal& operator+=(const mpq_t v);
+ mpreal& operator+=(const long double u);
+ mpreal& operator+=(const double u);
+ mpreal& operator+=(const unsigned long int u);
+ mpreal& operator+=(const unsigned int u);
+ mpreal& operator+=(const long int u);
+ mpreal& operator+=(const int u);
+
+ mpreal& operator+=(const long long int u);
+ mpreal& operator+=(const unsigned long long int u);
+ mpreal& operator-=(const long long int u);
+ mpreal& operator-=(const unsigned long long int u);
+ mpreal& operator*=(const long long int u);
+ mpreal& operator*=(const unsigned long long int u);
+ mpreal& operator/=(const long long int u);
+ mpreal& operator/=(const unsigned long long int u);
+
+ const mpreal operator+() const;
+ mpreal& operator++ ();
+ const mpreal operator++ (int);
+
+ // -
+ mpreal& operator-=(const mpreal& v);
+ mpreal& operator-=(const mpz_t v);
+ mpreal& operator-=(const mpq_t v);
+ mpreal& operator-=(const long double u);
+ mpreal& operator-=(const double u);
+ mpreal& operator-=(const unsigned long int u);
+ mpreal& operator-=(const unsigned int u);
+ mpreal& operator-=(const long int u);
+ mpreal& operator-=(const int u);
+ const mpreal operator-() const;
+ friend const mpreal operator-(const unsigned long int b, const mpreal& a);
+ friend const mpreal operator-(const unsigned int b, const mpreal& a);
+ friend const mpreal operator-(const long int b, const mpreal& a);
+ friend const mpreal operator-(const int b, const mpreal& a);
+ friend const mpreal operator-(const double b, const mpreal& a);
+ mpreal& operator-- ();
+ const mpreal operator-- (int);
+
+ // *
+ mpreal& operator*=(const mpreal& v);
+ mpreal& operator*=(const mpz_t v);
+ mpreal& operator*=(const mpq_t v);
+ mpreal& operator*=(const long double v);
+ mpreal& operator*=(const double v);
+ mpreal& operator*=(const unsigned long int v);
+ mpreal& operator*=(const unsigned int v);
+ mpreal& operator*=(const long int v);
+ mpreal& operator*=(const int v);
+
+ // /
+ mpreal& operator/=(const mpreal& v);
+ mpreal& operator/=(const mpz_t v);
+ mpreal& operator/=(const mpq_t v);
+ mpreal& operator/=(const long double v);
+ mpreal& operator/=(const double v);
+ mpreal& operator/=(const unsigned long int v);
+ mpreal& operator/=(const unsigned int v);
+ mpreal& operator/=(const long int v);
+ mpreal& operator/=(const int v);
+ friend const mpreal operator/(const unsigned long int b, const mpreal& a);
+ friend const mpreal operator/(const unsigned int b, const mpreal& a);
+ friend const mpreal operator/(const long int b, const mpreal& a);
+ friend const mpreal operator/(const int b, const mpreal& a);
+ friend const mpreal operator/(const double b, const mpreal& a);
+
+ //<<= Fast Multiplication by 2^u
+ mpreal& operator<<=(const unsigned long int u);
+ mpreal& operator<<=(const unsigned int u);
+ mpreal& operator<<=(const long int u);
+ mpreal& operator<<=(const int u);
+
+ //>>= Fast Division by 2^u
+ mpreal& operator>>=(const unsigned long int u);
+ mpreal& operator>>=(const unsigned int u);
+ mpreal& operator>>=(const long int u);
+ mpreal& operator>>=(const int u);
+
+ // Type Conversion operators
+ bool toBool ( ) const;
+ long toLong (mp_rnd_t mode = GMP_RNDZ) const;
+ unsigned long toULong (mp_rnd_t mode = GMP_RNDZ) const;
+ long long toLLong (mp_rnd_t mode = GMP_RNDZ) const;
+ unsigned long long toULLong (mp_rnd_t mode = GMP_RNDZ) const;
+ float toFloat (mp_rnd_t mode = GMP_RNDN) const;
+ double toDouble (mp_rnd_t mode = GMP_RNDN) const;
+ long double toLDouble (mp_rnd_t mode = GMP_RNDN) const;
+
+#if defined (MPREAL_HAVE_EXPLICIT_CONVERTERS)
+ explicit operator bool () const { return toBool(); }
+ explicit operator int () const { return int(toLong()); }
+ explicit operator long () const { return toLong(); }
+ explicit operator long long () const { return toLLong(); }
+ explicit operator unsigned () const { return unsigned(toULong()); }
+ explicit operator unsigned long () const { return toULong(); }
+ explicit operator unsigned long long () const { return toULLong(); }
+ explicit operator float () const { return toFloat(); }
+ explicit operator double () const { return toDouble(); }
+ explicit operator long double () const { return toLDouble(); }
+#endif
+
+ // Get raw pointers so that mpreal can be directly used in raw mpfr_* functions
+ ::mpfr_ptr mpfr_ptr();
+ ::mpfr_srcptr mpfr_ptr() const;
+ ::mpfr_srcptr mpfr_srcptr() const;
+
+ // Convert mpreal to string with n significant digits in base b
+ // n = -1 -> convert with the maximum available digits
+ std::string toString(int n = -1, int b = 10, mp_rnd_t mode = mpreal::get_default_rnd()) const;
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ std::string toString(const std::string& format) const;
+#endif
+
+ std::ostream& output(std::ostream& os) const;
+
+ // Math Functions
+ friend const mpreal sqr (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sqrt(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sqrt(const unsigned long int v, mp_rnd_t rnd_mode);
+ friend const mpreal cbrt(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal root(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const mpreal& a, const mpz_t b, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const mpreal& a, const unsigned long int b, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const mpreal& a, const long int b, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const unsigned long int a, const mpreal& b, mp_rnd_t rnd_mode);
+ friend const mpreal pow (const unsigned long int a, const unsigned long int b, mp_rnd_t rnd_mode);
+ friend const mpreal fabs(const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal abs(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode);
+ friend inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode);
+ friend inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode);
+ friend inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode);
+ friend inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode);
+ friend int cmpabs(const mpreal& a,const mpreal& b);
+
+ friend const mpreal log (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal log2 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal logb (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal log10(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal exp (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal exp2 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal exp10(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal log1p(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal expm1(const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal cos(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sin(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal tan(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sec(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal csc(const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal cot(const mpreal& v, mp_rnd_t rnd_mode);
+ friend int sin_cos(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal acos (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal asin (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal atan (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode);
+ friend const mpreal acot (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal asec (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal acsc (const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal cosh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sinh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal tanh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal sech (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal csch (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal coth (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal acosh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal asinh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal atanh (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal acoth (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal asech (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal acsch (const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+
+ friend const mpreal fac_ui (unsigned long int v, mp_prec_t prec, mp_rnd_t rnd_mode);
+ friend const mpreal eint (const mpreal& v, mp_rnd_t rnd_mode);
+
+ friend const mpreal gamma (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal tgamma (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal lngamma (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal lgamma (const mpreal& v, int *signp, mp_rnd_t rnd_mode);
+ friend const mpreal zeta (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal erf (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal erfc (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal besselj0 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal besselj1 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal besseljn (long n, const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal bessely0 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal bessely1 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal besselyn (long n, const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode);
+ friend const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode);
+ friend const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode);
+ friend const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t rnd_mode);
+ friend int sgn(const mpreal& v); // returns -1 or +1
+
+// MPFR 2.4.0 Specifics
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ friend int sinh_cosh (mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal li2 (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal fmod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+ friend const mpreal rec_sqrt (const mpreal& v, mp_rnd_t rnd_mode);
+
+ // MATLAB's semantic equivalents
+ friend const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Remainder after division
+ friend const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode); // Modulus after division
+#endif
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+ friend const mpreal digamma (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal ai (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear
+#endif
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
+ friend const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode); // use gmp_randinit_default() to init state, gmp_randclear() to clear
+ friend const mpreal grandom (unsigned int seed);
+#endif
+
+ // Uniformly distributed random number generation in [0,1] using
+ // Mersenne-Twister algorithm by default.
+ // Use parameter to setup seed, e.g.: random((unsigned)time(NULL))
+ // Check urandom() for more precise control.
+ friend const mpreal random(unsigned int seed);
+
+ // Splits mpreal value into fractional and integer parts.
+ // Returns fractional part and stores integer part in n.
+ friend const mpreal modf(const mpreal& v, mpreal& n);
+
+ // Constants
+ // don't forget to call mpfr_free_cache() for every thread where you are using const-functions
+ friend const mpreal const_log2 (mp_prec_t prec, mp_rnd_t rnd_mode);
+ friend const mpreal const_pi (mp_prec_t prec, mp_rnd_t rnd_mode);
+ friend const mpreal const_euler (mp_prec_t prec, mp_rnd_t rnd_mode);
+ friend const mpreal const_catalan (mp_prec_t prec, mp_rnd_t rnd_mode);
+
+ // returns +inf iff sign>=0 otherwise -inf
+ friend const mpreal const_infinity(int sign, mp_prec_t prec);
+
+ // Output/ Input
+ friend std::ostream& operator<<(std::ostream& os, const mpreal& v);
+ friend std::istream& operator>>(std::istream& is, mpreal& v);
+
+ // Integer Related Functions
+ friend const mpreal rint (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal ceil (const mpreal& v);
+ friend const mpreal floor(const mpreal& v);
+ friend const mpreal round(const mpreal& v);
+ friend const mpreal trunc(const mpreal& v);
+ friend const mpreal rint_ceil (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal rint_floor (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal rint_round (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal rint_trunc (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal frac (const mpreal& v, mp_rnd_t rnd_mode);
+ friend const mpreal remainder ( const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+ friend const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+
+ // Miscellaneous Functions
+ friend const mpreal nexttoward (const mpreal& x, const mpreal& y);
+ friend const mpreal nextabove (const mpreal& x);
+ friend const mpreal nextbelow (const mpreal& x);
+
+ // use gmp_randinit_default() to init state, gmp_randclear() to clear
+ friend const mpreal urandomb (gmp_randstate_t& state);
+
+// MPFR < 2.4.2 Specifics
+#if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2))
+ friend const mpreal random2 (mp_size_t size, mp_exp_t exp);
+#endif
+
+ // Instance Checkers
+ friend bool (isnan) (const mpreal& v);
+ friend bool (isinf) (const mpreal& v);
+ friend bool (isfinite) (const mpreal& v);
+
+ friend bool isnum (const mpreal& v);
+ friend bool iszero (const mpreal& v);
+ friend bool isint (const mpreal& v);
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+ friend bool isregular(const mpreal& v);
+#endif
+
+ // Set/Get instance properties
+ inline mp_prec_t get_prec() const;
+ inline void set_prec(mp_prec_t prec, mp_rnd_t rnd_mode = get_default_rnd()); // Change precision with rounding mode
+
+ // Aliases for get_prec(), set_prec() - needed for compatibility with std::complex<mpreal> interface
+ inline mpreal& setPrecision(int Precision, mp_rnd_t RoundingMode = get_default_rnd());
+ inline int getPrecision() const;
+
+ // Set mpreal to +/- inf, NaN, +/-0
+ mpreal& setInf (int Sign = +1);
+ mpreal& setNan ();
+ mpreal& setZero (int Sign = +1);
+ mpreal& setSign (int Sign, mp_rnd_t RoundingMode = get_default_rnd());
+
+ //Exponent
+ mp_exp_t get_exp();
+ int set_exp(mp_exp_t e);
+ int check_range (int t, mp_rnd_t rnd_mode = get_default_rnd());
+ int subnormalize (int t, mp_rnd_t rnd_mode = get_default_rnd());
+
+ // Inexact conversion from float
+ inline bool fits_in_bits(double x, int n);
+
+ // Set/Get global properties
+ static void set_default_prec(mp_prec_t prec);
+ static void set_default_rnd(mp_rnd_t rnd_mode);
+
+ static mp_exp_t get_emin (void);
+ static mp_exp_t get_emax (void);
+ static mp_exp_t get_emin_min (void);
+ static mp_exp_t get_emin_max (void);
+ static mp_exp_t get_emax_min (void);
+ static mp_exp_t get_emax_max (void);
+ static int set_emin (mp_exp_t exp);
+ static int set_emax (mp_exp_t exp);
+
+ // Efficient swapping of two mpreal values - needed for std algorithms
+ friend void swap(mpreal& x, mpreal& y);
+
+ friend const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+ friend const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode);
+
+private:
+ // Human friendly Debug Preview in Visual Studio.
+ // Put one of these lines:
+ //
+ // mpfr::mpreal=<DebugView> ; Show value only
+ // mpfr::mpreal=<DebugView>, <mp[0]._mpfr_prec,u>bits ; Show value & precision
+ //
+ // at the beginning of
+ // [Visual Studio Installation Folder]\Common7\Packages\Debugger\autoexp.dat
+ MPREAL_MSVC_DEBUGVIEW_DATA
+
+ // "Smart" resources deallocation. Checks if instance initialized before deletion.
+ void clear(::mpfr_ptr);
+};
+
+//////////////////////////////////////////////////////////////////////////
+// Exceptions
+class conversion_overflow : public std::exception {
+public:
+ std::string why() { return "inexact conversion from floating point"; }
+};
+
+//////////////////////////////////////////////////////////////////////////
+// Constructors & converters
+// Default constructor: creates mp number and initializes it to 0.
+inline mpreal::mpreal()
+{
+ mpfr_init2(mpfr_ptr(), mpreal::get_default_prec());
+ mpfr_set_zero_fast(mpfr_ptr());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const mpreal& u)
+{
+ mpfr_init2(mpfr_ptr(),mpfr_get_prec(u.mpfr_srcptr()));
+ mpfr_set (mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+#ifdef MPREAL_HAVE_MOVE_SUPPORT
+inline mpreal::mpreal(mpreal&& other)
+{
+ mpfr_set_uninitialized(mpfr_ptr()); // make sure "other" holds no pointer to actual data
+ mpfr_swap(mpfr_ptr(), other.mpfr_ptr());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal& mpreal::operator=(mpreal&& other)
+{
+ mpfr_swap(mpfr_ptr(), other.mpfr_ptr());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+#endif
+
+inline mpreal::mpreal(const mpfr_t u, bool shared)
+{
+ if(shared)
+ {
+ std::memcpy(mpfr_ptr(), u, sizeof(mpfr_t));
+ }
+ else
+ {
+ mpfr_init2(mpfr_ptr(), mpfr_get_prec(u));
+ mpfr_set (mpfr_ptr(), u, mpreal::get_default_rnd());
+ }
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const mpf_t u)
+{
+ mpfr_init2(mpfr_ptr(),(mp_prec_t) mpf_get_prec(u)); // (gmp: mp_bitcnt_t) unsigned long -> long (mpfr: mp_prec_t)
+ mpfr_set_f(mpfr_ptr(),u,mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const mpz_t u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2(mpfr_ptr(), prec);
+ mpfr_set_z(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const mpq_t u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2(mpfr_ptr(), prec);
+ mpfr_set_q(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const double u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2(mpfr_ptr(), prec);
+
+#if (MPREAL_DOUBLE_BITS_OVERFLOW > -1)
+ if(fits_in_bits(u, MPREAL_DOUBLE_BITS_OVERFLOW))
+ {
+ mpfr_set_d(mpfr_ptr(), u, mode);
+ }else
+ throw conversion_overflow();
+#else
+ mpfr_set_d(mpfr_ptr(), u, mode);
+#endif
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const long double u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_ld(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const unsigned long long int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_uj(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const long long int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_sj(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const unsigned long int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_ui(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const unsigned int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_ui(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const long int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_si(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const int u, mp_prec_t prec, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_si(mpfr_ptr(), u, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_str(mpfr_ptr(), s, base, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mpreal::mpreal(const std::string& s, mp_prec_t prec, int base, mp_rnd_t mode)
+{
+ mpfr_init2 (mpfr_ptr(), prec);
+ mpfr_set_str(mpfr_ptr(), s.c_str(), base, mode);
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline void mpreal::clear(::mpfr_ptr x)
+{
+#ifdef MPREAL_HAVE_MOVE_SUPPORT
+ if(mpfr_is_initialized(x))
+#endif
+ mpfr_clear(x);
+}
+
+inline mpreal::~mpreal()
+{
+ clear(mpfr_ptr());
+}
+
+// internal namespace needed for template magic
+namespace internal{
+
+ // Use SFINAE to restrict arithmetic operations instantiation only for numeric types
+ // This is needed for smooth integration with libraries based on expression templates, like Eigen.
+ // TODO: Do the same for boolean operators.
+ template <typename ArgumentType> struct result_type {};
+
+ template <> struct result_type<mpreal> {typedef mpreal type;};
+ template <> struct result_type<mpz_t> {typedef mpreal type;};
+ template <> struct result_type<mpq_t> {typedef mpreal type;};
+ template <> struct result_type<long double> {typedef mpreal type;};
+ template <> struct result_type<double> {typedef mpreal type;};
+ template <> struct result_type<unsigned long int> {typedef mpreal type;};
+ template <> struct result_type<unsigned int> {typedef mpreal type;};
+ template <> struct result_type<long int> {typedef mpreal type;};
+ template <> struct result_type<int> {typedef mpreal type;};
+ template <> struct result_type<long long> {typedef mpreal type;};
+ template <> struct result_type<unsigned long long> {typedef mpreal type;};
+}
+
+// + Addition
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
+ operator+(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) += rhs; }
+
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+ operator+(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) += lhs; }
+
+// - Subtraction
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
+ operator-(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) -= rhs; }
+
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+ operator-(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) -= rhs; }
+
+// * Multiplication
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
+ operator*(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) *= rhs; }
+
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+ operator*(const Lhs& lhs, const mpreal& rhs){ return mpreal(rhs) *= lhs; }
+
+// / Division
+template <typename Rhs>
+inline const typename internal::result_type<Rhs>::type
+ operator/(const mpreal& lhs, const Rhs& rhs){ return mpreal(lhs) /= rhs; }
+
+template <typename Lhs>
+inline const typename internal::result_type<Lhs>::type
+ operator/(const Lhs& lhs, const mpreal& rhs){ return mpreal(lhs) /= rhs; }
+
+//////////////////////////////////////////////////////////////////////////
+// sqrt
+const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal sqrt(const long int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal sqrt(const int v, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal sqrt(const long double v, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal sqrt(const double v, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+// abs
+inline const mpreal abs(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd());
+
+//////////////////////////////////////////////////////////////////////////
+// pow
+const mpreal pow(const mpreal& a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const mpreal& a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const mpreal& a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const mpreal& a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const unsigned int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long double a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const double a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const unsigned long int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned long int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned long int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned long int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned long int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const unsigned int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const unsigned int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const long int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+const mpreal pow(const double a, const int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode = mpreal::get_default_rnd());
+
+//////////////////////////////////////////////////////////////////////////
+// Estimate machine epsilon for the given precision
+// Returns smallest eps such that 1.0 + eps != 1.0
+inline mpreal machine_epsilon(mp_prec_t prec = mpreal::get_default_prec());
+
+// Returns smallest eps such that x + eps != x (relative machine epsilon)
+inline mpreal machine_epsilon(const mpreal& x);
+
+// Gives max & min values for the required precision,
+// minval is 'safe' meaning 1 / minval does not overflow
+// maxval is 'safe' meaning 1 / maxval does not underflow
+inline mpreal minval(mp_prec_t prec = mpreal::get_default_prec());
+inline mpreal maxval(mp_prec_t prec = mpreal::get_default_prec());
+
+// 'Dirty' equality check 1: |a-b| < min{|a|,|b|} * eps
+inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps);
+
+// 'Dirty' equality check 2: |a-b| < min{|a|,|b|} * eps( min{|a|,|b|} )
+inline bool isEqualFuzzy(const mpreal& a, const mpreal& b);
+
+// 'Bitwise' equality check
+// maxUlps - a and b can be apart by maxUlps binary numbers.
+inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps);
+
+//////////////////////////////////////////////////////////////////////////
+// Convert precision in 'bits' to decimal digits and vice versa.
+// bits = ceil(digits*log[2](10))
+// digits = floor(bits*log[10](2))
+
+inline mp_prec_t digits2bits(int d);
+inline int bits2digits(mp_prec_t b);
+
+//////////////////////////////////////////////////////////////////////////
+// min, max
+const mpreal (max)(const mpreal& x, const mpreal& y);
+const mpreal (min)(const mpreal& x, const mpreal& y);
+
+//////////////////////////////////////////////////////////////////////////
+// Implementation
+//////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////
+// Operators - Assignment
+inline mpreal& mpreal::operator=(const mpreal& v)
+{
+ if (this != &v)
+ {
+ mp_prec_t tp = mpfr_get_prec( mpfr_srcptr());
+ mp_prec_t vp = mpfr_get_prec(v.mpfr_srcptr());
+
+ if(tp != vp){
+ clear(mpfr_ptr());
+ mpfr_init2(mpfr_ptr(), vp);
+ }
+
+ mpfr_set(mpfr_ptr(), v.mpfr_srcptr(), mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ }
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const mpf_t v)
+{
+ mpfr_set_f(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const mpz_t v)
+{
+ mpfr_set_z(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const mpq_t v)
+{
+ mpfr_set_q(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const long double v)
+{
+ mpfr_set_ld(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const double v)
+{
+#if (MPREAL_DOUBLE_BITS_OVERFLOW > -1)
+ if(fits_in_bits(v, MPREAL_DOUBLE_BITS_OVERFLOW))
+ {
+ mpfr_set_d(mpfr_ptr(),v,mpreal::get_default_rnd());
+ }else
+ throw conversion_overflow();
+#else
+ mpfr_set_d(mpfr_ptr(),v,mpreal::get_default_rnd());
+#endif
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const unsigned long int v)
+{
+ mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const unsigned int v)
+{
+ mpfr_set_ui(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const unsigned long long int v)
+{
+ mpfr_set_uj(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const long long int v)
+{
+ mpfr_set_sj(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const long int v)
+{
+ mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const int v)
+{
+ mpfr_set_si(mpfr_ptr(), v, mpreal::get_default_rnd());
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const char* s)
+{
+ // Use other converters for more precise control on base & precision & rounding:
+ //
+ // mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode)
+ // mpreal(const std::string& s,mp_prec_t prec, int base, mp_rnd_t mode)
+ //
+ // Here we assume base = 10 and we use precision of target variable.
+
+ mpfr_t t;
+
+ mpfr_init2(t, mpfr_get_prec(mpfr_srcptr()));
+
+ if(0 == mpfr_set_str(t, s, 10, mpreal::get_default_rnd()))
+ {
+ mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ }
+
+ clear(t);
+ return *this;
+}
+
+inline mpreal& mpreal::operator=(const std::string& s)
+{
+ // Use other converters for more precise control on base & precision & rounding:
+ //
+ // mpreal(const char* s, mp_prec_t prec, int base, mp_rnd_t mode)
+ // mpreal(const std::string& s,mp_prec_t prec, int base, mp_rnd_t mode)
+ //
+ // Here we assume base = 10 and we use precision of target variable.
+
+ mpfr_t t;
+
+ mpfr_init2(t, mpfr_get_prec(mpfr_srcptr()));
+
+ if(0 == mpfr_set_str(t, s.c_str(), 10, mpreal::get_default_rnd()))
+ {
+ mpfr_set(mpfr_ptr(), t, mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ }
+
+ clear(t);
+ return *this;
+}
+
+template <typename real_t>
+inline mpreal& mpreal::operator= (const std::complex<real_t>& z)
+{
+ return *this = z.real();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// + Addition
+inline mpreal& mpreal::operator+=(const mpreal& v)
+{
+ mpfr_add(mpfr_ptr(), mpfr_srcptr(), v.mpfr_srcptr(), mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const mpf_t u)
+{
+ *this += mpreal(u);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const mpz_t u)
+{
+ mpfr_add_z(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const mpq_t u)
+{
+ mpfr_add_q(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+= (const long double u)
+{
+ *this += mpreal(u);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+= (const double u)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpfr_add_d(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+#else
+ *this += mpreal(u);
+#endif
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const unsigned long int u)
+{
+ mpfr_add_ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const unsigned int u)
+{
+ mpfr_add_ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const long int u)
+{
+ mpfr_add_si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const int u)
+{
+ mpfr_add_si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator+=(const long long int u) { *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator+=(const unsigned long long int u){ *this += mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator-=(const long long int u) { *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator-=(const unsigned long long int u){ *this -= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator*=(const long long int u) { *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator*=(const unsigned long long int u){ *this *= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator/=(const long long int u) { *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+inline mpreal& mpreal::operator/=(const unsigned long long int u){ *this /= mpreal(u); MPREAL_MSVC_DEBUGVIEW_CODE; return *this; }
+
+inline const mpreal mpreal::operator+()const { return mpreal(*this); }
+
+inline const mpreal operator+(const mpreal& a, const mpreal& b)
+{
+ mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+ mpfr_add(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+ return c;
+}
+
+inline mpreal& mpreal::operator++()
+{
+ return *this += 1;
+}
+
+inline const mpreal mpreal::operator++ (int)
+{
+ mpreal x(*this);
+ *this += 1;
+ return x;
+}
+
+inline mpreal& mpreal::operator--()
+{
+ return *this -= 1;
+}
+
+inline const mpreal mpreal::operator-- (int)
+{
+ mpreal x(*this);
+ *this -= 1;
+ return x;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// - Subtraction
+inline mpreal& mpreal::operator-=(const mpreal& v)
+{
+ mpfr_sub(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const mpz_t v)
+{
+ mpfr_sub_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const mpq_t v)
+{
+ mpfr_sub_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const long double v)
+{
+ *this -= mpreal(v);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const double v)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpfr_sub_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+#else
+ *this -= mpreal(v);
+#endif
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const unsigned long int v)
+{
+ mpfr_sub_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const unsigned int v)
+{
+ mpfr_sub_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const long int v)
+{
+ mpfr_sub_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator-=(const int v)
+{
+ mpfr_sub_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline const mpreal mpreal::operator-()const
+{
+ mpreal u(*this);
+ mpfr_neg(u.mpfr_ptr(),u.mpfr_srcptr(),mpreal::get_default_rnd());
+ return u;
+}
+
+inline const mpreal operator-(const mpreal& a, const mpreal& b)
+{
+ mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+ mpfr_sub(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+ return c;
+}
+
+inline const mpreal operator-(const double b, const mpreal& a)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ mpfr_d_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+#else
+ mpreal x(b, mpfr_get_prec(a.mpfr_ptr()));
+ x -= a;
+ return x;
+#endif
+}
+
+inline const mpreal operator-(const unsigned long int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator-(const unsigned int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ mpfr_ui_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator-(const long int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator-(const int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ mpfr_si_sub(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// * Multiplication
+inline mpreal& mpreal::operator*= (const mpreal& v)
+{
+ mpfr_mul(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const mpz_t v)
+{
+ mpfr_mul_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const mpq_t v)
+{
+ mpfr_mul_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const long double v)
+{
+ *this *= mpreal(v);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const double v)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpfr_mul_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+#else
+ *this *= mpreal(v);
+#endif
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const unsigned long int v)
+{
+ mpfr_mul_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const unsigned int v)
+{
+ mpfr_mul_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const long int v)
+{
+ mpfr_mul_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator*=(const int v)
+{
+ mpfr_mul_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline const mpreal operator*(const mpreal& a, const mpreal& b)
+{
+ mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_ptr()), mpfr_get_prec(b.mpfr_ptr())));
+ mpfr_mul(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+ return c;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// / Division
+inline mpreal& mpreal::operator/=(const mpreal& v)
+{
+ mpfr_div(mpfr_ptr(),mpfr_srcptr(),v.mpfr_srcptr(),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const mpz_t v)
+{
+ mpfr_div_z(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const mpq_t v)
+{
+ mpfr_div_q(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const long double v)
+{
+ *this /= mpreal(v);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const double v)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpfr_div_d(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+#else
+ *this /= mpreal(v);
+#endif
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const unsigned long int v)
+{
+ mpfr_div_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const unsigned int v)
+{
+ mpfr_div_ui(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const long int v)
+{
+ mpfr_div_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator/=(const int v)
+{
+ mpfr_div_si(mpfr_ptr(),mpfr_srcptr(),v,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline const mpreal operator/(const mpreal& a, const mpreal& b)
+{
+ mpreal c(0, (std::max)(mpfr_get_prec(a.mpfr_srcptr()), mpfr_get_prec(b.mpfr_srcptr())));
+ mpfr_div(c.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), mpreal::get_default_rnd());
+ return c;
+}
+
+inline const mpreal operator/(const unsigned long int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator/(const unsigned int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_ui_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator/(const long int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator/(const int b, const mpreal& a)
+{
+ mpreal x(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_si_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal operator/(const double b, const mpreal& a)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+ mpreal x(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_d_div(x.mpfr_ptr(), b, a.mpfr_srcptr(), mpreal::get_default_rnd());
+ return x;
+#else
+ mpreal x(0, mpfr_get_prec(a.mpfr_ptr()));
+ x /= a;
+ return x;
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Shifts operators - Multiplication/Division by power of 2
+inline mpreal& mpreal::operator<<=(const unsigned long int u)
+{
+ mpfr_mul_2ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator<<=(const unsigned int u)
+{
+ mpfr_mul_2ui(mpfr_ptr(),mpfr_srcptr(),static_cast<unsigned long int>(u),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator<<=(const long int u)
+{
+ mpfr_mul_2si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator<<=(const int u)
+{
+ mpfr_mul_2si(mpfr_ptr(),mpfr_srcptr(),static_cast<long int>(u),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator>>=(const unsigned long int u)
+{
+ mpfr_div_2ui(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator>>=(const unsigned int u)
+{
+ mpfr_div_2ui(mpfr_ptr(),mpfr_srcptr(),static_cast<unsigned long int>(u),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator>>=(const long int u)
+{
+ mpfr_div_2si(mpfr_ptr(),mpfr_srcptr(),u,mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::operator>>=(const int u)
+{
+ mpfr_div_2si(mpfr_ptr(),mpfr_srcptr(),static_cast<long int>(u),mpreal::get_default_rnd());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline const mpreal operator<<(const mpreal& v, const unsigned long int k)
+{
+ return mul_2ui(v,k);
+}
+
+inline const mpreal operator<<(const mpreal& v, const unsigned int k)
+{
+ return mul_2ui(v,static_cast<unsigned long int>(k));
+}
+
+inline const mpreal operator<<(const mpreal& v, const long int k)
+{
+ return mul_2si(v,k);
+}
+
+inline const mpreal operator<<(const mpreal& v, const int k)
+{
+ return mul_2si(v,static_cast<long int>(k));
+}
+
+inline const mpreal operator>>(const mpreal& v, const unsigned long int k)
+{
+ return div_2ui(v,k);
+}
+
+inline const mpreal operator>>(const mpreal& v, const long int k)
+{
+ return div_2si(v,k);
+}
+
+inline const mpreal operator>>(const mpreal& v, const unsigned int k)
+{
+ return div_2ui(v,static_cast<unsigned long int>(k));
+}
+
+inline const mpreal operator>>(const mpreal& v, const int k)
+{
+ return div_2si(v,static_cast<long int>(k));
+}
+
+// mul_2ui
+inline const mpreal mul_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode)
+{
+ mpreal x(v);
+ mpfr_mul_2ui(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode);
+ return x;
+}
+
+// mul_2si
+inline const mpreal mul_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode)
+{
+ mpreal x(v);
+ mpfr_mul_2si(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode);
+ return x;
+}
+
+inline const mpreal div_2ui(const mpreal& v, unsigned long int k, mp_rnd_t rnd_mode)
+{
+ mpreal x(v);
+ mpfr_div_2ui(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode);
+ return x;
+}
+
+inline const mpreal div_2si(const mpreal& v, long int k, mp_rnd_t rnd_mode)
+{
+ mpreal x(v);
+ mpfr_div_2si(x.mpfr_ptr(),v.mpfr_srcptr(),k,rnd_mode);
+ return x;
+}
+
+//////////////////////////////////////////////////////////////////////////
+//Relational operators
+
+// WARNING:
+//
+// Please note that following checks for double-NaN are guaranteed to work only in IEEE math mode:
+//
+// isnan(b) = (b != b)
+// isnan(b) = !(b == b) (we use in code below)
+//
+// Be cautions if you use compiler options which break strict IEEE compliance (e.g. -ffast-math in GCC).
+// Use std::isnan instead (C++11).
+
+inline bool operator > (const mpreal& a, const mpreal& b ){ return (mpfr_greater_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); }
+inline bool operator > (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); }
+inline bool operator > (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) > 0 ); }
+inline bool operator > (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); }
+inline bool operator > (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) > 0 ); }
+inline bool operator > (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) > 0 ); }
+inline bool operator > (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) > 0 ); }
+
+inline bool operator >= (const mpreal& a, const mpreal& b ){ return (mpfr_greaterequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); }
+inline bool operator >= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); }
+// inline bool operator >= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (isnan()a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) >= 0 ); }
+inline bool operator >= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); }
+inline bool operator >= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) >= 0 ); }
+inline bool operator >= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) >= 0 ); }
+inline bool operator >= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) >= 0 ); }
+
+inline bool operator < (const mpreal& a, const mpreal& b ){ return (mpfr_less_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); }
+inline bool operator < (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); }
+inline bool operator < (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) < 0 ); }
+inline bool operator < (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); }
+inline bool operator < (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) < 0 ); }
+inline bool operator < (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) < 0 ); }
+inline bool operator < (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) < 0 ); }
+
+inline bool operator <= (const mpreal& a, const mpreal& b ){ return (mpfr_lessequal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); }
+inline bool operator <= (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); }
+inline bool operator <= (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) <= 0 ); }
+inline bool operator <= (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); }
+inline bool operator <= (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) <= 0 ); }
+inline bool operator <= (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) <= 0 ); }
+inline bool operator <= (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) <= 0 ); }
+
+inline bool operator == (const mpreal& a, const mpreal& b ){ return (mpfr_equal_p(a.mpfr_srcptr(),b.mpfr_srcptr()) != 0 ); }
+inline bool operator == (const mpreal& a, const unsigned long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); }
+inline bool operator == (const mpreal& a, const unsigned int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_ui(a.mpfr_srcptr(),b) == 0 ); }
+inline bool operator == (const mpreal& a, const long int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); }
+inline bool operator == (const mpreal& a, const int b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (mpfr_cmp_si(a.mpfr_srcptr(),b) == 0 ); }
+inline bool operator == (const mpreal& a, const long double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_ld(a.mpfr_srcptr(),b) == 0 ); }
+inline bool operator == (const mpreal& a, const double b ){ return !isnan EIGEN_NOT_A_MACRO (a) && (b == b) && (mpfr_cmp_d (a.mpfr_srcptr(),b) == 0 ); }
+
+inline bool operator != (const mpreal& a, const mpreal& b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const unsigned long int b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const unsigned int b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const long int b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const int b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const long double b ){ return !(a == b); }
+inline bool operator != (const mpreal& a, const double b ){ return !(a == b); }
+
+inline bool (isnan) (const mpreal& op){ return (mpfr_nan_p (op.mpfr_srcptr()) != 0 ); }
+inline bool (isinf) (const mpreal& op){ return (mpfr_inf_p (op.mpfr_srcptr()) != 0 ); }
+inline bool (isfinite) (const mpreal& op){ return (mpfr_number_p (op.mpfr_srcptr()) != 0 ); }
+inline bool iszero (const mpreal& op){ return (mpfr_zero_p (op.mpfr_srcptr()) != 0 ); }
+inline bool isint (const mpreal& op){ return (mpfr_integer_p(op.mpfr_srcptr()) != 0 ); }
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+inline bool isregular(const mpreal& op){ return (mpfr_regular_p(op.mpfr_srcptr()));}
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+// Type Converters
+inline bool mpreal::toBool ( ) const { return mpfr_zero_p (mpfr_srcptr()) == 0; }
+inline long mpreal::toLong (mp_rnd_t mode) const { return mpfr_get_si (mpfr_srcptr(), mode); }
+inline unsigned long mpreal::toULong (mp_rnd_t mode) const { return mpfr_get_ui (mpfr_srcptr(), mode); }
+inline float mpreal::toFloat (mp_rnd_t mode) const { return mpfr_get_flt(mpfr_srcptr(), mode); }
+inline double mpreal::toDouble (mp_rnd_t mode) const { return mpfr_get_d (mpfr_srcptr(), mode); }
+inline long double mpreal::toLDouble(mp_rnd_t mode) const { return mpfr_get_ld (mpfr_srcptr(), mode); }
+inline long long mpreal::toLLong (mp_rnd_t mode) const { return mpfr_get_sj (mpfr_srcptr(), mode); }
+inline unsigned long long mpreal::toULLong (mp_rnd_t mode) const { return mpfr_get_uj (mpfr_srcptr(), mode); }
+
+inline ::mpfr_ptr mpreal::mpfr_ptr() { return mp; }
+inline ::mpfr_srcptr mpreal::mpfr_ptr() const { return mp; }
+inline ::mpfr_srcptr mpreal::mpfr_srcptr() const { return mp; }
+
+template <class T>
+inline std::string toString(T t, std::ios_base & (*f)(std::ios_base&))
+{
+ std::ostringstream oss;
+ oss << f << t;
+ return oss.str();
+}
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+
+inline std::string mpreal::toString(const std::string& format) const
+{
+ char *s = NULL;
+ std::string out;
+
+ if( !format.empty() )
+ {
+ if(!(mpfr_asprintf(&s, format.c_str(), mpfr_srcptr()) < 0))
+ {
+ out = std::string(s);
+
+ mpfr_free_str(s);
+ }
+ }
+
+ return out;
+}
+
+#endif
+
+inline std::string mpreal::toString(int n, int b, mp_rnd_t mode) const
+{
+ // TODO: Add extended format specification (f, e, rounding mode) as it done in output operator
+ (void)b;
+ (void)mode;
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+
+ std::ostringstream format;
+
+ int digits = (n >= 0) ? n : 1 + bits2digits(mpfr_get_prec(mpfr_srcptr()));
+
+ format << "%." << digits << "RNg";
+
+ return toString(format.str());
+
+#else
+
+ char *s, *ns = NULL;
+ size_t slen, nslen;
+ mp_exp_t exp;
+ std::string out;
+
+ if(mpfr_inf_p(mp))
+ {
+ if(mpfr_sgn(mp)>0) return "+Inf";
+ else return "-Inf";
+ }
+
+ if(mpfr_zero_p(mp)) return "0";
+ if(mpfr_nan_p(mp)) return "NaN";
+
+ s = mpfr_get_str(NULL, &exp, b, 0, mp, mode);
+ ns = mpfr_get_str(NULL, &exp, b, (std::max)(0,n), mp, mode);
+
+ if(s!=NULL && ns!=NULL)
+ {
+ slen = strlen(s);
+ nslen = strlen(ns);
+ if(nslen<=slen)
+ {
+ mpfr_free_str(s);
+ s = ns;
+ slen = nslen;
+ }
+ else {
+ mpfr_free_str(ns);
+ }
+
+ // Make human eye-friendly formatting if possible
+ if (exp>0 && static_cast<size_t>(exp)<slen)
+ {
+ if(s[0]=='-')
+ {
+ // Remove zeros starting from right end
+ char* ptr = s+slen-1;
+ while (*ptr=='0' && ptr>s+exp) ptr--;
+
+ if(ptr==s+exp) out = std::string(s,exp+1);
+ else out = std::string(s,exp+1)+'.'+std::string(s+exp+1,ptr-(s+exp+1)+1);
+
+ //out = string(s,exp+1)+'.'+string(s+exp+1);
+ }
+ else
+ {
+ // Remove zeros starting from right end
+ char* ptr = s+slen-1;
+ while (*ptr=='0' && ptr>s+exp-1) ptr--;
+
+ if(ptr==s+exp-1) out = std::string(s,exp);
+ else out = std::string(s,exp)+'.'+std::string(s+exp,ptr-(s+exp)+1);
+
+ //out = string(s,exp)+'.'+string(s+exp);
+ }
+
+ }else{ // exp<0 || exp>slen
+ if(s[0]=='-')
+ {
+ // Remove zeros starting from right end
+ char* ptr = s+slen-1;
+ while (*ptr=='0' && ptr>s+1) ptr--;
+
+ if(ptr==s+1) out = std::string(s,2);
+ else out = std::string(s,2)+'.'+std::string(s+2,ptr-(s+2)+1);
+
+ //out = string(s,2)+'.'+string(s+2);
+ }
+ else
+ {
+ // Remove zeros starting from right end
+ char* ptr = s+slen-1;
+ while (*ptr=='0' && ptr>s) ptr--;
+
+ if(ptr==s) out = std::string(s,1);
+ else out = std::string(s,1)+'.'+std::string(s+1,ptr-(s+1)+1);
+
+ //out = string(s,1)+'.'+string(s+1);
+ }
+
+ // Make final string
+ if(--exp)
+ {
+ if(exp>0) out += "e+"+mpfr::toString<mp_exp_t>(exp,std::dec);
+ else out += "e"+mpfr::toString<mp_exp_t>(exp,std::dec);
+ }
+ }
+
+ mpfr_free_str(s);
+ return out;
+ }else{
+ return "conversion error!";
+ }
+#endif
+}
+
+
+//////////////////////////////////////////////////////////////////////////
+// I/O
+inline std::ostream& mpreal::output(std::ostream& os) const
+{
+ std::ostringstream format;
+ const std::ios::fmtflags flags = os.flags();
+
+ format << ((flags & std::ios::showpos) ? "%+" : "%");
+ if (os.precision() >= 0)
+ format << '.' << os.precision() << "R*"
+ << ((flags & std::ios::floatfield) == std::ios::fixed ? 'f' :
+ (flags & std::ios::floatfield) == std::ios::scientific ? 'e' :
+ 'g');
+ else
+ format << "R*e";
+
+ char *s = NULL;
+ if(!(mpfr_asprintf(&s, format.str().c_str(),
+ mpfr::mpreal::get_default_rnd(),
+ mpfr_srcptr())
+ < 0))
+ {
+ os << std::string(s);
+ mpfr_free_str(s);
+ }
+ return os;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const mpreal& v)
+{
+ return v.output(os);
+}
+
+inline std::istream& operator>>(std::istream &is, mpreal& v)
+{
+ // TODO: use cout::hexfloat and other flags to setup base
+ std::string tmp;
+ is >> tmp;
+ mpfr_set_str(v.mpfr_ptr(), tmp.c_str(), 10, mpreal::get_default_rnd());
+ return is;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Bits - decimal digits relation
+// bits = ceil(digits*log[2](10))
+// digits = floor(bits*log[10](2))
+
+inline mp_prec_t digits2bits(int d)
+{
+ const double LOG2_10 = 3.3219280948873624;
+
+ return mp_prec_t(std::ceil( d * LOG2_10 ));
+}
+
+inline int bits2digits(mp_prec_t b)
+{
+ const double LOG10_2 = 0.30102999566398119;
+
+ return int(std::floor( b * LOG10_2 ));
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Set/Get number properties
+inline int sgn(const mpreal& op)
+{
+ return mpfr_sgn(op.mpfr_srcptr());
+}
+
+inline mpreal& mpreal::setSign(int sign, mp_rnd_t RoundingMode)
+{
+ mpfr_setsign(mpfr_ptr(), mpfr_srcptr(), (sign < 0 ? 1 : 0), RoundingMode);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline int mpreal::getPrecision() const
+{
+ return int(mpfr_get_prec(mpfr_srcptr()));
+}
+
+inline mpreal& mpreal::setPrecision(int Precision, mp_rnd_t RoundingMode)
+{
+ mpfr_prec_round(mpfr_ptr(), Precision, RoundingMode);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::setInf(int sign)
+{
+ mpfr_set_inf(mpfr_ptr(), sign);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::setNan()
+{
+ mpfr_set_nan(mpfr_ptr());
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mpreal& mpreal::setZero(int sign)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+ mpfr_set_zero(mpfr_ptr(), sign);
+#else
+ mpfr_set_si(mpfr_ptr(), 0, (mpfr_get_default_rounding_mode)());
+ setSign(sign);
+#endif
+
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return *this;
+}
+
+inline mp_prec_t mpreal::get_prec() const
+{
+ return mpfr_get_prec(mpfr_srcptr());
+}
+
+inline void mpreal::set_prec(mp_prec_t prec, mp_rnd_t rnd_mode)
+{
+ mpfr_prec_round(mpfr_ptr(),prec,rnd_mode);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+}
+
+inline mp_exp_t mpreal::get_exp ()
+{
+ return mpfr_get_exp(mpfr_srcptr());
+}
+
+inline int mpreal::set_exp (mp_exp_t e)
+{
+ int x = mpfr_set_exp(mpfr_ptr(), e);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return x;
+}
+
+inline const mpreal frexp(const mpreal& x, mp_exp_t* exp, mp_rnd_t mode = mpreal::get_default_rnd())
+{
+ mpreal y(x);
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
+ mpfr_frexp(exp,y.mpfr_ptr(),x.mpfr_srcptr(),mode);
+#else
+ *exp = mpfr_get_exp(y.mpfr_srcptr());
+ mpfr_set_exp(y.mpfr_ptr(),0);
+#endif
+ return y;
+}
+
+inline const mpreal ldexp(const mpreal& v, mp_exp_t exp)
+{
+ mpreal x(v);
+
+ // rounding is not important since we are just increasing the exponent (= exact operation)
+ mpfr_mul_2si(x.mpfr_ptr(), x.mpfr_srcptr(), exp, mpreal::get_default_rnd());
+ return x;
+}
+
+inline const mpreal scalbn(const mpreal& v, mp_exp_t exp)
+{
+ return ldexp(v, exp);
+}
+
+inline mpreal machine_epsilon(mp_prec_t prec)
+{
+ /* the smallest eps such that 1 + eps != 1 */
+ return machine_epsilon(mpreal(1, prec));
+}
+
+inline mpreal machine_epsilon(const mpreal& x)
+{
+ /* the smallest eps such that x + eps != x */
+ if( x < 0)
+ {
+ return nextabove(-x) + x;
+ }else{
+ return nextabove( x) - x;
+ }
+}
+
+// minval is 'safe' meaning 1 / minval does not overflow
+inline mpreal minval(mp_prec_t prec)
+{
+ /* min = 1/2 * 2^emin = 2^(emin - 1) */
+ return mpreal(1, prec) << mpreal::get_emin()-1;
+}
+
+// maxval is 'safe' meaning 1 / maxval does not underflow
+inline mpreal maxval(mp_prec_t prec)
+{
+ /* max = (1 - eps) * 2^emax, eps is machine epsilon */
+ return (mpreal(1, prec) - machine_epsilon(prec)) << mpreal::get_emax();
+}
+
+inline bool isEqualUlps(const mpreal& a, const mpreal& b, int maxUlps)
+{
+ return abs(a - b) <= machine_epsilon((max)(abs(a), abs(b))) * maxUlps;
+}
+
+inline bool isEqualFuzzy(const mpreal& a, const mpreal& b, const mpreal& eps)
+{
+ return abs(a - b) <= eps;
+}
+
+inline bool isEqualFuzzy(const mpreal& a, const mpreal& b)
+{
+ return isEqualFuzzy(a, b, machine_epsilon((max)(1, (min)(abs(a), abs(b)))));
+}
+
+//////////////////////////////////////////////////////////////////////////
+// C++11 sign functions.
+inline mpreal copysign(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal rop(0, mpfr_get_prec(x.mpfr_ptr()));
+ mpfr_setsign(rop.mpfr_ptr(), x.mpfr_srcptr(), mpfr_signbit(y.mpfr_srcptr()), rnd_mode);
+ return rop;
+}
+
+inline bool signbit(const mpreal& x)
+{
+ return mpfr_signbit(x.mpfr_srcptr());
+}
+
+inline const mpreal modf(const mpreal& v, mpreal& n)
+{
+ mpreal f(v);
+
+ // rounding is not important since we are using the same number
+ mpfr_frac (f.mpfr_ptr(),f.mpfr_srcptr(),mpreal::get_default_rnd());
+ mpfr_trunc(n.mpfr_ptr(),v.mpfr_srcptr());
+ return f;
+}
+
+inline int mpreal::check_range (int t, mp_rnd_t rnd_mode)
+{
+ return mpfr_check_range(mpfr_ptr(),t,rnd_mode);
+}
+
+inline int mpreal::subnormalize (int t,mp_rnd_t rnd_mode)
+{
+ int r = mpfr_subnormalize(mpfr_ptr(),t,rnd_mode);
+ MPREAL_MSVC_DEBUGVIEW_CODE;
+ return r;
+}
+
+inline mp_exp_t mpreal::get_emin (void)
+{
+ return mpfr_get_emin();
+}
+
+inline int mpreal::set_emin (mp_exp_t exp)
+{
+ return mpfr_set_emin(exp);
+}
+
+inline mp_exp_t mpreal::get_emax (void)
+{
+ return mpfr_get_emax();
+}
+
+inline int mpreal::set_emax (mp_exp_t exp)
+{
+ return mpfr_set_emax(exp);
+}
+
+inline mp_exp_t mpreal::get_emin_min (void)
+{
+ return mpfr_get_emin_min();
+}
+
+inline mp_exp_t mpreal::get_emin_max (void)
+{
+ return mpfr_get_emin_max();
+}
+
+inline mp_exp_t mpreal::get_emax_min (void)
+{
+ return mpfr_get_emax_min();
+}
+
+inline mp_exp_t mpreal::get_emax_max (void)
+{
+ return mpfr_get_emax_max();
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Mathematical Functions
+//////////////////////////////////////////////////////////////////////////
+#define MPREAL_UNARY_MATH_FUNCTION_BODY(f) \
+ mpreal y(0, mpfr_get_prec(x.mpfr_srcptr())); \
+ mpfr_##f(y.mpfr_ptr(), x.mpfr_srcptr(), r); \
+ return y;
+
+inline const mpreal sqr (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{ MPREAL_UNARY_MATH_FUNCTION_BODY(sqr ); }
+
+inline const mpreal sqrt (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{ MPREAL_UNARY_MATH_FUNCTION_BODY(sqrt); }
+
+inline const mpreal sqrt(const unsigned long int x, mp_rnd_t r)
+{
+ mpreal y;
+ mpfr_sqrt_ui(y.mpfr_ptr(), x, r);
+ return y;
+}
+
+inline const mpreal sqrt(const unsigned int v, mp_rnd_t rnd_mode)
+{
+ return sqrt(static_cast<unsigned long int>(v),rnd_mode);
+}
+
+inline const mpreal sqrt(const long int v, mp_rnd_t rnd_mode)
+{
+ if (v>=0) return sqrt(static_cast<unsigned long int>(v),rnd_mode);
+ else return mpreal().setNan(); // NaN
+}
+
+inline const mpreal sqrt(const int v, mp_rnd_t rnd_mode)
+{
+ if (v>=0) return sqrt(static_cast<unsigned long int>(v),rnd_mode);
+ else return mpreal().setNan(); // NaN
+}
+
+inline const mpreal root(const mpreal& x, unsigned long int k, mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal y(0, mpfr_get_prec(x.mpfr_srcptr()));
+ mpfr_root(y.mpfr_ptr(), x.mpfr_srcptr(), k, r);
+ return y;
+}
+
+inline const mpreal dim(const mpreal& a, const mpreal& b, mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal y(0, mpfr_get_prec(a.mpfr_srcptr()));
+ mpfr_dim(y.mpfr_ptr(), a.mpfr_srcptr(), b.mpfr_srcptr(), r);
+ return y;
+}
+
+inline int cmpabs(const mpreal& a,const mpreal& b)
+{
+ return mpfr_cmpabs(a.mpfr_ptr(), b.mpfr_srcptr());
+}
+
+inline int sin_cos(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ return mpfr_sin_cos(s.mpfr_ptr(), c.mpfr_ptr(), v.mpfr_srcptr(), rnd_mode);
+}
+
+inline const mpreal sqrt (const long double v, mp_rnd_t rnd_mode) { return sqrt(mpreal(v),rnd_mode); }
+inline const mpreal sqrt (const double v, mp_rnd_t rnd_mode) { return sqrt(mpreal(v),rnd_mode); }
+
+inline const mpreal cbrt (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cbrt ); }
+inline const mpreal fabs (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(abs ); }
+inline const mpreal abs (const mpreal& x, mp_rnd_t r) { MPREAL_UNARY_MATH_FUNCTION_BODY(abs ); }
+inline const mpreal log (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log ); }
+inline const mpreal log2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log2 ); }
+inline const mpreal log10 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log10); }
+inline const mpreal exp (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp ); }
+inline const mpreal exp2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp2 ); }
+inline const mpreal exp10 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(exp10); }
+inline const mpreal cos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cos ); }
+inline const mpreal sin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sin ); }
+inline const mpreal tan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(tan ); }
+inline const mpreal sec (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sec ); }
+inline const mpreal csc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(csc ); }
+inline const mpreal cot (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cot ); }
+inline const mpreal acos (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(acos ); }
+inline const mpreal asin (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asin ); }
+inline const mpreal atan (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atan ); }
+
+inline const mpreal logb (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { return log2 (abs(x),r); }
+
+inline const mpreal acot (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atan (1/v, r); }
+inline const mpreal asec (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acos (1/v, r); }
+inline const mpreal acsc (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asin (1/v, r); }
+inline const mpreal acoth (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return atanh(1/v, r); }
+inline const mpreal asech (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return acosh(1/v, r); }
+inline const mpreal acsch (const mpreal& v, mp_rnd_t r = mpreal::get_default_rnd()) { return asinh(1/v, r); }
+
+inline const mpreal cosh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(cosh ); }
+inline const mpreal sinh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sinh ); }
+inline const mpreal tanh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(tanh ); }
+inline const mpreal sech (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(sech ); }
+inline const mpreal csch (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(csch ); }
+inline const mpreal coth (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(coth ); }
+inline const mpreal acosh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(acosh); }
+inline const mpreal asinh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(asinh); }
+inline const mpreal atanh (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(atanh); }
+
+inline const mpreal log1p (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(log1p ); }
+inline const mpreal expm1 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(expm1 ); }
+inline const mpreal eint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(eint ); }
+inline const mpreal gamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); }
+inline const mpreal tgamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(gamma ); }
+inline const mpreal lngamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(lngamma); }
+inline const mpreal zeta (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(zeta ); }
+inline const mpreal erf (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erf ); }
+inline const mpreal erfc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(erfc ); }
+inline const mpreal besselj0(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(j0 ); }
+inline const mpreal besselj1(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(j1 ); }
+inline const mpreal bessely0(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(y0 ); }
+inline const mpreal bessely1(const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(y1 ); }
+
+inline const mpreal atan2 (const mpreal& y, const mpreal& x, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision()));
+ mpfr_atan2(a.mpfr_ptr(), y.mpfr_srcptr(), x.mpfr_srcptr(), rnd_mode);
+ return a;
+}
+
+inline const mpreal hypot (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision()));
+ mpfr_hypot(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode);
+ return a;
+}
+
+inline const mpreal remainder (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision()));
+ mpfr_remainder(a.mpfr_ptr(), x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode);
+ return a;
+}
+
+inline const mpreal remquo (long* q, const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a(0,(std::max)(y.getPrecision(), x.getPrecision()));
+ mpfr_remquo(a.mpfr_ptr(),q, x.mpfr_srcptr(), y.mpfr_srcptr(), rnd_mode);
+ return a;
+}
+
+inline const mpreal fac_ui (unsigned long int v, mp_prec_t prec = mpreal::get_default_prec(),
+ mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(0, prec);
+ mpfr_fac_ui(x.mpfr_ptr(),v,rnd_mode);
+ return x;
+}
+
+
+inline const mpreal lgamma (const mpreal& v, int *signp = 0, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(v);
+ int tsignp;
+
+ if(signp) mpfr_lgamma(x.mpfr_ptr(), signp,v.mpfr_srcptr(),rnd_mode);
+ else mpfr_lgamma(x.mpfr_ptr(),&tsignp,v.mpfr_srcptr(),rnd_mode);
+
+ return x;
+}
+
+
+inline const mpreal besseljn (long n, const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal y(0, x.getPrecision());
+ mpfr_jn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r);
+ return y;
+}
+
+inline const mpreal besselyn (long n, const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal y(0, x.getPrecision());
+ mpfr_yn(y.mpfr_ptr(), n, x.mpfr_srcptr(), r);
+ return y;
+}
+
+inline const mpreal fma (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mp_prec_t p1, p2, p3;
+
+ p1 = v1.get_prec();
+ p2 = v2.get_prec();
+ p3 = v3.get_prec();
+
+ a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1));
+
+ mpfr_fma(a.mp,v1.mp,v2.mp,v3.mp,rnd_mode);
+ return a;
+}
+
+inline const mpreal fms (const mpreal& v1, const mpreal& v2, const mpreal& v3, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mp_prec_t p1, p2, p3;
+
+ p1 = v1.get_prec();
+ p2 = v2.get_prec();
+ p3 = v3.get_prec();
+
+ a.set_prec(p3>p2?(p3>p1?p3:p1):(p2>p1?p2:p1));
+
+ mpfr_fms(a.mp,v1.mp,v2.mp,v3.mp,rnd_mode);
+ return a;
+}
+
+inline const mpreal agm (const mpreal& v1, const mpreal& v2, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mp_prec_t p1, p2;
+
+ p1 = v1.get_prec();
+ p2 = v2.get_prec();
+
+ a.set_prec(p1>p2?p1:p2);
+
+ mpfr_agm(a.mp, v1.mp, v2.mp, rnd_mode);
+
+ return a;
+}
+
+inline const mpreal sum (const mpreal tab[], const unsigned long int n, int& status, mp_rnd_t mode = mpreal::get_default_rnd())
+{
+ mpfr_srcptr *p = new mpfr_srcptr[n];
+
+ for (unsigned long int i = 0; i < n; i++)
+ p[i] = tab[i].mpfr_srcptr();
+
+ mpreal x;
+ status = mpfr_sum(x.mpfr_ptr(), (mpfr_ptr*)p, n, mode);
+
+ delete [] p;
+ return x;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// MPFR 2.4.0 Specifics
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(2,4,0))
+
+inline int sinh_cosh(mpreal& s, mpreal& c, const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ return mpfr_sinh_cosh(s.mp,c.mp,v.mp,rnd_mode);
+}
+
+inline const mpreal li2 (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd())
+{
+ MPREAL_UNARY_MATH_FUNCTION_BODY(li2);
+}
+
+inline const mpreal rem (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ /* R = rem(X,Y) if Y != 0, returns X - n * Y where n = trunc(X/Y). */
+ return fmod(x, y, rnd_mode);
+}
+
+inline const mpreal mod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ (void)rnd_mode;
+
+ /*
+
+ m = mod(x,y) if y != 0, returns x - n*y where n = floor(x/y)
+
+ The following are true by convention:
+ - mod(x,0) is x
+ - mod(x,x) is 0
+ - mod(x,y) for x != y and y != 0 has the same sign as y.
+
+ */
+
+ if(iszero(y)) return x;
+ if(x == y) return 0;
+
+ mpreal m = x - floor(x / y) * y;
+
+ m.setSign(sgn(y)); // make sure result has the same sign as Y
+
+ return m;
+}
+
+inline const mpreal fmod (const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mp_prec_t yp, xp;
+
+ yp = y.get_prec();
+ xp = x.get_prec();
+
+ a.set_prec(yp>xp?yp:xp);
+
+ mpfr_fmod(a.mp, x.mp, y.mp, rnd_mode);
+
+ return a;
+}
+
+inline const mpreal rec_sqrt(const mpreal& v, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(v);
+ mpfr_rec_sqrt(x.mp,v.mp,rnd_mode);
+ return x;
+}
+#endif // MPFR 2.4.0 Specifics
+
+//////////////////////////////////////////////////////////////////////////
+// MPFR 3.0.0 Specifics
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+inline const mpreal digamma (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(digamma); }
+inline const mpreal ai (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(ai); }
+#endif // MPFR 3.0.0 Specifics
+
+//////////////////////////////////////////////////////////////////////////
+// Constants
+inline const mpreal const_log2 (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal x(0, p);
+ mpfr_const_log2(x.mpfr_ptr(), r);
+ return x;
+}
+
+inline const mpreal const_pi (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal x(0, p);
+ mpfr_const_pi(x.mpfr_ptr(), r);
+ return x;
+}
+
+inline const mpreal const_euler (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal x(0, p);
+ mpfr_const_euler(x.mpfr_ptr(), r);
+ return x;
+}
+
+inline const mpreal const_catalan (mp_prec_t p = mpreal::get_default_prec(), mp_rnd_t r = mpreal::get_default_rnd())
+{
+ mpreal x(0, p);
+ mpfr_const_catalan(x.mpfr_ptr(), r);
+ return x;
+}
+
+inline const mpreal const_infinity (int sign = 1, mp_prec_t p = mpreal::get_default_prec())
+{
+ mpreal x(0, p);
+ mpfr_set_inf(x.mpfr_ptr(), sign);
+ return x;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// Integer Related Functions
+inline const mpreal ceil(const mpreal& v)
+{
+ mpreal x(v);
+ mpfr_ceil(x.mp,v.mp);
+ return x;
+}
+
+inline const mpreal floor(const mpreal& v)
+{
+ mpreal x(v);
+ mpfr_floor(x.mp,v.mp);
+ return x;
+}
+
+inline const mpreal round(const mpreal& v)
+{
+ mpreal x(v);
+ mpfr_round(x.mp,v.mp);
+ return x;
+}
+
+inline const mpreal trunc(const mpreal& v)
+{
+ mpreal x(v);
+ mpfr_trunc(x.mp,v.mp);
+ return x;
+}
+
+inline const mpreal rint (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint ); }
+inline const mpreal rint_ceil (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_ceil ); }
+inline const mpreal rint_floor (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_floor); }
+inline const mpreal rint_round (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_round); }
+inline const mpreal rint_trunc (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(rint_trunc); }
+inline const mpreal frac (const mpreal& x, mp_rnd_t r = mpreal::get_default_rnd()) { MPREAL_UNARY_MATH_FUNCTION_BODY(frac ); }
+
+//////////////////////////////////////////////////////////////////////////
+// Miscellaneous Functions
+inline void swap (mpreal& a, mpreal& b) { mpfr_swap(a.mp,b.mp); }
+inline const mpreal (max)(const mpreal& x, const mpreal& y){ return (x>y?x:y); }
+inline const mpreal (min)(const mpreal& x, const mpreal& y){ return (x<y?x:y); }
+
+inline const mpreal fmax(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mpfr_max(a.mp,x.mp,y.mp,rnd_mode);
+ return a;
+}
+
+inline const mpreal fmin(const mpreal& x, const mpreal& y, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal a;
+ mpfr_min(a.mp,x.mp,y.mp,rnd_mode);
+ return a;
+}
+
+inline const mpreal nexttoward (const mpreal& x, const mpreal& y)
+{
+ mpreal a(x);
+ mpfr_nexttoward(a.mp,y.mp);
+ return a;
+}
+
+inline const mpreal nextabove (const mpreal& x)
+{
+ mpreal a(x);
+ mpfr_nextabove(a.mp);
+ return a;
+}
+
+inline const mpreal nextbelow (const mpreal& x)
+{
+ mpreal a(x);
+ mpfr_nextbelow(a.mp);
+ return a;
+}
+
+inline const mpreal urandomb (gmp_randstate_t& state)
+{
+ mpreal x;
+ mpfr_urandomb(x.mpfr_ptr(),state);
+ return x;
+}
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+inline const mpreal urandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x;
+ mpfr_urandom(x.mpfr_ptr(), state, rnd_mode);
+ return x;
+}
+#endif
+
+#if (MPFR_VERSION <= MPFR_VERSION_NUM(2,4,2))
+inline const mpreal random2 (mp_size_t size, mp_exp_t exp)
+{
+ mpreal x;
+ mpfr_random2(x.mpfr_ptr(),size,exp);
+ return x;
+}
+#endif
+
+// Uniformly distributed random number generation
+// a = random(seed); <- initialization & first random number generation
+// a = random(); <- next random numbers generation
+// seed != 0
+inline const mpreal random(unsigned int seed = 0)
+{
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,0,0))
+ static gmp_randstate_t state;
+ static bool initialize = true;
+
+ if(initialize)
+ {
+ gmp_randinit_default(state);
+ gmp_randseed_ui(state,0);
+ initialize = false;
+ }
+
+ if(seed != 0) gmp_randseed_ui(state,seed);
+
+ return mpfr::urandom(state);
+#else
+ if(seed != 0) std::srand(seed);
+ return mpfr::mpreal(std::rand()/(double)RAND_MAX);
+#endif
+
+}
+
+#if (MPFR_VERSION >= MPFR_VERSION_NUM(3,1,0))
+
+inline const mpreal grandom (gmp_randstate_t& state, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x;
+ mpfr_grandom(x.mpfr_ptr(), NULL, state, rnd_mode);
+ return x;
+}
+
+inline const mpreal grandom(unsigned int seed = 0)
+{
+ static gmp_randstate_t state;
+ static bool initialize = true;
+
+ if(initialize)
+ {
+ gmp_randinit_default(state);
+ gmp_randseed_ui(state,0);
+ initialize = false;
+ }
+
+ if(seed != 0) gmp_randseed_ui(state,seed);
+
+ return mpfr::grandom(state);
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+// Set/Get global properties
+inline void mpreal::set_default_prec(mp_prec_t prec)
+{
+ mpfr_set_default_prec(prec);
+}
+
+inline void mpreal::set_default_rnd(mp_rnd_t rnd_mode)
+{
+ mpfr_set_default_rounding_mode(rnd_mode);
+}
+
+inline bool mpreal::fits_in_bits(double x, int n)
+{
+ int i;
+ double t;
+ return IsInf(x) || (std::modf ( std::ldexp ( std::frexp ( x, &i ), n ), &t ) == 0.0);
+}
+
+inline const mpreal pow(const mpreal& a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(a);
+ mpfr_pow(x.mp,x.mp,b.mp,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const mpreal& a, const mpz_t b, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(a);
+ mpfr_pow_z(x.mp,x.mp,b,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const mpreal& a, const unsigned long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(a);
+ mpfr_pow_ui(x.mp,x.mp,b,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const mpreal& a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ return pow(a,static_cast<unsigned long int>(b),rnd_mode);
+}
+
+inline const mpreal pow(const mpreal& a, const long int b, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(a);
+ mpfr_pow_si(x.mp,x.mp,b,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const mpreal& a, const int b, mp_rnd_t rnd_mode)
+{
+ return pow(a,static_cast<long int>(b),rnd_mode);
+}
+
+inline const mpreal pow(const mpreal& a, const long double b, mp_rnd_t rnd_mode)
+{
+ return pow(a,mpreal(b),rnd_mode);
+}
+
+inline const mpreal pow(const mpreal& a, const double b, mp_rnd_t rnd_mode)
+{
+ return pow(a,mpreal(b),rnd_mode);
+}
+
+inline const mpreal pow(const unsigned long int a, const mpreal& b, mp_rnd_t rnd_mode = mpreal::get_default_rnd())
+{
+ mpreal x(a);
+ mpfr_ui_pow(x.mp,a,b.mp,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const unsigned int a, const mpreal& b, mp_rnd_t rnd_mode)
+{
+ return pow(static_cast<unsigned long int>(a),b,rnd_mode);
+}
+
+inline const mpreal pow(const long int a, const mpreal& b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),b,rnd_mode);
+ else return pow(mpreal(a),b,rnd_mode);
+}
+
+inline const mpreal pow(const int a, const mpreal& b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),b,rnd_mode);
+ else return pow(mpreal(a),b,rnd_mode);
+}
+
+inline const mpreal pow(const long double a, const mpreal& b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode);
+}
+
+inline const mpreal pow(const double a, const mpreal& b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode);
+}
+
+// pow unsigned long int
+inline const mpreal pow(const unsigned long int a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ mpreal x(a);
+ mpfr_ui_pow_ui(x.mp,a,b,rnd_mode);
+ return x;
+}
+
+inline const mpreal pow(const unsigned long int a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+}
+
+inline const mpreal pow(const unsigned long int a, const long int b, mp_rnd_t rnd_mode)
+{
+ if(b>0) return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned long int a, const int b, mp_rnd_t rnd_mode)
+{
+ if(b>0) return pow(a,static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned long int a, const long double b, mp_rnd_t rnd_mode)
+{
+ return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned long int a, const double b, mp_rnd_t rnd_mode)
+{
+ return pow(a,mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+// pow unsigned int
+inline const mpreal pow(const unsigned int a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui
+}
+
+inline const mpreal pow(const unsigned int a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+}
+
+inline const mpreal pow(const unsigned int a, const long int b, mp_rnd_t rnd_mode)
+{
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned int a, const int b, mp_rnd_t rnd_mode)
+{
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned int a, const long double b, mp_rnd_t rnd_mode)
+{
+ return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+inline const mpreal pow(const unsigned int a, const double b, mp_rnd_t rnd_mode)
+{
+ return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+}
+
+// pow long int
+inline const mpreal pow(const long int a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ if (a>0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui
+ else return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const long int a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ if (a>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const long int a, const long int b, mp_rnd_t rnd_mode)
+{
+ if (a>0)
+ {
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ }else{
+ return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si
+ }
+}
+
+inline const mpreal pow(const long int a, const int b, mp_rnd_t rnd_mode)
+{
+ if (a>0)
+ {
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ }else{
+ return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si
+ }
+}
+
+inline const mpreal pow(const long int a, const long double b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
+}
+
+inline const mpreal pow(const long int a, const double b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
+}
+
+// pow int
+inline const mpreal pow(const int a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ if (a>0) return pow(static_cast<unsigned long int>(a),b,rnd_mode); //mpfr_ui_pow_ui
+ else return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const int a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ if (a>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const int a, const long int b, mp_rnd_t rnd_mode)
+{
+ if (a>0)
+ {
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ }else{
+ return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si
+ }
+}
+
+inline const mpreal pow(const int a, const int b, mp_rnd_t rnd_mode)
+{
+ if (a>0)
+ {
+ if(b>0) return pow(static_cast<unsigned long int>(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_ui_pow_ui
+ else return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ }else{
+ return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si
+ }
+}
+
+inline const mpreal pow(const int a, const long double b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
+}
+
+inline const mpreal pow(const int a, const double b, mp_rnd_t rnd_mode)
+{
+ if (a>=0) return pow(static_cast<unsigned long int>(a),mpreal(b),rnd_mode); //mpfr_ui_pow
+ else return pow(mpreal(a),mpreal(b),rnd_mode); //mpfr_pow
+}
+
+// pow long double
+inline const mpreal pow(const long double a, const long double b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),mpreal(b),rnd_mode);
+}
+
+inline const mpreal pow(const long double a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const long double a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); //mpfr_pow_ui
+}
+
+inline const mpreal pow(const long double a, const long int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si
+}
+
+inline const mpreal pow(const long double a, const int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si
+}
+
+inline const mpreal pow(const double a, const double b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),mpreal(b),rnd_mode);
+}
+
+inline const mpreal pow(const double a, const unsigned long int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode); // mpfr_pow_ui
+}
+
+inline const mpreal pow(const double a, const unsigned int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),static_cast<unsigned long int>(b),rnd_mode); // mpfr_pow_ui
+}
+
+inline const mpreal pow(const double a, const long int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),b,rnd_mode); // mpfr_pow_si
+}
+
+inline const mpreal pow(const double a, const int b, mp_rnd_t rnd_mode)
+{
+ return pow(mpreal(a),static_cast<long int>(b),rnd_mode); // mpfr_pow_si
+}
+} // End of mpfr namespace
+
+// Explicit specialization of std::swap for mpreal numbers
+// Thus standard algorithms will use efficient version of swap (due to Koenig lookup)
+// Non-throwing swap C++ idiom: http://en.wikibooks.org/wiki/More_C%2B%2B_Idioms/Non-throwing_swap
+namespace std
+{
+ // we are allowed to extend namespace std with specializations only
+ template <>
+ inline void swap(mpfr::mpreal& x, mpfr::mpreal& y)
+ {
+ return mpfr::swap(x, y);
+ }
+
+ template<>
+ class numeric_limits<mpfr::mpreal>
+ {
+ public:
+ static const bool is_specialized = true;
+ static const bool is_signed = true;
+ static const bool is_integer = false;
+ static const bool is_exact = false;
+ static const int radix = 2;
+
+ static const bool has_infinity = true;
+ static const bool has_quiet_NaN = true;
+ static const bool has_signaling_NaN = true;
+
+ static const bool is_iec559 = true; // = IEEE 754
+ static const bool is_bounded = true;
+ static const bool is_modulo = false;
+ static const bool traps = true;
+ static const bool tinyness_before = true;
+
+ static const float_denorm_style has_denorm = denorm_absent;
+
+ inline static mpfr::mpreal (min) (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::minval(precision); }
+ inline static mpfr::mpreal (max) (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(precision); }
+ inline static mpfr::mpreal lowest (mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(precision); }
+
+ // Returns smallest eps such that 1 + eps != 1 (classic machine epsilon)
+ inline static mpfr::mpreal epsilon(mp_prec_t precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(precision); }
+
+ // Returns smallest eps such that x + eps != x (relative machine epsilon)
+ inline static mpfr::mpreal epsilon(const mpfr::mpreal& x) { return mpfr::machine_epsilon(x); }
+
+ inline static mpfr::mpreal round_error(mp_prec_t precision = mpfr::mpreal::get_default_prec())
+ {
+ mp_rnd_t r = mpfr::mpreal::get_default_rnd();
+
+ if(r == GMP_RNDN) return mpfr::mpreal(0.5, precision);
+ else return mpfr::mpreal(1.0, precision);
+ }
+
+ inline static const mpfr::mpreal infinity() { return mpfr::const_infinity(); }
+ inline static const mpfr::mpreal quiet_NaN() { return mpfr::mpreal().setNan(); }
+ inline static const mpfr::mpreal signaling_NaN() { return mpfr::mpreal().setNan(); }
+ inline static const mpfr::mpreal denorm_min() { return (min)(); }
+
+ // Please note, exponent range is not fixed in MPFR
+ static const int min_exponent = MPFR_EMIN_DEFAULT;
+ static const int max_exponent = MPFR_EMAX_DEFAULT;
+ MPREAL_PERMISSIVE_EXPR static const int min_exponent10 = (int) (MPFR_EMIN_DEFAULT * 0.3010299956639811);
+ MPREAL_PERMISSIVE_EXPR static const int max_exponent10 = (int) (MPFR_EMAX_DEFAULT * 0.3010299956639811);
+
+#ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS
+
+ // Following members should be constant according to standard, but they can be variable in MPFR
+ // So we define them as functions here.
+ //
+ // This is preferable way for std::numeric_limits<mpfr::mpreal> specialization.
+ // But it is incompatible with standard std::numeric_limits and might not work with other libraries, e.g. boost.
+ // See below for compatible implementation.
+ inline static float_round_style round_style()
+ {
+ mp_rnd_t r = mpfr::mpreal::get_default_rnd();
+
+ switch (r)
+ {
+ case GMP_RNDN: return round_to_nearest;
+ case GMP_RNDZ: return round_toward_zero;
+ case GMP_RNDU: return round_toward_infinity;
+ case GMP_RNDD: return round_toward_neg_infinity;
+ default: return round_indeterminate;
+ }
+ }
+
+ inline static int digits() { return int(mpfr::mpreal::get_default_prec()); }
+ inline static int digits(const mpfr::mpreal& x) { return x.getPrecision(); }
+
+ inline static int digits10(mp_prec_t precision = mpfr::mpreal::get_default_prec())
+ {
+ return mpfr::bits2digits(precision);
+ }
+
+ inline static int digits10(const mpfr::mpreal& x)
+ {
+ return mpfr::bits2digits(x.getPrecision());
+ }
+
+ inline static int max_digits10(mp_prec_t precision = mpfr::mpreal::get_default_prec())
+ {
+ return digits10(precision);
+ }
+#else
+ // Digits and round_style are NOT constants when it comes to mpreal.
+ // If possible, please use functions digits() and round_style() defined above.
+ //
+ // These (default) values are preserved for compatibility with existing libraries, e.g. boost.
+ // Change them accordingly to your application.
+ //
+ // For example, if you use 256 bits of precision uniformly in your program, then:
+ // digits = 256
+ // digits10 = 77
+ // max_digits10 = 78
+ //
+ // Approximate formula for decimal digits is: digits10 = floor(log10(2) * digits). See bits2digits() for more details.
+
+ static const std::float_round_style round_style = round_to_nearest;
+ static const int digits = 53;
+ static const int digits10 = 15;
+ static const int max_digits10 = 16;
+#endif
+ };
+
+}
+
+#endif /* __MPREAL_H__ */
diff --git a/eigen/unsupported/test/mpreal_support.cpp b/eigen/unsupported/test/mpreal_support.cpp index bc00382..685e7ea 100644 --- a/eigen/unsupported/test/mpreal_support.cpp +++ b/eigen/unsupported/test/mpreal_support.cpp @@ -12,11 +12,13 @@ void test_mpreal_support() // set precision to 256 bits (double has only 53 bits) mpreal::set_default_prec(256); typedef Matrix<mpreal,Eigen::Dynamic,Eigen::Dynamic> MatrixXmp; + typedef Matrix<std::complex<mpreal>,Eigen::Dynamic,Eigen::Dynamic> MatrixXcmp; std::cerr << "epsilon = " << NumTraits<mpreal>::epsilon() << "\n"; std::cerr << "dummy_precision = " << NumTraits<mpreal>::dummy_precision() << "\n"; std::cerr << "highest = " << NumTraits<mpreal>::highest() << "\n"; std::cerr << "lowest = " << NumTraits<mpreal>::lowest() << "\n"; + std::cerr << "digits10 = " << NumTraits<mpreal>::digits10() << "\n"; for(int i = 0; i < g_repeat; i++) { int s = Eigen::internal::random<int>(1,100); @@ -24,6 +26,10 @@ void test_mpreal_support() MatrixXmp B = MatrixXmp::Random(s,s); MatrixXmp S = A.adjoint() * A; MatrixXmp X; + MatrixXcmp Ac = MatrixXcmp::Random(s,s); + MatrixXcmp Bc = MatrixXcmp::Random(s,s); + MatrixXcmp Sc = Ac.adjoint() * Ac; + MatrixXcmp Xc; // Basic stuffs VERIFY_IS_APPROX(A.real(), A); @@ -32,12 +38,14 @@ void test_mpreal_support() VERIFY_IS_APPROX(A.array().abs2().sqrt(), A.array().abs()); VERIFY_IS_APPROX(A.array().sin(), sin(A.array())); VERIFY_IS_APPROX(A.array().cos(), cos(A.array())); - // Cholesky X = S.selfadjointView<Lower>().llt().solve(B); VERIFY_IS_APPROX((S.selfadjointView<Lower>()*X).eval(),B); + Xc = Sc.selfadjointView<Lower>().llt().solve(Bc); + VERIFY_IS_APPROX((Sc.selfadjointView<Lower>()*Xc).eval(),Bc); + // partial LU X = A.lu().solve(B); VERIFY_IS_APPROX((A*X).eval(),B); diff --git a/eigen/unsupported/test/polynomialsolver.cpp b/eigen/unsupported/test/polynomialsolver.cpp index de79f15..7ad4aa6 100644 --- a/eigen/unsupported/test/polynomialsolver.cpp +++ b/eigen/unsupported/test/polynomialsolver.cpp @@ -32,12 +32,16 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) { typedef typename POLYNOMIAL::Index Index; typedef typename POLYNOMIAL::Scalar Scalar; + typedef typename POLYNOMIAL::RealScalar RealScalar; typedef typename SOLVER::RootsType RootsType; - typedef Matrix<Scalar,Deg,1> EvalRootsType; + typedef Matrix<RealScalar,Deg,1> EvalRootsType; const Index deg = pols.size()-1; + // Test template constructor from coefficient vector + SOLVER solve_constr (pols); + psolve.compute( pols ); const RootsType& roots( psolve.roots() ); EvalRootsType evr( deg ); @@ -54,7 +58,7 @@ bool aux_evalSolver( const POLYNOMIAL& pols, SOLVER& psolve ) cerr << endl; } - std::vector<Scalar> rootModuli( roots.size() ); + std::vector<RealScalar> rootModuli( roots.size() ); Map< EvalRootsType > aux( &rootModuli[0], roots.size() ); aux = roots.array().abs(); std::sort( rootModuli.begin(), rootModuli.end() ); @@ -80,7 +84,7 @@ void evalSolver( const POLYNOMIAL& pols ) { typedef typename POLYNOMIAL::Scalar Scalar; - typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; + typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; PolynomialSolverType psolve; aux_evalSolver<Deg, POLYNOMIAL, PolynomialSolverType>( pols, psolve ); @@ -94,6 +98,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const { using std::sqrt; typedef typename POLYNOMIAL::Scalar Scalar; + typedef typename POLYNOMIAL::RealScalar RealScalar; typedef PolynomialSolver<Scalar, Deg > PolynomialSolverType; @@ -104,21 +109,19 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const // 1) the roots found are correct // 2) the roots have distinct moduli - typedef typename REAL_ROOTS::Scalar Real; - //Test realRoots - std::vector< Real > calc_realRoots; - psolve.realRoots( calc_realRoots ); - VERIFY( calc_realRoots.size() == (size_t)real_roots.size() ); + std::vector< RealScalar > calc_realRoots; + psolve.realRoots( calc_realRoots, test_precision<RealScalar>()); + VERIFY_IS_EQUAL( calc_realRoots.size() , (size_t)real_roots.size() ); - const Scalar psPrec = sqrt( test_precision<Scalar>() ); + const RealScalar psPrec = sqrt( test_precision<RealScalar>() ); for( size_t i=0; i<calc_realRoots.size(); ++i ) { bool found = false; for( size_t j=0; j<calc_realRoots.size()&& !found; ++j ) { - if( internal::isApprox( calc_realRoots[i], real_roots[j] ), psPrec ){ + if( internal::isApprox( calc_realRoots[i], real_roots[j], psPrec ) ){ found = true; } } VERIFY( found ); @@ -134,7 +137,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const bool hasRealRoot; //Test absGreatestRealRoot - Real r = psolve.absGreatestRealRoot( hasRealRoot ); + RealScalar r = psolve.absGreatestRealRoot( hasRealRoot ); VERIFY( hasRealRoot == (real_roots.size() > 0 ) ); if( hasRealRoot ){ VERIFY( internal::isApprox( real_roots.array().abs().maxCoeff(), abs(r), psPrec ) ); } @@ -163,9 +166,11 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const template<typename _Scalar, int _Deg> void polynomialsolver(int deg) { - typedef internal::increment_if_fixed_size<_Deg> Dim; + typedef typename NumTraits<_Scalar>::Real RealScalar; + typedef internal::increment_if_fixed_size<_Deg> Dim; typedef Matrix<_Scalar,Dim::ret,1> PolynomialType; typedef Matrix<_Scalar,_Deg,1> EvalRootsType; + typedef Matrix<RealScalar,_Deg,1> RealRootsType; cout << "Standard cases" << endl; PolynomialType pols = PolynomialType::Random(deg+1); @@ -178,15 +183,11 @@ void polynomialsolver(int deg) evalSolver<_Deg,PolynomialType>( pols ); cout << "Test sugar" << endl; - EvalRootsType realRoots = EvalRootsType::Random(deg); + RealRootsType realRoots = RealRootsType::Random(deg); roots_to_monicPolynomial( realRoots, pols ); evalSolverSugarFunction<_Deg>( pols, - realRoots.template cast < - std::complex< - typename NumTraits<_Scalar>::Real - > - >(), + realRoots.template cast <std::complex<RealScalar> >().eval(), realRoots ); } @@ -209,5 +210,7 @@ void test_polynomialsolver() CALL_SUBTEST_10((polynomialsolver<double,Dynamic>( internal::random<int>(9,13) )) ); + CALL_SUBTEST_11((polynomialsolver<float,Dynamic>(1)) ); + CALL_SUBTEST_12((polynomialsolver<std::complex<double>,Dynamic>(internal::random<int>(2,13))) ); } } diff --git a/eigen/unsupported/test/sparse_extra.cpp b/eigen/unsupported/test/sparse_extra.cpp index 1ee791b..4f6723d 100644 --- a/eigen/unsupported/test/sparse_extra.cpp +++ b/eigen/unsupported/test/sparse_extra.cpp @@ -49,7 +49,6 @@ bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref) { - typedef typename SparseMatrixType::Index Index; const Index rows = ref.rows(); const Index cols = ref.cols(); typedef typename SparseMatrixType::Scalar Scalar; @@ -130,6 +129,19 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re } +template<typename SparseMatrixType> +void check_marketio() +{ + typedef Matrix<typename SparseMatrixType::Scalar, Dynamic, Dynamic> DenseMatrix; + Index rows = internal::random<Index>(1,100); + Index cols = internal::random<Index>(1,100); + SparseMatrixType m1, m2; + m1 = DenseMatrix::Random(rows, cols).sparseView(); + saveMarket(m1, "sparse_extra.mtx"); + loadMarket(m2, "sparse_extra.mtx"); + VERIFY_IS_EQUAL(DenseMatrix(m1),DenseMatrix(m2)); +} + void test_sparse_extra() { for(int i = 0; i < g_repeat; i++) { @@ -144,5 +156,15 @@ void test_sparse_extra() CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) ); CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) ); + + CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) ); + CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) ); + TEST_SET_BUT_UNUSED_VARIABLE(s); } } diff --git a/eigen/unsupported/test/special_functions.cpp b/eigen/unsupported/test/special_functions.cpp new file mode 100644 index 0000000..057fb3e --- /dev/null +++ b/eigen/unsupported/test/special_functions.cpp @@ -0,0 +1,345 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" +#include "../Eigen/SpecialFunctions" + +template<typename X, typename Y> +void verify_component_wise(const X& x, const Y& y) +{ + for(Index i=0; i<x.size(); ++i) + { + if((numext::isfinite)(y(i))) + VERIFY_IS_APPROX( x(i), y(i) ); + else if((numext::isnan)(y(i))) + VERIFY((numext::isnan)(x(i))); + else + VERIFY_IS_EQUAL( x(i), y(i) ); + } +} + +template<typename ArrayType> void array_special_functions() +{ + using std::abs; + using std::sqrt; + typedef typename ArrayType::Scalar Scalar; + typedef typename NumTraits<Scalar>::Real RealScalar; + + Scalar plusinf = std::numeric_limits<Scalar>::infinity(); + Scalar nan = std::numeric_limits<Scalar>::quiet_NaN(); + + Index rows = internal::random<Index>(1,30); + Index cols = 1; + + // API + { + ArrayType m1 = ArrayType::Random(rows,cols); +#if EIGEN_HAS_C99_MATH + VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); + VERIFY_IS_APPROX(m1.digamma(), digamma(m1)); + VERIFY_IS_APPROX(m1.erf(), erf(m1)); + VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); +#endif // EIGEN_HAS_C99_MATH + } + + +#if EIGEN_HAS_C99_MATH + // check special functions (comparing against numpy implementation) + if (!NumTraits<Scalar>::IsComplex) + { + + { + ArrayType m1 = ArrayType::Random(rows,cols); + ArrayType m2 = ArrayType::Random(rows,cols); + + // Test various propreties of igamma & igammac. These are normalized + // gamma integrals where + // igammac(a, x) = Gamma(a, x) / Gamma(a) + // igamma(a, x) = gamma(a, x) / Gamma(a) + // where Gamma and gamma are considered the standard unnormalized + // upper and lower incomplete gamma functions, respectively. + ArrayType a = m1.abs() + 2; + ArrayType x = m2.abs() + 2; + ArrayType zero = ArrayType::Zero(rows, cols); + ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0)); + ArrayType a_m1 = a - one; + ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp(); + ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp(); + ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp(); + ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp(); + + // Gamma(a, 0) == Gamma(a) + VERIFY_IS_APPROX(Eigen::igammac(a, zero), one); + + // Gamma(a, x) + gamma(a, x) == Gamma(a) + VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp()); + + // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x) + VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp()); + + // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x) + VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp()); + } + + { + // Check exact values of igamma and igammac against a third party calculation. + Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)}; + + // location i*6+j corresponds to a_s[i], x_s[j]. + Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan}, + {0.0, 0.6321205588285578, 0.7768698398515702, + 0.9816843611112658, 9.999500016666262e-05, 1.0}, + {0.0, 0.4275932955291202, 0.608374823728911, + 0.9539882943107686, 7.522076445089201e-07, 1.0}, + {0.0, 0.01898815687615381, 0.06564245437845008, + 0.5665298796332909, 4.166333347221828e-18, 1.0}, + {0.0, 0.9999780593618628, 0.9999899967080838, + 0.9999996219837988, 0.9991370418689945, 1.0}, + {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}}; + Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan}, + {1.0, 0.36787944117144233, 0.22313016014842982, + 0.018315638888734182, 0.9999000049998333, 0.0}, + {1.0, 0.5724067044708798, 0.3916251762710878, + 0.04601170568923136, 0.9999992477923555, 0.0}, + {1.0, 0.9810118431238462, 0.9343575456215499, + 0.4334701203667089, 1.0, 0.0}, + {1.0, 2.1940638138146658e-05, 1.0003291916285e-05, + 3.7801620118431334e-07, 0.0008629581310054535, + 0.0}, + {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}}; + for (int i = 0; i < 6; ++i) { + for (int j = 0; j < 6; ++j) { + if ((std::isnan)(igamma_s[i][j])) { + VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]); + } + + if ((std::isnan)(igammac_s[i][j])) { + VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j]))); + } else { + VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]); + } + } + } + } + } +#endif // EIGEN_HAS_C99_MATH + + // Check the zeta function against scipy.special.zeta + { + ArrayType x(7), q(7), res(7), ref(7); + x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9; + q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345; + ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); ); + } + + // digamma + { + ArrayType x(7), res(7), ref(7); + x << 1, 1.5, 4, -10.5, 10000.5, 0, -1; + ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + + CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = digamma(x); verify_component_wise(res, ref); ); + } + + +#if EIGEN_HAS_C99_MATH + { + ArrayType n(11), x(11), res(11), ref(11); + n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170; + x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64; + ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927; + CALL_SUBTEST( verify_component_wise(ref, ref); ); + + if(sizeof(RealScalar)>=8) { // double + // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); ); + CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); ); + } + else { + // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); ); + CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); ); + } + } +#endif + +#if EIGEN_HAS_C99_MATH + { + // Inputs and ground truth generated with scipy via: + // a = np.logspace(-3, 3, 5) - 1e-3 + // b = np.logspace(-3, 3, 5) - 1e-3 + // x = np.linspace(-0.1, 1.1, 5) + // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x)) + // full_a = full_a.flatten().tolist() # same for full_b, full_x + // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist() + // + // Note in Eigen, we call betainc with arguments in the order (x, a, b). + ArrayType a(125); + ArrayType b(125); + ArrayType x(125); + ArrayType v(125); + ArrayType res(125); + + a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, + 999.999, 999.999, 999.999; + + b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999, + 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999, + 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, + 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, + 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, + 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, + 31.62177660168379, 31.62177660168379, 31.62177660168379, + 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999, + 999.999, 999.999; + + x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, + -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, + 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, + 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, + 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, + 0.8, 1.1; + + v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, + nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan, + 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan, + 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan, + 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan, + nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256, + 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001, + 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403, + 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999, + 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan, + 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06, + nan, nan, 7.864342668429763e-23, 3.015969667594166e-10, + 0.0008598571564165444, nan, nan, 6.031987710123844e-08, + 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999, + 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan, + nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan, + 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0, + 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan, + 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan; + + CALL_SUBTEST(res = betainc(a, b, x); + verify_component_wise(res, v);); + } + + // Test various properties of betainc + { + ArrayType m1 = ArrayType::Random(32); + ArrayType m2 = ArrayType::Random(32); + ArrayType m3 = ArrayType::Random(32); + ArrayType one = ArrayType::Constant(32, Scalar(1.0)); + const Scalar eps = std::numeric_limits<Scalar>::epsilon(); + ArrayType a = (m1 * 4.0).exp(); + ArrayType b = (m2 * 4.0).exp(); + ArrayType x = m3.abs(); + + // betainc(a, 1, x) == x**a + CALL_SUBTEST( + ArrayType test = betainc(a, one, x); + ArrayType expected = x.pow(a); + verify_component_wise(test, expected);); + + // betainc(1, b, x) == 1 - (1 - x)**b + CALL_SUBTEST( + ArrayType test = betainc(one, b, x); + ArrayType expected = one - (one - x).pow(b); + verify_component_wise(test, expected);); + + // betainc(a, b, x) == 1 - betainc(b, a, 1-x) + CALL_SUBTEST( + ArrayType test = betainc(a, b, x) + betainc(b, a, one - x); + ArrayType expected = one; + verify_component_wise(test, expected);); + + // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b)) + CALL_SUBTEST( + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType expected = betainc(a, b, x) - num / denom + eps; + ArrayType test = betainc(a + one, b, x) + eps; + if (sizeof(Scalar) >= 8) { // double + verify_component_wise(test, expected); + } else { + // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232 + verify_component_wise(test.head(8), expected.head(8)); + }); + + // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b)) + CALL_SUBTEST( + // Add eps to rhs and lhs so that component-wise test doesn't result in + // nans when both outputs are zeros. + ArrayType num = x.pow(a) * (one - x).pow(b); + ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp(); + ArrayType expected = betainc(a, b, x) + num / denom + eps; + ArrayType test = betainc(a, b + one, x) + eps; + verify_component_wise(test, expected);); + } +#endif +} + +void test_special_functions() +{ + CALL_SUBTEST_1(array_special_functions<ArrayXf>()); + CALL_SUBTEST_2(array_special_functions<ArrayXd>()); +} diff --git a/eigen/unsupported/test/splines.cpp b/eigen/unsupported/test/splines.cpp index a7eb3e0..3be0204 100644 --- a/eigen/unsupported/test/splines.cpp +++ b/eigen/unsupported/test/splines.cpp @@ -13,23 +13,23 @@ namespace Eigen { -// lets do some explicit instantiations and thus -// force the compilation of all spline functions... -template class Spline<double, 2, Dynamic>; -template class Spline<double, 3, Dynamic>; + // lets do some explicit instantiations and thus + // force the compilation of all spline functions... + template class Spline<double, 2, Dynamic>; + template class Spline<double, 3, Dynamic>; -template class Spline<double, 2, 2>; -template class Spline<double, 2, 3>; -template class Spline<double, 2, 4>; -template class Spline<double, 2, 5>; + template class Spline<double, 2, 2>; + template class Spline<double, 2, 3>; + template class Spline<double, 2, 4>; + template class Spline<double, 2, 5>; -template class Spline<float, 2, Dynamic>; -template class Spline<float, 3, Dynamic>; + template class Spline<float, 2, Dynamic>; + template class Spline<float, 3, Dynamic>; -template class Spline<float, 3, 2>; -template class Spline<float, 3, 3>; -template class Spline<float, 3, 4>; -template class Spline<float, 3, 5>; + template class Spline<float, 3, 2>; + template class Spline<float, 3, 3>; + template class Spline<float, 3, 4>; + template class Spline<float, 3, 5>; } @@ -234,11 +234,48 @@ void check_global_interpolation2d() } } +void check_global_interpolation_with_derivatives2d() +{ + typedef Spline2d::PointType PointType; + typedef Spline2d::KnotVectorType KnotVectorType; + + const Eigen::DenseIndex numPoints = 100; + const unsigned int dimension = 2; + const unsigned int degree = 3; + + ArrayXXd points = ArrayXXd::Random(dimension, numPoints); + + KnotVectorType knots; + Eigen::ChordLengths(points, knots); + + ArrayXXd derivatives = ArrayXXd::Random(dimension, numPoints); + VectorXd derivativeIndices(numPoints); + + for (Eigen::DenseIndex i = 0; i < numPoints; ++i) + derivativeIndices(i) = static_cast<double>(i); + + const Spline2d spline = SplineFitting<Spline2d>::InterpolateWithDerivatives( + points, derivatives, derivativeIndices, degree); + + for (Eigen::DenseIndex i = 0; i < points.cols(); ++i) + { + PointType point = spline(knots(i)); + PointType referencePoint = points.col(i); + VERIFY_IS_APPROX(point, referencePoint); + PointType derivative = spline.derivatives(knots(i), 1).col(1); + PointType referenceDerivative = derivatives.col(i); + VERIFY_IS_APPROX(derivative, referenceDerivative); + } +} void test_splines() { - CALL_SUBTEST( eval_spline3d() ); - CALL_SUBTEST( eval_spline3d_onbrks() ); - CALL_SUBTEST( eval_closed_spline2d() ); - CALL_SUBTEST( check_global_interpolation2d() ); + for (int i = 0; i < g_repeat; ++i) + { + CALL_SUBTEST( eval_spline3d() ); + CALL_SUBTEST( eval_spline3d_onbrks() ); + CALL_SUBTEST( eval_closed_spline2d() ); + CALL_SUBTEST( check_global_interpolation2d() ); + CALL_SUBTEST( check_global_interpolation_with_derivatives2d() ); + } } diff --git a/eigen/unsupported/test/svd_common.h b/eigen/unsupported/test/svd_common.h deleted file mode 100644 index b40c23a..0000000 --- a/eigen/unsupported/test/svd_common.h +++ /dev/null @@ -1,261 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr> -// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com> -// -// Copyright (C) 2013 Gauthier Brun <brun.gauthier@gmail.com> -// Copyright (C) 2013 Nicolas Carre <nicolas.carre@ensimag.fr> -// Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> -// Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// discard stack allocation as that too bypasses malloc -#define EIGEN_STACK_ALLOCATION_LIMIT 0 -#define EIGEN_RUNTIME_NO_MALLOC - -#include "main.h" -#include <unsupported/Eigen/SVD> -#include <Eigen/LU> - - -// check if "svd" is the good image of "m" -template<typename MatrixType, typename SVD> -void svd_check_full(const MatrixType& m, const SVD& svd) -{ - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - typedef typename MatrixType::Scalar Scalar; - typedef Matrix<Scalar, RowsAtCompileTime, RowsAtCompileTime> MatrixUType; - typedef Matrix<Scalar, ColsAtCompileTime, ColsAtCompileTime> MatrixVType; - - - MatrixType sigma = MatrixType::Zero(rows, cols); - sigma.diagonal() = svd.singularValues().template cast<Scalar>(); - MatrixUType u = svd.matrixU(); - MatrixVType v = svd.matrixV(); - VERIFY_IS_APPROX(m, u * sigma * v.adjoint()); - VERIFY_IS_UNITARY(u); - VERIFY_IS_UNITARY(v); -} // end svd_check_full - - - -// Compare to a reference value -template<typename MatrixType, typename SVD> -void svd_compare_to_full(const MatrixType& m, - unsigned int computationOptions, - const SVD& referenceSvd) -{ - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - Index diagSize = (std::min)(rows, cols); - - SVD svd(m, computationOptions); - - VERIFY_IS_APPROX(svd.singularValues(), referenceSvd.singularValues()); - if(computationOptions & ComputeFullU) - VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU()); - if(computationOptions & ComputeThinU) - VERIFY_IS_APPROX(svd.matrixU(), referenceSvd.matrixU().leftCols(diagSize)); - if(computationOptions & ComputeFullV) - VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV()); - if(computationOptions & ComputeThinV) - VERIFY_IS_APPROX(svd.matrixV(), referenceSvd.matrixV().leftCols(diagSize)); -} // end svd_compare_to_full - - - -template<typename MatrixType, typename SVD> -void svd_solve(const MatrixType& m, unsigned int computationOptions) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - typedef Matrix<Scalar, RowsAtCompileTime, Dynamic> RhsType; - typedef Matrix<Scalar, ColsAtCompileTime, Dynamic> SolutionType; - - RhsType rhs = RhsType::Random(rows, internal::random<Index>(1, cols)); - SVD svd(m, computationOptions); - SolutionType x = svd.solve(rhs); - // evaluate normal equation which works also for least-squares solutions - VERIFY_IS_APPROX(m.adjoint()*m*x,m.adjoint()*rhs); -} // end svd_solve - - -// test computations options -// 2 functions because Jacobisvd can return before the second function -template<typename MatrixType, typename SVD> -void svd_test_computation_options_1(const MatrixType& m, const SVD& fullSvd) -{ - svd_check_full< MatrixType, SVD >(m, fullSvd); - svd_solve< MatrixType, SVD >(m, ComputeFullU | ComputeFullV); -} - - -template<typename MatrixType, typename SVD> -void svd_test_computation_options_2(const MatrixType& m, const SVD& fullSvd) -{ - svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU, fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, ComputeFullV, fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, 0, fullSvd); - - if (MatrixType::ColsAtCompileTime == Dynamic) { - // thin U/V are only available with dynamic number of columns - - svd_compare_to_full< MatrixType, SVD >(m, ComputeFullU|ComputeThinV, fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, ComputeThinV, fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeFullV, fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU , fullSvd); - svd_compare_to_full< MatrixType, SVD >(m, ComputeThinU|ComputeThinV, fullSvd); - svd_solve<MatrixType, SVD>(m, ComputeFullU | ComputeThinV); - svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeFullV); - svd_solve<MatrixType, SVD>(m, ComputeThinU | ComputeThinV); - - typedef typename MatrixType::Index Index; - Index diagSize = (std::min)(m.rows(), m.cols()); - SVD svd(m, ComputeThinU | ComputeThinV); - VERIFY_IS_APPROX(m, svd.matrixU().leftCols(diagSize) * svd.singularValues().asDiagonal() * svd.matrixV().leftCols(diagSize).adjoint()); - } -} - -template<typename MatrixType, typename SVD> -void svd_verify_assert(const MatrixType& m) -{ - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; - Index rows = m.rows(); - Index cols = m.cols(); - - enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime - }; - - typedef Matrix<Scalar, RowsAtCompileTime, 1> RhsType; - RhsType rhs(rows); - SVD svd; - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.singularValues()) - VERIFY_RAISES_ASSERT(svd.matrixV()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - MatrixType a = MatrixType::Zero(rows, cols); - a.setZero(); - svd.compute(a, 0); - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.matrixV()) - svd.singularValues(); - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - - if (ColsAtCompileTime == Dynamic) - { - svd.compute(a, ComputeThinU); - svd.matrixU(); - VERIFY_RAISES_ASSERT(svd.matrixV()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - svd.compute(a, ComputeThinV); - svd.matrixV(); - VERIFY_RAISES_ASSERT(svd.matrixU()) - VERIFY_RAISES_ASSERT(svd.solve(rhs)) - } - else - { - VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinU)) - VERIFY_RAISES_ASSERT(svd.compute(a, ComputeThinV)) - } -} - -// work around stupid msvc error when constructing at compile time an expression that involves -// a division by zero, even if the numeric type has floating point -template<typename Scalar> -EIGEN_DONT_INLINE Scalar zero() { return Scalar(0); } - -// workaround aggressive optimization in ICC -template<typename T> EIGEN_DONT_INLINE T sub(T a, T b) { return a - b; } - - -template<typename MatrixType, typename SVD> -void svd_inf_nan() -{ - // all this function does is verify we don't iterate infinitely on nan/inf values - - SVD svd; - typedef typename MatrixType::Scalar Scalar; - Scalar some_inf = Scalar(1) / zero<Scalar>(); - VERIFY(sub(some_inf, some_inf) != sub(some_inf, some_inf)); - svd.compute(MatrixType::Constant(10,10,some_inf), ComputeFullU | ComputeFullV); - - Scalar some_nan = zero<Scalar> () / zero<Scalar> (); - VERIFY(some_nan != some_nan); - svd.compute(MatrixType::Constant(10,10,some_nan), ComputeFullU | ComputeFullV); - - MatrixType m = MatrixType::Zero(10,10); - m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_inf; - svd.compute(m, ComputeFullU | ComputeFullV); - - m = MatrixType::Zero(10,10); - m(internal::random<int>(0,9), internal::random<int>(0,9)) = some_nan; - svd.compute(m, ComputeFullU | ComputeFullV); -} - - -template<typename SVD> -void svd_preallocate() -{ - Vector3f v(3.f, 2.f, 1.f); - MatrixXf m = v.asDiagonal(); - - internal::set_is_malloc_allowed(false); - VERIFY_RAISES_ASSERT(VectorXf v(10);) - SVD svd; - internal::set_is_malloc_allowed(true); - svd.compute(m); - VERIFY_IS_APPROX(svd.singularValues(), v); - - SVD svd2(3,3); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - VERIFY_IS_APPROX(svd2.singularValues(), v); - VERIFY_RAISES_ASSERT(svd2.matrixU()); - VERIFY_RAISES_ASSERT(svd2.matrixV()); - svd2.compute(m, ComputeFullU | ComputeFullV); - VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); - VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - - SVD svd3(3,3,ComputeFullU|ComputeFullV); - internal::set_is_malloc_allowed(false); - svd2.compute(m); - internal::set_is_malloc_allowed(true); - VERIFY_IS_APPROX(svd2.singularValues(), v); - VERIFY_IS_APPROX(svd2.matrixU(), Matrix3f::Identity()); - VERIFY_IS_APPROX(svd2.matrixV(), Matrix3f::Identity()); - internal::set_is_malloc_allowed(false); - svd2.compute(m, ComputeFullU|ComputeFullV); - internal::set_is_malloc_allowed(true); -} - - - - - |