diff options
143 files changed, 1570 insertions, 863 deletions
diff --git a/eigen/.hgignore b/eigen/.hgignore index 769a47f..fada7ec 100644 --- a/eigen/.hgignore +++ b/eigen/.hgignore @@ -13,7 +13,7 @@ core core.* *.bak *~ -build* +*build* *.moc.* *.moc ui_* diff --git a/eigen/.hgtags b/eigen/.hgtags index 32ec946..2a2a704 100644 --- a/eigen/.hgtags +++ b/eigen/.hgtags @@ -31,3 +31,4 @@ bef509908b9da05d0d07ffc0da105e2c8c6d3996 3.3-rc1 f562a193118d4f40514e2f4a0ace6e974926ef06 3.3.1 da9b4e14c2550e0d11078a3c39e6d56eba9905df 3.3.2 67e894c6cd8f5f1f604b27d37ed47fdf012674ff 3.3.3 +5a0156e40feb7c4136680b493c6e433d91a6f355 3.3.4 diff --git a/eigen/CMakeLists.txt b/eigen/CMakeLists.txt index f584002..48a574f 100644 --- a/eigen/CMakeLists.txt +++ b/eigen/CMakeLists.txt @@ -41,10 +41,13 @@ string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_ set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}") set(EIGEN_VERSION_NUMBER ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}) -# if the mercurial program is absent, this will leave the EIGEN_HG_CHANGESET string empty, -# but won't stop CMake. -execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT) -execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT) +# if we are not in a mercurial clone +if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.hg) + # if the mercurial program is absent or this will leave the EIGEN_HG_CHANGESET string empty, + # but won't stop CMake. + execute_process(COMMAND hg tip -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_HGTIP_OUTPUT) + execute_process(COMMAND hg branch -R ${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE EIGEN_BRANCH_OUTPUT) +endif() # if this is the default (aka development) branch, extract the mercurial changeset number from the hg tip output... if(EIGEN_BRANCH_OUTPUT MATCHES "default") @@ -416,16 +419,15 @@ add_subdirectory(Eigen) add_subdirectory(doc EXCLUDE_FROM_ALL) -include(EigenConfigureTesting) - -# fixme, not sure this line is still needed: -enable_testing() # must be called from the root CMakeLists, see man page +option(BUILD_TESTING "Enable creation of Eigen tests." ON) +if(BUILD_TESTING) + include(EigenConfigureTesting) - -if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) - add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest -else() - add_subdirectory(test EXCLUDE_FROM_ALL) + if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest + else() + add_subdirectory(test EXCLUDE_FROM_ALL) + endif() endif() if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) @@ -461,7 +463,9 @@ endif(NOT WIN32) configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY) -ei_testing_print_summary() +if(BUILD_TESTING) + ei_testing_print_summary() +endif() message(STATUS "") message(STATUS "Configured Eigen ${EIGEN_VERSION_NUMBER}") diff --git a/eigen/CTestConfig.cmake b/eigen/CTestConfig.cmake index 755b473..0039bf8 100644 --- a/eigen/CTestConfig.cmake +++ b/eigen/CTestConfig.cmake @@ -4,10 +4,10 @@ ## # The following are required to uses Dart and the Cdash dashboard ## ENABLE_TESTING() ## INCLUDE(CTest) -set(CTEST_PROJECT_NAME "Eigen3.3") +set(CTEST_PROJECT_NAME "Eigen 3.3") set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC") set(CTEST_DROP_METHOD "http") set(CTEST_DROP_SITE "manao.inria.fr") -set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.3") +set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen+3.3") set(CTEST_DROP_SITE_CDASH TRUE) diff --git a/eigen/Eigen/Cholesky b/eigen/Eigen/Cholesky index 369d1f5..1332b54 100644 --- a/eigen/Eigen/Cholesky +++ b/eigen/Eigen/Cholesky @@ -9,6 +9,7 @@ #define EIGEN_CHOLESKY_MODULE_H #include "Core" +#include "Jacobi" #include "src/Core/util/DisableStupidWarnings.h" @@ -31,7 +32,11 @@ #include "src/Cholesky/LLT.h" #include "src/Cholesky/LDLT.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/Cholesky/LLT_LAPACKE.h" #endif diff --git a/eigen/Eigen/Core b/eigen/Eigen/Core index 0f7fa63..4d4901e 100644 --- a/eigen/Eigen/Core +++ b/eigen/Eigen/Core @@ -14,6 +14,22 @@ // first thing Eigen does: stop the compiler from committing suicide #include "src/Core/util/DisableStupidWarnings.h" +#if defined(__CUDACC__) && !defined(EIGEN_NO_CUDA) + #define EIGEN_CUDACC __CUDACC__ +#endif + +#if defined(__CUDA_ARCH__) && !defined(EIGEN_NO_CUDA) + #define EIGEN_CUDA_ARCH __CUDA_ARCH__ +#endif + +#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) +#define EIGEN_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) +#define EIGEN_CUDACC_VER __CUDACC_VER__ +#else +#define EIGEN_CUDACC_VER 0 +#endif + // Handle NVCC/CUDA/SYCL #if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) // Do not try asserts on CUDA and SYCL! @@ -155,6 +171,9 @@ #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif + #ifdef __AVX512ER__ + #define EIGEN_VECTORIZE_AVX512ER + #endif #endif // include files @@ -229,7 +248,7 @@ #if defined __CUDACC__ #define EIGEN_VECTORIZE_CUDA #include <vector_types.h> - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 + #if EIGEN_CUDACC_VER >= 70500 #define EIGEN_HAS_CUDA_FP16 #endif #endif @@ -352,6 +371,7 @@ using std::ptrdiff_t; #include "src/Core/MathFunctions.h" #include "src/Core/GenericPacketMath.h" #include "src/Core/MathFunctionsImpl.h" +#include "src/Core/arch/Default/ConjHelper.h" #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" @@ -367,6 +387,7 @@ using std::ptrdiff_t; #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX/TypeCasting.h" + #include "src/Core/arch/SSE/TypeCasting.h" #elif defined EIGEN_VECTORIZE_SSE #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" diff --git a/eigen/Eigen/Eigenvalues b/eigen/Eigen/Eigenvalues index 009e529..f3f661b 100644 --- a/eigen/Eigen/Eigenvalues +++ b/eigen/Eigen/Eigenvalues @@ -45,7 +45,11 @@ #include "src/Eigenvalues/GeneralizedEigenSolver.h" #include "src/Eigenvalues/MatrixBaseEigenvalues.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/Eigenvalues/RealSchur_LAPACKE.h" #include "src/Eigenvalues/ComplexSchur_LAPACKE.h" #include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" diff --git a/eigen/Eigen/LU b/eigen/Eigen/LU index 6f6c556..6418a86 100644 --- a/eigen/Eigen/LU +++ b/eigen/Eigen/LU @@ -28,7 +28,11 @@ #include "src/LU/FullPivLU.h" #include "src/LU/PartialPivLU.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/LU/PartialPivLU_LAPACKE.h" #endif #include "src/LU/Determinant.h" diff --git a/eigen/Eigen/QR b/eigen/Eigen/QR index 80838e3..c7e9144 100644 --- a/eigen/Eigen/QR +++ b/eigen/Eigen/QR @@ -36,7 +36,11 @@ #include "src/QR/ColPivHouseholderQR.h" #include "src/QR/CompleteOrthogonalDecomposition.h" #ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/QR/HouseholderQR_LAPACKE.h" #include "src/QR/ColPivHouseholderQR_LAPACKE.h" #endif diff --git a/eigen/Eigen/QtAlignedMalloc b/eigen/Eigen/QtAlignedMalloc index c6571f1..4f07df0 100644 --- a/eigen/Eigen/QtAlignedMalloc +++ b/eigen/Eigen/QtAlignedMalloc @@ -27,7 +27,7 @@ void qFree(void *ptr) void *qRealloc(void *ptr, std::size_t size) { void* newPtr = Eigen::internal::aligned_malloc(size); - memcpy(newPtr, ptr, size); + std::memcpy(newPtr, ptr, size); Eigen::internal::aligned_free(ptr); return newPtr; } diff --git a/eigen/Eigen/SVD b/eigen/Eigen/SVD index 86143c2..5d0e75f 100644 --- a/eigen/Eigen/SVD +++ b/eigen/Eigen/SVD @@ -37,7 +37,11 @@ #include "src/SVD/JacobiSVD.h" #include "src/SVD/BDCSVD.h" #if defined(EIGEN_USE_LAPACKE) && !defined(EIGEN_USE_LAPACKE_STRICT) +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else #include "src/misc/lapacke.h" +#endif #include "src/SVD/JacobiSVD_LAPACKE.h" #endif diff --git a/eigen/Eigen/src/Cholesky/LDLT.h b/eigen/Eigen/src/Cholesky/LDLT.h index fcee7b2..0313a54 100644 --- a/eigen/Eigen/src/Cholesky/LDLT.h +++ b/eigen/Eigen/src/Cholesky/LDLT.h @@ -248,7 +248,7 @@ template<typename _MatrixType, int _UpLo> class LDLT /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. + * \c NumericalIssue if the factorization failed because of a zero pivot. */ ComputationInfo info() const { @@ -376,6 +376,8 @@ template<> struct ldlt_inplace<Lower> if((rs>0) && pivot_is_valid) A21 /= realAkk; + else if(rs>0) + ret = ret && (A21.array()==Scalar(0)).all(); if(found_zero_pivot && pivot_is_valid) ret = false; // factorization failed else if(!pivot_is_valid) found_zero_pivot = true; @@ -568,13 +570,14 @@ void LDLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) cons // more precisely, use pseudo-inverse of D (see bug 241) using std::abs; const typename Diagonal<const MatrixType>::RealReturnType vecD(vectorD()); - // In some previous versions, tolerance was set to the max of 1/highest and the maximal diagonal entry * epsilon - // as motivated by LAPACK's xGELSS: + // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min()) + // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS: // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits<RealScalar>::epsilon(),RealScalar(1) / NumTraits<RealScalar>::highest()); // However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the highest // diagonal element is not well justified and leads to numerical issues in some cases. // Moreover, Lapack's xSYTRS routines use 0 for the tolerance. - RealScalar tolerance = RealScalar(1) / NumTraits<RealScalar>::highest(); + // Using numeric_limits::min() gives us more robustness to denormals. + RealScalar tolerance = (std::numeric_limits<RealScalar>::min)(); for (Index i = 0; i < vecD.size(); ++i) { diff --git a/eigen/Eigen/src/Cholesky/LLT.h b/eigen/Eigen/src/Cholesky/LLT.h index 87ca8d4..e1624d2 100644 --- a/eigen/Eigen/src/Cholesky/LLT.h +++ b/eigen/Eigen/src/Cholesky/LLT.h @@ -24,7 +24,7 @@ template<typename MatrixType, int UpLo> struct LLT_Traits; * * \tparam _MatrixType the type of the matrix of which we are computing the LL^T Cholesky decomposition * \tparam _UpLo the triangular part that will be used for the decompositon: Lower (default) or Upper. - * The other triangular part won't be read. + * The other triangular part won't be read. * * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite * matrix A such that A = LL^* = U^*U, where L is lower triangular. @@ -41,14 +41,18 @@ template<typename MatrixType, int UpLo> struct LLT_Traits; * Example: \include LLT_example.cpp * Output: \verbinclude LLT_example.out * + * \b Performance: for best performance, it is recommended to use a column-major storage format + * with the Lower triangular part (the default), or, equivalently, a row-major storage format + * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization + * step, and rank-updates can be up to 3 times slower. + * * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. * + * Note that during the decomposition, only the lower (or upper, as defined by _UpLo) triangular part of A is considered. + * Therefore, the strict lower part does not have to store correct values. + * * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT */ - /* HEY THIS DOX IS DISABLED BECAUSE THERE's A BUG EITHER HERE OR IN LDLT ABOUT THAT (OR BOTH) - * Note that during the decomposition, only the upper triangular part of A is considered. Therefore, - * the strict lower part does not have to store correct values. - */ template<typename _MatrixType, int _UpLo> class LLT { public: @@ -146,7 +150,7 @@ template<typename _MatrixType, int _UpLo> class LLT } template<typename Derived> - void solveInPlace(MatrixBase<Derived> &bAndX) const; + void solveInPlace(const MatrixBase<Derived> &bAndX) const; template<typename InputType> LLT& compute(const EigenBase<InputType>& matrix); @@ -177,7 +181,7 @@ template<typename _MatrixType, int _UpLo> class LLT /** \brief Reports whether previous computation was successful. * * \returns \c Success if computation was succesful, - * \c NumericalIssue if the matrix.appears to be negative. + * \c NumericalIssue if the matrix.appears not to be positive definite. */ ComputationInfo info() const { @@ -425,7 +429,8 @@ LLT<MatrixType,_UpLo>& LLT<MatrixType,_UpLo>::compute(const EigenBase<InputType> eigen_assert(a.rows()==a.cols()); const Index size = a.rows(); m_matrix.resize(size, size); - m_matrix = a.derived(); + if (!internal::is_same_dense(m_matrix, a.derived())) + m_matrix = a.derived(); // Compute matrix L1 norm = max abs column sum. m_l1_norm = RealScalar(0); @@ -485,11 +490,14 @@ void LLT<_MatrixType,_UpLo>::_solve_impl(const RhsType &rhs, DstType &dst) const * * This version avoids a copy when the right hand side matrix b is not needed anymore. * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * * \sa LLT::solve(), MatrixBase::llt() */ template<typename MatrixType, int _UpLo> template<typename Derived> -void LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const +void LLT<MatrixType,_UpLo>::solveInPlace(const MatrixBase<Derived> &bAndX) const { eigen_assert(m_isInitialized && "LLT is not initialized."); eigen_assert(m_matrix.rows()==bAndX.rows()); diff --git a/eigen/Eigen/src/Core/AssignEvaluator.h b/eigen/Eigen/src/Core/AssignEvaluator.h index b0ec7b7..dbe435d 100644 --- a/eigen/Eigen/src/Core/AssignEvaluator.h +++ b/eigen/Eigen/src/Core/AssignEvaluator.h @@ -39,7 +39,7 @@ public: enum { DstAlignment = DstEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment, - DstHasDirectAccess = DstFlags & DirectAccessBit, + DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) }; @@ -83,7 +83,7 @@ private: && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ diff --git a/eigen/Eigen/src/Core/Assign_MKL.h b/eigen/Eigen/src/Core/Assign_MKL.h index 6c2ab92..6866095 100644 --- a/eigen/Eigen/src/Core/Assign_MKL.h +++ b/eigen/Eigen/src/Core/Assign_MKL.h @@ -84,7 +84,8 @@ class vml_assign_traits struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \ Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ @@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \ typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \ const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \ if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \ diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h index f7c1eff..910889e 100644 --- a/eigen/Eigen/src/Core/CoreEvaluators.h +++ b/eigen/Eigen/src/Core/CoreEvaluators.h @@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time<ArgType>::ret) : int(inner_stride_at_compile_time<ArgType>::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, PacketAlignment = unpacket_traits<PacketScalar>::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0) }; typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type; @@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), - m_startCol(block.startCol()) + m_startCol(block.startCol()), + m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0) { } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + RowsAtCompileTime = XprType::RowsAtCompileTime, + ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit) }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeff(m_linear_offset.value() + index); + else + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeffRef(m_linear_offset.value() + index); + else + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template<int LoadMode, typename PacketType> @@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index); + else + return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); } template<int StoreMode, typename PacketType> @@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + if (ForwardLinearAccess) + return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x); + else + return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); } protected: evaluator<ArgType> m_argImpl; const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow; const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol; + const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset; }; // TODO: This evaluator does not actually use the child evaluator; diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h index 49e7112..afcaf35 100644 --- a/eigen/Eigen/src/Core/Diagonal.h +++ b/eigen/Eigen/src/Core/Diagonal.h @@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DEVICE_FUNC - explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) + { + eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() ); + } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h index 06ef18b..1fe7a84 100644 --- a/eigen/Eigen/src/Core/Dot.h +++ b/eigen/Eigen/src/Core/Dot.h @@ -31,7 +31,8 @@ struct dot_nocheck typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) { return a.template binaryExpr<conj_prod>(b).sum(); } @@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true> typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b) { return a.transpose().template binaryExpr<conj_prod>(b).sum(); } @@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true> template<typename Derived> template<typename OtherDerived> EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const { @@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala * \sa lpNorm(), dot(), squaredNorm() */ template<typename Derived> -inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const +EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr * \sa norm(), normalize() */ template<typename Derived> -inline const typename MatrixBase<Derived>::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized() const { typedef typename internal::nested_eval<Derived,2>::type _Nested; @@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const * \sa norm(), normalized() */ template<typename Derived> -inline void MatrixBase<Derived>::normalize() +EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template<typename Derived> -inline const typename MatrixBase<Derived>::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::stableNormalized() const { typedef typename internal::nested_eval<Derived,3>::type _Nested; @@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template<typename Derived> -inline void MatrixBase<Derived>::stableNormalize() +EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h index 0f16cd8..6f0cc80 100644 --- a/eigen/Eigen/src/Core/GeneralProduct.h +++ b/eigen/Eigen/src/Core/GeneralProduct.h @@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector; template<int Size, int MaxSize> struct product_size_category { - enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || - (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), - value = is_large ? Large - : Size == 1 ? 1 - : Small + enum { + #ifndef EIGEN_CUDA_ARCH + is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), + #else + is_large = 0, + #endif + value = is_large ? Large + : Size == 1 ? 1 + : Small }; }; @@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false> * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ -#ifndef __CUDACC__ - template<typename Derived> template<typename OtherDerived> inline const Product<Derived, OtherDerived> @@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const return Product<Derived, OtherDerived>(derived(), other.derived()); } -#endif // __CUDACC__ - /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be diff --git a/eigen/Eigen/src/Core/Map.h b/eigen/Eigen/src/Core/Map.h index 06d1967..548bf9a 100644 --- a/eigen/Eigen/src/Core/Map.h +++ b/eigen/Eigen/src/Core/Map.h @@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> > { typedef traits<PlainObjectType> TraitsBase; enum { + PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit) + ? PlainObjectType::ColsAtCompileTime + : PlainObjectType::RowsAtCompileTime, + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) : int(StrideType::InnerStrideAtCompileTime), OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 - ? int(PlainObjectType::OuterStrideAtCompileTime) + ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic + ? Dynamic + : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize)) : int(StrideType::OuterStrideAtCompileTime), Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), @@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma EIGEN_DEVICE_FUNC inline Index outerStride() const { - return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() - : IsVectorAtCompileTime ? this->size() - : int(Flags)&RowMajorBit ? this->cols() - : this->rows(); + return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer() + : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime) + : IsVectorAtCompileTime ? (this->size() * innerStride()) + : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride()) + : (this->rows() * innerStride()); } /** Constructor in the fixed-size case. diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index a648aa0..6eb974d 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -348,31 +348,7 @@ struct norm1_retval * Implementation of hypot * ****************************************************************************/ -template<typename Scalar> -struct hypot_impl -{ - typedef typename NumTraits<Scalar>::Real RealScalar; - static inline RealScalar run(const Scalar& x, const Scalar& y) - { - EIGEN_USING_STD_MATH(abs); - EIGEN_USING_STD_MATH(sqrt); - RealScalar _x = abs(x); - RealScalar _y = abs(y); - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - if(p==RealScalar(0)) return RealScalar(0); - return p * sqrt(RealScalar(1) + qp*qp); - } -}; +template<typename Scalar> struct hypot_impl; template<typename Scalar> struct hypot_retval @@ -495,7 +471,7 @@ namespace std_fallback { typedef typename NumTraits<Scalar>::Real RealScalar; EIGEN_USING_STD_MATH(log); Scalar x1p = RealScalar(1) + x; - return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } } diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h index 3c9ef22..9c1ceb0 100644 --- a/eigen/Eigen/src/Core/MathFunctionsImpl.h +++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x) return pdiv(p, q); } +template<typename RealScalar> +EIGEN_STRONG_INLINE +RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) +{ + EIGEN_USING_STD_MATH(sqrt); + RealScalar p, qp; + p = numext::maxi(x,y); + if(p==RealScalar(0)) return RealScalar(0); + qp = numext::mini(y,x) / p; + return p * sqrt(RealScalar(1) + qp*qp); +} + +template<typename Scalar> +struct hypot_impl +{ + typedef typename NumTraits<Scalar>::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + EIGEN_USING_STD_MATH(abs); + return positive_real_hypot<RealScalar>(abs(x), abs(y)); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h index ce41218..05db488 100644 --- a/eigen/Eigen/src/Core/MatrixBase.h +++ b/eigen/Eigen/src/Core/MatrixBase.h @@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase<OtherDerived>& other); -#ifdef __CUDACC__ template<typename OtherDerived> EIGEN_DEVICE_FUNC - const Product<Derived,OtherDerived,LazyProduct> - operator*(const MatrixBase<OtherDerived> &other) const - { return this->lazyProduct(other); } -#else - - template<typename OtherDerived> const Product<Derived,OtherDerived> operator*(const MatrixBase<OtherDerived> &other) const; -#endif - template<typename OtherDerived> EIGEN_DEVICE_FUNC const Product<Derived,OtherDerived,LazyProduct> diff --git a/eigen/Eigen/src/Core/PlainObjectBase.h b/eigen/Eigen/src/Core/PlainObjectBase.h index 77f4f60..1dc7e22 100644 --- a/eigen/Eigen/src/Core/PlainObjectBase.h +++ b/eigen/Eigen/src/Core/PlainObjectBase.h @@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * \a data pointers. * + * Here is an example using strides: + * \include Matrix_Map_stride.cpp + * Output: \verbinclude Matrix_Map_stride.out + * * \see class Map */ //@{ diff --git a/eigen/Eigen/src/Core/Product.h b/eigen/Eigen/src/Core/Product.h index ae0c94b..676c480 100644 --- a/eigen/Eigen/src/Core/Product.h +++ b/eigen/Eigen/src/Core/Product.h @@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } @@ -127,7 +127,7 @@ public: using Base::derived; typedef typename Base::Scalar Scalar; - operator const Scalar() const + EIGEN_STRONG_INLINE operator const Scalar() const { return internal::evaluator<ProductXpr>(derived()).coeff(0,0); } @@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense> public: - EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); @@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense> return internal::evaluator<Derived>(derived()).coeff(row,col); } - EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h index c42725d..9b99bd7 100644 --- a/eigen/Eigen/src/Core/ProductEvaluators.h +++ b/eigen/Eigen/src/Core/ProductEvaluators.h @@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> > typedef Product<Lhs, Rhs, Options> XprType; typedef product_evaluator<XprType> Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" @@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>, const Product<Lhs, Rhs, DefaultProduct> > XprType; typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> > typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType; typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>( Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index() )) @@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs> struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct> { template<typename Dst> - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template<typename Dst> - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template<typename Dst> - static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; @@ -312,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct> }; template<typename Dst> - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>()); } template<typename Dst> - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>()); } template<typename Dst> - static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>()); } template<typename Dst> - static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>()); } @@ -785,7 +785,11 @@ public: _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), - Alignment = evaluator<MatrixType>::Alignment + Alignment = evaluator<MatrixType>::Alignment, + + AsScalarProduct = (DiagonalType::SizeAtCompileTime==1) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -797,7 +801,10 @@ public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { - return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + if(AsScalarProduct) + return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); + else + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } protected: diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h index b6e8f88..760e9f8 100644 --- a/eigen/Eigen/src/Core/Redux.h +++ b/eigen/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ protected: */ template<typename Derived> template<typename Func> -typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h index bdf24f5..9c6e3c5 100644 --- a/eigen/Eigen/src/Core/Ref.h +++ b/eigen/Eigen/src/Core/Ref.h @@ -95,6 +95,8 @@ protected: template<typename Expression> EIGEN_DEVICE_FUNC void construct(Expression& expr) { + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression); + if(PlainObjectType::RowsAtCompileTime==1) { eigen_assert(expr.rows()==1 || expr.cols()==1); diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h index 504c98f..b2e51f3 100644 --- a/eigen/Eigen/src/Core/SelfAdjointView.h +++ b/eigen/Eigen/src/Core/SelfAdjointView.h @@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView EIGEN_DEVICE_FUNC explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) - {} + { + EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY); + } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } @@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2); } - typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; + typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType; /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const diff --git a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h index 50099df..7c89c2e 100644 --- a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -17,7 +17,6 @@ namespace Eigen { template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>()); return derived(); } @@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>()); return derived(); } @@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>()); return derived(); } @@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co template<typename Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>()); return derived(); } diff --git a/eigen/Eigen/src/Core/StableNorm.h b/eigen/Eigen/src/Core/StableNorm.h index be04ed4..88c8d98 100644 --- a/eigen/Eigen/src/Core/StableNorm.h +++ b/eigen/Eigen/src/Core/StableNorm.h @@ -165,7 +165,7 @@ MatrixBase<Derived>::stableNorm() const typedef typename internal::nested_eval<Derived,2>::type DerivedCopy; typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean; - DerivedCopy copy(derived()); + const DerivedCopy copy(derived()); enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) diff --git a/eigen/Eigen/src/Core/Transpositions.h b/eigen/Eigen/src/Core/Transpositions.h index 19c17bb..86da5af 100644 --- a/eigen/Eigen/src/Core/Transpositions.h +++ b/eigen/Eigen/src/Core/Transpositions.h @@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> > const Product<OtherDerived, Transpose, AliasFreeProduct> operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt) { - return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived()); + return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt); } /** \returns the \a matrix with the inverse transpositions applied to the rows. diff --git a/eigen/Eigen/src/Core/arch/AVX/Complex.h b/eigen/Eigen/src/Core/arch/AVX/Complex.h index 99439c8..7fa6196 100644 --- a/eigen/Eigen/src/Core/arch/AVX/Complex.h +++ b/eigen/Eigen/src/Core/arch/AVX/Complex.h @@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true> } }; -template<> struct conj_helper<Packet8f, Packet4cf, false,false> -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const - { return Packet4cf(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper<Packet4cf, Packet8f, false,false> -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const - { return Packet4cf(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f) template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b) { @@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true> } }; -template<> struct conj_helper<Packet4d, Packet2cd, false,false> -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const - { return Packet2cd(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper<Packet2cd, Packet4d, false,false> -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const - { return Packet2cd(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d) template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b) { diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h index 195d40f..61c3dfc 100644 --- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h @@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a) } #ifndef EIGEN_VECTORIZE_AVX512 -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) { @@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a) { __m256d tmp = _mm256_shuffle_pd(a,a,5); return _mm256_permute2f128_pd(tmp, tmp, 1); - + #if 0 + // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd + // exhibit the same latency/throughput, but it is here for future reference/benchmarking... __m256d swap_halves = _mm256_permute2f128_pd(a,a,1); return _mm256_permute_pd(swap_halves,5); + #endif } // pabs should be ok diff --git a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h index 399be0e..9c1717f 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) { // x = x + x - 1.0; // } else { x = x - 1.0; } __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ); - Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps()); + Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x); x = psub(x, p16f_1); - e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps())); + e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1)); x = padd(x, tmp); Packet16f x2 = pmul(x, x); @@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) { x = padd(x, y2); // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. - return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf, - _mm512_mask_blend_ps(invalid_mask, p16f_nan, x)); + return _mm512_mask_blend_ps(iszero_mask, + _mm512_mask_blend_ps(invalid_mask, x, p16f_nan), + p16f_minus_inf); } #endif @@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ); - Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x), - _mm512_setzero_ps()); + Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x)); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); @@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ); - Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x), - _mm512_setzero_pd()); + Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x)); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ); - Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), - _mm512_rsqrt14_ps(_x)); + Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps()); // Fill in NaNs and Infs for the negative/zero entries. __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ); Packet16f infs_and_nans = _mm512_mask_blend_ps( - neg_mask, p16f_nan, - _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps())); + neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans); } template <> @@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ); - Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), - _mm512_rsqrt14_pd(_x)); + Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd()); // Fill in NaNs and Infs for the negative/zero entries. __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ); Packet8d infs_and_nans = _mm512_mask_blend_pd( - neg_mask, p8d_nan, - _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd())); + neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) { x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans); } -#else +#elif defined(EIGEN_VECTORIZE_AVX512ER) template <> EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) { return _mm512_rsqrt28_ps(x); diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index f6500a1..8970524 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) { pstore(to, pa); } -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template <> EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) { diff --git a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h index 67db2f8..3e66573 100644 --- a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h @@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true> } }; -template<> struct conj_helper<Packet4f, Packet2cf, false,false> -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); } -}; - -template<> struct conj_helper<Packet2cf, Packet4f, false,false> -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { @@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true> return pconj(internal::pmul(a, b)); } }; -template<> struct conj_helper<Packet2d, Packet1cd, false,false> -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); } -}; -template<> struct conj_helper<Packet1cd, Packet2d, false,false> -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { diff --git a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h index b3f1ea1..08a27d1 100644 --- a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; #else -static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; +static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; @@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } -template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_min(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_max(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } @@ -764,7 +782,7 @@ typedef __vector __bool long Packet2bl; static Packet2l p2l_ONE = { 1, 1 }; static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO); -static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO); static Packet2d p2d_MZERO = { -0.0, -0.0 }; @@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + } -template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; +} template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } @@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs) Packet2d v[2], sum; v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8)); v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8)); - + #ifdef _BIG_ENDIAN sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8)); #else @@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) { template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; - Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)); + Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) ); return vec_sel(elsePacket, thenPacket, mask); } #endif // __VSX__ diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h index 294c517..02ac0c2 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -147,55 +147,55 @@ namespace half_impl { // versions to get the ALU speed increased), but you do save the // conversion steps back and forth. -__device__ half operator + (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) { return __hadd(a, b); } -__device__ half operator * (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) { return __hmul(a, b); } -__device__ half operator - (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) { return __hsub(a, b); } -__device__ half operator / (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) { float num = __half2float(a); float denom = __half2float(b); return __float2half(num / denom); } -__device__ half operator - (const half& a) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a) { return __hneg(a); } -__device__ half& operator += (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) { a = a + b; return a; } -__device__ half& operator *= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) { a = a * b; return a; } -__device__ half& operator -= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) { a = a - b; return a; } -__device__ half& operator /= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) { a = a / b; return a; } -__device__ bool operator == (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) { return __heq(a, b); } -__device__ bool operator != (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) { return __hne(a, b); } -__device__ bool operator < (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) { return __hlt(a, b); } -__device__ bool operator <= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) { return __hle(a, b); } -__device__ bool operator > (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) { return __hgt(a, b); } -__device__ bool operator >= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) { return __hge(a, b); } @@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) return a; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { - return float(a) == float(b); + return numext::equal_strict(float(a),float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { - return float(a) != float(b); + return numext::not_equal_strict(float(a), float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { return float(a) < float(b); @@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { - return half(::expf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hexp(a)); +#else + return half(::expf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return Eigen::half(::hlog(a)); +#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 + return half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { - return half(::sqrtf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hsqrt(a)); +#else + return half(::sqrtf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hfloor(a)); +#else return half(::floorf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hceil(a)); +#else return half(::ceilf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,8 +509,8 @@ struct numeric_limits<Eigen::half> { static const bool is_bounded = false; static const bool is_modulo = false; static const int digits = 11; - static const int digits10 = 2; - //static const int max_digits10 = ; + static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html static const int radix = 2; static const int min_exponent = -13; static const int min_exponent10 = -4; @@ -557,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return Eigen::half(::hlog(a)); #else return Eigen::half(::logf(float(a))); diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225..943e0b0 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { return __floats2half2_rn(r1, r2); } -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 template<> __device__ EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) { diff --git a/eigen/Eigen/src/Core/arch/NEON/Complex.h b/eigen/Eigen/src/Core/arch/NEON/Complex.h index 57e9b43..306a309 100644 --- a/eigen/Eigen/src/Core/arch/NEON/Complex.h +++ b/eigen/Eigen/src/Core/arch/NEON/Complex.h @@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from) { float32x2_t r64; - r64 = vld1_f32((float *)&from); + r64 = vld1_f32((const float *)&from); return Packet2cf(vcombine_f32(r64, r64)); } @@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); } -template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); } template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) { @@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true> } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) + template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for NEON @@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con s = vmulq_f32(b.v, b.v); rev_s = vrev64q_f32(s); - return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); + return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s))); } EIGEN_DEVICE_FUNC inline void @@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); } template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride) { @@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true> } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for NEON diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h index 836fbc0..3d5ed0d 100644 --- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -36,12 +36,43 @@ namespace internal { #endif #endif +#if EIGEN_COMP_MSVC + +// In MSVC's arm_neon.h header file, all NEON vector types +// are aliases to the same underlying type __n128. +// We thus have to wrap them to make them different C++ types. +// (See also bug 1428) + +template<typename T,int unique_id> +struct eigen_packet_wrapper +{ + operator T&() { return m_val; } + operator const T&() const { return m_val; } + eigen_packet_wrapper() {} + eigen_packet_wrapper(const T &v) : m_val(v) {} + eigen_packet_wrapper& operator=(const T &v) { + m_val = v; + return *this; + } + + T m_val; +}; +typedef eigen_packet_wrapper<float32x2_t,0> Packet2f; +typedef eigen_packet_wrapper<float32x4_t,1> Packet4f; +typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i; +typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i; +typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui; + +#else + typedef float32x2_t Packet2f; typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; typedef int32x2_t Packet2i; typedef uint32x4_t Packet4ui; +#endif // EIGEN_COMP_MSVC + #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ const Packet4f p4f_##NAME = pset1<Packet4f>(X) diff --git a/eigen/Eigen/src/Core/arch/SSE/Complex.h b/eigen/Eigen/src/Core/arch/SSE/Complex.h index 5607fe0..d075043 100644 --- a/eigen/Eigen/src/Core/arch/SSE/Complex.h +++ b/eigen/Eigen/src/Core/arch/SSE/Complex.h @@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); } -template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) { @@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true> } }; -template<> struct conj_helper<Packet4f, Packet2cf, false,false> -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); } -}; - -template<> struct conj_helper<Packet2cf, Packet4f, false,false> -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { @@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } -template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a) { @@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true> } }; -template<> struct conj_helper<Packet2d, Packet1cd, false,false> -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); } -}; - -template<> struct conj_helper<Packet1cd, Packet2d, false,false> -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h index 3832de1..5e652cc 100644 --- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h @@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double& pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); } +#if EIGEN_COMP_PGI +typedef const void * SsePrefetchPtrType; +#else +typedef const char * SsePrefetchPtrType; +#endif + #ifndef EIGEN_VECTORIZE_AVX -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 @@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co } // end namespace Eigen +#if EIGEN_COMP_PGI +// PGI++ does not define the following intrinsics in C++ mode. +static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); } +static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); } +static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); } +static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); } +#endif + #endif // EIGEN_PACKET_MATH_SSE_H diff --git a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h index c848932..c6ca8c7 100644 --- a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -14,6 +14,7 @@ namespace Eigen { namespace internal { +#ifndef EIGEN_VECTORIZE_AVX template <> struct type_casting_traits<float, int> { enum { @@ -23,11 +24,6 @@ struct type_casting_traits<float, int> { }; }; -template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) { - return _mm_cvttps_epi32(a); -} - - template <> struct type_casting_traits<int, float> { enum { @@ -37,11 +33,6 @@ struct type_casting_traits<int, float> { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) { - return _mm_cvtepi32_ps(a); -} - - template <> struct type_casting_traits<double, float> { enum { @@ -51,10 +42,6 @@ struct type_casting_traits<double, float> { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) { - return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); -} - template <> struct type_casting_traits<float, double> { enum { @@ -63,6 +50,19 @@ struct type_casting_traits<float, double> { TgtCoeffRatio = 2 }; }; +#endif + +template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) { + return _mm_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) { // Simply discard the second half of the input diff --git a/eigen/Eigen/src/Core/arch/ZVector/Complex.h b/eigen/Eigen/src/Core/arch/ZVector/Complex.h index d39d2d1..1bfb733 100644 --- a/eigen/Eigen/src/Core/arch/ZVector/Complex.h +++ b/eigen/Eigen/src/Core/arch/ZVector/Complex.h @@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true> } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec diff --git a/eigen/Eigen/src/Core/functors/BinaryFunctors.h b/eigen/Eigen/src/Core/functors/BinaryFunctors.h index 96747ba..3eae6b8 100644 --- a/eigen/Eigen/src/Core/functors/BinaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/BinaryFunctors.h @@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh /** \internal - * \brief Template functor to compute the hypot of two scalars + * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars * * \sa MatrixBase::stableNorm(), class Redux */ @@ -263,22 +263,15 @@ template<typename Scalar> struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar> { EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits<Scalar>::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const { - EIGEN_USING_STD_MATH(sqrt) - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - return p * sqrt(Scalar(1) + qp*qp); + // This functor is used by hypotNorm only for which it is faster to first apply abs + // on all coefficients prior to reduction through hypot. + // This way we avoid calling abs on positive and real entries, and this also permits + // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes + // through the same functor... + return internal::positive_real_hypot(x,y); } }; template<typename Scalar> diff --git a/eigen/Eigen/src/Core/functors/StlFunctors.h b/eigen/Eigen/src/Core/functors/StlFunctors.h index 6df3fa5..9c1d758 100644 --- a/eigen/Eigen/src/Core/functors/StlFunctors.h +++ b/eigen/Eigen/src/Core/functors/StlFunctors.h @@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> > { enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; }; #endif +#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910) +// std::unary_negate is deprecated since c++17 and will be removed in c++20 template<typename T> struct functor_traits<std::unary_negate<T> > { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; +// std::binary_negate is deprecated since c++17 and will be removed in c++20 template<typename T> struct functor_traits<std::binary_negate<T> > { enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; }; +#endif #ifdef EIGEN_STDEXT_SUPPORT diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 41e18ff..9176a13 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \ char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \ EIGTYPE beta(1); \ - BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ + BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \ } \ }; @@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C } \ }; - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk) +EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk) +#else EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_) EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_) +#endif // TODO hanlde complex cases // EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_) diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h index 7a3bdbf..b0f6b0d 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h @@ -46,7 +46,7 @@ namespace internal { // gemm specialization -#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \ +#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \ template< \ typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ @@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \ ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ }}; -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, double, z) -GEMM_SPECIALIZATION(scomplex, cf, float, c) +#ifdef EIGEN_USE_MKL +GEMM_SPECIALIZATION(double, d, double, dgemm) +GEMM_SPECIALIZATION(float, f, float, sgemm) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm) +#else +GEMM_SPECIALIZATION(double, d, double, dgemm_) +GEMM_SPECIALIZATION(float, f, float, sgemm_) +GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_) +GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 3c1a7fc..a597c1f 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C alignmentPattern = AllAligned; } - const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; - const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; + const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3; Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) @@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R alignmentPattern = AllAligned; } - const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; - const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; + const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3; Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h index e3a5d58..6e36c2b 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h @@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float) EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex) EIGEN_BLAS_GEMV_SPECIALIZE(scomplex) -#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \ +#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \ template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \ { \ @@ -113,14 +113,21 @@ static void run( \ x_ptr=x_tmp.data(); \ incx=1; \ } else x_ptr=rhs; \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; -EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) -EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) -EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) -EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv) +#else +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index a45238d..9a53185 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -40,7 +40,7 @@ namespace internal { /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -EIGEN_BLAS_SYMM_L(double, double, d, d) -EIGEN_BLAS_SYMM_L(float, float, f, s) -EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_L(double, double, d, dsymm) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm) +EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_L(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_) +#endif /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } else b = _lhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ } \ }; -EIGEN_BLAS_SYMM_R(double, double, d, d) -EIGEN_BLAS_SYMM_R(float, float, f, s) -EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_R(double, double, d, dsymm) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm) +EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_R(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_) +#endif } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h index 38f23ac..1238345 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h @@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ } else x_ptr=_rhs; \ - BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) +#else EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) +#endif } // end namespace internal diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h index 6ec5a8a..f784507 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); + // To work around an "error: member reference base type 'Matrix<...> + // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is + // not a structure or union" compilation error in nvcc (tested V8.0.61), + // create a dummy internal::constructor_without_unaligned_array_assert + // object to pass to the Matrix constructor. + internal::constructor_without_unaligned_array_assert a; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false, ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA()); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB()); - Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert())); + internal::constructor_without_unaligned_array_assert a; + Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> { template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typedef typename Dest::Scalar Scalar; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar Scalar; typedef internal::blas_traits<Lhs> LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; @@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) - * RhsBlasTraits::extractScalarFactor(a_rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); + Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha; typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false> &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); + + // Apply correction if the diagonal is unit and a scalar factor was nested: + if ((Mode&UnitDiag)==UnitDiag) + { + if (LhsIsTriangular && lhs_alpha!=LhsScalar(1)) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize); + } + else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1)) + { + Index diagSize = (std::min)(rhs.rows(),rhs.cols()); + dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize); + } + } } }; diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index aecded6..a25197a 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) // implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ } \ /*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \ /* call ?trmm*/ \ - BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ + BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ \ /* Add op(a_triangular)*b into res*/ \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \ } \ }; -EIGEN_BLAS_TRMM_L(double, double, d, d) -EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z) -EIGEN_BLAS_TRMM_L(float, float, f, s) -EIGEN_BLAS_TRMM_L(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMM_L(double, double, d, dtrmm) +EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm) +EIGEN_BLAS_TRMM_L(float, float, f, strmm) +EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm) +#else +EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_) +EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_) +EIGEN_BLAS_TRMM_L(float, float, f, strmm_) +EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_) +#endif // implements col-major += alpha * op(general) * op(triangular) -#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template <typename Index, int Mode, \ int LhsStorageOrder, bool ConjugateLhs, \ int RhsStorageOrder, bool ConjugateRhs> \ @@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ } \ /*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \ /* call ?trmm*/ \ - BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ + BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \ \ /* Add op(a_triangular)*b into res*/ \ Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \ } \ }; -EIGEN_BLAS_TRMM_R(double, double, d, d) -EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z) -EIGEN_BLAS_TRMM_R(float, float, f, s) -EIGEN_BLAS_TRMM_R(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMM_R(double, double, d, dtrmm) +EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm) +EIGEN_BLAS_TRMM_R(float, float, f, strmm) +EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm) +#else +EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_) +EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_) +EIGEN_BLAS_TRMM_R(float, float, f, strmm_) +EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_) +#endif } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixVector.h b/eigen/Eigen/src/Core/products/TriangularMatrixVector.h index 4b292e7..76bfa15 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixVector.h @@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor> typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { // FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1 @@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor> else dest = MappedDest(actualDestPtr, dest.size()); } + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; @@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor> typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 @@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor> actualRhsPtr,1, dest.data(),dest.innerStride(), actualAlpha); + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h index 07bf26c..3d47a2b 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h @@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) // implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \ enum { \ @@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, diag = IsUnitDiag ? 'U' : 'N'; \ \ /* call ?TRMV*/ \ - BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ + BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ \ /* Add op(a_tr)rhs into res*/ \ - BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ + BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ if (size<(std::max)(rows,cols)) { \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ @@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, m = convert_index<BlasIndex>(size); \ n = convert_index<BlasIndex>(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_CM(double, double, d, d) -EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_CM(float, float, f, s) -EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_CM(double, double, d, d,) +EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_CM(float, float, f, s,) +EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_CM(double, double, d, d, _) +EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _) +EIGEN_BLAS_TRMV_CM(float, float, f, s, _) +EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _) +#endif // implements row-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \ enum { \ @@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, diag = IsUnitDiag ? 'U' : 'N'; \ \ /* call ?TRMV*/ \ - BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ + BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \ \ /* Add op(a_tr)rhs into res*/ \ - BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ + BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \ /* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \ if (size<(std::max)(rows,cols)) { \ if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \ @@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE, m = convert_index<BlasIndex>(size); \ n = convert_index<BlasIndex>(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_RM(double, double, d, d) -EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_RM(float, float, f, s) -EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_RM(double, double, d, d,) +EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_RM(float, float, f, s,) +EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_RM(double, double, d, d,_) +EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_) +EIGEN_BLAS_TRMV_RM(float, float, f, s,_) +EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h index 88c0fb7..f077511 100644 --- a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h @@ -38,7 +38,7 @@ namespace Eigen { namespace internal { // implements LeftSide op(triangular)^-1 * general -#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ +#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \ { \ @@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage } \ if (IsUnitDiag) diag='U'; \ /* call ?trsm*/ \ - BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ + BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ } \ }; -EIGEN_BLAS_TRSM_L(double, double, d) -EIGEN_BLAS_TRSM_L(dcomplex, double, z) -EIGEN_BLAS_TRSM_L(float, float, s) -EIGEN_BLAS_TRSM_L(scomplex, float, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRSM_L(double, double, dtrsm) +EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm) +EIGEN_BLAS_TRSM_L(float, float, strsm) +EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm) +#else +EIGEN_BLAS_TRSM_L(double, double, dtrsm_) +EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_) +EIGEN_BLAS_TRSM_L(float, float, strsm_) +EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_) +#endif // implements RightSide general * op(triangular)^-1 -#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \ +#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \ template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \ { \ @@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag } \ if (IsUnitDiag) diag='U'; \ /* call ?trsm*/ \ - BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ + BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \ /*std::cout << "TRMS_L specialization!\n";*/ \ } \ }; -EIGEN_BLAS_TRSM_R(double, double, d) -EIGEN_BLAS_TRSM_R(dcomplex, double, z) -EIGEN_BLAS_TRSM_R(float, float, s) -EIGEN_BLAS_TRSM_R(scomplex, float, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRSM_R(double, double, dtrsm) +EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm) +EIGEN_BLAS_TRSM_R(float, float, strsm) +EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm) +#else +EIGEN_BLAS_TRSM_R(double, double, dtrsm_) +EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_) +EIGEN_BLAS_TRSM_R(float, float, strsm_) +EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_) +#endif } // end namespace internal diff --git a/eigen/Eigen/src/Core/util/MKL_support.h b/eigen/Eigen/src/Core/util/MKL_support.h index 26b5966..b7d6ecc 100644 --- a/eigen/Eigen/src/Core/util/MKL_support.h +++ b/eigen/Eigen/src/Core/util/MKL_support.h @@ -49,10 +49,11 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL) #define EIGEN_USE_MKL #endif + #if defined EIGEN_USE_MKL # include <mkl.h> /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ @@ -108,6 +109,10 @@ #endif #endif +#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL) +#include "../../misc/blas.h" +#endif + namespace Eigen { typedef std::complex<double> dcomplex; @@ -121,8 +126,5 @@ typedef int BlasIndex; } // end namespace Eigen -#if defined(EIGEN_USE_BLAS) -#include "../../misc/blas.h" -#endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 38d6ddb..02d21d2 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 4 +#define EIGEN_MINOR_VERSION 5 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -399,7 +399,7 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 @@ -413,7 +413,7 @@ #ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ @@ -487,11 +487,13 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. +#ifndef EIGEN_STRONG_INLINE #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline #endif +#endif // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // attribute to maximize inlining. This should only be used when really necessary: in particular, @@ -812,7 +814,8 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0) + // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) @@ -986,7 +989,13 @@ namespace Eigen { # define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() -# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# if EIGEN_COMP_MSVC + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif #endif #endif // EIGEN_MACROS_H diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index c634d7e..66cdbd8 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -70,7 +70,7 @@ inline void throw_std_bad_alloc() throw std::bad_alloc(); #else std::size_t huge = static_cast<std::size_t>(-1); - new int[huge]; + ::operator new(huge); #endif } @@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - memcpy(target, start, size); + std::memcpy(target, start, size); } }; @@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. * * Example: * \code diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 7f63707..1d73f05 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b) return (a+b-1) / b; } +// The aim of the following functions is to bypass -Wfloat-equal warnings +// when we really want a strict equality comparison on floating points. +template<typename X, typename Y> EIGEN_STRONG_INLINE +bool equal_strict(const X& x,const Y& y) { return x == y; } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); } + +template<typename X, typename Y> EIGEN_STRONG_INLINE +bool not_equal_strict(const X& x,const Y& y) { return x != y; } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); } + } // end namespace numext } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/util/StaticAssert.h b/eigen/Eigen/src/Core/util/StaticAssert.h index 983361a..500e477 100644 --- a/eigen/Eigen/src/Core/util/StaticAssert.h +++ b/eigen/Eigen/src/Core/util/StaticAssert.h @@ -24,6 +24,7 @@ * */ +#ifndef EIGEN_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) @@ -44,64 +45,65 @@ struct static_assertion<true> { enum { - YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, - YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, - YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, - THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, - OUT_OF_RANGE_ACCESS, - YOU_MADE_A_PROGRAMMING_MISTAKE, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, - EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, - YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, - YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, - UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, - THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, - FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, - NUMERIC_TYPE_MUST_BE_REAL, - COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, - WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, - THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, - INVALID_MATRIX_PRODUCT, - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, - THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, - INVALID_MATRIX_TEMPLATE_PARAMETERS, - INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, - BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, - THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, - YOU_ALREADY_SPECIFIED_THIS_STRIDE, - INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, - THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, - THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, - YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, - YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, - THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, - YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, - THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, - THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, - THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, - YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, - YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, - THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, - IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, - THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED, - STORAGE_KIND_MUST_MATCH, - STORAGE_INDEX_MUST_MATCH, - CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1, + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1, + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1, + OUT_OF_RANGE_ACCESS=1, + YOU_MADE_A_PROGRAMMING_MISTAKE=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1, + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1, + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1, + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1, + UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1, + THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1, + NUMERIC_TYPE_MUST_BE_REAL=1, + COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1, + WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1, + INVALID_MATRIX_PRODUCT=1, + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1, + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1, + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1, + INVALID_MATRIX_TEMPLATE_PARAMETERS=1, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1, + BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1, + THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1, + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1, + YOU_ALREADY_SPECIFIED_THIS_STRIDE=1, + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1, + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1, + THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1, + YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1, + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1, + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1, + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1, + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1, + YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1, + YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1, + THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1, + THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1, + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1, + STORAGE_LAYOUT_DOES_NOT_MATCH=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1, + THIS_TYPE_IS_NOT_SUPPORTED=1, + STORAGE_KIND_MUST_MATCH=1, + STORAGE_INDEX_MUST_MATCH=1, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1 }; }; @@ -131,7 +133,7 @@ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #endif // EIGEN_NO_STATIC_ASSERT - +#endif // EIGEN_STATIC_ASSERT // static assertion failing if the type \a TYPE is not a vector type #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ diff --git a/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h b/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h index 36a91df..87d789b 100644 --- a/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +++ b/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h @@ -311,7 +311,6 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp // Aliases: Map<VectorType> v(reinterpret_cast<Scalar*>(m_tmp.data()), size); ComplexVectorType &cv = m_tmp; - const MatrixType &mZ = m_realQZ.matrixZ(); const MatrixType &mS = m_realQZ.matrixS(); const MatrixType &mT = m_realQZ.matrixT(); @@ -351,7 +350,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp } } } - m_eivec.col(i).real().noalias() = mZ.transpose() * v; + m_eivec.col(i).real().noalias() = m_realQZ.matrixZ().transpose() * v; m_eivec.col(i).real().normalize(); m_eivec.col(i).imag().setConstant(0); } @@ -400,7 +399,7 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp / (alpha*mT.coeffRef(j,j) - static_cast<Scalar>(beta*mS.coeffRef(j,j))); } } - m_eivec.col(i+1).noalias() = (mZ.transpose() * cv); + m_eivec.col(i+1).noalias() = (m_realQZ.matrixZ().transpose() * cv); m_eivec.col(i+1).normalize(); m_eivec.col(i) = m_eivec.col(i+1).conjugate(); } diff --git a/eigen/Eigen/src/Eigenvalues/RealSchur.h b/eigen/Eigen/src/Eigenvalues/RealSchur.h index f5c8604..17ea903 100644 --- a/eigen/Eigen/src/Eigenvalues/RealSchur.h +++ b/eigen/Eigen/src/Eigenvalues/RealSchur.h @@ -303,7 +303,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa Scalar exshift(0); // sum of exceptional shifts Scalar norm = computeNormOfT(); - if(norm!=0) + if(norm!=Scalar(0)) { while (iu >= 0) { @@ -327,7 +327,7 @@ RealSchur<MatrixType>& RealSchur<MatrixType>::computeFromHessenberg(const HessMa else // No convergence yet { // The firstHouseholderVector vector has to be initialized to something to get rid of a silly GCC warning (-O1 -Wall -DNDEBUG ) - Vector3s firstHouseholderVector(0,0,0), shiftInfo; + Vector3s firstHouseholderVector = Vector3s::Zero(), shiftInfo; computeShift(iu, iter, exshift, shiftInfo); iter = iter + 1; totalIter = totalIter + 1; diff --git a/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h b/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h index 3891cf8..b0c947d 100644 --- a/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +++ b/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h @@ -37,7 +37,7 @@ namespace Eigen { /** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW, LAPACKE_COLROW ) \ +#define EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, EIGCOLROW ) \ template<> template<typename InputType> inline \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >& \ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const EigenBase<InputType>& matrix, int options) \ @@ -47,7 +47,7 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c && (options&EigVecMask)!=EigVecMask \ && "invalid option parameter"); \ bool computeEigenvectors = (options&ComputeEigenvectors)==ComputeEigenvectors; \ - lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, matrix_order, info; \ + lapack_int n = internal::convert_index<lapack_int>(matrix.cols()), lda, info; \ m_eivalues.resize(n,1); \ m_subdiag.resize(n-1); \ m_eivec = matrix; \ @@ -63,27 +63,24 @@ SelfAdjointEigenSolver<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(c } \ \ lda = internal::convert_index<lapack_int>(m_eivec.outerStride()); \ - matrix_order=LAPACKE_COLROW; \ char jobz, uplo='L'/*, range='A'*/; \ jobz = computeEigenvectors ? 'V' : 'N'; \ \ - info = LAPACKE_##LAPACKE_NAME( matrix_order, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ + info = LAPACKE_##LAPACKE_NAME( LAPACK_COL_MAJOR, jobz, uplo, n, (LAPACKE_TYPE*)m_eivec.data(), lda, (LAPACKE_RTYPE*)m_eivalues.data() ); \ m_info = (info==0) ? Success : NoConvergence; \ m_isInitialized = true; \ m_eigenvectorsOk = computeEigenvectors; \ return *this; \ } +#define EIGEN_LAPACKE_EIG_SELFADJ(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME ) \ + EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, ColMajor ) \ + EIGEN_LAPACKE_EIG_SELFADJ_2(EIGTYPE, LAPACKE_TYPE, LAPACKE_RTYPE, LAPACKE_NAME, RowMajor ) -EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, ColMajor, LAPACK_COL_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, ColMajor, LAPACK_COL_MAJOR) - -EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev, RowMajor, LAPACK_ROW_MAJOR) -EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev, RowMajor, LAPACK_ROW_MAJOR) +EIGEN_LAPACKE_EIG_SELFADJ(double, double, double, dsyev) +EIGEN_LAPACKE_EIG_SELFADJ(float, float, float, ssyev) +EIGEN_LAPACKE_EIG_SELFADJ(dcomplex, lapack_complex_double, double, zheev) +EIGEN_LAPACKE_EIG_SELFADJ(scomplex, lapack_complex_float, float, cheev) } // end namespace Eigen diff --git a/eigen/Eigen/src/Geometry/AngleAxis.h b/eigen/Eigen/src/Geometry/AngleAxis.h index 0af3c1b..83ee1be 100644 --- a/eigen/Eigen/src/Geometry/AngleAxis.h +++ b/eigen/Eigen/src/Geometry/AngleAxis.h @@ -178,7 +178,7 @@ EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const Quaterni if (n != Scalar(0)) { m_angle = Scalar(2)*atan2(n, abs(q.w())); - if(q.w() < 0) + if(q.w() < Scalar(0)) n = -n; m_axis = q.vec() / n; } diff --git a/eigen/Eigen/src/Geometry/Quaternion.h b/eigen/Eigen/src/Geometry/Quaternion.h index 3e5a9ba..c3fd8c3 100644 --- a/eigen/Eigen/src/Geometry/Quaternion.h +++ b/eigen/Eigen/src/Geometry/Quaternion.h @@ -43,6 +43,11 @@ class QuaternionBase : public RotationBase<Derived, 3> typedef typename internal::traits<Derived>::Scalar Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename internal::traits<Derived>::Coefficients Coefficients; + typedef typename Coefficients::CoeffReturnType CoeffReturnType; + typedef typename internal::conditional<bool(internal::traits<Derived>::Flags&LvalueBit), + Scalar&, CoeffReturnType>::type NonConstCoeffReturnType; + + enum { Flags = Eigen::internal::traits<Derived>::Flags }; @@ -58,22 +63,22 @@ class QuaternionBase : public RotationBase<Derived, 3> /** \returns the \c x coefficient */ - EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); } + EIGEN_DEVICE_FUNC inline CoeffReturnType x() const { return this->derived().coeffs().coeff(0); } /** \returns the \c y coefficient */ - EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); } + EIGEN_DEVICE_FUNC inline CoeffReturnType y() const { return this->derived().coeffs().coeff(1); } /** \returns the \c z coefficient */ - EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); } + EIGEN_DEVICE_FUNC inline CoeffReturnType z() const { return this->derived().coeffs().coeff(2); } /** \returns the \c w coefficient */ - EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); } - - /** \returns a reference to the \c x coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } - /** \returns a reference to the \c y coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } - /** \returns a reference to the \c z coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } - /** \returns a reference to the \c w coefficient */ - EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } + EIGEN_DEVICE_FUNC inline CoeffReturnType w() const { return this->derived().coeffs().coeff(3); } + + /** \returns a reference to the \c x coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType x() { return this->derived().coeffs().x(); } + /** \returns a reference to the \c y coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType y() { return this->derived().coeffs().y(); } + /** \returns a reference to the \c z coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType z() { return this->derived().coeffs().z(); } + /** \returns a reference to the \c w coefficient (if Derived is a non-const lvalue) */ + EIGEN_DEVICE_FUNC inline NonConstCoeffReturnType w() { return this->derived().coeffs().w(); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); } diff --git a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index facdaf8..f66c846 100644 --- a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -168,7 +168,7 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { for(Index j=0; j<mat.outerSize(); ++j) { - RealScalar sum = mat.innerVector(j).squaredNorm(); + RealScalar sum = mat.col(j).squaredNorm(); if(sum>RealScalar(0)) m_invdiag(j) = RealScalar(1)/sum; else diff --git a/eigen/Eigen/src/Jacobi/Jacobi.h b/eigen/Eigen/src/Jacobi/Jacobi.h index c30326e..437e666 100644 --- a/eigen/Eigen/src/Jacobi/Jacobi.h +++ b/eigen/Eigen/src/Jacobi/Jacobi.h @@ -298,61 +298,119 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR } namespace internal { -template<typename VectorX, typename VectorY, typename OtherScalar> -void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) + +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment, bool Vectorizable> +struct apply_rotation_in_the_plane_selector { - typedef typename VectorX::Scalar Scalar; - enum { - PacketSize = packet_traits<Scalar>::size, - OtherPacketSize = packet_traits<OtherScalar>::size - }; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<OtherScalar>::type OtherPacket; - eigen_assert(xpr_x.size() == xpr_y.size()); - Index size = xpr_x.size(); - Index incrx = xpr_x.derived().innerStride(); - Index incry = xpr_y.derived().innerStride(); + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + for(Index i=0; i<size; ++i) + { + Scalar xi = *x; + Scalar yi = *y; + *x = c * xi + numext::conj(s) * yi; + *y = -s * xi + numext::conj(c) * yi; + x += incrx; + y += incry; + } + } +}; - Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); - Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); - - OtherScalar c = j.c(); - OtherScalar s = j.s(); - if (c==OtherScalar(1) && s==OtherScalar(0)) - return; +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment> +struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */> +{ + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; + + /*** dynamic-size vectorized paths ***/ + if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1)) + { + // both vectors are sequentially stored in memory => vectorization + enum { Peeling = 2 }; - /*** dynamic-size vectorized paths ***/ + Index alignedStart = internal::first_default_aligned(y, size); + Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - if(VectorX::SizeAtCompileTime == Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - ((incrx==1 && incry==1) || PacketSize == 1)) - { - // both vectors are sequentially stored in memory => vectorization - enum { Peeling = 2 }; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; - Index alignedStart = internal::first_default_aligned(y, size); - Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; + for(Index i=0; i<alignedStart; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; + } - const OtherPacket pc = pset1<OtherPacket>(c); - const OtherPacket ps = pset1<OtherPacket>(s); - conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; - conj_helper<OtherPacket,Packet,false,false> pm; + Scalar* EIGEN_RESTRICT px = x + alignedStart; + Scalar* EIGEN_RESTRICT py = y + alignedStart; - for(Index i=0; i<alignedStart; ++i) - { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; - } + if(internal::first_default_aligned(x, size)==alignedStart) + { + for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + { + Packet xi = pload<Packet>(px); + Packet yi = pload<Packet>(py); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } + } + else + { + Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); + for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) + { + Packet xi = ploadu<Packet>(px); + Packet xi1 = ploadu<Packet>(px+PacketSize); + Packet yi = pload <Packet>(py); + Packet yi1 = pload <Packet>(py+PacketSize); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); + px += Peeling*PacketSize; + py += Peeling*PacketSize; + } + if(alignedEnd!=peelingEnd) + { + Packet xi = ploadu<Packet>(x+peelingEnd); + Packet yi = pload <Packet>(y+peelingEnd); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + } + } - Scalar* EIGEN_RESTRICT px = x + alignedStart; - Scalar* EIGEN_RESTRICT py = y + alignedStart; + for(Index i=alignedEnd; i<size; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; + } + } - if(internal::first_default_aligned(x, size)==alignedStart) + /*** fixed-size vectorized path ***/ + else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment { - for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; + Scalar* EIGEN_RESTRICT px = x; + Scalar* EIGEN_RESTRICT py = y; + for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); @@ -362,76 +420,40 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x py += PacketSize; } } - else - { - Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); - for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) - { - Packet xi = ploadu<Packet>(px); - Packet xi1 = ploadu<Packet>(px+PacketSize); - Packet yi = pload <Packet>(py); - Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); - px += Peeling*PacketSize; - py += Peeling*PacketSize; - } - if(alignedEnd!=peelingEnd) - { - Packet xi = ploadu<Packet>(x+peelingEnd); - Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - } - } - for(Index i=alignedEnd; i<size; ++i) + /*** non-vectorized path ***/ + else { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; + apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s); } } +}; - /*** fixed-size vectorized path ***/ - else if(VectorX::SizeAtCompileTime != Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment - { - const OtherPacket pc = pset1<OtherPacket>(c); - const OtherPacket ps = pset1<OtherPacket>(s); - conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; - conj_helper<OtherPacket,Packet,false,false> pm; - Scalar* EIGEN_RESTRICT px = x; - Scalar* EIGEN_RESTRICT py = y; - for(Index i=0; i<size; i+=PacketSize) - { - Packet xi = pload<Packet>(px); - Packet yi = pload<Packet>(py); - pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - px += PacketSize; - py += PacketSize; - } - } +template<typename VectorX, typename VectorY, typename OtherScalar> +void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) +{ + typedef typename VectorX::Scalar Scalar; + const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit) + && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size)); - /*** non-vectorized path ***/ - else - { - for(Index i=0; i<size; ++i) - { - Scalar xi = *x; - Scalar yi = *y; - *x = c * xi + numext::conj(s) * yi; - *y = -s * xi + numext::conj(c) * yi; - x += incrx; - y += incry; - } - } + eigen_assert(xpr_x.size() == xpr_y.size()); + Index size = xpr_x.size(); + Index incrx = xpr_x.derived().innerStride(); + Index incry = xpr_y.derived().innerStride(); + + Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); + Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); + + OtherScalar c = j.c(); + OtherScalar s = j.s(); + if (c==OtherScalar(1) && s==OtherScalar(0)) + return; + + apply_rotation_in_the_plane_selector< + Scalar,OtherScalar, + VectorX::SizeAtCompileTime, + EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment), + Vectorizable>::run(x,incrx,y,incry,size,c,s); } } // end namespace internal diff --git a/eigen/Eigen/src/LU/InverseImpl.h b/eigen/Eigen/src/LU/InverseImpl.h index 018f99b..f49f233 100644 --- a/eigen/Eigen/src/LU/InverseImpl.h +++ b/eigen/Eigen/src/LU/InverseImpl.h @@ -404,7 +404,7 @@ inline void MatrixBase<Derived>::computeInverseWithCheck( const RealScalar& absDeterminantThreshold ) const { - RealScalar determinant; + Scalar determinant; // i'd love to put some static assertions there, but SFINAE means that they have no effect... eigen_assert(rows() == cols()); computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); diff --git a/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h b/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h index d2ebfd7..160d8a5 100644 --- a/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +++ b/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h @@ -64,28 +64,28 @@ namespace internal typedef typename _MatrixType::StorageIndex StorageIndex; }; - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, float *vals, int *perm, int * invp, float *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} s_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, double *vals, int *perm, int * invp, double *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} d_pastix(pastix_data, pastix_comm, n, ptr, idx, vals, perm, invp, x, nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<float> *vals, int *perm, int * invp, std::complex<float> *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} c_pastix(pastix_data, pastix_comm, n, ptr, idx, reinterpret_cast<PASTIX_COMPLEX*>(vals), perm, invp, reinterpret_cast<PASTIX_COMPLEX*>(x), nbrhs, iparm, dparm); } - void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm) + inline void eigen_pastix(pastix_data_t **pastix_data, int pastix_comm, int n, int *ptr, int *idx, std::complex<double> *vals, int *perm, int * invp, std::complex<double> *x, int nbrhs, int *iparm, double *dparm) { if (n == 0) { ptr = NULL; idx = NULL; vals = NULL; } if (nbrhs == 0) {x = NULL; nbrhs=1;} diff --git a/eigen/Eigen/src/SVD/BDCSVD.h b/eigen/Eigen/src/SVD/BDCSVD.h index d7a4271..1134d66 100644 --- a/eigen/Eigen/src/SVD/BDCSVD.h +++ b/eigen/Eigen/src/SVD/BDCSVD.h @@ -11,7 +11,7 @@ // Copyright (C) 2013 Jean Ceccato <jean.ceccato@ensimag.fr> // Copyright (C) 2013 Pierre Zoppitelli <pierre.zoppitelli@ensimag.fr> // Copyright (C) 2013 Jitse Niesen <jitse@maths.leeds.ac.uk> -// Copyright (C) 2014-2016 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2014-2017 Gael Guennebaud <gael.guennebaud@inria.fr> // // Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -696,7 +696,9 @@ typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar for(Index i=0; i<m; ++i) { Index j = perm(i); - res += numext::abs2(col0(j)) / ((diagShifted(j) - mu) * (diag(j) + shift + mu)); + // The following expression could be rewritten to involve only a single division, + // but this would make the expression more sensitive to overflow. + res += (col0(j) / (diagShifted(j) - mu)) * (col0(j) / (diag(j) + shift + mu)); } return res; @@ -708,9 +710,12 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d { using std::abs; using std::swap; + using std::sqrt; Index n = col0.size(); Index actual_n = n; + // Note that here actual_n is computed based on col0(i)==0 instead of diag(i)==0 as above + // because 1) we have diag(i)==0 => col0(i)==0 and 2) if col0(i)==0, then diag(i) is already a singular value. while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) @@ -732,7 +737,9 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d right = (diag(actual_n-1) + col0.matrix().norm()); else { - // Skip deflated singular values + // Skip deflated singular values, + // recall that at this stage we assume that z[j]!=0 and all entries for which z[j]==0 have been put aside. + // This should be equivalent to using perm[] Index l = k+1; while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); } right = diag(l); @@ -818,15 +825,23 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d RealScalar leftShifted, rightShifted; if (shift == left) { - leftShifted = (std::numeric_limits<RealScalar>::min)(); + // to avoid overflow, we must have mu > max(real_min, |z(k)|/sqrt(real_max)), + // the factor 2 is to be more conservative + leftShifted = numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), Literal(2) * abs(col0(k)) / sqrt((std::numeric_limits<RealScalar>::max)()) ); + + // check that we did it right: + eigen_internal_assert( (numext::isfinite)( (col0(k)/leftShifted)*(col0(k)/(diag(k)+shift+leftShifted)) ) ); // I don't understand why the case k==0 would be special there: - // if (k == 0) rightShifted = right - left; else - rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.6)); // theoretically we can take 0.5, but let's be safe + // if (k == 0) rightShifted = right - left; else + rightShifted = (k==actual_n-1) ? right : ((right - left) * RealScalar(0.51)); // theoretically we can take 0.5, but let's be safe } else { - leftShifted = -(right - left) * RealScalar(0.6); - rightShifted = -(std::numeric_limits<RealScalar>::min)(); + leftShifted = -(right - left) * RealScalar(0.51); + if(k+1<n) + rightShifted = -numext::maxi<RealScalar>( (std::numeric_limits<RealScalar>::min)(), abs(col0(k+1)) / sqrt((std::numeric_limits<RealScalar>::max)()) ); + else + rightShifted = -(std::numeric_limits<RealScalar>::min)(); } RealScalar fLeft = secularEq(leftShifted, col0, diag, perm, diagShifted, shift); @@ -980,7 +995,7 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index Index start = firstCol + shift; RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); - RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); + RealScalar r = numext::hypot(c,s); if (r == Literal(0)) { m_computed(start+i, start+i) = Literal(0); diff --git a/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h b/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h index 5027215..ff0516f 100644 --- a/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +++ b/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h @@ -61,9 +61,10 @@ JacobiSVD<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW, Dynamic, Dynamic>, ColPiv u = (LAPACKE_TYPE*)m_matrixU.data(); \ } else { ldu=1; u=&dummy; }\ MatrixType localV; \ - ldvt = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ + lapack_int vt_rows = (m_computeFullV) ? internal::convert_index<lapack_int>(m_cols) : (m_computeThinV) ? internal::convert_index<lapack_int>(m_diagSize) : 1; \ if (computeV()) { \ - localV.resize(ldvt, m_cols); \ + localV.resize(vt_rows, m_cols); \ + ldvt = internal::convert_index<lapack_int>(localV.outerStride()); \ vt = (LAPACKE_TYPE*)localV.data(); \ } else { ldvt=1; vt=&dummy; }\ Matrix<LAPACKE_RTYPE, Dynamic, Dynamic> superb; superb.resize(m_diagSize, 1); \ diff --git a/eigen/Eigen/src/SparseCore/AmbiVector.h b/eigen/Eigen/src/SparseCore/AmbiVector.h index 8a5cc91..e0295f2 100644 --- a/eigen/Eigen/src/SparseCore/AmbiVector.h +++ b/eigen/Eigen/src/SparseCore/AmbiVector.h @@ -94,7 +94,7 @@ class AmbiVector Index allocSize = m_allocatedElements * sizeof(ListEl); allocSize = (allocSize + sizeof(Scalar) - 1)/sizeof(Scalar); Scalar* newBuffer = new Scalar[allocSize]; - memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); + std::memcpy(newBuffer, m_buffer, copyElements * sizeof(ListEl)); delete[] m_buffer; m_buffer = newBuffer; } diff --git a/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 492eb0a..9db119b 100644 --- a/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -17,7 +17,9 @@ namespace internal { template<typename Lhs, typename Rhs, typename ResultType> static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false) { - typedef typename remove_all<Lhs>::type::Scalar Scalar; + typedef typename remove_all<Lhs>::type::Scalar LhsScalar; + typedef typename remove_all<Rhs>::type::Scalar RhsScalar; + typedef typename remove_all<ResultType>::type::Scalar ResScalar; // make sure to call innerSize/outerSize since we fake the storage order. Index rows = lhs.innerSize(); @@ -25,7 +27,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r eigen_assert(lhs.outerSize() == rhs.innerSize()); ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); - ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); + ei_declare_aligned_stack_constructed_variable(ResScalar, values, rows, 0); ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); std::memset(mask,0,sizeof(bool)*rows); @@ -51,12 +53,12 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r Index nnz = 0; for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { - Scalar y = rhsIt.value(); + RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); - Scalar x = lhsIt.value(); + LhsScalar x = lhsIt.value(); if(!mask[i]) { mask[i] = true; @@ -166,11 +168,12 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; - RowMajorMatrix rhsRow = rhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow); - res = resRow; + typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRhs; + typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes; + RowMajorRhs rhsRow = rhs; + RowMajorRes resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl<RowMajorRhs,Lhs,RowMajorRes>(rhsRow, lhs, resRow); + res = resRow; } }; @@ -179,10 +182,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorMatrix; - RowMajorMatrix lhsRow = lhs; - RowMajorMatrix resRow(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow); + typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorLhs; + typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::StorageIndex> RowMajorRes; + RowMajorLhs lhsRow = lhs; + RowMajorRes resRow(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorLhs,RowMajorRes>(rhs, lhsRow, resRow); res = resRow; } }; @@ -219,10 +223,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; - ColMajorMatrix lhsCol = lhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol); + typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs; + typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes; + ColMajorLhs lhsCol = lhs; + ColMajorRes resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl<ColMajorLhs,Rhs,ColMajorRes>(lhsCol, rhs, resCol); res = resCol; } }; @@ -232,10 +237,11 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; - ColMajorMatrix rhsCol = rhs; - ColMajorMatrix resCol(lhs.rows(), rhs.cols()); - internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol); + typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs; + typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRes; + ColMajorRhs rhsCol = rhs; + ColMajorRes resCol(lhs.rows(), rhs.cols()); + internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorRhs,ColMajorRes>(lhs, rhsCol, resCol); res = resCol; } }; @@ -263,7 +269,8 @@ namespace internal { template<typename Lhs, typename Rhs, typename ResultType> static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef typename remove_all<Lhs>::type::Scalar Scalar; + typedef typename remove_all<Lhs>::type::Scalar LhsScalar; + typedef typename remove_all<Rhs>::type::Scalar RhsScalar; Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); @@ -274,12 +281,12 @@ static void sparse_sparse_to_dense_product_impl(const Lhs& lhs, const Rhs& rhs, { for (typename evaluator<Rhs>::InnerIterator rhsIt(rhsEval, j); rhsIt; ++rhsIt) { - Scalar y = rhsIt.value(); + RhsScalar y = rhsIt.value(); Index k = rhsIt.index(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, k); lhsIt; ++lhsIt) { Index i = lhsIt.index(); - Scalar x = lhsIt.value(); + LhsScalar x = lhsIt.value(); res.coeffRef(i,j) += x * y; } } @@ -310,9 +317,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMa { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; - ColMajorMatrix lhsCol(lhs); - internal::sparse_sparse_to_dense_product_impl<ColMajorMatrix,Rhs,ResultType>(lhsCol, rhs, res); + typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorLhs; + ColMajorLhs lhsCol(lhs); + internal::sparse_sparse_to_dense_product_impl<ColMajorLhs,Rhs,ResultType>(lhsCol, rhs, res); } }; @@ -321,9 +328,9 @@ struct sparse_sparse_to_dense_product_selector<Lhs,Rhs,ResultType,ColMajor,RowMa { static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorMatrix; - ColMajorMatrix rhsCol(rhs); - internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorMatrix,ResultType>(lhs, rhsCol, res); + typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename ResultType::StorageIndex> ColMajorRhs; + ColMajorRhs rhsCol(rhs); + internal::sparse_sparse_to_dense_product_impl<Lhs,ColMajorRhs,ResultType>(lhs, rhsCol, res); } }; diff --git a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h index 5ab64f1..65611b3 100644 --- a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -311,7 +311,7 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons while (i && i.index()<j) ++i; if(i && i.index()==j) { - res(j,k) += alpha * i.value() * rhs(j,k); + res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k); ++i; } } @@ -324,14 +324,14 @@ inline void sparse_selfadjoint_time_dense_product(const SparseLhsType& lhs, cons { LhsScalar lhs_ij = i.value(); if(!LhsIsRowMajor) lhs_ij = numext::conj(lhs_ij); - res_j += lhs_ij * rhs(i.index(),k); + res_j += lhs_ij * rhs.coeff(i.index(),k); res(i.index(),k) += numext::conj(lhs_ij) * rhs_j; } - res(j,k) += alpha * res_j; + res.coeffRef(j,k) += alpha * res_j; // handle diagonal coeff if (ProcessFirstHalf && i && (i.index()==j)) - res(j,k) += alpha * i.value() * rhs(j,k); + res.coeffRef(j,k) += alpha * i.value() * rhs.coeff(j,k); } } } diff --git a/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h b/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h index 21c4190..88820a4 100644 --- a/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +++ b/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h @@ -21,7 +21,8 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r { // return sparse_sparse_product_with_pruning_impl2(lhs,rhs,res); - typedef typename remove_all<Lhs>::type::Scalar Scalar; + typedef typename remove_all<Rhs>::type::Scalar RhsScalar; + typedef typename remove_all<ResultType>::type::Scalar ResScalar; typedef typename remove_all<Lhs>::type::StorageIndex StorageIndex; // make sure to call innerSize/outerSize since we fake the storage order. @@ -31,7 +32,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r eigen_assert(lhs.outerSize() == rhs.innerSize()); // allocate a temporary buffer - AmbiVector<Scalar,StorageIndex> tempVector(rows); + AmbiVector<ResScalar,StorageIndex> tempVector(rows); // mimics a resizeByInnerOuter: if(ResultType::IsRowMajor) @@ -63,14 +64,14 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r { // FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index()) tempVector.restart(); - Scalar x = rhsIt.value(); + RhsScalar x = rhsIt.value(); for (typename evaluator<Lhs>::InnerIterator lhsIt(lhsEval, rhsIt.index()); lhsIt; ++lhsIt) { tempVector.coeffRef(lhsIt.index()) += lhsIt.value() * x; } } res.startVec(j); - for (typename AmbiVector<Scalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it) + for (typename AmbiVector<ResScalar,StorageIndex>::Iterator it(tempVector,tolerance); it; ++it) res.insertBackByOuterInner(j,it.index()) = it.value(); } res.finalize(); @@ -85,7 +86,6 @@ struct sparse_sparse_product_with_pruning_selector; template<typename Lhs, typename Rhs, typename ResultType> struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor> { - typedef typename traits<typename remove_all<Lhs>::type>::Scalar Scalar; typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) @@ -129,8 +129,8 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; + typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; + typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; ColMajorMatrixLhs colLhs(lhs); ColMajorMatrixRhs colRhs(rhs); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance); @@ -149,7 +149,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) { - typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs; + typedef SparseMatrix<typename Lhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixLhs; RowMajorMatrixLhs rowLhs(lhs); sparse_sparse_product_with_pruning_selector<RowMajorMatrixLhs,Rhs,ResultType,RowMajor,RowMajor>(rowLhs,rhs,res,tolerance); } @@ -161,7 +161,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) { - typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs; + typedef SparseMatrix<typename Rhs::Scalar,RowMajor,typename Lhs::StorageIndex> RowMajorMatrixRhs; RowMajorMatrixRhs rowRhs(rhs); sparse_sparse_product_with_pruning_selector<Lhs,RowMajorMatrixRhs,ResultType,RowMajor,RowMajor,RowMajor>(lhs,rowRhs,res,tolerance); } @@ -173,7 +173,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,R typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; + typedef SparseMatrix<typename Rhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixRhs; ColMajorMatrixRhs colRhs(rhs); internal::sparse_sparse_product_with_pruning_impl<Lhs,ColMajorMatrixRhs,ResultType>(lhs, colRhs, res, tolerance); } @@ -185,7 +185,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,C typedef typename ResultType::RealScalar RealScalar; static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance) { - typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; + typedef SparseMatrix<typename Lhs::Scalar,ColMajor,typename Lhs::StorageIndex> ColMajorMatrixLhs; ColMajorMatrixLhs colLhs(lhs); internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,Rhs,ResultType>(colLhs, rhs, res, tolerance); } diff --git a/eigen/Eigen/src/SparseQR/SparseQR.h b/eigen/Eigen/src/SparseQR/SparseQR.h index 2d4498b..7409fca 100644 --- a/eigen/Eigen/src/SparseQR/SparseQR.h +++ b/eigen/Eigen/src/SparseQR/SparseQR.h @@ -52,7 +52,7 @@ namespace internal { * rank-revealing permutations. Use colsPermutation() to get it. * * Q is the orthogonal matrix represented as products of Householder reflectors. - * Use matrixQ() to get an expression and matrixQ().transpose() to get the transpose. + * Use matrixQ() to get an expression and matrixQ().adjoint() to get the adjoint. * You can then apply it to a vector. * * R is the sparse triangular or trapezoidal matrix. The later occurs when A is rank-deficient. @@ -65,6 +65,7 @@ namespace internal { * \implsparsesolverconcept * * \warning The input sparse matrix A must be in compressed mode (see SparseMatrix::makeCompressed()). + * \warning For complex matrices matrixQ().transpose() will actually return the adjoint matrix. * */ template<typename _MatrixType, typename _OrderingType> @@ -196,9 +197,9 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> > Index rank = this->rank(); - // Compute Q^T * b; + // Compute Q^* * b; typename Dest::PlainObject y, b; - y = this->matrixQ().transpose() * B; + y = this->matrixQ().adjoint() * B; b = y; // Solve with the triangular matrix R @@ -604,7 +605,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived // Get the references SparseQR_QProduct(const SparseQRType& qr, const Derived& other, bool transpose) : m_qr(qr),m_other(other),m_transpose(transpose) {} - inline Index rows() const { return m_transpose ? m_qr.rows() : m_qr.cols(); } + inline Index rows() const { return m_qr.matrixQ().rows(); } inline Index cols() const { return m_other.cols(); } // Assign to a vector @@ -632,7 +633,10 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived } else { - eigen_assert(m_qr.m_Q.rows() == m_other.rows() && "Non conforming object sizes"); + eigen_assert(m_qr.matrixQ().cols() == m_other.rows() && "Non conforming object sizes"); + + res.conservativeResize(rows(), cols()); + // Compute res = Q * other column by column for(Index j = 0; j < res.cols(); j++) { @@ -641,7 +645,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived Scalar tau = Scalar(0); tau = m_qr.m_Q.col(k).dot(res.col(j)); if(tau==Scalar(0)) continue; - tau = tau * m_qr.m_hcoeffs(k); + tau = tau * numext::conj(m_qr.m_hcoeffs(k)); res.col(j) -= tau * m_qr.m_Q.col(k); } } @@ -650,7 +654,7 @@ struct SparseQR_QProduct : ReturnByValue<SparseQR_QProduct<SparseQRType, Derived const SparseQRType& m_qr; const Derived& m_other; - bool m_transpose; + bool m_transpose; // TODO this actually means adjoint }; template<typename SparseQRType> @@ -668,13 +672,14 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp { return SparseQR_QProduct<SparseQRType,Derived>(m_qr,other.derived(),false); } + // To use for operations with the adjoint of Q SparseQRMatrixQTransposeReturnType<SparseQRType> adjoint() const { return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); } inline Index rows() const { return m_qr.rows(); } - inline Index cols() const { return (std::min)(m_qr.rows(),m_qr.cols()); } - // To use for operations with the transpose of Q + inline Index cols() const { return m_qr.rows(); } + // To use for operations with the transpose of Q FIXME this is the same as adjoint at the moment SparseQRMatrixQTransposeReturnType<SparseQRType> transpose() const { return SparseQRMatrixQTransposeReturnType<SparseQRType>(m_qr); @@ -682,6 +687,7 @@ struct SparseQRMatrixQReturnType : public EigenBase<SparseQRMatrixQReturnType<Sp const SparseQRType& m_qr; }; +// TODO this actually represents the adjoint of Q template<typename SparseQRType> struct SparseQRMatrixQTransposeReturnType { @@ -712,7 +718,7 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal: typedef typename DstXprType::StorageIndex StorageIndex; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/) { - typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows()); + typename DstXprType::PlainObject idMat(src.rows(), src.cols()); idMat.setIdentity(); // Sort the sparse householder reflectors if needed const_cast<SparseQRType *>(&src.m_qr)->_sort_matrix_Q(); diff --git a/eigen/bench/spbench/CMakeLists.txt b/eigen/bench/spbench/CMakeLists.txt index 9327356..029ba6d 100644 --- a/eigen/bench/spbench/CMakeLists.txt +++ b/eigen/bench/spbench/CMakeLists.txt @@ -60,7 +60,7 @@ if(PASTIX_FOUND AND PASTIX_pastix_nompi.h_INCLUDE_DIRS AND BLAS_FOUND) endif(SCOTCH_FOUND) set(SPARSE_LIBS ${SPARSE_LIBS} ${PASTIX_LIBRARIES_DEP} ${ORDERING_LIBRARIES}) set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES_DEP}) -endif(PASTIX_FOUND AND BLAS_FOUND) +endif() if(METIS_FOUND) include_directories(${METIS_INCLUDE_DIRS}) diff --git a/eigen/blas/CMakeLists.txt b/eigen/blas/CMakeLists.txt index d0efb41..9887d58 100644 --- a/eigen/blas/CMakeLists.txt +++ b/eigen/blas/CMakeLists.txt @@ -45,10 +45,12 @@ install(TARGETS eigen_blas eigen_blas_static if(EIGEN_Fortran_COMPILER_WORKS) -if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) - add_subdirectory(testing) # can't do EXCLUDE_FROM_ALL here, breaks CTest -else() - add_subdirectory(testing EXCLUDE_FROM_ALL) +if(BUILD_TESTING) + if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(testing) # can't do EXCLUDE_FROM_ALL here, breaks CTest + else() + add_subdirectory(testing EXCLUDE_FROM_ALL) + endif() endif() endif() diff --git a/eigen/cmake/FindBLAS.cmake b/eigen/cmake/FindBLAS.cmake index 9f74b07..e3395bc 100644 --- a/eigen/cmake/FindBLAS.cmake +++ b/eigen/cmake/FindBLAS.cmake @@ -152,7 +152,7 @@ set(_blas_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) # Check the language being used get_property( _LANGUAGES_ GLOBAL PROPERTY ENABLED_LANGUAGES ) -if( _LANGUAGES_ MATCHES Fortran ) +if( _LANGUAGES_ MATCHES Fortran AND CMAKE_Fortran_COMPILER) set( _CHECK_FORTRAN TRUE ) elseif( (_LANGUAGES_ MATCHES C) OR (_LANGUAGES_ MATCHES CXX) ) set( _CHECK_FORTRAN FALSE ) diff --git a/eigen/cmake/language_support.cmake b/eigen/cmake/language_support.cmake index 2f14f30..ddba509 100644 --- a/eigen/cmake/language_support.cmake +++ b/eigen/cmake/language_support.cmake @@ -26,7 +26,7 @@ function(workaround_9220 language language_works) cmake_minimum_required(VERSION 2.8.0) set (CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\") set (CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\") - enable_language(${language} OPTIONAL) + enable_language(${language}) ") file(REMOVE_RECURSE ${CMAKE_BINARY_DIR}/language_tests/${language}) file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/language_tests/${language}) diff --git a/eigen/doc/LeastSquares.dox b/eigen/doc/LeastSquares.dox index e2191a2..24dfe4b 100644 --- a/eigen/doc/LeastSquares.dox +++ b/eigen/doc/LeastSquares.dox @@ -16,7 +16,7 @@ equations is the fastest but least accurate, and the QR decomposition is in betw \section LeastSquaresSVD Using the SVD decomposition -The \link JacobiSVD::solve() solve() \endlink method in the JacobiSVD class can be directly used to +The \link BDCSVD::solve() solve() \endlink method in the BDCSVD class can be directly used to solve linear squares systems. It is not enough to compute only the singular values (the default for this class); you also need the singular vectors but the thin SVD decomposition suffices for computing least squares solutions: diff --git a/eigen/doc/Pitfalls.dox b/eigen/doc/Pitfalls.dox index cf42eff..3f39505 100644 --- a/eigen/doc/Pitfalls.dox +++ b/eigen/doc/Pitfalls.dox @@ -2,10 +2,16 @@ namespace Eigen { /** \page TopicPitfalls Common pitfalls + \section TopicPitfalls_template_keyword Compilation error with template methods See this \link TopicTemplateKeyword page \endlink. +\section TopicPitfalls_aliasing Aliasing + +Don't miss this \link TopicAliasing page \endlink on aliasing, +especially if you got wrong results in statements where the destination appears on the right hand side of the expression. + \section TopicPitfalls_auto_keyword C++11 and the auto keyword In short: do not use the auto keywords with Eigen's expressions, unless you are 100% sure about what you are doing. In particular, do not use the auto keyword as a replacement for a Matrix<> type. Here is an example: diff --git a/eigen/doc/PreprocessorDirectives.dox b/eigen/doc/PreprocessorDirectives.dox index f01b39a..40cceb9 100644 --- a/eigen/doc/PreprocessorDirectives.dox +++ b/eigen/doc/PreprocessorDirectives.dox @@ -120,6 +120,10 @@ run time. However, these assertions do cost time and can thus be turned off. - \b \c EIGEN_STACK_ALLOCATION_LIMIT - defines the maximum bytes for a buffer to be allocated on the stack. For internal temporary buffers, dynamic memory allocation is employed as a fall back. For fixed-size matrices or arrays, exceeding this threshold raises a compile time assertion. Use 0 to set no limit. Default is 128 KB. + - \b \c EIGEN_STRONG_INLINE - This macro is used to qualify critical functions and methods that we expect the compiler to inline. + By default it is defined to \c __forceinline for MSVC and ICC, and to \c inline for other compilers. A tipical usage is to + define it to \c inline for MSVC users wanting faster compilation times, at the risk of performance degradations in some rare + cases for which MSVC inliner fails to do a good job. - \c EIGEN_DONT_ALIGN - Deprecated, it is a synonym for \c EIGEN_MAX_ALIGN_BYTES=0. It disables alignment completely. %Eigen will not try to align its objects and does not expect that any objects passed to it are aligned. This will turn off vectorization if \b EIGEN_UNALIGNED_VECTORIZE=1. Not defined by default. diff --git a/eigen/doc/TopicLinearAlgebraDecompositions.dox b/eigen/doc/TopicLinearAlgebraDecompositions.dox index 4914706..d9db677 100644 --- a/eigen/doc/TopicLinearAlgebraDecompositions.dox +++ b/eigen/doc/TopicLinearAlgebraDecompositions.dox @@ -4,7 +4,7 @@ namespace Eigen { This page presents a catalogue of the dense matrix decompositions offered by Eigen. For an introduction on linear solvers and decompositions, check this \link TutorialLinearAlgebra page \endlink. -To get an overview of the true relative speed of the different decomposition, check this \link DenseDecompositionBenchmark benchmark \endlink. +To get an overview of the true relative speed of the different decompositions, check this \link DenseDecompositionBenchmark benchmark \endlink. \section TopicLinAlgBigTable Catalogue of decompositions offered by Eigen @@ -114,6 +114,18 @@ To get an overview of the true relative speed of the different decomposition, ch <tr><th class="inter" colspan="9">\n Singular values and eigenvalues decompositions</th></tr> <tr> + <td>BDCSVD (divide \& conquer)</td> + <td>-</td> + <td>One of the fastest SVD algorithms</td> + <td>Excellent</td> + <td>Yes</td> + <td>Singular values/vectors, least squares</td> + <td>Yes (and does least squares)</td> + <td>Excellent</td> + <td>Blocked bidiagonalization</td> + </tr> + + <tr> <td>JacobiSVD (two-sided)</td> <td>-</td> <td>Slow (but fast for small matrices)</td> diff --git a/eigen/doc/TutorialLinearAlgebra.dox b/eigen/doc/TutorialLinearAlgebra.dox index cb92cee..a727241 100644 --- a/eigen/doc/TutorialLinearAlgebra.dox +++ b/eigen/doc/TutorialLinearAlgebra.dox @@ -73,7 +73,7 @@ depending on your matrix and the trade-off you want to make: <td>ColPivHouseholderQR</td> <td>colPivHouseholderQr()</td> <td>None</td> - <td>++</td> + <td>+</td> <td>-</td> <td>+++</td> </tr> @@ -86,6 +86,14 @@ depending on your matrix and the trade-off you want to make: <td>+++</td> </tr> <tr class="alt"> + <td>CompleteOrthogonalDecomposition</td> + <td>completeOrthogonalDecomposition()</td> + <td>None</td> + <td>+</td> + <td>-</td> + <td>+++</td> + </tr> + <tr class="alt"> <td>LLT</td> <td>llt()</td> <td>Positive definite</td> @@ -102,14 +110,23 @@ depending on your matrix and the trade-off you want to make: <td>++</td> </tr> <tr class="alt"> + <td>BDCSVD</td> + <td>bdcSvd()</td> + <td>None</td> + <td>-</td> + <td>-</td> + <td>+++</td> + </tr> + <tr class="alt"> <td>JacobiSVD</td> <td>jacobiSvd()</td> <td>None</td> - <td>- -</td> + <td>-</td> <td>- - -</td> <td>+++</td> </tr> </table> +To get an overview of the true relative speed of the different decompositions, check this \link DenseDecompositionBenchmark benchmark \endlink. All of these decompositions offer a solve() method that works as in the above example. @@ -183,8 +200,11 @@ Here is an example: \section TutorialLinAlgLeastsquares Least squares solving -The most accurate method to do least squares solving is with a SVD decomposition. Eigen provides one -as the JacobiSVD class, and its solve() is doing least-squares solving. +The most accurate method to do least squares solving is with a SVD decomposition. +Eigen provides two implementations. +The recommended one is the BDCSVD class, which scale well for large problems +and automatically fall-back to the JacobiSVD class for smaller problems. +For both classes, their solve() method is doing least-squares solving. Here is an example: <table class="example"> diff --git a/eigen/doc/UsingIntelMKL.dox b/eigen/doc/UsingIntelMKL.dox index a1a3a18..6de14af 100644 --- a/eigen/doc/UsingIntelMKL.dox +++ b/eigen/doc/UsingIntelMKL.dox @@ -63,6 +63,8 @@ In addition you can choose which parts will be substituted by defining one or mu <tr><td>\c EIGEN_USE_MKL_ALL </td><td>Defines \c EIGEN_USE_BLAS, \c EIGEN_USE_LAPACKE, and \c EIGEN_USE_MKL_VML </td></tr> </table> +The options can be combined with \b MKL_DIRECT_CALL to enable MKL direct call feature. This may help to increase performance of some MKL BLAS (?GEMM, ?GEMV, ?TRSM, ?AXPY and ?DOT) and LAPACK (LU, Cholesky and QR) routines for very small matrices. To make it work properly, the macro \c EIGEN_USE_MKL must also be defined in the case none of the other \c EIGEN_USE_MKL_* macros has been defined. + Note that the BLAS and LAPACKE backends can be enabled for any F77 compatible BLAS and LAPACK libraries. See this \link TopicUsingBlasLapack page \endlink for the details. Finally, the PARDISO sparse solver shipped with Intel MKL can be used through the \ref PardisoLU, \ref PardisoLLT and \ref PardisoLDLT classes of the \ref PardisoSupport_Module. diff --git a/eigen/doc/eigen_navtree_hacks.js b/eigen/doc/eigen_navtree_hacks.js index bd7e02b..39c59f7 100644 --- a/eigen/doc/eigen_navtree_hacks.js +++ b/eigen/doc/eigen_navtree_hacks.js @@ -65,6 +65,10 @@ function getNode(o, po) function resizeHeight() { var toc = $("#nav-toc"); + var header = $("#header"); + var content = $("#doc-content"); + var navtree = $("#nav-path"); + var sidenav = $("#side-nav"); var tocHeight = toc.height(); // <- we added this line var headerHeight = header.height(); var footerHeight = footer.height(); diff --git a/eigen/doc/eigendoxy.css b/eigen/doc/eigendoxy.css index 6274e6c..9a036d2 100644 --- a/eigen/doc/eigendoxy.css +++ b/eigen/doc/eigendoxy.css @@ -214,3 +214,8 @@ h3.version { td.width20em p.endtd { width: 20em; } + +/* needed for huge screens */ +.ui-resizable-e { + background-repeat: repeat-y; +}
\ No newline at end of file diff --git a/eigen/doc/eigendoxy_footer.html.in b/eigen/doc/eigendoxy_footer.html.in index 878244a..9ac0596 100644 --- a/eigen/doc/eigendoxy_footer.html.in +++ b/eigen/doc/eigendoxy_footer.html.in @@ -5,14 +5,14 @@ $navpath <li class="footer">$generatedby <a href="http://www.doxygen.org/index.html"> - <img class="footer" src="$relpath$doxygen.png" alt="doxygen"/></a> $doxygenversion </li> + <img class="footer" src="$relpath^doxygen.png" alt="doxygen"/></a> $doxygenversion </li> </ul> </div> <!--END GENERATE_TREEVIEW--> <!--BEGIN !GENERATE_TREEVIEW--> <hr class="footer"/><address class="footer"><small> $generatedby  <a href="http://www.doxygen.org/index.html"> -<img class="footer" src="$relpath$doxygen.png" alt="doxygen"/> +<img class="footer" src="$relpath^doxygen.png" alt="doxygen"/> </a> $doxygenversion </small></address> <!--END !GENERATE_TREEVIEW--> diff --git a/eigen/doc/eigendoxy_header.html.in b/eigen/doc/eigendoxy_header.html.in index 0f3859f..bb149f8 100644 --- a/eigen/doc/eigendoxy_header.html.in +++ b/eigen/doc/eigendoxy_header.html.in @@ -4,25 +4,23 @@ <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> <meta http-equiv="X-UA-Compatible" content="IE=9"/> <meta name="generator" content="Doxygen $doxygenversion"/> +<meta name="viewport" content="width=device-width, initial-scale=1"/> <!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME--> <!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME--> -<link href="$relpath$tabs.css" rel="stylesheet" type="text/css"/> -<script type="text/javascript" src="$relpath$jquery.js"></script> -<script type="text/javascript" src="$relpath$dynsections.js"></script> +<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/> +<script type="text/javascript" src="$relpath^jquery.js"></script> +<script type="text/javascript" src="$relpath^dynsections.js"></script> $treeview $search $mathjax -<link href="$relpath$$stylesheet" rel="stylesheet" type="text/css" /> +<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" /> <link href="$relpath$eigendoxy.css" rel="stylesheet" type="text/css"> <!-- $extrastylesheet --> <script type="text/javascript" src="$relpath$eigen_navtree_hacks.js"></script> -<!-- <script type="text/javascript"> --> -<!-- </script> --> </head> <body> <div id="top"><!-- do not remove this div, it is closed by doxygen! --> -<!-- <a name="top"></a> --> <!--BEGIN TITLEAREA--> <div id="titlearea"> @@ -30,10 +28,10 @@ $mathjax <tbody> <tr style="height: 56px;"> <!--BEGIN PROJECT_LOGO--> - <td id="projectlogo"><img alt="Logo" src="$relpath$$projectlogo"/></td> + <td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td> <!--END PROJECT_LOGO--> <!--BEGIN PROJECT_NAME--> - <td style="padding-left: 0.5em;"> + <td id="projectalign" style="padding-left: 0.5em;"> <div id="projectname"><a href="http://eigen.tuxfamily.org">$projectname</a> <!--BEGIN PROJECT_NUMBER--> <span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER--> </div> @@ -42,7 +40,7 @@ $mathjax <!--END PROJECT_NAME--> <!--BEGIN !PROJECT_NAME--> <!--BEGIN PROJECT_BRIEF--> - <td style="padding-left: 0.5em;"> + <td id="projectalign" style="padding-left: 0.5em;"> <div id="projectbrief">$projectbrief</div> </td> <!--END PROJECT_BRIEF--> diff --git a/eigen/doc/examples/Cwise_lgamma.cpp b/eigen/doc/examples/Cwise_lgamma.cpp index f1c4f50..6bfaccb 100644 --- a/eigen/doc/examples/Cwise_lgamma.cpp +++ b/eigen/doc/examples/Cwise_lgamma.cpp @@ -6,4 +6,4 @@ int main() { Array4d v(0.5,10,0,-1); std::cout << v.lgamma() << std::endl; -}
\ No newline at end of file +} diff --git a/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp b/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp index 9fbc031..f109f04 100644 --- a/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp +++ b/eigen/doc/examples/TutorialLinAlgSVDSolve.cpp @@ -11,5 +11,5 @@ int main() VectorXf b = VectorXf::Random(3); cout << "Here is the right hand side b:\n" << b << endl; cout << "The least-squares solution is:\n" - << A.jacobiSvd(ComputeThinU | ComputeThinV).solve(b) << endl; + << A.bdcSvd(ComputeThinU | ComputeThinV).solve(b) << endl; } diff --git a/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp b/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp index 0f0280e..defcb1e 100644 --- a/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp +++ b/eigen/doc/examples/Tutorial_simple_example_dynamic_size.cpp @@ -10,7 +10,7 @@ int main() MatrixXi m(size,size+1); // a (size)x(size+1)-matrix of int's for (int j=0; j<m.cols(); ++j) // loop over columns for (int i=0; i<m.rows(); ++i) // loop over rows - m(i,j) = i+j*m.rows(); // to access matrix coefficients, + m(i,j) = i+j*size; // to access matrix coefficients, // use operator()(int,int) std::cout << m << "\n\n"; } diff --git a/eigen/doc/examples/matrixfree_cg.cpp b/eigen/doc/examples/matrixfree_cg.cpp index 6a205ae..7469938 100644 --- a/eigen/doc/examples/matrixfree_cg.cpp +++ b/eigen/doc/examples/matrixfree_cg.cpp @@ -67,6 +67,7 @@ namespace internal { // This method should implement "dst += alpha * lhs * rhs" inplace, // however, for iterative solvers, alpha is always equal to 1, so let's not bother about it. assert(alpha==Scalar(1) && "scaling is not implemented"); + EIGEN_ONLY_USED_FOR_DEBUG(alpha); // Here we could simply call dst.noalias() += lhs.my_matrix() * rhs, // but let's do something fancier (and less efficient): diff --git a/eigen/doc/special_examples/Tutorial_sparse_example.cpp b/eigen/doc/special_examples/Tutorial_sparse_example.cpp index 830e196..c5767a8 100644 --- a/eigen/doc/special_examples/Tutorial_sparse_example.cpp +++ b/eigen/doc/special_examples/Tutorial_sparse_example.cpp @@ -1,5 +1,6 @@ #include <Eigen/Sparse> #include <vector> +#include <iostream> typedef Eigen::SparseMatrix<double> SpMat; // declares a column-major sparse matrix type of double typedef Eigen::Triplet<double> T; @@ -9,7 +10,10 @@ void saveAsBitmap(const Eigen::VectorXd& x, int n, const char* filename); int main(int argc, char** argv) { - assert(argc==2); + if(argc!=2) { + std::cerr << "Error: expected one and only one argument.\n"; + return -1; + } int n = 300; // size of the image int m = n*n; // number of unknows (=number of pixels) diff --git a/eigen/lapack/CMakeLists.txt b/eigen/lapack/CMakeLists.txt index 9883d4c..6df1fa9 100644 --- a/eigen/lapack/CMakeLists.txt +++ b/eigen/lapack/CMakeLists.txt @@ -49,7 +49,7 @@ if(EIGEN_ENABLE_LAPACK_TESTS) INACTIVITY_TIMEOUT 15 TIMEOUT 240 STATUS download_status - EXPECTED_MD5 5758ce55afcf79da98de8b9de1615ad5 + EXPECTED_MD5 ab5742640617e3221a873aba44bbdc93 SHOW_PROGRESS) message(STATUS ${download_status}) diff --git a/eigen/test/bdcsvd.cpp b/eigen/test/bdcsvd.cpp index f9f687a..6c7b096 100644 --- a/eigen/test/bdcsvd.cpp +++ b/eigen/test/bdcsvd.cpp @@ -104,7 +104,8 @@ void test_bdcsvd() CALL_SUBTEST_7( BDCSVD<MatrixXf>(10,10) ); // Check that preallocation avoids subsequent mallocs - CALL_SUBTEST_9( svd_preallocate<void>() ); + // Disbaled because not supported by BDCSVD + // CALL_SUBTEST_9( svd_preallocate<void>() ); CALL_SUBTEST_2( svd_underoverflow<void>() ); } diff --git a/eigen/test/block.cpp b/eigen/test/block.cpp index 39565af..f64bdae 100644 --- a/eigen/test/block.cpp +++ b/eigen/test/block.cpp @@ -37,7 +37,7 @@ template<typename MatrixType> void block(const MatrixType& m) typedef typename MatrixType::RealScalar RealScalar; typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType; typedef Matrix<Scalar, 1, MatrixType::ColsAtCompileTime> RowVectorType; - typedef Matrix<Scalar, Dynamic, Dynamic> DynamicMatrixType; + typedef Matrix<Scalar, Dynamic, Dynamic, MatrixType::IsRowMajor?RowMajor:ColMajor> DynamicMatrixType; typedef Matrix<Scalar, Dynamic, 1> DynamicVectorType; Index rows = m.rows(); @@ -131,7 +131,7 @@ template<typename MatrixType> void block(const MatrixType& m) VERIFY(numext::real(ones.col(c1).dot(ones.col(c2))) == RealScalar(rows)); VERIFY(numext::real(ones.row(r1).dot(ones.row(r2))) == RealScalar(cols)); - // chekc that linear acccessors works on blocks + // check that linear acccessors works on blocks m1 = m1_copy; if((MatrixType::Flags&RowMajorBit)==0) VERIFY_IS_EQUAL(m1.leftCols(c1).coeff(r1+c1*rows), m1(r1,c1)); @@ -155,6 +155,13 @@ template<typename MatrixType> void block(const MatrixType& m) VERIFY_IS_APPROX( ((m1+m2).block(r1,c1,r2-r1+1,c2-c1+1).transpose().col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); VERIFY_IS_APPROX( ((m1+m2).transpose().block(c1,r1,c2-c1+1,r2-r1+1).col(0)) , ((m1+m2).row(r1).segment(c1,c2-c1+1)).transpose() ); + VERIFY_IS_APPROX( (m1*1).topRows(r1), m1.topRows(r1) ); + VERIFY_IS_APPROX( (m1*1).leftCols(c1), m1.leftCols(c1) ); + VERIFY_IS_APPROX( (m1*1).transpose().topRows(c1), m1.transpose().topRows(c1) ); + VERIFY_IS_APPROX( (m1*1).transpose().leftCols(r1), m1.transpose().leftCols(r1) ); + VERIFY_IS_APPROX( (m1*1).transpose().middleRows(c1,c2-c1+1), m1.transpose().middleRows(c1,c2-c1+1) ); + VERIFY_IS_APPROX( (m1*1).transpose().middleCols(r1,r2-r1+1), m1.transpose().middleCols(r1,r2-r1+1) ); + // evaluation into plain matrices from expressions with direct access (stress MapBase) DynamicMatrixType dm; DynamicVectorType dv; diff --git a/eigen/test/cholesky.cpp b/eigen/test/cholesky.cpp index 8ad5ac6..b4b6bda 100644 --- a/eigen/test/cholesky.cpp +++ b/eigen/test/cholesky.cpp @@ -373,6 +373,7 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m) VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); + VERIFY_IS_APPROX(mat,ldlt.reconstructedMatrix()); } { mat << 1, 2, 2, 1; @@ -380,6 +381,7 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m) VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(!ldlt.isPositive()); + VERIFY_IS_APPROX(mat,ldlt.reconstructedMatrix()); } { mat << 0, 0, 0, 0; @@ -387,6 +389,7 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m) VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(ldlt.isPositive()); + VERIFY_IS_APPROX(mat,ldlt.reconstructedMatrix()); } { mat << 0, 0, 0, 1; @@ -394,6 +397,7 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m) VERIFY(ldlt.info()==Success); VERIFY(!ldlt.isNegative()); VERIFY(ldlt.isPositive()); + VERIFY_IS_APPROX(mat,ldlt.reconstructedMatrix()); } { mat << -1, 0, 0, 0; @@ -401,6 +405,7 @@ template<typename MatrixType> void cholesky_definiteness(const MatrixType& m) VERIFY(ldlt.info()==Success); VERIFY(ldlt.isNegative()); VERIFY(!ldlt.isPositive()); + VERIFY_IS_APPROX(mat,ldlt.reconstructedMatrix()); } } @@ -452,6 +457,18 @@ void cholesky_faillure_cases() VERIFY(ldlt.info()==NumericalIssue); VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); } + + // bug 1479 + { + mat.resize(4,4); + mat << 1, 2, 0, 1, + 2, 4, 0, 2, + 0, 0, 0, 1, + 1, 2, 1, 1; + ldlt.compute(mat); + VERIFY(ldlt.info()==NumericalIssue); + VERIFY_IS_NOT_APPROX(mat,ldlt.reconstructedMatrix()); + } } template<typename MatrixType> void cholesky_verify_assert() diff --git a/eigen/test/cuda_basic.cu b/eigen/test/cuda_basic.cu index cb2e416..ce66c2c 100644 --- a/eigen/test/cuda_basic.cu +++ b/eigen/test/cuda_basic.cu @@ -20,9 +20,6 @@ #include <math_constants.h> #include <cuda.h> -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include "cuda_common.h" diff --git a/eigen/test/diagonal.cpp b/eigen/test/diagonal.cpp index c1546e9..0b5ae82 100644 --- a/eigen/test/diagonal.cpp +++ b/eigen/test/diagonal.cpp @@ -66,6 +66,9 @@ template<typename MatrixType> void diagonal(const MatrixType& m) m2.diagonal(N2).coeffRef(0) = Scalar(2)*s1; VERIFY_IS_APPROX(m2.diagonal(N2).coeff(0), Scalar(2)*s1); } + + VERIFY( m1.diagonal( cols).size()==0 ); + VERIFY( m1.diagonal(-rows).size()==0 ); } template<typename MatrixType> void diagonal_assert(const MatrixType& m) { @@ -81,6 +84,9 @@ template<typename MatrixType> void diagonal_assert(const MatrixType& m) { VERIFY_RAISES_ASSERT( m1.array() *= m1.diagonal().array() ); VERIFY_RAISES_ASSERT( m1.array() /= m1.diagonal().array() ); } + + VERIFY_RAISES_ASSERT( m1.diagonal(cols+1) ); + VERIFY_RAISES_ASSERT( m1.diagonal(-(rows+1)) ); } void test_diagonal() @@ -95,7 +101,6 @@ void test_diagonal() CALL_SUBTEST_2( diagonal(MatrixXcd(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_1( diagonal(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_1( diagonal(Matrix<float,Dynamic,4>(3, 4)) ); + CALL_SUBTEST_1( diagonal_assert(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); } - - CALL_SUBTEST_1( diagonal_assert(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); } diff --git a/eigen/test/diagonalmatrices.cpp b/eigen/test/diagonalmatrices.cpp index cd6dc8c..a4ff102 100644 --- a/eigen/test/diagonalmatrices.cpp +++ b/eigen/test/diagonalmatrices.cpp @@ -30,6 +30,7 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m) v2 = VectorType::Random(rows); RowVectorType rv1 = RowVectorType::Random(cols), rv2 = RowVectorType::Random(cols); + LeftDiagonalMatrix ldm1(v1), ldm2(v2); RightDiagonalMatrix rdm1(rv1), rdm2(rv2); @@ -99,6 +100,38 @@ template<typename MatrixType> void diagonalmatrices(const MatrixType& m) VERIFY_IS_APPROX( (sq_m1 += (s1*v1).asDiagonal()), sq_m2 += (s1*v1).asDiagonal().toDenseMatrix() ); VERIFY_IS_APPROX( (sq_m1 -= (s1*v1).asDiagonal()), sq_m2 -= (s1*v1).asDiagonal().toDenseMatrix() ); VERIFY_IS_APPROX( (sq_m1 = (s1*v1).asDiagonal()), (s1*v1).asDiagonal().toDenseMatrix() ); + + sq_m1.setRandom(); + sq_m2 = v1.asDiagonal(); + sq_m2 = sq_m1 * sq_m2; + VERIFY_IS_APPROX( (sq_m1*v1.asDiagonal()).col(i), sq_m2.col(i) ); + VERIFY_IS_APPROX( (sq_m1*v1.asDiagonal()).row(i), sq_m2.row(i) ); +} + +template<typename MatrixType> void as_scalar_product(const MatrixType& m) +{ + typedef typename MatrixType::Scalar Scalar; + typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType; + typedef Matrix<Scalar, Dynamic, Dynamic> DynMatrixType; + typedef Matrix<Scalar, Dynamic, 1> DynVectorType; + typedef Matrix<Scalar, 1, Dynamic> DynRowVectorType; + + Index rows = m.rows(); + Index depth = internal::random<Index>(1,EIGEN_TEST_MAX_SIZE); + + VectorType v1 = VectorType::Random(rows); + DynVectorType dv1 = DynVectorType::Random(depth); + DynRowVectorType drv1 = DynRowVectorType::Random(depth); + DynMatrixType dm1 = dv1; + DynMatrixType drm1 = drv1; + + Scalar s = v1(0); + + VERIFY_IS_APPROX( v1.asDiagonal() * drv1, s*drv1 ); + VERIFY_IS_APPROX( dv1 * v1.asDiagonal(), dv1*s ); + + VERIFY_IS_APPROX( v1.asDiagonal() * drm1, s*drm1 ); + VERIFY_IS_APPROX( dm1 * v1.asDiagonal(), dm1*s ); } template<int> @@ -116,14 +149,19 @@ void test_diagonalmatrices() { for(int i = 0; i < g_repeat; i++) { CALL_SUBTEST_1( diagonalmatrices(Matrix<float, 1, 1>()) ); + CALL_SUBTEST_1( as_scalar_product(Matrix<float, 1, 1>()) ); + CALL_SUBTEST_2( diagonalmatrices(Matrix3f()) ); CALL_SUBTEST_3( diagonalmatrices(Matrix<double,3,3,RowMajor>()) ); CALL_SUBTEST_4( diagonalmatrices(Matrix4d()) ); CALL_SUBTEST_5( diagonalmatrices(Matrix<float,4,4,RowMajor>()) ); CALL_SUBTEST_6( diagonalmatrices(MatrixXcf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_6( as_scalar_product(MatrixXcf(1,1)) ); CALL_SUBTEST_7( diagonalmatrices(MatrixXi(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_8( diagonalmatrices(Matrix<double,Dynamic,Dynamic,RowMajor>(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); CALL_SUBTEST_9( diagonalmatrices(MatrixXf(internal::random<int>(1,EIGEN_TEST_MAX_SIZE), internal::random<int>(1,EIGEN_TEST_MAX_SIZE))) ); + CALL_SUBTEST_9( diagonalmatrices(MatrixXf(1,1)) ); + CALL_SUBTEST_9( as_scalar_product(MatrixXf(1,1)) ); } CALL_SUBTEST_10( bug987<0>() ); } diff --git a/eigen/test/eigensolver_generalized_real.cpp b/eigen/test/eigensolver_generalized_real.cpp index 9c0838b..f7861d3 100644 --- a/eigen/test/eigensolver_generalized_real.cpp +++ b/eigen/test/eigensolver_generalized_real.cpp @@ -77,6 +77,13 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType GeneralizedEigenSolver<MatrixType> eig2(a.adjoint() * a,b.adjoint() * b); eig2.compute(a.adjoint() * a,b.adjoint() * b); } + + // check without eigenvectors + { + GeneralizedEigenSolver<MatrixType> eig1(spdA, spdB, true); + GeneralizedEigenSolver<MatrixType> eig2(spdA, spdB, false); + VERIFY_IS_APPROX(eig1.eigenvalues(), eig2.eigenvalues()); + } } void test_eigensolver_generalized_real() diff --git a/eigen/test/geo_quaternion.cpp b/eigen/test/geo_quaternion.cpp index 96889e7..8ee8fdb 100644 --- a/eigen/test/geo_quaternion.cpp +++ b/eigen/test/geo_quaternion.cpp @@ -231,6 +231,19 @@ template<typename Scalar> void mapQuaternion(void){ VERIFY_IS_APPROX(mq3*mq2, q3*q2); VERIFY_IS_APPROX(mcq1*mq2, q1*q2); VERIFY_IS_APPROX(mcq3*mq2, q3*q2); + + // Bug 1461, compilation issue with Map<const Quat>::w(), and other reference/constness checks: + VERIFY_IS_APPROX(mcq3.coeffs().x() + mcq3.coeffs().y() + mcq3.coeffs().z() + mcq3.coeffs().w(), mcq3.coeffs().sum()); + VERIFY_IS_APPROX(mcq3.x() + mcq3.y() + mcq3.z() + mcq3.w(), mcq3.coeffs().sum()); + mq3.w() = 1; + const Quaternionx& cq3(q3); + VERIFY( &cq3.x() == &q3.x() ); + const MQuaternionUA& cmq3(mq3); + VERIFY( &cmq3.x() == &mq3.x() ); + // FIXME the following should be ok. The problem is that currently the LValueBit flag + // is used to determine wether we can return a coeff by reference or not, which is not enough for Map<const ...>. + //const MCQuaternionUA& cmcq3(mcq3); + //VERIFY( &cmcq3.x() == &mcq3.x() ); } template<typename Scalar> void quaternionAlignment(void){ diff --git a/eigen/test/integer_types.cpp b/eigen/test/integer_types.cpp index a21f73a..2512631 100644 --- a/eigen/test/integer_types.cpp +++ b/eigen/test/integer_types.cpp @@ -162,8 +162,8 @@ void test_integer_types() VERIFY_IS_EQUAL(internal::scalar_div_cost<int>::value, 8); VERIFY_IS_EQUAL(internal::scalar_div_cost<unsigned int>::value, 8); if(sizeof(long)>sizeof(int)) { - VERIFY(internal::scalar_div_cost<long>::value > internal::scalar_div_cost<int>::value); - VERIFY(internal::scalar_div_cost<unsigned long>::value > internal::scalar_div_cost<int>::value); + VERIFY(int(internal::scalar_div_cost<long>::value) > int(internal::scalar_div_cost<int>::value)); + VERIFY(int(internal::scalar_div_cost<unsigned long>::value) > int(internal::scalar_div_cost<int>::value)); } #endif } diff --git a/eigen/test/inverse.cpp b/eigen/test/inverse.cpp index 5c6777a..97fe6ff 100644 --- a/eigen/test/inverse.cpp +++ b/eigen/test/inverse.cpp @@ -47,7 +47,7 @@ template<typename MatrixType> void inverse(const MatrixType& m) //computeInverseAndDetWithCheck tests //First: an invertible matrix bool invertible; - RealScalar det; + Scalar det; m2.setZero(); m1.computeInverseAndDetWithCheck(m2, det, invertible); @@ -113,5 +113,7 @@ void test_inverse() CALL_SUBTEST_7( inverse(Matrix4d()) ); CALL_SUBTEST_7( inverse(Matrix<double,4,4,DontAlign>()) ); + + CALL_SUBTEST_8( inverse(Matrix4cd()) ); } } diff --git a/eigen/test/main.h b/eigen/test/main.h index bd53251..8c868ee 100644 --- a/eigen/test/main.h +++ b/eigen/test/main.h @@ -50,6 +50,19 @@ #endif #endif +// Same for cuda_fp16.h +#if defined(__CUDACC_VER_MAJOR__) && (__CUDACC_VER_MAJOR__ >= 9) +#define EIGEN_TEST_CUDACC_VER ((__CUDACC_VER_MAJOR__ * 10000) + (__CUDACC_VER_MINOR__ * 100)) +#elif defined(__CUDACC_VER__) +#define EIGEN_TEST_CUDACC_VER __CUDACC_VER__ +#else +#define EIGEN_TEST_CUDACC_VER 0 +#endif + +#if EIGEN_TEST_CUDACC_VER >= 70500 +#include <cuda_fp16.h> +#endif + // To test that all calls from Eigen code to std::min() and std::max() are // protected by parenthesis against macro expansion, the min()/max() macros // are defined here and any not-parenthesized min/max call will cause a @@ -162,6 +175,12 @@ namespace Eigen eigen_assert_exception(void) {} ~eigen_assert_exception() { Eigen::no_more_assert = false; } }; + + struct eigen_static_assert_exception + { + eigen_static_assert_exception(void) {} + ~eigen_static_assert_exception() { Eigen::no_more_assert = false; } + }; } // If EIGEN_DEBUG_ASSERTS is defined and if no assertion is triggered while // one should have been, then the list of excecuted assertions is printed out. @@ -225,6 +244,7 @@ namespace Eigen else \ EIGEN_THROW_X(Eigen::eigen_assert_exception()); \ } + #ifdef EIGEN_EXCEPTIONS #define VERIFY_RAISES_ASSERT(a) { \ Eigen::no_more_assert = false; \ @@ -236,13 +256,39 @@ namespace Eigen catch (Eigen::eigen_assert_exception&) { VERIFY(true); } \ Eigen::report_on_cerr_on_assert_failure = true; \ } - #endif //EIGEN_EXCEPTIONS + #endif // EIGEN_EXCEPTIONS #endif // EIGEN_DEBUG_ASSERTS + #if defined(TEST_CHECK_STATIC_ASSERTIONS) && defined(EIGEN_EXCEPTIONS) + #define EIGEN_STATIC_ASSERT(a,MSG) \ + if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\ + { \ + Eigen::no_more_assert = true; \ + if(report_on_cerr_on_assert_failure) \ + eigen_plain_assert((a) && #MSG); \ + else \ + EIGEN_THROW_X(Eigen::eigen_static_assert_exception()); \ + } + #define VERIFY_RAISES_STATIC_ASSERT(a) { \ + Eigen::no_more_assert = false; \ + Eigen::report_on_cerr_on_assert_failure = false; \ + try { \ + a; \ + VERIFY(Eigen::should_raise_an_assert && # a); \ + } \ + catch (Eigen::eigen_static_assert_exception&) { VERIFY(true); } \ + Eigen::report_on_cerr_on_assert_failure = true; \ + } + #endif // TEST_CHECK_STATIC_ASSERTIONS + #ifndef VERIFY_RAISES_ASSERT #define VERIFY_RAISES_ASSERT(a) \ std::cout << "Can't VERIFY_RAISES_ASSERT( " #a " ) with exceptions disabled\n"; #endif +#ifndef VERIFY_RAISES_STATIC_ASSERT + #define VERIFY_RAISES_STATIC_ASSERT(a) \ + std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; +#endif #if !defined(__CUDACC__) #define EIGEN_USE_CUSTOM_ASSERT @@ -251,10 +297,10 @@ namespace Eigen #else // EIGEN_NO_ASSERTION_CHECKING #define VERIFY_RAISES_ASSERT(a) {} + #define VERIFY_RAISES_STATIC_ASSERT(a) {} #endif // EIGEN_NO_ASSERTION_CHECKING - #define EIGEN_INTERNAL_DEBUGGING #include <Eigen/QR> // required for createRandomPIMatrixOfRank @@ -313,7 +359,7 @@ template<> inline long double test_precision<std::complex<long double> >() { ret inline bool test_isApprox(const short& a, const short& b) { return internal::isApprox(a, b, test_precision<short>()); } inline bool test_isApprox(const unsigned short& a, const unsigned short& b) -{ return internal::isApprox(a, b, test_precision<unsigned long>()); } +{ return internal::isApprox(a, b, test_precision<unsigned short>()); } inline bool test_isApprox(const unsigned int& a, const unsigned int& b) { return internal::isApprox(a, b, test_precision<unsigned int>()); } inline bool test_isApprox(const long& a, const long& b) diff --git a/eigen/test/mapped_matrix.cpp b/eigen/test/mapped_matrix.cpp index 6a84c58..ef350b2 100644 --- a/eigen/test/mapped_matrix.cpp +++ b/eigen/test/mapped_matrix.cpp @@ -64,8 +64,9 @@ template<typename MatrixType> void map_class_matrix(const MatrixType& m) for(int i = 0; i < size; i++) array2[i] = Scalar(1); // array3unaligned -> unaligned pointer to heap Scalar* array3 = new Scalar[size+1]; - for(int i = 0; i < size+1; i++) array3[i] = Scalar(1); - Scalar* array3unaligned = internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES == 0 ? array3+1 : array3; + Index sizep1 = size + 1; // <- without this temporary MSVC 2103 generates bad code + for(Index i = 0; i < sizep1; i++) array3[i] = Scalar(1); + Scalar* array3unaligned = (internal::UIntPtr(array3)%EIGEN_MAX_ALIGN_BYTES) == 0 ? array3+1 : array3; Scalar array4[256]; if(size<=256) for(int i = 0; i < size; i++) array4[i] = Scalar(1); diff --git a/eigen/test/mapstride.cpp b/eigen/test/mapstride.cpp index 4858f8f..de77dc5 100644 --- a/eigen/test/mapstride.cpp +++ b/eigen/test/mapstride.cpp @@ -58,7 +58,7 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy MatrixType m = MatrixType::Random(rows,cols); Scalar s1 = internal::random<Scalar>(); - Index arraysize = 2*(rows+4)*(cols+4); + Index arraysize = 4*(rows+4)*(cols+4); Scalar* a_array1 = internal::aligned_new<Scalar>(arraysize+1); Scalar* array1 = a_array1; @@ -143,9 +143,62 @@ template<int Alignment,typename MatrixType> void map_class_matrix(const MatrixTy VERIFY_IS_APPROX(map,s1*m); } + // test inner stride and no outer stride + for(int k=0; k<2; ++k) + { + if(k==1 && (m.innerSize()*2)*m.outerSize() > maxsize2) + break; + Scalar* array = (k==0 ? array1 : array2); + + Map<MatrixType, Alignment, InnerStride<Dynamic> > map(array, rows, cols, InnerStride<Dynamic>(2)); + map = m; + VERIFY(map.outerStride() == map.innerSize()*2); + for(int i = 0; i < m.outerSize(); ++i) + for(int j = 0; j < m.innerSize(); ++j) + { + VERIFY(array[map.innerSize()*i*2+j*2] == m.coeffByOuterInner(i,j)); + VERIFY(map.coeffByOuterInner(i,j) == m.coeffByOuterInner(i,j)); + } + VERIFY_IS_APPROX(s1*map,s1*m); + map *= s1; + VERIFY_IS_APPROX(map,s1*m); + } + internal::aligned_delete(a_array1, arraysize+1); } +// Additional tests for inner-stride but no outer-stride +template<int> +void bug1453() +{ + const int data[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + typedef Matrix<int,Dynamic,Dynamic,RowMajor> RowMatrixXi; + typedef Matrix<int,2,3,ColMajor> ColMatrix23i; + typedef Matrix<int,3,2,ColMajor> ColMatrix32i; + typedef Matrix<int,2,3,RowMajor> RowMatrix23i; + typedef Matrix<int,3,2,RowMajor> RowMatrix32i; + + VERIFY_IS_APPROX(MatrixXi::Map(data, 2, 3, InnerStride<2>()), MatrixXi::Map(data, 2, 3, Stride<4,2>())); + VERIFY_IS_APPROX(MatrixXi::Map(data, 2, 3, InnerStride<>(2)), MatrixXi::Map(data, 2, 3, Stride<4,2>())); + VERIFY_IS_APPROX(MatrixXi::Map(data, 3, 2, InnerStride<2>()), MatrixXi::Map(data, 3, 2, Stride<6,2>())); + VERIFY_IS_APPROX(MatrixXi::Map(data, 3, 2, InnerStride<>(2)), MatrixXi::Map(data, 3, 2, Stride<6,2>())); + + VERIFY_IS_APPROX(RowMatrixXi::Map(data, 2, 3, InnerStride<2>()), RowMatrixXi::Map(data, 2, 3, Stride<6,2>())); + VERIFY_IS_APPROX(RowMatrixXi::Map(data, 2, 3, InnerStride<>(2)), RowMatrixXi::Map(data, 2, 3, Stride<6,2>())); + VERIFY_IS_APPROX(RowMatrixXi::Map(data, 3, 2, InnerStride<2>()), RowMatrixXi::Map(data, 3, 2, Stride<4,2>())); + VERIFY_IS_APPROX(RowMatrixXi::Map(data, 3, 2, InnerStride<>(2)), RowMatrixXi::Map(data, 3, 2, Stride<4,2>())); + + VERIFY_IS_APPROX(ColMatrix23i::Map(data, InnerStride<2>()), MatrixXi::Map(data, 2, 3, Stride<4,2>())); + VERIFY_IS_APPROX(ColMatrix23i::Map(data, InnerStride<>(2)), MatrixXi::Map(data, 2, 3, Stride<4,2>())); + VERIFY_IS_APPROX(ColMatrix32i::Map(data, InnerStride<2>()), MatrixXi::Map(data, 3, 2, Stride<6,2>())); + VERIFY_IS_APPROX(ColMatrix32i::Map(data, InnerStride<>(2)), MatrixXi::Map(data, 3, 2, Stride<6,2>())); + + VERIFY_IS_APPROX(RowMatrix23i::Map(data, InnerStride<2>()), RowMatrixXi::Map(data, 2, 3, Stride<6,2>())); + VERIFY_IS_APPROX(RowMatrix23i::Map(data, InnerStride<>(2)), RowMatrixXi::Map(data, 2, 3, Stride<6,2>())); + VERIFY_IS_APPROX(RowMatrix32i::Map(data, InnerStride<2>()), RowMatrixXi::Map(data, 3, 2, Stride<4,2>())); + VERIFY_IS_APPROX(RowMatrix32i::Map(data, InnerStride<>(2)), RowMatrixXi::Map(data, 3, 2, Stride<4,2>())); +} + void test_mapstride() { for(int i = 0; i < g_repeat; i++) { @@ -175,6 +228,8 @@ void test_mapstride() CALL_SUBTEST_5( map_class_matrix<Unaligned>(MatrixXi(internal::random<int>(1,maxn),internal::random<int>(1,maxn))) ); CALL_SUBTEST_6( map_class_matrix<Aligned>(MatrixXcd(internal::random<int>(1,maxn),internal::random<int>(1,maxn))) ); CALL_SUBTEST_6( map_class_matrix<Unaligned>(MatrixXcd(internal::random<int>(1,maxn),internal::random<int>(1,maxn))) ); + + CALL_SUBTEST_5( bug1453<0>() ); TEST_SET_BUT_UNUSED_VARIABLE(maxn); } diff --git a/eigen/test/permutationmatrices.cpp b/eigen/test/permutationmatrices.cpp index db12665..b2229cf 100644 --- a/eigen/test/permutationmatrices.cpp +++ b/eigen/test/permutationmatrices.cpp @@ -19,9 +19,11 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m) enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime, Options = MatrixType::Options }; typedef PermutationMatrix<Rows> LeftPermutationType; + typedef Transpositions<Rows> LeftTranspositionsType; typedef Matrix<int, Rows, 1> LeftPermutationVectorType; typedef Map<LeftPermutationType> MapLeftPerm; typedef PermutationMatrix<Cols> RightPermutationType; + typedef Transpositions<Cols> RightTranspositionsType; typedef Matrix<int, Cols, 1> RightPermutationVectorType; typedef Map<RightPermutationType> MapRightPerm; @@ -35,6 +37,8 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m) RightPermutationVectorType rv; randomPermutationVector(rv, cols); RightPermutationType rp(rv); + LeftTranspositionsType lt(lv); + RightTranspositionsType rt(rv); MatrixType m_permuted = MatrixType::Random(rows,cols); VERIFY_EVALUATION_COUNT(m_permuted = lp * m_original * rp, 1); // 1 temp for sub expression "lp * m_original" @@ -115,6 +119,14 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m) Matrix<Scalar, Cols, Cols> B = rp.transpose(); VERIFY_IS_APPROX(A, B.transpose()); } + + m_permuted = m_original; + lp = lt; + rp = rt; + VERIFY_EVALUATION_COUNT(m_permuted = lt * m_permuted * rt, 1); + VERIFY_IS_APPROX(m_permuted, lp*m_original*rp.transpose()); + + VERIFY_IS_APPROX(lt.inverse()*m_permuted*rt.inverse(), m_original); } template<typename T> diff --git a/eigen/test/product_trmm.cpp b/eigen/test/product_trmm.cpp index 12e5544..e08d9f3 100644 --- a/eigen/test/product_trmm.cpp +++ b/eigen/test/product_trmm.cpp @@ -29,7 +29,7 @@ void trmm(int rows=get_random_size<Scalar>(), typedef Matrix<Scalar,Dynamic,OtherCols,OtherCols==1?ColMajor:ResOrder> ResXS; typedef Matrix<Scalar,OtherCols,Dynamic,OtherCols==1?RowMajor:ResOrder> ResSX; - TriMatrix mat(rows,cols), tri(rows,cols), triTr(cols,rows); + TriMatrix mat(rows,cols), tri(rows,cols), triTr(cols,rows), s1tri(rows,cols), s1triTr(cols,rows); OnTheRight ge_right(cols,otherCols); OnTheLeft ge_left(otherCols,rows); @@ -42,6 +42,8 @@ void trmm(int rows=get_random_size<Scalar>(), mat.setRandom(); tri = mat.template triangularView<Mode>(); triTr = mat.transpose().template triangularView<Mode>(); + s1tri = (s1*mat).template triangularView<Mode>(); + s1triTr = (s1*mat).transpose().template triangularView<Mode>(); ge_right.setRandom(); ge_left.setRandom(); @@ -51,19 +53,29 @@ void trmm(int rows=get_random_size<Scalar>(), VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView<Mode>() * ge_right, tri * ge_right); VERIFY_IS_APPROX( ge_sx.noalias() = ge_left * mat.template triangularView<Mode>(), ge_left * tri); - VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); - VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView<Mode>(), ge_right.transpose() * triTr.conjugate()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1*triTr.conjugate() * (s2*ge_left.transpose())); - VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()), s1*triTr.conjugate() * (s2*ge_left.adjoint())); - VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView<Mode>(), ge_right.adjoint() * triTr.conjugate()); + VERIFY_IS_APPROX( ge_xs.noalias() = (s1*mat.transpose()).template triangularView<Mode>() * (s2*ge_left.transpose()), s1triTr * (s2*ge_left.transpose())); + VERIFY_IS_APPROX( ge_sx.noalias() = (s2*ge_left) * (s1*mat).template triangularView<Mode>(), (s2*ge_left)*s1tri); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.transpose() * mat.adjoint().template triangularView<Mode>(), ge_right.transpose() * triTr.conjugate()); + VERIFY_IS_APPROX( ge_sx.noalias() = ge_right.adjoint() * mat.adjoint().template triangularView<Mode>(), ge_right.adjoint() * triTr.conjugate()); + + ge_xs_save = ge_xs; + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); ge_xs_save = ge_xs; - VERIFY_IS_APPROX( (ge_xs_save + s1*triTr.conjugate() * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.adjoint()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); + VERIFY_IS_APPROX( (ge_xs_save + s1triTr * (s2*ge_left.adjoint())).eval(), ge_xs.noalias() += (s1*mat.transpose()).template triangularView<Mode>() * (s2*ge_left.adjoint()) ); ge_sx.setRandom(); ge_sx_save = ge_sx; - VERIFY_IS_APPROX( ge_sx_save - (ge_right.adjoint() * (-s1 * triTr).conjugate()).eval(), ge_sx.noalias() -= (ge_right.adjoint() * (-s1 * mat).adjoint().template triangularView<Mode>()).eval()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_sx_save - (ge_right.adjoint() * (-s1 * triTr).conjugate()).eval(), ge_sx.noalias() -= (ge_right.adjoint() * (-s1 * mat).adjoint().template triangularView<Mode>()).eval()); - VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); + if((Mode&UnitDiag)==0) + VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint()); + VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint()); + // TODO check with sub-matrix expressions ? } diff --git a/eigen/test/redux.cpp b/eigen/test/redux.cpp index 989e105..2bade37 100644 --- a/eigen/test/redux.cpp +++ b/eigen/test/redux.cpp @@ -9,6 +9,8 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #define TEST_ENABLE_TEMPORARY_TRACKING +#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8 +// ^^ see bug 1449 #include "main.h" diff --git a/eigen/test/ref.cpp b/eigen/test/ref.cpp index 769db04..9dd2c04 100644 --- a/eigen/test/ref.cpp +++ b/eigen/test/ref.cpp @@ -13,7 +13,7 @@ #endif #define TEST_ENABLE_TEMPORARY_TRACKING - +#define TEST_CHECK_STATIC_ASSERTIONS #include "main.h" // test Ref.h @@ -255,6 +255,17 @@ void test_ref_overloads() test_ref_ambiguous(A, B); } +void test_ref_fixed_size_assert() +{ + Vector4f v4; + VectorXf vx(10); + VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = v4; (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<Vector3f> y = vx.head<4>(); (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = v4; (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = vx.head<4>(); (void)y; ); + VERIFY_RAISES_STATIC_ASSERT( Ref<const Vector3f> y = 2*v4; (void)y; ); +} + void test_ref() { for(int i = 0; i < g_repeat; i++) { @@ -277,4 +288,5 @@ void test_ref() } CALL_SUBTEST_7( test_ref_overloads() ); + CALL_SUBTEST_7( test_ref_fixed_size_assert() ); } diff --git a/eigen/test/selfadjoint.cpp b/eigen/test/selfadjoint.cpp index 92401e5..aaa4888 100644 --- a/eigen/test/selfadjoint.cpp +++ b/eigen/test/selfadjoint.cpp @@ -7,6 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#define TEST_CHECK_STATIC_ASSERTIONS #include "main.h" // This file tests the basic selfadjointView API, @@ -45,6 +46,9 @@ template<typename MatrixType> void selfadjoint(const MatrixType& m) m4 = m2; m4 -= m1.template selfadjointView<Lower>(); VERIFY_IS_APPROX(m4, m2-m3); + + VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<StrictlyUpper>()); + VERIFY_RAISES_STATIC_ASSERT(m2.template selfadjointView<UnitLower>()); } void bug_159() diff --git a/eigen/test/sparse_basic.cpp b/eigen/test/sparse_basic.cpp index 3849850..f84b6e3 100644 --- a/eigen/test/sparse_basic.cpp +++ b/eigen/test/sparse_basic.cpp @@ -228,8 +228,8 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re VERIFY_RAISES_ASSERT( m1 -= m1.innerVector(0) ); VERIFY_RAISES_ASSERT( refM1 -= m1.innerVector(0) ); VERIFY_RAISES_ASSERT( refM1 += m1.innerVector(0) ); - m1 = m4; refM1 = refM4; } + m1 = m4; refM1 = refM4; // test aliasing VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1)); diff --git a/eigen/test/sparse_product.cpp b/eigen/test/sparse_product.cpp index 1975867..7f77bb7 100644 --- a/eigen/test/sparse_product.cpp +++ b/eigen/test/sparse_product.cpp @@ -7,6 +7,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#if defined(_MSC_VER) && (_MSC_VER==1800) +// This unit test takes forever to compile in Release mode with MSVC 2013, +// multiple hours. So let's switch off optimization for this one. +#pragma optimize("",off) +#endif + static long int nb_temporaries; inline void on_temporary_creation() { @@ -371,6 +377,88 @@ void bug_942() VERIFY_IS_APPROX( ( d.asDiagonal()*cmA ).eval().coeff(0,0), res ); } +template<typename Real> +void test_mixing_types() +{ + typedef std::complex<Real> Cplx; + typedef SparseMatrix<Real> SpMatReal; + typedef SparseMatrix<Cplx> SpMatCplx; + typedef SparseMatrix<Cplx,RowMajor> SpRowMatCplx; + typedef Matrix<Real,Dynamic,Dynamic> DenseMatReal; + typedef Matrix<Cplx,Dynamic,Dynamic> DenseMatCplx; + + Index n = internal::random<Index>(1,100); + double density = (std::max)(8./(n*n), 0.2); + + SpMatReal sR1(n,n); + SpMatCplx sC1(n,n), sC2(n,n), sC3(n,n); + SpRowMatCplx sCR(n,n); + DenseMatReal dR1(n,n); + DenseMatCplx dC1(n,n), dC2(n,n), dC3(n,n); + + initSparse<Real>(density, dR1, sR1); + initSparse<Cplx>(density, dC1, sC1); + initSparse<Cplx>(density, dC2, sC2); + + VERIFY_IS_APPROX( sC2 = (sR1 * sC1), dC3 = dR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( sC2 = (sC1 * sR1), dC3 = dC1 * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sC2 = (sR1.transpose() * sC1), dC3 = dR1.template cast<Cplx>().transpose() * dC1 ); + VERIFY_IS_APPROX( sC2 = (sC1.transpose() * sR1), dC3 = dC1.transpose() * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sC2 = (sR1 * sC1.transpose()), dC3 = dR1.template cast<Cplx>() * dC1.transpose() ); + VERIFY_IS_APPROX( sC2 = (sC1 * sR1.transpose()), dC3 = dC1 * dR1.template cast<Cplx>().transpose() ); + VERIFY_IS_APPROX( sC2 = (sR1.transpose() * sC1.transpose()), dC3 = dR1.template cast<Cplx>().transpose() * dC1.transpose() ); + VERIFY_IS_APPROX( sC2 = (sC1.transpose() * sR1.transpose()), dC3 = dC1.transpose() * dR1.template cast<Cplx>().transpose() ); + + VERIFY_IS_APPROX( sCR = (sR1 * sC1), dC3 = dR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( sCR = (sC1 * sR1), dC3 = dC1 * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sCR = (sR1.transpose() * sC1), dC3 = dR1.template cast<Cplx>().transpose() * dC1 ); + VERIFY_IS_APPROX( sCR = (sC1.transpose() * sR1), dC3 = dC1.transpose() * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sCR = (sR1 * sC1.transpose()), dC3 = dR1.template cast<Cplx>() * dC1.transpose() ); + VERIFY_IS_APPROX( sCR = (sC1 * sR1.transpose()), dC3 = dC1 * dR1.template cast<Cplx>().transpose() ); + VERIFY_IS_APPROX( sCR = (sR1.transpose() * sC1.transpose()), dC3 = dR1.template cast<Cplx>().transpose() * dC1.transpose() ); + VERIFY_IS_APPROX( sCR = (sC1.transpose() * sR1.transpose()), dC3 = dC1.transpose() * dR1.template cast<Cplx>().transpose() ); + + + VERIFY_IS_APPROX( sC2 = (sR1 * sC1).pruned(), dC3 = dR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( sC2 = (sC1 * sR1).pruned(), dC3 = dC1 * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sC2 = (sR1.transpose() * sC1).pruned(), dC3 = dR1.template cast<Cplx>().transpose() * dC1 ); + VERIFY_IS_APPROX( sC2 = (sC1.transpose() * sR1).pruned(), dC3 = dC1.transpose() * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sC2 = (sR1 * sC1.transpose()).pruned(), dC3 = dR1.template cast<Cplx>() * dC1.transpose() ); + VERIFY_IS_APPROX( sC2 = (sC1 * sR1.transpose()).pruned(), dC3 = dC1 * dR1.template cast<Cplx>().transpose() ); + VERIFY_IS_APPROX( sC2 = (sR1.transpose() * sC1.transpose()).pruned(), dC3 = dR1.template cast<Cplx>().transpose() * dC1.transpose() ); + VERIFY_IS_APPROX( sC2 = (sC1.transpose() * sR1.transpose()).pruned(), dC3 = dC1.transpose() * dR1.template cast<Cplx>().transpose() ); + + VERIFY_IS_APPROX( sCR = (sR1 * sC1).pruned(), dC3 = dR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( sCR = (sC1 * sR1).pruned(), dC3 = dC1 * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sCR = (sR1.transpose() * sC1).pruned(), dC3 = dR1.template cast<Cplx>().transpose() * dC1 ); + VERIFY_IS_APPROX( sCR = (sC1.transpose() * sR1).pruned(), dC3 = dC1.transpose() * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( sCR = (sR1 * sC1.transpose()).pruned(), dC3 = dR1.template cast<Cplx>() * dC1.transpose() ); + VERIFY_IS_APPROX( sCR = (sC1 * sR1.transpose()).pruned(), dC3 = dC1 * dR1.template cast<Cplx>().transpose() ); + VERIFY_IS_APPROX( sCR = (sR1.transpose() * sC1.transpose()).pruned(), dC3 = dR1.template cast<Cplx>().transpose() * dC1.transpose() ); + VERIFY_IS_APPROX( sCR = (sC1.transpose() * sR1.transpose()).pruned(), dC3 = dC1.transpose() * dR1.template cast<Cplx>().transpose() ); + + + VERIFY_IS_APPROX( dC2 = (sR1 * sC1), dC3 = dR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( dC2 = (sC1 * sR1), dC3 = dC1 * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( dC2 = (sR1.transpose() * sC1), dC3 = dR1.template cast<Cplx>().transpose() * dC1 ); + VERIFY_IS_APPROX( dC2 = (sC1.transpose() * sR1), dC3 = dC1.transpose() * dR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( dC2 = (sR1 * sC1.transpose()), dC3 = dR1.template cast<Cplx>() * dC1.transpose() ); + VERIFY_IS_APPROX( dC2 = (sC1 * sR1.transpose()), dC3 = dC1 * dR1.template cast<Cplx>().transpose() ); + VERIFY_IS_APPROX( dC2 = (sR1.transpose() * sC1.transpose()), dC3 = dR1.template cast<Cplx>().transpose() * dC1.transpose() ); + VERIFY_IS_APPROX( dC2 = (sC1.transpose() * sR1.transpose()), dC3 = dC1.transpose() * dR1.template cast<Cplx>().transpose() ); + + + VERIFY_IS_APPROX( dC2 = dR1 * sC1, dC3 = dR1.template cast<Cplx>() * sC1 ); + VERIFY_IS_APPROX( dC2 = sR1 * dC1, dC3 = sR1.template cast<Cplx>() * dC1 ); + VERIFY_IS_APPROX( dC2 = dC1 * sR1, dC3 = dC1 * sR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( dC2 = sC1 * dR1, dC3 = sC1 * dR1.template cast<Cplx>() ); + + VERIFY_IS_APPROX( dC2 = dR1.row(0) * sC1, dC3 = dR1.template cast<Cplx>().row(0) * sC1 ); + VERIFY_IS_APPROX( dC2 = sR1 * dC1.col(0), dC3 = sR1.template cast<Cplx>() * dC1.col(0) ); + VERIFY_IS_APPROX( dC2 = dC1.row(0) * sR1, dC3 = dC1.row(0) * sR1.template cast<Cplx>() ); + VERIFY_IS_APPROX( dC2 = sC1 * dR1.col(0), dC3 = sC1 * dR1.template cast<Cplx>().col(0) ); +} + void test_sparse_product() { for(int i = 0; i < g_repeat; i++) { @@ -381,5 +469,7 @@ void test_sparse_product() CALL_SUBTEST_2( (sparse_product<SparseMatrix<std::complex<double>, RowMajor > >()) ); CALL_SUBTEST_3( (sparse_product<SparseMatrix<float,ColMajor,long int> >()) ); CALL_SUBTEST_4( (sparse_product_regression_test<SparseMatrix<double,RowMajor>, Matrix<double, Dynamic, Dynamic, RowMajor> >()) ); + + CALL_SUBTEST_5( (test_mixing_types<float>()) ); } } diff --git a/eigen/test/sparseqr.cpp b/eigen/test/sparseqr.cpp index e8605fd..f0e721f 100644 --- a/eigen/test/sparseqr.cpp +++ b/eigen/test/sparseqr.cpp @@ -54,6 +54,28 @@ template<typename Scalar> void test_sparseqr_scalar() b = dA * DenseVector::Random(A.cols()); solver.compute(A); + + // Q should be MxM + VERIFY_IS_EQUAL(solver.matrixQ().rows(), A.rows()); + VERIFY_IS_EQUAL(solver.matrixQ().cols(), A.rows()); + + // R should be MxN + VERIFY_IS_EQUAL(solver.matrixR().rows(), A.rows()); + VERIFY_IS_EQUAL(solver.matrixR().cols(), A.cols()); + + // Q and R can be multiplied + DenseMat recoveredA = solver.matrixQ() + * DenseMat(solver.matrixR().template triangularView<Upper>()) + * solver.colsPermutation().transpose(); + VERIFY_IS_EQUAL(recoveredA.rows(), A.rows()); + VERIFY_IS_EQUAL(recoveredA.cols(), A.cols()); + + // and in the full rank case the original matrix is recovered + if (solver.rank() == A.cols()) + { + VERIFY_IS_APPROX(A, recoveredA); + } + if(internal::random<float>(0,1)>0.5f) solver.factorize(A); // this checks that calling analyzePattern is not needed if the pattern do not change. if (solver.info() != Success) diff --git a/eigen/test/stable_norm.cpp b/eigen/test/stable_norm.cpp index c3eb5ff..3c02474 100644 --- a/eigen/test/stable_norm.cpp +++ b/eigen/test/stable_norm.cpp @@ -65,6 +65,8 @@ template<typename MatrixType> void stable_norm(const MatrixType& m) factor = internal::random<Scalar>(); Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4)); + Scalar one(1); + MatrixType vzero = MatrixType::Zero(rows, cols), vrand = MatrixType::Random(rows, cols), vbig(rows, cols), @@ -78,6 +80,14 @@ template<typename MatrixType> void stable_norm(const MatrixType& m) VERIFY_IS_APPROX(vrand.blueNorm(), vrand.norm()); VERIFY_IS_APPROX(vrand.hypotNorm(), vrand.norm()); + // test with expressions as input + VERIFY_IS_APPROX((one*vrand).stableNorm(), vrand.norm()); + VERIFY_IS_APPROX((one*vrand).blueNorm(), vrand.norm()); + VERIFY_IS_APPROX((one*vrand).hypotNorm(), vrand.norm()); + VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).stableNorm(), vrand.norm()); + VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).blueNorm(), vrand.norm()); + VERIFY_IS_APPROX((one*vrand+one*vrand-one*vrand).hypotNorm(), vrand.norm()); + RealScalar size = static_cast<RealScalar>(m.size()); // test numext::isfinite diff --git a/eigen/test/vectorization_logic.cpp b/eigen/test/vectorization_logic.cpp index 83c1439..37e7495 100644 --- a/eigen/test/vectorization_logic.cpp +++ b/eigen/test/vectorization_logic.cpp @@ -207,6 +207,12 @@ struct vectorization_logic VERIFY(test_redux(Vector1(), LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Vector1().array()*Vector1().array(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux((Vector1().array()*Vector1().array()).col(0), + LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Matrix<Scalar,PacketSize,3>(), LinearVectorizedTraversal,CompleteUnrolling)); diff --git a/eigen/unsupported/CMakeLists.txt b/eigen/unsupported/CMakeLists.txt index 4fef40a..9a56661 100644 --- a/eigen/unsupported/CMakeLists.txt +++ b/eigen/unsupported/CMakeLists.txt @@ -1,7 +1,9 @@ add_subdirectory(Eigen) add_subdirectory(doc EXCLUDE_FROM_ALL) -if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) - add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest -else() - add_subdirectory(test EXCLUDE_FROM_ALL) +if(BUILD_TESTING) + if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + add_subdirectory(test) # can't do EXCLUDE_FROM_ALL here, breaks CTest + else() + add_subdirectory(test EXCLUDE_FROM_ALL) + endif() endif() diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 069680a..17f0466 100644 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -196,9 +196,11 @@ struct ThreadPoolDevice { // of blocks to be evenly dividable across threads. double block_size_f = 1.0 / CostModel::taskSize(1, cost); - Index block_size = numext::mini(n, numext::maxi<Index>(1, block_size_f)); - const Index max_block_size = - numext::mini(n, numext::maxi<Index>(1, 2 * block_size_f)); + const Index max_oversharding_factor = 4; + Index block_size = numext::mini( + n, numext::maxi<Index>(divup<Index>(n, max_oversharding_factor * numThreads()), + block_size_f)); + const Index max_block_size = numext::mini(n, 2 * block_size); if (block_align) { Index new_block_size = block_align(block_size); eigen_assert(new_block_size >= block_size); @@ -212,7 +214,8 @@ struct ThreadPoolDevice { (divup<int>(block_count, numThreads()) * numThreads()); // Now try to increase block size up to max_block_size as long as it // doesn't decrease parallel efficiency. - for (Index prev_block_count = block_count; prev_block_count > 1;) { + for (Index prev_block_count = block_count; + max_efficiency < 1.0 && prev_block_count > 1;) { // This is the next block size that divides size into a smaller number // of blocks than the current block_size. Index coarser_block_size = divup(n, prev_block_count - 1); diff --git a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h index 2854a4a..e6a666f 100644 --- a/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h +++ b/eigen/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h @@ -31,12 +31,12 @@ namespace Eigen { * * \sa Tensor */ -template<typename T, typename Dimensions, int Options_> class TensorStorage; +template<typename T, typename Dimensions, int Options> class TensorStorage; // Pure fixed-size storage -template<typename T, int Options_, typename FixedDimensions> -class TensorStorage<T, FixedDimensions, Options_> +template<typename T, typename FixedDimensions, int Options_> +class TensorStorage { private: static const std::size_t Size = FixedDimensions::total_size; @@ -66,7 +66,7 @@ class TensorStorage<T, FixedDimensions, Options_> // pure dynamic -template<typename T, int Options_, typename IndexType, int NumIndices_> +template<typename T, typename IndexType, int NumIndices_, int Options_> class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_> { public: diff --git a/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h b/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h index bae04fc..eff2dc8 100644 --- a/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h +++ b/eigen/unsupported/Eigen/src/IterativeSolvers/DGMRES.h @@ -39,7 +39,6 @@ template <typename VectorType, typename IndexType> void sortWithPermutation (VectorType& vec, IndexType& perm, typename IndexType::Scalar& ncut) { eigen_assert(vec.size() == perm.size()); - typedef typename IndexType::Scalar Index; bool flag; for (Index k = 0; k < ncut; k++) { @@ -112,7 +111,6 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > using Base::_solve_impl; typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::RealScalar RealScalar; typedef _Preconditioner Preconditioner; @@ -146,7 +144,7 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > void _solve_with_guess_impl(const Rhs& b, Dest& x) const { bool failed = false; - for(int j=0; j<b.cols(); ++j) + for(Index j=0; j<b.cols(); ++j) { m_iterations = Base::maxIterations(); m_error = Base::m_tolerance; @@ -170,17 +168,17 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > /** * Get the restart value */ - int restart() { return m_restart; } + Index restart() { return m_restart; } /** * Set the restart value (default is 30) */ - void set_restart(const int restart) { m_restart=restart; } + Index set_restart(const Index restart) { m_restart=restart; } /** * Set the number of eigenvalues to deflate at each restart */ - void setEigenv(const int neig) + void setEigenv(const Index neig) { m_neig = neig; if (neig+1 > m_maxNeig) m_maxNeig = neig+1; // To allow for complex conjugates @@ -189,12 +187,12 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > /** * Get the size of the deflation subspace size */ - int deflSize() {return m_r; } + Index deflSize() {return m_r; } /** * Set the maximum size of the deflation subspace */ - void setMaxEigenv(const int maxNeig) { m_maxNeig = maxNeig; } + void setMaxEigenv(const Index maxNeig) { m_maxNeig = maxNeig; } protected: // DGMRES algorithm @@ -202,12 +200,12 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > void dgmres(const MatrixType& mat,const Rhs& rhs, Dest& x, const Preconditioner& precond) const; // Perform one cycle of GMRES template<typename Dest> - int dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const; + Index dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const; // Compute data to use for deflation - int dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; + Index dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const; // Apply deflation to a vector template<typename RhsType, typename DestType> - int dgmresApplyDeflation(const RhsType& In, DestType& Out) const; + Index dgmresApplyDeflation(const RhsType& In, DestType& Out) const; ComplexVector schurValues(const ComplexSchur<DenseMatrix>& schurofH) const; ComplexVector schurValues(const RealSchur<DenseMatrix>& schurofH) const; // Init data for deflation @@ -221,8 +219,8 @@ class DGMRES : public IterativeSolverBase<DGMRES<_MatrixType,_Preconditioner> > mutable DenseMatrix m_T; /* T=U^T*M^{-1}*A*U */ mutable PartialPivLU<DenseMatrix> m_luT; // LU factorization of m_T mutable StorageIndex m_neig; //Number of eigenvalues to extract at each restart - mutable int m_r; // Current number of deflated eigenvalues, size of m_U - mutable int m_maxNeig; // Maximum number of eigenvalues to deflate + mutable Index m_r; // Current number of deflated eigenvalues, size of m_U + mutable Index m_maxNeig; // Maximum number of eigenvalues to deflate mutable RealScalar m_lambdaN; //Modulus of the largest eigenvalue of A mutable bool m_isDeflAllocated; mutable bool m_isDeflInitialized; @@ -244,9 +242,9 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh const Preconditioner& precond) const { //Initialization - int n = mat.rows(); + Index n = mat.rows(); DenseVector r0(n); - int nbIts = 0; + Index nbIts = 0; m_H.resize(m_restart+1, m_restart); m_Hes.resize(m_restart, m_restart); m_V.resize(n,m_restart+1); @@ -284,7 +282,7 @@ void DGMRES<_MatrixType, _Preconditioner>::dgmres(const MatrixType& mat,const Rh */ template< typename _MatrixType, typename _Preconditioner> template<typename Dest> -int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, int& nbIts) const +Index DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, const Preconditioner& precond, Dest& x, DenseVector& r0, RealScalar& beta, const RealScalar& normRhs, Index& nbIts) const { //Initialization DenseVector g(m_restart+1); // Right hand side of the least square problem @@ -293,8 +291,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con m_V.col(0) = r0/beta; m_info = NoConvergence; std::vector<JacobiRotation<Scalar> >gr(m_restart); // Givens rotations - int it = 0; // Number of inner iterations - int n = mat.rows(); + Index it = 0; // Number of inner iterations + Index n = mat.rows(); DenseVector tv1(n), tv2(n); //Temporary vectors while (m_info == NoConvergence && it < m_restart && nbIts < m_iterations) { @@ -312,7 +310,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con // Orthogonalize it with the previous basis in the basis using modified Gram-Schmidt Scalar coef; - for (int i = 0; i <= it; ++i) + for (Index i = 0; i <= it; ++i) { coef = tv1.dot(m_V.col(i)); tv1 = tv1 - coef * m_V.col(i); @@ -328,7 +326,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresCycle(const MatrixType& mat, con // FIXME Check for happy breakdown // Update Hessenberg matrix with Givens rotations - for (int i = 1; i <= it; ++i) + for (Index i = 1; i <= it; ++i) { m_H.col(it).applyOnTheLeft(i-1,i,gr[i-1].adjoint()); } @@ -418,7 +416,7 @@ inline typename DGMRES<_MatrixType, _Preconditioner>::ComplexVector DGMRES<_Matr } template< typename _MatrixType, typename _Preconditioner> -int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const +Index DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const MatrixType& mat, const Preconditioner& precond, const Index& it, StorageIndex& neig) const { // First, find the Schur form of the Hessenberg matrix H typename internal::conditional<NumTraits<Scalar>::IsComplex, ComplexSchur<DenseMatrix>, RealSchur<DenseMatrix> >::type schurofH; @@ -433,8 +431,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri // Reorder the absolute values of Schur values DenseRealVector modulEig(it); - for (int j=0; j<it; ++j) modulEig(j) = std::abs(eig(j)); - perm.setLinSpaced(it,0,it-1); + for (Index j=0; j<it; ++j) modulEig(j) = std::abs(eig(j)); + perm.setLinSpaced(it,0,internal::convert_index<StorageIndex>(it-1)); internal::sortWithPermutation(modulEig, perm, neig); if (!m_lambdaN) @@ -442,7 +440,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri m_lambdaN = (std::max)(modulEig.maxCoeff(), m_lambdaN); } //Count the real number of extracted eigenvalues (with complex conjugates) - int nbrEig = 0; + Index nbrEig = 0; while (nbrEig < neig) { if(eig(perm(it-nbrEig-1)).imag() == RealScalar(0)) nbrEig++; @@ -451,7 +449,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri // Extract the Schur vectors corresponding to the smallest Ritz values DenseMatrix Sr(it, nbrEig); Sr.setZero(); - for (int j = 0; j < nbrEig; j++) + for (Index j = 0; j < nbrEig; j++) { Sr.col(j) = schurofH.matrixU().col(perm(it-j-1)); } @@ -462,8 +460,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri if (m_r) { // Orthogonalize X against m_U using modified Gram-Schmidt - for (int j = 0; j < nbrEig; j++) - for (int k =0; k < m_r; k++) + for (Index j = 0; j < nbrEig; j++) + for (Index k =0; k < m_r; k++) X.col(j) = X.col(j) - (m_U.col(k).dot(X.col(j)))*m_U.col(k); } @@ -473,7 +471,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri dgmresInitDeflation(m); DenseMatrix MX(m, nbrEig); DenseVector tv1(m); - for (int j = 0; j < nbrEig; j++) + for (Index j = 0; j < nbrEig; j++) { tv1 = mat * X.col(j); MX.col(j) = precond.solve(tv1); @@ -488,8 +486,8 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri } // Save X into m_U and m_MX in m_MU - for (int j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j); - for (int j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j); + for (Index j = 0; j < nbrEig; j++) m_U.col(m_r+j) = X.col(j); + for (Index j = 0; j < nbrEig; j++) m_MU.col(m_r+j) = MX.col(j); // Increase the size of the invariant subspace m_r += nbrEig; @@ -502,7 +500,7 @@ int DGMRES<_MatrixType, _Preconditioner>::dgmresComputeDeflationData(const Matri } template<typename _MatrixType, typename _Preconditioner> template<typename RhsType, typename DestType> -int DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const +Index DGMRES<_MatrixType, _Preconditioner>::dgmresApplyDeflation(const RhsType &x, DestType &y) const { DenseVector x1 = m_U.leftCols(m_r).transpose() * x; y = x + m_U.leftCols(m_r) * ( m_lambdaN * m_luT.solve(x1) - x1); diff --git a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h index bb6d9e1..85ab3d9 100644 --- a/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h +++ b/eigen/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h @@ -326,6 +326,7 @@ struct matrix_exp_computeUV<MatrixType, long double> } else if (l1norm < 1.125358383453143065081397882891878e+000L) { matrix_exp_pade13(arg, U, V); } else { + const long double maxnorm = 2.884233277829519311757165057717815L; frexp(l1norm / maxnorm, &squarings); if (squarings < 0) squarings = 0; MatrixType A = arg.unaryExpr(MatrixExponentialScalingOp<long double>(squarings)); @@ -342,6 +343,27 @@ struct matrix_exp_computeUV<MatrixType, long double> } }; +template<typename T> struct is_exp_known_type : false_type {}; +template<> struct is_exp_known_type<float> : true_type {}; +template<> struct is_exp_known_type<double> : true_type {}; +#if LDBL_MANT_DIG <= 112 +template<> struct is_exp_known_type<long double> : true_type {}; +#endif + +template <typename ArgType, typename ResultType> +void matrix_exp_compute(const ArgType& arg, ResultType &result, true_type) // natively supported scalar type +{ + typedef typename ArgType::PlainObject MatrixType; + MatrixType U, V; + int squarings; + matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V) + MatrixType numer = U + V; + MatrixType denom = -U + V; + result = denom.partialPivLu().solve(numer); + for (int i=0; i<squarings; i++) + result *= result; // undo scaling by repeated squaring +} + /* Computes the matrix exponential * @@ -349,26 +371,13 @@ struct matrix_exp_computeUV<MatrixType, long double> * \param result variable in which result will be stored */ template <typename ArgType, typename ResultType> -void matrix_exp_compute(const ArgType& arg, ResultType &result) +void matrix_exp_compute(const ArgType& arg, ResultType &result, false_type) // default { typedef typename ArgType::PlainObject MatrixType; -#if LDBL_MANT_DIG > 112 // rarely happens typedef typename traits<MatrixType>::Scalar Scalar; typedef typename NumTraits<Scalar>::Real RealScalar; typedef typename std::complex<RealScalar> ComplexScalar; - if (sizeof(RealScalar) > 14) { - result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>); - return; - } -#endif - MatrixType U, V; - int squarings; - matrix_exp_computeUV<MatrixType>::run(arg, U, V, squarings); // Pade approximant is (U+V) / (-U+V) - MatrixType numer = U + V; - MatrixType denom = -U + V; - result = denom.partialPivLu().solve(numer); - for (int i=0; i<squarings; i++) - result *= result; // undo scaling by repeated squaring + result = arg.matrixFunction(internal::stem_function_exp<ComplexScalar>); } } // end namespace Eigen::internal @@ -402,7 +411,7 @@ template<typename Derived> struct MatrixExponentialReturnValue inline void evalTo(ResultType& result) const { const typename internal::nested_eval<Derived, 10>::type tmp(m_src); - internal::matrix_exp_compute(tmp, result); + internal::matrix_exp_compute(tmp, result, internal::is_exp_known_type<typename Derived::Scalar>()); } Index rows() const { return m_src.rows(); } diff --git a/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h b/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h index cdc14f8..41e4af4 100644 --- a/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h +++ b/eigen/unsupported/Eigen/src/SparseExtra/MarketIO.h @@ -109,6 +109,7 @@ namespace internal inline bool getMarketHeader(const std::string& filename, int& sym, bool& iscomplex, bool& isvector) { sym = 0; + iscomplex = false; isvector = false; std::ifstream in(filename.c_str(),std::ios::in); if(!in) diff --git a/eigen/unsupported/test/CMakeLists.txt b/eigen/unsupported/test/CMakeLists.txt index b5fa1c8..80cccd8 100644 --- a/eigen/unsupported/test/CMakeLists.txt +++ b/eigen/unsupported/test/CMakeLists.txt @@ -30,11 +30,16 @@ else(GOOGLEHASH_FOUND) ei_add_property(EIGEN_MISSING_BACKENDS "GoogleHash, ") endif(GOOGLEHASH_FOUND) + find_package(Adolc) if(ADOLC_FOUND) include_directories(${ADOLC_INCLUDES}) ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ") - ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES}) + if(EIGEN_TEST_CXX11) + ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES}) + else() + message(STATUS "Adolc found, but tests require C++11 mode") + endif() else(ADOLC_FOUND) ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ") endif(ADOLC_FOUND) diff --git a/eigen/unsupported/test/autodiff.cpp b/eigen/unsupported/test/autodiff.cpp index 8574313..1c5e0dc 100644 --- a/eigen/unsupported/test/autodiff.cpp +++ b/eigen/unsupported/test/autodiff.cpp @@ -306,6 +306,8 @@ double bug_1222() { return denom.value(); } +#ifdef EIGEN_TEST_PART_5 + double bug_1223() { using std::min; typedef Eigen::AutoDiffScalar<Eigen::Vector3d> AD; @@ -326,8 +328,8 @@ double bug_1223() { // regression test for some compilation issues with specializations of ScalarBinaryOpTraits void bug_1260() { - Matrix4d A; - Vector4d v; + Matrix4d A = Matrix4d::Ones(); + Vector4d v = Vector4d::Ones(); A*v; } @@ -336,7 +338,7 @@ double bug_1261() { typedef AutoDiffScalar<Matrix2d> AD; typedef Matrix<AD,2,1> VectorAD; - VectorAD v; + VectorAD v(0.,0.); const AD maxVal = v.maxCoeff(); const AD minVal = v.minCoeff(); return maxVal.value() + minVal.value(); @@ -344,12 +346,14 @@ double bug_1261() { double bug_1264() { typedef AutoDiffScalar<Vector2d> AD; - const AD s; - const Matrix<AD, 3, 1> v1; + const AD s = 0.; + const Matrix<AD, 3, 1> v1(0.,0.,0.); const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1; return v2(0).value(); } +#endif + void test_autodiff() { for(int i = 0; i < g_repeat; i++) { @@ -359,9 +363,9 @@ void test_autodiff() CALL_SUBTEST_4( test_autodiff_hessian<1>() ); } - bug_1222(); - bug_1223(); - bug_1260(); - bug_1261(); + CALL_SUBTEST_5( bug_1222() ); + CALL_SUBTEST_5( bug_1223() ); + CALL_SUBTEST_5( bug_1260() ); + CALL_SUBTEST_5( bug_1261() ); } diff --git a/eigen/unsupported/test/autodiff_scalar.cpp b/eigen/unsupported/test/autodiff_scalar.cpp index 9cf1128..a917ec3 100644 --- a/eigen/unsupported/test/autodiff_scalar.cpp +++ b/eigen/unsupported/test/autodiff_scalar.cpp @@ -81,6 +81,9 @@ void check_limits_specialization() typedef std::numeric_limits<AD> A; typedef std::numeric_limits<Scalar> B; + // workaround "unsed typedef" warning: + VERIFY(!bool(internal::is_same<B, A>::value)); + #if EIGEN_HAS_CXX11 VERIFY(bool(std::is_base_of<B, A>::value)); #endif diff --git a/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu b/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu index 653443d..3d73d49 100644 --- a/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -12,9 +12,6 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu index 88c2339..816e032 100644 --- a/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -13,9 +13,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu b/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu index d4e111f..916f12a 100644 --- a/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -11,9 +11,6 @@ #define EIGEN_TEST_FUNC cxx11_tensor_complex #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu b/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu index 2baf5ea..aac7809 100644 --- a/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_complex_cwise_ops_cuda.cu @@ -11,9 +11,6 @@ #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu b/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu index dd68430..e821ccf 100644 --- a/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -14,9 +14,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_cuda.cu b/eigen/unsupported/test/cxx11_tensor_cuda.cu index 0ba9d52..9584a53 100644 --- a/eigen/unsupported/test/cxx11_tensor_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_cuda.cu @@ -12,9 +12,6 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_device.cu b/eigen/unsupported/test/cxx11_tensor_device.cu index fde20dd..cbb43e2 100644 --- a/eigen/unsupported/test/cxx11_tensor_device.cu +++ b/eigen/unsupported/test/cxx11_tensor_device.cu @@ -13,9 +13,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 2f86980..e296bf9 100644 --- a/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -13,9 +13,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_random_cuda.cu b/eigen/unsupported/test/cxx11_tensor_random_cuda.cu index b3be199..fa1a467 100644 --- a/eigen/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_random_cuda.cu @@ -13,9 +13,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu b/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu index 6858b43..ec06697 100644 --- a/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -12,9 +12,6 @@ #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu b/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu index 5f146f3..de1c0ac 100644 --- a/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu +++ b/eigen/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -13,9 +13,6 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 -#include <cuda_fp16.h> -#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> |