diff options
Diffstat (limited to 'eigen/Eigen')
95 files changed, 1247 insertions, 3432 deletions
diff --git a/eigen/Eigen/Core b/eigen/Eigen/Core index d188356..0f7fa63 100644 --- a/eigen/Eigen/Core +++ b/eigen/Eigen/Core @@ -43,8 +43,10 @@ #else #define EIGEN_DEVICE_FUNC #endif + #else #define EIGEN_DEVICE_FUNC + #endif // When compiling CUDA device code with NVCC, pull in math functions from the @@ -141,24 +143,15 @@ #endif #ifdef __AVX2__ #define EIGEN_VECTORIZE_AVX2 - #define EIGEN_VECTORIZE_AVX - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 #endif #ifdef __FMA__ #define EIGEN_VECTORIZE_FMA #endif - #if defined(__AVX512F__) + #if defined(__AVX512F__) && defined(EIGEN_ENABLE_AVX512) #define EIGEN_VECTORIZE_AVX512 #define EIGEN_VECTORIZE_AVX2 #define EIGEN_VECTORIZE_AVX #define EIGEN_VECTORIZE_FMA - #define EIGEN_VECTORIZE_SSE3 - #define EIGEN_VECTORIZE_SSSE3 - #define EIGEN_VECTORIZE_SSE4_1 - #define EIGEN_VECTORIZE_SSE4_2 #ifdef __AVX512DQ__ #define EIGEN_VECTORIZE_AVX512DQ #endif @@ -290,15 +283,6 @@ #include <intrin.h> #endif -#if defined(__SYCL_DEVICE_ONLY__) - #undef min - #undef max - #undef isnan - #undef isinf - #undef isfinite - #include <SYCL/sycl.hpp> -#endif - /** \brief Namespace containing all symbols from the %Eigen library. */ namespace Eigen { @@ -363,9 +347,6 @@ using std::ptrdiff_t; #include "src/Core/util/StaticAssert.h" #include "src/Core/util/XprHelper.h" #include "src/Core/util/Memory.h" -#include "src/Core/util/IntegralConstant.h" -#include "src/Core/util/SymbolicIndex.h" - #include "src/Core/NumTraits.h" #include "src/Core/MathFunctions.h" @@ -376,8 +357,6 @@ using std::ptrdiff_t; #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX512/PacketMath.h" - #include "src/Core/arch/SSE/MathFunctions.h" - #include "src/Core/arch/AVX/MathFunctions.h" #include "src/Core/arch/AVX512/MathFunctions.h" #elif defined EIGEN_VECTORIZE_AVX // Use AVX for floats and doubles, SSE for integers @@ -430,8 +409,6 @@ using std::ptrdiff_t; // on CUDA devices #include "src/Core/arch/CUDA/Complex.h" -#include "src/Core/util/IndexedViewHelper.h" -#include "src/Core/ArithmeticSequence.h" #include "src/Core/IO.h" #include "src/Core/DenseCoeffsBase.h" #include "src/Core/DenseBase.h" @@ -473,7 +450,6 @@ using std::ptrdiff_t; #include "src/Core/Ref.h" #include "src/Core/Block.h" #include "src/Core/VectorBlock.h" -#include "src/Core/IndexedView.h" #include "src/Core/Transpose.h" #include "src/Core/DiagonalMatrix.h" #include "src/Core/Diagonal.h" diff --git a/eigen/Eigen/Geometry b/eigen/Eigen/Geometry index 131a4ed..716d529 100644 --- a/eigen/Eigen/Geometry +++ b/eigen/Eigen/Geometry @@ -59,3 +59,4 @@ #endif // EIGEN_GEOMETRY_MODULE_H /* vim: set filetype=cpp et sw=2 ts=2 ai: */ + diff --git a/eigen/Eigen/src/Cholesky/LDLT.h b/eigen/Eigen/src/Cholesky/LDLT.h index 9b4fdb4..fcee7b2 100644 --- a/eigen/Eigen/src/Cholesky/LDLT.h +++ b/eigen/Eigen/src/Cholesky/LDLT.h @@ -258,6 +258,7 @@ template<typename _MatrixType, int _UpLo> class LDLT #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/Cholesky/LLT.h b/eigen/Eigen/src/Cholesky/LLT.h index e6c02d8..87ca8d4 100644 --- a/eigen/Eigen/src/Cholesky/LLT.h +++ b/eigen/Eigen/src/Cholesky/LLT.h @@ -200,6 +200,7 @@ template<typename _MatrixType, int _UpLo> class LLT #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h index 61faf43..5719720 100644 --- a/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -32,7 +32,7 @@ template<> struct cholmod_configure_matrix<std::complex<double> > { } }; -// Other scalar types are not yet supported by Cholmod +// Other scalar types are not yet suppotred by Cholmod // template<> struct cholmod_configure_matrix<float> { // template<typename CholmodType> // static void run(CholmodType& mat) { @@ -124,9 +124,6 @@ cholmod_sparse viewAsCholmod(const SparseSelfAdjointView<const SparseMatrix<_Sca if(UpLo==Upper) res.stype = 1; if(UpLo==Lower) res.stype = -1; - // swap stype for rowmajor matrices (only works for real matrices) - EIGEN_STATIC_ASSERT((_Options & RowMajorBit) == 0 || NumTraits<_Scalar>::IsComplex == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - if(_Options & RowMajorBit) res.stype *=-1; return res; } @@ -162,44 +159,6 @@ MappedSparseMatrix<Scalar,Flags,StorageIndex> viewAsEigen(cholmod_sparse& cm) static_cast<StorageIndex*>(cm.p), static_cast<StorageIndex*>(cm.i),static_cast<Scalar*>(cm.x) ); } -namespace internal { - -// template specializations for int and long that call the correct cholmod method - -#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ - template<typename _StorageIndex> ret cm_ ## name (cholmod_common &Common) { return cholmod_ ## name (&Common); } \ - template<> ret cm_ ## name<long> (cholmod_common &Common) { return cholmod_l_ ## name (&Common); } - -#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ - template<typename _StorageIndex> ret cm_ ## name (t1& a1, cholmod_common &Common) { return cholmod_ ## name (&a1, &Common); } \ - template<> ret cm_ ## name<long> (t1& a1, cholmod_common &Common) { return cholmod_l_ ## name (&a1, &Common); } - -EIGEN_CHOLMOD_SPECIALIZE0(int, start) -EIGEN_CHOLMOD_SPECIALIZE0(int, finish) - -EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense, cholmod_dense*, X) -EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) - -EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) - -template<typename _StorageIndex> cholmod_dense* cm_solve (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_solve (sys, &L, &B, &Common); } -template<> cholmod_dense* cm_solve<long> (int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common &Common) { return cholmod_l_solve (sys, &L, &B, &Common); } - -template<typename _StorageIndex> cholmod_sparse* cm_spsolve (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_spsolve (sys, &L, &B, &Common); } -template<> cholmod_sparse* cm_spsolve<long> (int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common &Common) { return cholmod_l_spsolve (sys, &L, &B, &Common); } - -template<typename _StorageIndex> -int cm_factorize_p (cholmod_sparse* A, double beta[2], _StorageIndex* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_factorize_p (A, beta, fset, fsize, L, &Common); } -template<> -int cm_factorize_p<long> (cholmod_sparse* A, double beta[2], long* fset, std::size_t fsize, cholmod_factor* L, cholmod_common &Common) { return cholmod_l_factorize_p (A, beta, fset, fsize, L, &Common); } - -#undef EIGEN_CHOLMOD_SPECIALIZE0 -#undef EIGEN_CHOLMOD_SPECIALIZE1 - -} // namespace internal - - enum CholmodMode { CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt }; @@ -236,7 +195,7 @@ class CholmodBase : public SparseSolverBase<Derived> { EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start<StorageIndex>(m_cholmod); + cholmod_start(&m_cholmod); } explicit CholmodBase(const MatrixType& matrix) @@ -244,15 +203,15 @@ class CholmodBase : public SparseSolverBase<Derived> { EIGEN_STATIC_ASSERT((internal::is_same<double,RealScalar>::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); m_shiftOffset[0] = m_shiftOffset[1] = 0.0; - internal::cm_start<StorageIndex>(m_cholmod); + cholmod_start(&m_cholmod); compute(matrix); } ~CholmodBase() { if(m_cholmodFactor) - internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod); - internal::cm_finish<StorageIndex>(m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); + cholmod_finish(&m_cholmod); } inline StorageIndex cols() const { return internal::convert_index<StorageIndex, Index>(m_cholmodFactor->n); } @@ -260,7 +219,7 @@ class CholmodBase : public SparseSolverBase<Derived> /** \brief Reports whether previous computation was successful. * - * \returns \c Success if computation was successful, + * \returns \c Success if computation was succesful, * \c NumericalIssue if the matrix.appears to be negative. */ ComputationInfo info() const @@ -287,11 +246,11 @@ class CholmodBase : public SparseSolverBase<Derived> { if(m_cholmodFactor) { - internal::cm_free_factor<StorageIndex>(m_cholmodFactor, m_cholmod); + cholmod_free_factor(&m_cholmodFactor, &m_cholmod); m_cholmodFactor = 0; } cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); - m_cholmodFactor = internal::cm_analyze<StorageIndex>(A, m_cholmod); + m_cholmodFactor = cholmod_analyze(&A, &m_cholmod); this->m_isInitialized = true; this->m_info = Success; @@ -309,7 +268,7 @@ class CholmodBase : public SparseSolverBase<Derived> { eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView<UpLo>()); - internal::cm_factorize_p<StorageIndex>(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); + cholmod_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, &m_cholmod); // If the factorization failed, minor is the column at which it did. On success minor == n. this->m_info = (m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); @@ -330,20 +289,19 @@ class CholmodBase : public SparseSolverBase<Derived> EIGEN_UNUSED_VARIABLE(size); eigen_assert(size==b.rows()); - // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. + // Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref. Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived()); cholmod_dense b_cd = viewAsCholmod(b_ref); - cholmod_dense* x_cd = internal::cm_solve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod); + cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); if(!x_cd) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve dest = Matrix<Scalar,Dest::RowsAtCompileTime,Dest::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x),b.rows(),b.cols()); - internal::cm_free_dense<StorageIndex>(x_cd, m_cholmod); + cholmod_free_dense(&x_cd, &m_cholmod); } /** \internal */ @@ -358,16 +316,15 @@ class CholmodBase : public SparseSolverBase<Derived> // note: cs stands for Cholmod Sparse Ref<SparseMatrix<typename RhsDerived::Scalar,ColMajor,typename RhsDerived::StorageIndex> > b_ref(b.const_cast_derived()); cholmod_sparse b_cs = viewAsCholmod(b_ref); - cholmod_sparse* x_cs = internal::cm_spsolve<StorageIndex>(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod); + cholmod_sparse* x_cs = cholmod_spsolve(CHOLMOD_A, m_cholmodFactor, &b_cs, &m_cholmod); if(!x_cs) { this->m_info = NumericalIssue; return; } // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) - // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's sparse solver) dest.derived() = viewAsEigen<typename DestDerived::Scalar,ColMajor,typename DestDerived::StorageIndex>(*x_cs); - internal::cm_free_sparse<StorageIndex>(x_cs, m_cholmod); + cholmod_free_sparse(&x_cs, &m_cholmod); } #endif // EIGEN_PARSED_BY_DOXYGEN diff --git a/eigen/Eigen/src/Core/ArithmeticSequence.h b/eigen/Eigen/src/Core/ArithmeticSequence.h deleted file mode 100644 index ada1571..0000000 --- a/eigen/Eigen/src/Core/ArithmeticSequence.h +++ /dev/null @@ -1,350 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARITHMETIC_SEQUENCE_H -#define EIGEN_ARITHMETIC_SEQUENCE_H - -namespace Eigen { - -namespace internal { - -#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) -template<typename T> struct aseq_negate {}; - -template<> struct aseq_negate<Index> { - typedef Index type; -}; - -template<int N> struct aseq_negate<FixedInt<N> > { - typedef FixedInt<-N> type; -}; - -// Compilation error in the following case: -template<> struct aseq_negate<FixedInt<DynamicIndex> > {}; - -template<typename FirstType,typename SizeType,typename IncrType, - bool FirstIsSymbolic=Symbolic::is_symbolic<FirstType>::value, - bool SizeIsSymbolic =Symbolic::is_symbolic<SizeType>::value> -struct aseq_reverse_first_type { - typedef Index type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> { - typedef Symbolic::AddExpr<FirstType, - Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >, - Symbolic::ValueExpr<IncrType> > - > type; -}; - -template<typename SizeType,typename IncrType,typename EnableIf = void> -struct aseq_reverse_first_type_aux { - typedef Index type; -}; - -template<typename SizeType,typename IncrType> -struct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> { - typedef FixedInt<(SizeType::value-1)*IncrType::value> type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> { - typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux; - typedef Symbolic::AddExpr<FirstType,Symbolic::ValueExpr<Aux> > type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> { - typedef Symbolic::AddExpr<Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >, - Symbolic::ValueExpr<IncrType> >, - Symbolic::ValueExpr<> > type; -}; -#endif - -// Helper to cleanup the type of the increment: -template<typename T> struct cleanup_seq_incr { - typedef typename cleanup_index_type<T,DynamicIndex>::type type; -}; - -} - -//-------------------------------------------------------------------------------- -// seq(first,last,incr) and seqN(first,size,incr) -//-------------------------------------------------------------------------------- - -template<typename FirstType=Index,typename SizeType=Index,typename IncrType=internal::FixedInt<1> > -class ArithmeticSequence; - -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - typename internal::cleanup_index_type<SizeType>::type, - typename internal::cleanup_seq_incr<IncrType>::type > -seqN(FirstType first, SizeType size, IncrType incr); - -/** \class ArithmeticSequence - * \ingroup Core_Module - * - * This class represents an arithmetic progression \f$ a_0, a_1, a_2, ..., a_{n-1}\f$ defined by - * its \em first value \f$ a_0 \f$, its \em size (aka length) \em n, and the \em increment (aka stride) - * that is equal to \f$ a_{i+1}-a_{i}\f$ for any \em i. - * - * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments - * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the - * only way it is used. - * - * \tparam FirstType type of the first element, usually an Index, - * but internally it can be a symbolic expression - * \tparam SizeType type representing the size of the sequence, usually an Index - * or a compile time integral constant. Internally, it can also be a symbolic expression - * \tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is compile-time 1) - * - * \sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView - */ -template<typename FirstType,typename SizeType,typename IncrType> -class ArithmeticSequence -{ -public: - ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} - ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {} - - enum { - SizeAtCompileTime = internal::get_fixed_value<SizeType>::value, - IncrAtCompileTime = internal::get_fixed_value<IncrType,DynamicIndex>::value - }; - - /** \returns the size, i.e., number of elements, of the sequence */ - Index size() const { return m_size; } - - /** \returns the first element \f$ a_0 \f$ in the sequence */ - Index first() const { return m_first; } - - /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ - Index operator[](Index i) const { return m_first + i * m_incr; } - - const FirstType& firstObject() const { return m_first; } - const SizeType& sizeObject() const { return m_size; } - const IncrType& incrObject() const { return m_incr; } - -protected: - FirstType m_first; - SizeType m_size; - IncrType m_incr; - -public: - -#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) - auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#else -protected: - typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType; - typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType; -public: - ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType> - reverse() const { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#endif -}; - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type > -seqN(FirstType first, SizeType size, IncrType incr) { - return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type>(first,size,incr); -} - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and unit increment - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */ -template<typename FirstType,typename SizeType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type > -seqN(FirstType first, SizeType size) { - return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr - * - * It is essentially an alias to: - * \code - * seqN(f, (l-f+incr)/incr, incr); - * \endcode - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) - */ -template<typename FirstType,typename LastType, typename IncrType> -auto seq(FirstType f, LastType l, IncrType incr); - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment - * - * It is essentially an alias to: - * \code - * seqN(f,l-f+1); - * \endcode - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) - */ -template<typename FirstType,typename LastType> -auto seq(FirstType f, LastType l); - -#else // EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX11 -template<typename FirstType,typename LastType> -auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - - typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()))) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - (typename internal::cleanup_index_type<LastType>::type(l) - -typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())); -} - -template<typename FirstType,typename LastType, typename IncrType> -auto seq(FirstType f, LastType l, IncrType incr) - -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - - typename internal::cleanup_index_type<FirstType>::type(f)+typename internal::cleanup_seq_incr<IncrType>::type(incr) - ) / typename internal::cleanup_seq_incr<IncrType>::type(incr), - typename internal::cleanup_seq_incr<IncrType>::type(incr))) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - -typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr)) / CleanedIncrType(incr), - CleanedIncrType(incr)); -} -#else - -template<typename FirstType,typename LastType> -typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value), - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type -seq(FirstType f, LastType l) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()))); -} - -template<typename FirstTypeDerived,typename LastType> -typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value, - ArithmeticSequence<FirstTypeDerived, Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l) -{ - return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>())); -} - -template<typename FirstType,typename LastTypeDerived> -typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value, - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type -seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())); -} - -template<typename FirstTypeDerived,typename LastTypeDerived> -ArithmeticSequence<FirstTypeDerived, - Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::NegateExpr<FirstTypeDerived> >,Symbolic::ValueExpr<internal::FixedInt<1> > > > -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l) -{ - return seqN(f.derived(),(l.derived()-f.derived()+fix<1>())); -} - - -template<typename FirstType,typename LastType, typename IncrType> -typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value), - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(FirstType f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr); -} - -template<typename FirstTypeDerived,typename LastType, typename IncrType> -typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value, - ArithmeticSequence<FirstTypeDerived, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>, - Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template<typename FirstType,typename LastTypeDerived, typename IncrType> -typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value, - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - (l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType> -ArithmeticSequence<FirstTypeDerived, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived, - Symbolic::NegateExpr<FirstTypeDerived> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} -#endif - -#endif // EIGEN_PARSED_BY_DOXYGEN - -namespace internal { - -// Convert a symbolic span into a usable one (i.e., remove last/end "keywords") -template<typename T> -struct make_size_type { - typedef typename internal::conditional<Symbolic::is_symbolic<T>::value, Index, T>::type type; -}; - -template<typename FirstType,typename SizeType,typename IncrType,int XprSize> -struct IndexedViewCompatibleType<ArithmeticSequence<FirstType,SizeType,IncrType>, XprSize> { - typedef ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> -makeIndexedViewCompatible(const ArithmeticSequence<FirstType,SizeType,IncrType>& ids, Index size,SpecializedType) { - return ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>( - eval_expr_given_size(ids.firstObject(),size),eval_expr_given_size(ids.sizeObject(),size),ids.incrObject()); -} - -template<typename FirstType,typename SizeType,typename IncrType> -struct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > { - enum { value = get_fixed_value<IncrType,DynamicIndex>::value }; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ARITHMETIC_SEQUENCE_H diff --git a/eigen/Eigen/src/Core/Array.h b/eigen/Eigen/src/Core/Array.h index 0d34269..e10020d 100644 --- a/eigen/Eigen/src/Core/Array.h +++ b/eigen/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other, + typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/eigen/Eigen/src/Core/ArrayBase.h b/eigen/Eigen/src/Core/ArrayBase.h index 9da960f..3dbc708 100644 --- a/eigen/Eigen/src/Core/ArrayBase.h +++ b/eigen/Eigen/src/Core/ArrayBase.h @@ -69,7 +69,6 @@ template<typename Derived> class ArrayBase using Base::coeff; using Base::coeffRef; using Base::lazyAssign; - using Base::operator-; using Base::operator=; using Base::operator+=; using Base::operator-=; @@ -89,6 +88,7 @@ template<typename Derived> class ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/ArrayWrapper.h b/eigen/Eigen/src/Core/ArrayWrapper.h index a04521a..688aadd 100644 --- a/eigen/Eigen/src/Core/ArrayWrapper.h +++ b/eigen/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/eigen/Eigen/src/Core/Assign.h b/eigen/Eigen/src/Core/Assign.h index 655412e..53806ba 100644 --- a/eigen/Eigen/src/Core/Assign.h +++ b/eigen/Eigen/src/Core/Assign.h @@ -16,7 +16,7 @@ namespace Eigen { template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived> +EIGEN_STRONG_INLINE Derived& DenseBase<Derived> ::lazyAssign(const DenseBase<OtherDerived>& other) { enum{ diff --git a/eigen/Eigen/src/Core/BooleanRedux.h b/eigen/Eigen/src/Core/BooleanRedux.h index ccf5190..8409d87 100644 --- a/eigen/Eigen/src/Core/BooleanRedux.h +++ b/eigen/Eigen/src/Core/BooleanRedux.h @@ -14,54 +14,56 @@ namespace Eigen { namespace internal { -template<typename Derived, int UnrollCount, int Rows> +template<typename Derived, int UnrollCount> struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col); + return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col); } }; -template<typename Derived, int Rows> -struct all_unroller<Derived, 0, Rows> +template<typename Derived> +struct all_unroller<Derived, 0> { static inline bool run(const Derived &/*mat*/) { return true; } }; -template<typename Derived, int Rows> -struct all_unroller<Derived, Dynamic, Rows> +template<typename Derived> +struct all_unroller<Derived, Dynamic> { static inline bool run(const Derived &) { return false; } }; -template<typename Derived, int UnrollCount, int Rows> +template<typename Derived, int UnrollCount> struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col); + return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col); } }; -template<typename Derived, int Rows> -struct any_unroller<Derived, 0, Rows> +template<typename Derived> +struct any_unroller<Derived, 0> { static inline bool run(const Derived & /*mat*/) { return false; } }; -template<typename Derived, int Rows> -struct any_unroller<Derived, Dynamic, Rows> +template<typename Derived> +struct any_unroller<Derived, Dynamic> { static inline bool run(const Derived &) { return false; } }; @@ -76,7 +78,7 @@ struct any_unroller<Derived, Dynamic, Rows> * \sa any(), Cwise::operator<() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const +inline bool DenseBase<Derived>::all() const { typedef internal::evaluator<Derived> Evaluator; enum { @@ -85,7 +87,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const }; Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); + return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -100,7 +102,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const * \sa all() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const +inline bool DenseBase<Derived>::any() const { typedef internal::evaluator<Derived> Evaluator; enum { @@ -109,7 +111,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const }; Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); + return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -124,7 +126,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const * \sa all(), any() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const +inline Eigen::Index DenseBase<Derived>::count() const { return derived().template cast<bool>().template cast<Index>().sum(); } diff --git a/eigen/Eigen/src/Core/CommaInitializer.h b/eigen/Eigen/src/Core/CommaInitializer.h index 35fdbb8..d218e98 100644 --- a/eigen/Eigen/src/Core/CommaInitializer.h +++ b/eigen/Eigen/src/Core/CommaInitializer.h @@ -141,7 +141,7 @@ struct CommaInitializer * \sa CommaInitializer::finished(), class CommaInitializer */ template<typename Derived> -EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s) +inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s) { return CommaInitializer<Derived>(*static_cast<Derived*>(this), s); } @@ -149,7 +149,7 @@ EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator< /** \sa operator<<(const Scalar&) */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> +inline CommaInitializer<Derived> DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other) { return CommaInitializer<Derived>(*static_cast<Derived *>(this), other); diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h index 15b361b..f7c1eff 100644 --- a/eigen/Eigen/src/Core/CoreEvaluators.h +++ b/eigen/Eigen/src/Core/CoreEvaluators.h @@ -106,7 +106,7 @@ struct evaluator<const T> // ---------- base class for all evaluators ---------- template<typename ExpressionType> -struct evaluator_base +struct evaluator_base : public noncopyable { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; @@ -114,14 +114,6 @@ struct evaluator_base enum { Alignment = 0 }; - // noncopyable: - // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) - // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} -private: - EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); - EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -131,27 +123,6 @@ private: // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. -// this helper permits to completely eliminate m_outerStride if it is known at compiletime. -template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) - { - EIGEN_ONLY_USED_FOR_DEBUG(outerStride); - eigen_internal_assert(outerStride==OuterStride); - } - EIGEN_DEVICE_FUNC Index outerStride() const { return OuterStride; } - const Scalar *data; -}; - -template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC Index outerStride() const { return m_outerStride; } - const Scalar *data; -protected: - Index m_outerStride; -}; - template<typename Derived> struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> @@ -170,21 +141,18 @@ struct evaluator<PlainObjectBase<Derived> > Flags = traits<Derived>::EvaluatorFlags, Alignment = traits<Derived>::Alignment }; - enum { - // We do not need to know the outer stride for vectors - OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime - }; - + EIGEN_DEVICE_FUNC evaluator() - : m_d(0,OuterStrideAtCompileTime) + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -193,30 +161,30 @@ struct evaluator<PlainObjectBase<Derived> > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_d.data[row * m_d.outerStride() + col]; + return m_data[row * m_outerStride.value() + col]; else - return m_d.data[row + col * m_d.outerStride()]; + return m_data[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.data[index]; + return m_data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col]; + return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; else - return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()]; + return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast<Scalar*>(m_d.data)[index]; + return const_cast<Scalar*>(m_data)[index]; } template<int LoadMode, typename PacketType> @@ -224,16 +192,16 @@ struct evaluator<PlainObjectBase<Derived> > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col); + return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col); else - return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride()); + return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value()); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt<PacketType, LoadMode>(m_d.data + index); + return ploadt<PacketType, LoadMode>(m_data + index); } template<int StoreMode,typename PacketType> @@ -242,22 +210,26 @@ struct evaluator<PlainObjectBase<Derived> > { if (IsRowMajor) return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x); + (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x); else return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x); + (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x); } template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x); + return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); } protected: + const Scalar *m_data; - plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d; + // We do not need to know the outer stride for vectors + variable_if_dynamic<Index, IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime> m_outerStride; }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> @@ -555,7 +527,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) : m_d(op) + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -566,43 +540,32 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_functor(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_functor(m_argImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col)); + return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index)); + return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - class Data : private UnaryOp - { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } - evaluator<ArgType> argImpl; - }; - - Data m_d; + const UnaryOp m_functor; + evaluator<ArgType> m_argImpl; }; // -------------------- CwiseTernaryOp -------------------- @@ -646,7 +609,11 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased evaluator<Arg3>::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -657,47 +624,38 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col), - m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col), - m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col)); + return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col), + m_arg2Impl.template packet<LoadMode,PacketType>(row, col), + m_arg3Impl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index), - m_d.arg2Impl.template packet<LoadMode,PacketType>(index), - m_d.arg3Impl.template packet<LoadMode,PacketType>(index)); + return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index), + m_arg2Impl.template packet<LoadMode,PacketType>(index), + m_arg3Impl.template packet<LoadMode,PacketType>(index)); } protected: - // this helper permits to completely eliminate the functor if it is empty - struct Data : private TernaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); } - evaluator<Arg1> arg1Impl; - evaluator<Arg2> arg2Impl; - evaluator<Arg3> arg3Impl; - }; - - Data m_d; + const TernaryOp m_functor; + evaluator<Arg1> m_arg1Impl; + evaluator<Arg2> m_arg2Impl; + evaluator<Arg3> m_arg3Impl; }; // -------------------- CwiseBinaryOp -------------------- @@ -738,7 +696,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -749,45 +710,35 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col), - m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col)); + return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), + m_rhsImpl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index), - m_d.rhsImpl.template packet<LoadMode,PacketType>(index)); + return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), + m_rhsImpl.template packet<LoadMode,PacketType>(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private BinaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); } - evaluator<Lhs> lhsImpl; - evaluator<Rhs> rhsImpl; - }; - - Data m_d; + const BinaryOp m_functor; + evaluator<Lhs> m_lhsImpl; + evaluator<Rhs> m_rhsImpl; }; // -------------------- CwiseUnaryView -------------------- @@ -806,7 +757,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -818,40 +771,30 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_unaryOp(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_unaryOp(m_argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_d.func()(m_d.argImpl.coeffRef(row, col)); + return m_unaryOp(m_argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_d.func()(m_d.argImpl.coeffRef(index)); + return m_unaryOp(m_argImpl.coeffRef(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private UnaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } - evaluator<ArgType> argImpl; - }; - - Data m_d; + const UnaryOp m_unaryOp; + evaluator<ArgType> m_argImpl; }; // -------------------- Map -------------------- diff --git a/eigen/Eigen/src/Core/CoreIterators.h b/eigen/Eigen/src/Core/CoreIterators.h index b967196..4eb42b9 100644 --- a/eigen/Eigen/src/Core/CoreIterators.h +++ b/eigen/Eigen/src/Core/CoreIterators.h @@ -48,11 +48,6 @@ public: * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView */ EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } - EIGEN_STRONG_INLINE InnerIterator& operator+=(Index i) { m_iter.operator+=(i); return *this; } - EIGEN_STRONG_INLINE InnerIterator operator+(Index i) - { InnerIterator result(*this); result+=i; return result; } - - /// \returns the column or row index of the current coefficient. EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } /// \returns the row index of the current coefficient. diff --git a/eigen/Eigen/src/Core/CwiseBinaryOp.h b/eigen/Eigen/src/Core/CwiseBinaryOp.h index bf2632d..a36765e 100644 --- a/eigen/Eigen/src/Core/CwiseBinaryOp.h +++ b/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -158,7 +158,7 @@ public: */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -171,7 +171,7 @@ MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -181,3 +181,4 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/eigen/Eigen/src/Core/CwiseNullaryOp.h b/eigen/Eigen/src/Core/CwiseNullaryOp.h index 144608e..ddd607e 100644 --- a/eigen/Eigen/src/Core/CwiseNullaryOp.h +++ b/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -131,7 +131,7 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -170,7 +170,7 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value) { return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value)); diff --git a/eigen/Eigen/src/Core/DenseBase.h b/eigen/Eigen/src/Core/DenseBase.h index fd933ee..90066ae 100644 --- a/eigen/Eigen/src/Core/DenseBase.h +++ b/eigen/Eigen/src/Core/DenseBase.h @@ -570,17 +570,13 @@ template<typename Derived> class DenseBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) -#define EIGEN_DOC_UNARY_ADDONS(X,Y) -# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/BlockMethods.h" -# include "../plugins/IndexedViewMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF -#undef EIGEN_DOC_UNARY_ADDONS // disable the use of evalTo for dense objects with a nice compilation error template<typename Dest> diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h index c62f5ff..49e7112 100644 --- a/eigen/Eigen/src/Core/Diagonal.h +++ b/eigen/Eigen/src/Core/Diagonal.h @@ -184,7 +184,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal * * \sa class Diagonal */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType +inline typename MatrixBase<Derived>::DiagonalReturnType MatrixBase<Derived>::diagonal() { return DiagonalReturnType(derived()); @@ -192,7 +192,7 @@ MatrixBase<Derived>::diagonal() /** This is the const version of diagonal(). */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType +inline typename MatrixBase<Derived>::ConstDiagonalReturnType MatrixBase<Derived>::diagonal() const { return ConstDiagonalReturnType(derived()); @@ -210,7 +210,7 @@ MatrixBase<Derived>::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType +inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) { return DiagonalDynamicIndexReturnType(derived(), index); @@ -218,7 +218,7 @@ MatrixBase<Derived>::diagonal(Index index) /** This is the const version of diagonal(Index). */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType +inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) const { return ConstDiagonalDynamicIndexReturnType(derived(), index); @@ -237,7 +237,6 @@ MatrixBase<Derived>::diagonal(Index index) const * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> template<int Index_> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() { @@ -247,7 +246,6 @@ MatrixBase<Derived>::diagonal() /** This is the const version of diagonal<int>(). */ template<typename Derived> template<int Index_> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() const { diff --git a/eigen/Eigen/src/Core/DiagonalMatrix.h b/eigen/Eigen/src/Core/DiagonalMatrix.h index 4e8297e..ecfdce8 100644 --- a/eigen/Eigen/src/Core/DiagonalMatrix.h +++ b/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase<Derived> EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -273,7 +273,7 @@ class DiagonalWrapper * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() **/ template<typename Derived> -EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived> +inline const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const { return DiagonalWrapper<const Derived>(derived()); diff --git a/eigen/Eigen/src/Core/DiagonalProduct.h b/eigen/Eigen/src/Core/DiagonalProduct.h index 7911d1c..d372b93 100644 --- a/eigen/Eigen/src/Core/DiagonalProduct.h +++ b/eigen/Eigen/src/Core/DiagonalProduct.h @@ -17,7 +17,7 @@ namespace Eigen { */ template<typename Derived> template<typename DiagonalDerived> -EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct> +inline const Product<Derived, DiagonalDerived, LazyProduct> MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const { return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived()); diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h index bb8e3fe..06ef18b 100644 --- a/eigen/Eigen/src/Core/Dot.h +++ b/eigen/Eigen/src/Core/Dot.h @@ -90,7 +90,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const * \sa dot(), norm(), lpNorm() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const +EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const { return numext::real((*this).cwiseAbs2().sum()); } @@ -102,7 +102,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::trai * \sa lpNorm(), dot(), squaredNorm() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const +inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +117,7 @@ EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>:: * \sa norm(), normalize() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject +inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized() const { typedef typename internal::nested_eval<Derived,2>::type _Nested; @@ -139,7 +139,7 @@ MatrixBase<Derived>::normalized() const * \sa norm(), normalized() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize() +inline void MatrixBase<Derived>::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +160,7 @@ EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject +inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::stableNormalized() const { typedef typename internal::nested_eval<Derived,3>::type _Nested; @@ -185,7 +185,7 @@ MatrixBase<Derived>::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::stableNormalize() +inline void MatrixBase<Derived>::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); @@ -257,9 +257,9 @@ struct lpNorm_selector<Derived, Infinity> template<typename Derived> template<int p> #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real +inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real #else -EIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar +MatrixBase<Derived>::RealScalar #endif MatrixBase<Derived>::lpNorm() const { diff --git a/eigen/Eigen/src/Core/EigenBase.h b/eigen/Eigen/src/Core/EigenBase.h index ccc122c..b195506 100644 --- a/eigen/Eigen/src/Core/EigenBase.h +++ b/eigen/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * diff --git a/eigen/Eigen/src/Core/Fuzzy.h b/eigen/Eigen/src/Core/Fuzzy.h index 43aa49b..3e403a0 100644 --- a/eigen/Eigen/src/Core/Fuzzy.h +++ b/eigen/Eigen/src/Core/Fuzzy.h @@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector<Derived, true> */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox( +bool DenseBase<Derived>::isApprox( const DenseBase<OtherDerived>& other, const RealScalar& prec ) const @@ -122,7 +122,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox( * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const */ template<typename Derived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( +bool DenseBase<Derived>::isMuchSmallerThan( const typename NumTraits<Scalar>::Real& other, const RealScalar& prec ) const @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( +bool DenseBase<Derived>::isMuchSmallerThan( const DenseBase<OtherDerived>& other, const RealScalar& prec ) const diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h index b206b0a..0f16cd8 100644 --- a/eigen/Eigen/src/Core/GeneralProduct.h +++ b/eigen/Eigen/src/Core/GeneralProduct.h @@ -428,7 +428,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const template<typename Derived> template<typename OtherDerived> const Product<Derived,OtherDerived,LazyProduct> -EIGEN_DEVICE_FUNC MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const +MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const { enum { ProductIsValid = Derived::ColsAtCompileTime==Dynamic diff --git a/eigen/Eigen/src/Core/GenericPacketMath.h b/eigen/Eigen/src/Core/GenericPacketMath.h index d19d5bb..029f8ac 100644 --- a/eigen/Eigen/src/Core/GenericPacketMath.h +++ b/eigen/Eigen/src/Core/GenericPacketMath.h @@ -61,7 +61,6 @@ struct default_packet_traits HasSqrt = 0, HasRsqrt = 0, HasExp = 0, - HasExpm1 = 0, HasLog = 0, HasLog1p = 0, HasLog10 = 0, @@ -402,10 +401,6 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } -/** \internal \returns the expm1 of \a a (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pexpm1(const Packet& a) { return numext::expm1(a); } - /** \internal \returns the log of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } diff --git a/eigen/Eigen/src/Core/GlobalFunctions.h b/eigen/Eigen/src/Core/GlobalFunctions.h index 12828a7..769dc25 100644 --- a/eigen/Eigen/src/Core/GlobalFunctions.h +++ b/eigen/Eigen/src/Core/GlobalFunctions.h @@ -71,7 +71,6 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) diff --git a/eigen/Eigen/src/Core/IndexedView.h b/eigen/Eigen/src/Core/IndexedView.h deleted file mode 100644 index 8c57a27..0000000 --- a/eigen/Eigen/src/Core/IndexedView.h +++ /dev/null @@ -1,207 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INDEXED_VIEW_H -#define EIGEN_INDEXED_VIEW_H - -namespace Eigen { - -namespace internal { - -template<typename XprType, typename RowIndices, typename ColIndices> -struct traits<IndexedView<XprType, RowIndices, ColIndices> > - : traits<XprType> -{ - enum { - RowsAtCompileTime = int(array_size<RowIndices>::value), - ColsAtCompileTime = int(array_size<ColIndices>::value), - MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : int(traits<XprType>::MaxRowsAtCompileTime), - MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime), - - XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0, - IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : XprTypeIsRowMajor, - - RowIncr = int(get_compile_time_incr<RowIndices>::value), - ColIncr = int(get_compile_time_incr<ColIndices>::value), - InnerIncr = IsRowMajor ? ColIncr : RowIncr, - OuterIncr = IsRowMajor ? RowIncr : ColIncr, - - HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), - XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time<XprType>::ret) : int(outer_stride_at_compile_time<XprType>::ret), - XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret), - - InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime, - IsBlockAlike = InnerIncr==1 && OuterIncr==1, - IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value, - - InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr, - OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr, - - ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value, - ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, - ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), - - // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, - // but this is too strict regarding negative strides... - DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0, - FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, - Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit - }; - - typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType; -}; - -} - -template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind> -class IndexedViewImpl; - - -/** \class IndexedView - * \ingroup Core_Module - * - * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices - * - * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns - * \tparam RowIndices the type of the object defining the sequence of row indices - * \tparam ColIndices the type of the object defining the sequence of column indices - * - * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection - * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$ \{r_0,r_1,..r_{m-1}\} \f$ - * and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$ be the nested matrix, then the resulting matrix \f$ B \f$ has \c m - * rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j) \f$. - * - * The \c RowIndices and \c ColIndices types must be compatible with the following API: - * \code - * <integral type> operator[](Index) const; - * Index size() const; - * \endcode - * - * Typical supported types thus include: - * - std::vector<int> - * - std::valarray<int> - * - std::array<int> - * - Plain C arrays: int[N] - * - Eigen::ArrayXi - * - decltype(ArrayXi::LinSpaced(...)) - * - Any view/expressions of the previous types - * - Eigen::ArithmeticSequence - * - Eigen::internal::AllRange (helper for Eigen::all) - * - Eigen::internal::SingleRange (helper for single index) - * - etc. - * - * In typical usages of %Eigen, this class should never be used directly. It is the return type of - * DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * \sa class Block - */ -template<typename XprType, typename RowIndices, typename ColIndices> -class IndexedView : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind> -{ -public: - typedef typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView) - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView) - - typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested; - typedef typename internal::remove_all<XprType>::type NestedExpression; - - template<typename T0, typename T1> - IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) - : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) - {} - - /** \returns number of rows */ - Index rows() const { return internal::size(m_rowIndices); } - - /** \returns number of columns */ - Index cols() const { return internal::size(m_colIndices); } - - /** \returns the nested expression */ - const typename internal::remove_all<XprType>::type& - nestedExpression() const { return m_xpr; } - - /** \returns the nested expression */ - typename internal::remove_reference<XprType>::type& - nestedExpression() { return m_xpr.const_cast_derived(); } - - /** \returns a const reference to the object storing/generating the row indices */ - const RowIndices& rowIndices() const { return m_rowIndices; } - - /** \returns a const reference to the object storing/generating the column indices */ - const ColIndices& colIndices() const { return m_colIndices; } - -protected: - MatrixTypeNested m_xpr; - RowIndices m_rowIndices; - ColIndices m_colIndices; -}; - - -// Generic API dispatcher -template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind> -class IndexedViewImpl - : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type -{ -public: - typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type Base; -}; - -namespace internal { - - -template<typename ArgType, typename RowIndices, typename ColIndices> -struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased> - : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices> > -{ - typedef IndexedView<ArgType, RowIndices, ColIndices> XprType; - - enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */, - - Flags = (evaluator<ArgType>::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)), - - Alignment = 0 - }; - - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) - { - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index row, Index col) const - { - return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Scalar& coeffRef(Index row, Index col) - { - return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - -protected: - - evaluator<ArgType> m_argImpl; - const XprType& m_xpr; - -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_H diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index 5ec6c39..a648aa0 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -14,6 +14,7 @@ // TODO this should better be moved to NumTraits #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: @@ -412,7 +413,7 @@ inline NewType cast(const OldType& x) static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - EIGEN_USING_STD_MATH(round); + using std::round; return round(x); } }; @@ -482,55 +483,6 @@ struct arg_retval }; /**************************************************************************** -* Implementation of expm1 * -****************************************************************************/ - -// This implementation is based on GSL Math's expm1. -namespace std_fallback { - // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, - // or that there is no suitable std::expm1 function available. Implementation - // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. - template<typename Scalar> - EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_USING_STD_MATH(exp); - Scalar u = exp(x); - if (u == Scalar(1)) { - return x; - } - Scalar um1 = u - RealScalar(1); - if (um1 == Scalar(-1)) { - return RealScalar(-1); - } - - EIGEN_USING_STD_MATH(log); - return (u - RealScalar(1)) * x / log(u); - } -} - -template<typename Scalar> -struct expm1_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if EIGEN_HAS_CXX11_MATH - using std::expm1; - #endif - using std_fallback::expm1; - return expm1(x); - } -}; - - -template<typename Scalar> -struct expm1_retval -{ - typedef Scalar type; -}; - -/**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -549,7 +501,7 @@ namespace std_fallback { template<typename Scalar> struct log1p_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) #if EIGEN_HAS_CXX11_MATH @@ -688,7 +640,7 @@ template<typename Scalar> struct random_default_impl<Scalar, false, true> { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y<x) return x; @@ -874,7 +826,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x); namespace numext { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) @@ -890,84 +842,6 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) EIGEN_USING_STD_MATH(max); return max EIGEN_NOT_A_MACRO (x,y); } - - -#elif defined(__SYCL_DEVICE_ONLY__) -template<typename T> -EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) -{ - - return y < x ? y : x; -} - -template<typename T> -EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) -{ - - return x < y ? y : x; -} - -EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE long mini(const long & x, const long & y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE long maxi(const long & x, const long & y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::max(x,y); -} - - -EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) -{ - return cl::sycl::fmax(x,y); -} - -EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) -{ - return cl::sycl::fmax(x,y); -} - #else template<typename T> EIGEN_DEVICE_FUNC @@ -1080,11 +954,6 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); } -EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -1100,24 +969,10 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala return internal::pow_impl<ScalarX,ScalarY>::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } -EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float isnan(float x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE double isnan(double x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE float isinf(float x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE double isinf(double x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE float isfinite(float x) { return cl::sycl::isfinite(x); } -EIGEN_ALWAYS_INLINE double isfinite(double x) { return cl::sycl::isfinite(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) @@ -1125,11 +980,6 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } -EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC T (floor)(const T& x) @@ -1138,11 +988,6 @@ T (floor)(const T& x) return floor(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } -EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1159,11 +1004,6 @@ T (ceil)(const T& x) return ceil(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } -EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1204,11 +1044,6 @@ T sqrt(const T &x) return sqrt(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } -EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { @@ -1216,12 +1051,6 @@ T log(const T &x) { return log(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } -EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1232,11 +1061,19 @@ double log(const double &x) { return ::log(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits<T>::Real abs(const T &x) { +typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type +abs(const T &x) { + return x; +} + #if defined(__SYCL_DEVICE_ONLY__) EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } @@ -1267,11 +1104,6 @@ T exp(const T &x) { return exp(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } -EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } @@ -1280,26 +1112,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } #endif -template<typename Scalar> -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) -{ - return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); -} - -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); } -EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - -#ifdef __CUDACC__ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float expm1(const float &x) { return ::expm1f(x); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double expm1(const double &x) { return ::expm1(x); } -#endif - template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { @@ -1307,11 +1119,6 @@ T cos(const T &x) { return cos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } -EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1327,11 +1134,6 @@ T sin(const T &x) { return sin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } -EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1347,11 +1149,6 @@ T tan(const T &x) { return tan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } -EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1367,11 +1164,6 @@ T acos(const T &x) { return acos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } -EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1387,11 +1179,6 @@ T asin(const T &x) { return asin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } -EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1407,11 +1194,6 @@ T atan(const T &x) { return atan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } -EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1428,11 +1210,6 @@ T cosh(const T &x) { return cosh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } -EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1448,11 +1225,6 @@ T sinh(const T &x) { return sinh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } -EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1468,10 +1240,7 @@ T tanh(const T &x) { return tanh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } -EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } -#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif @@ -1491,11 +1260,6 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } -EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1638,13 +1402,13 @@ template<> struct random_impl<bool> template<> struct scalar_fuzzy_impl<bool> { typedef bool RealScalar; - + template<typename OtherScalar> EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1656,10 +1420,10 @@ template<> struct scalar_fuzzy_impl<bool> { return (!x) || y; } - + }; - + } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h index ae1386b..3c9ef22 100644 --- a/eigen/Eigen/src/Core/MathFunctionsImpl.h +++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -29,7 +29,12 @@ T generic_fast_tanh_float(const T& a_x) // this range is +/-1.0f in single-precision. const T plus_9 = pset1<T>(9.f); const T minus_9 = pset1<T>(-9.f); - const T x = pmax(pmin(a_x, plus_9), minus_9); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1<T>(4.89352455891786e-03f); const T alpha_3 = pset1<T>(6.37261928875436e-04f); diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h index 200e577..ce41218 100644 --- a/eigen/Eigen/src/Core/MatrixBase.h +++ b/eigen/Eigen/src/Core/MatrixBase.h @@ -76,7 +76,6 @@ template<typename Derived> class MatrixBase using Base::coeffRef; using Base::lazyAssign; using Base::eval; - using Base::operator-; using Base::operator+=; using Base::operator-=; using Base::operator*=; @@ -123,6 +122,7 @@ template<typename Derived> class MatrixBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/NestByValue.h b/eigen/Eigen/src/Core/NestByValue.h index 01cf192..13adf07 100644 --- a/eigen/Eigen/src/Core/NestByValue.h +++ b/eigen/Eigen/src/Core/NestByValue.h @@ -67,25 +67,25 @@ template<typename ExpressionType> class NestByValue } template<int LoadMode> - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index row, Index col) const + inline const PacketScalar packet(Index row, Index col) const { return m_expression.template packet<LoadMode>(row, col); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline void writePacket(Index row, Index col, const PacketScalar& x) + inline void writePacket(Index row, Index col, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index index) const + inline const PacketScalar packet(Index index) const { return m_expression.template packet<LoadMode>(index); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& x) + inline void writePacket(Index index, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket<LoadMode>(index, x); } @@ -99,7 +99,7 @@ template<typename ExpressionType> class NestByValue /** \returns an expression of the temporary version of *this. */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const NestByValue<Derived> +inline const NestByValue<Derived> DenseBase<Derived>::nestByValue() const { return NestByValue<Derived>(derived()); diff --git a/eigen/Eigen/src/Core/NumTraits.h b/eigen/Eigen/src/Core/NumTraits.h index aebc0c2..daf4898 100644 --- a/eigen/Eigen/src/Core/NumTraits.h +++ b/eigen/Eigen/src/Core/NumTraits.h @@ -71,7 +71,7 @@ struct default_digits10_impl<T,false,true> // Integer * and to \c 0 otherwise. * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. - * Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * Stay vague here. No need to do architecture-specific stuff. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. @@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } + + static inline int digits10() { return NumTraits<Scalar>::digits10(); } }; template<> struct NumTraits<std::string> diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h index 583b7f5..c42725d 100644 --- a/eigen/Eigen/src/Core/ProductEvaluators.h +++ b/eigen/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename static const bool value = true; }; +template<typename OtherXpr, typename Lhs, typename Rhs> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, + const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { + static const bool value = true; +}; + template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> struct assignment_from_xpr_op_product { diff --git a/eigen/Eigen/src/Core/Random.h b/eigen/Eigen/src/Core/Random.h index 486e9ed..6faf789 100644 --- a/eigen/Eigen/src/Core/Random.h +++ b/eigen/Eigen/src/Core/Random.h @@ -128,7 +128,7 @@ DenseBase<Derived>::Random() * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom() +inline Derived& DenseBase<Derived>::setRandom() { return *this = Random(rows(), cols()); } diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h index 2b5b73b..b6e8f88 100644 --- a/eigen/Eigen/src/Core/Redux.h +++ b/eigen/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ protected: */ template<typename Derived> template<typename Func> -EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar +typename internal::traits<Derived>::Scalar DenseBase<Derived>::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); @@ -422,7 +422,7 @@ DenseBase<Derived>::redux(const Func& func) const * \warning the result is undefined if \c *this contains NaN. */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff() const { return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); @@ -432,7 +432,7 @@ DenseBase<Derived>::minCoeff() const * \warning the result is undefined if \c *this contains NaN. */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff() const { return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); @@ -445,7 +445,7 @@ DenseBase<Derived>::maxCoeff() const * \sa trace(), prod(), mean() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -458,7 +458,7 @@ DenseBase<Derived>::sum() const * \sa trace(), prod(), sum() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::mean() const { #ifdef __INTEL_COMPILER @@ -479,7 +479,7 @@ DenseBase<Derived>::mean() const * \sa sum(), mean(), trace() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -494,7 +494,7 @@ DenseBase<Derived>::prod() const * \sa diagonal(), sum() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar MatrixBase<Derived>::trace() const { return derived().diagonal().sum(); diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h index abb1e51..bdf24f5 100644 --- a/eigen/Eigen/src/Core/Ref.h +++ b/eigen/Eigen/src/Core/Ref.h @@ -184,8 +184,6 @@ protected: * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); } * \endcode * - * See also the following stackoverflow questions for further references: - * - <a href="http://stackoverflow.com/questions/21132538/correct-usage-of-the-eigenref-class">Correct usage of the Eigen::Ref<> class</a> * * \sa PlainObjectBase::Map(), \ref TopicStorageOrders */ diff --git a/eigen/Eigen/src/Core/Replicate.h b/eigen/Eigen/src/Core/Replicate.h index 0b2d6d7..9960ef8 100644 --- a/eigen/Eigen/src/Core/Replicate.h +++ b/eigen/Eigen/src/Core/Replicate.h @@ -115,7 +115,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate */ template<typename Derived> template<int RowFactor, int ColFactor> -EIGEN_DEVICE_FUNC const Replicate<Derived,RowFactor,ColFactor> +const Replicate<Derived,RowFactor,ColFactor> DenseBase<Derived>::replicate() const { return Replicate<Derived,RowFactor,ColFactor>(derived()); @@ -130,7 +130,7 @@ DenseBase<Derived>::replicate() const * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate */ template<typename ExpressionType, int Direction> -EIGEN_DEVICE_FUNC const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType +const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const { return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType diff --git a/eigen/Eigen/src/Core/ReturnByValue.h b/eigen/Eigen/src/Core/ReturnByValue.h index 11dc86d..c44b767 100644 --- a/eigen/Eigen/src/Core/ReturnByValue.h +++ b/eigen/Eigen/src/Core/ReturnByValue.h @@ -79,7 +79,7 @@ template<typename Derived> class ReturnByValue template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) +Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) { other.evalTo(derived()); return derived(); diff --git a/eigen/Eigen/src/Core/Reverse.h b/eigen/Eigen/src/Core/Reverse.h index 8b6b3ab..0640cda 100644 --- a/eigen/Eigen/src/Core/Reverse.h +++ b/eigen/Eigen/src/Core/Reverse.h @@ -114,7 +114,7 @@ template<typename MatrixType, int Direction> class Reverse * */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType +inline typename DenseBase<Derived>::ReverseReturnType DenseBase<Derived>::reverse() { return ReverseReturnType(derived()); @@ -136,7 +136,7 @@ DenseBase<Derived>::reverse() * * \sa VectorwiseOp::reverseInPlace(), reverse() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace() +inline void DenseBase<Derived>::reverseInPlace() { if(cols()>rows()) { @@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl<Horizontal> * * \sa DenseBase::reverseInPlace(), reverse() */ template<typename ExpressionType, int Direction> -EIGEN_DEVICE_FUNC void VectorwiseOp<ExpressionType,Direction>::reverseInPlace() +void VectorwiseOp<ExpressionType,Direction>::reverseInPlace() { internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived()); } diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h index 7e71fe3..504c98f 100644 --- a/eigen/Eigen/src/Core/SelfAdjointView.h +++ b/eigen/Eigen/src/Core/SelfAdjointView.h @@ -322,7 +322,7 @@ public: /** This is the const version of MatrixBase::selfadjointView() */ template<typename Derived> template<unsigned int UpLo> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type +typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() const { return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived()); @@ -339,7 +339,7 @@ MatrixBase<Derived>::selfadjointView() const */ template<typename Derived> template<unsigned int UpLo> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type +typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() { return typename SelfAdjointViewReturnType<UpLo>::Type(derived()); diff --git a/eigen/Eigen/src/Core/Solve.h b/eigen/Eigen/src/Core/Solve.h index 960a585..a8daea5 100644 --- a/eigen/Eigen/src/Core/Solve.h +++ b/eigen/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s template<typename Decomposition, typename RhsType> struct solve_traits<Decomposition,RhsType,Dense> { - typedef Matrix<typename RhsType::Scalar, + typedef typename make_proper_matrix_type<typename RhsType::Scalar, Decomposition::ColsAtCompileTime, RhsType::ColsAtCompileTime, RhsType::PlainObject::Options, Decomposition::MaxColsAtCompileTime, - RhsType::MaxColsAtCompileTime> PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template<typename Decomposition, typename RhsType> diff --git a/eigen/Eigen/src/Core/SolveTriangular.h b/eigen/Eigen/src/Core/SolveTriangular.h index a0011d4..049890b 100644 --- a/eigen/Eigen/src/Core/SolveTriangular.h +++ b/eigen/Eigen/src/Core/SolveTriangular.h @@ -164,7 +164,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> { #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename MatrixType, unsigned int Mode> template<int Side, typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const +void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const { OtherDerived& other = _other.const_cast_derived(); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); diff --git a/eigen/Eigen/src/Core/Transpose.h b/eigen/Eigen/src/Core/Transpose.h index ba7d6e6..79b767b 100644 --- a/eigen/Eigen/src/Core/Transpose.h +++ b/eigen/Eigen/src/Core/Transpose.h @@ -168,7 +168,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Transpose<Derived> +inline Transpose<Derived> DenseBase<Derived>::transpose() { return TransposeReturnType(derived()); @@ -180,7 +180,7 @@ DenseBase<Derived>::transpose() * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ConstTransposeReturnType +inline typename DenseBase<Derived>::ConstTransposeReturnType DenseBase<Derived>::transpose() const { return ConstTransposeReturnType(derived()); @@ -206,7 +206,7 @@ DenseBase<Derived>::transpose() const * * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType +inline const typename MatrixBase<Derived>::AdjointReturnType MatrixBase<Derived>::adjoint() const { return AdjointReturnType(this->transpose()); @@ -281,7 +281,7 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ * * \sa transpose(), adjoint(), adjointInPlace() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace() +inline void DenseBase<Derived>::transposeInPlace() { eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && "transposeInPlace() called on a non-square non-resizable matrix"); @@ -312,7 +312,7 @@ EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace() * * \sa transpose(), adjoint(), transposeInPlace() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace() +inline void MatrixBase<Derived>::adjointInPlace() { derived() = adjoint().eval(); } diff --git a/eigen/Eigen/src/Core/TriangularMatrix.h b/eigen/Eigen/src/Core/TriangularMatrix.h index ed80da3..667ef09 100644 --- a/eigen/Eigen/src/Core/TriangularMatrix.h +++ b/eigen/Eigen/src/Core/TriangularMatrix.h @@ -488,6 +488,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat * \sa TriangularView::solveInPlace() */ template<int Side, typename Other> + EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval<Side,TriangularViewType, Other> solve(const MatrixBase<Other>& other) const; @@ -553,7 +554,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>& +inline TriangularView<MatrixType, Mode>& TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other) { internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -563,7 +564,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDer // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other) { internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>()); } @@ -572,7 +573,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>& +inline TriangularView<MatrixType, Mode>& TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other) { eigen_assert(Mode == int(OtherDerived::Mode)); @@ -582,7 +583,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<Othe template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other) { eigen_assert(Mode == int(OtherDerived::Mode)); internal::call_assignment_no_alias(derived(), other.derived()); @@ -597,7 +598,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c * If the matrix is triangular, the opposite part is set to zero. */ template<typename Derived> template<typename DenseDerived> -EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const +void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const { evalToLazy(other.derived()); } @@ -623,7 +624,6 @@ EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> */ template<typename Derived> template<unsigned int Mode> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() { @@ -633,7 +633,6 @@ MatrixBase<Derived>::triangularView() /** This is the const version of MatrixBase::triangularView() */ template<typename Derived> template<unsigned int Mode> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() const { @@ -931,7 +930,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite> * If the matrix is triangular, the opposite part is set to zero. */ template<typename Derived> template<typename DenseDerived> -EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const +void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const { other.derived().resize(this->rows(), this->cols()); internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression()); diff --git a/eigen/Eigen/src/Core/VectorwiseOp.h b/eigen/Eigen/src/Core/VectorwiseOp.h index 893bc79..4fe267e 100644 --- a/eigen/Eigen/src/Core/VectorwiseOp.h +++ b/eigen/Eigen/src/Core/VectorwiseOp.h @@ -670,7 +670,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType +inline typename DenseBase<Derived>::ColwiseReturnType DenseBase<Derived>::colwise() { return ColwiseReturnType(derived()); @@ -684,7 +684,7 @@ DenseBase<Derived>::colwise() * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType +inline typename DenseBase<Derived>::RowwiseReturnType DenseBase<Derived>::rowwise() { return RowwiseReturnType(derived()); diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h index 6362309..195d40f 100644 --- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h @@ -183,22 +183,12 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& } #endif -template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_pd(b,a); -} -template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_pd(b,a); -} +template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); } + +template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } + template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); } template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); } @@ -235,7 +225,7 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from) // Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from)); // tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1); // return _mm256_unpacklo_ps(tmp,tmp); - + // _mm256_insertf128_ps is very slow on Haswell, thus: Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); // mimic an "inplace" permutation of the lower 128bits using a blend diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index 12b8975..f6500a1 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -59,8 +59,8 @@ template<> struct packet_traits<float> : default_packet_traits HasLog = 1, #endif HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, + HasSqrt = 1, + HasRsqrt = 1, #endif HasDiv = 1 }; @@ -75,7 +75,7 @@ template<> struct packet_traits<double> : default_packet_traits size = 8, HasHalfPacket = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) - HasSqrt = EIGEN_FAST_MATH, + HasSqrt = 1, HasRsqrt = EIGEN_FAST_MATH, #endif HasDiv = 1 @@ -230,27 +230,23 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b, template <> EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_ps(b, a); + return _mm512_min_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_pd(b, a); + return _mm512_min_pd(a, b); } template <> EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_ps(b, a); + return _mm512_max_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_pd(b, a); + return _mm512_max_pd(a, b); } template <> @@ -465,21 +461,53 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) { - __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); - __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); - __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); - return pairs; + Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane0 = _mm256_blend_ps( + lane0, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2)); + + Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4)); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane1 = _mm256_blend_ps( + lane1, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); + +#ifdef EIGEN_VECTORIZE_AVX512DQ + Packet16f res = _mm512_undefined_ps(); + return _mm512_insertf32x8(res, lane0, 0); + return _mm512_insertf32x8(res, lane1, 1); + return res; +#else + Packet16f res = _mm512_undefined_ps(); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3); + return res; +#endif } // Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); - return x; + Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from); + lane0 = _mm256_permute_pd(lane0, 3 << 2); + + Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2)); + lane1 = _mm256_permute_pd(lane1, 3 << 2); + + Packet8d res = _mm512_undefined_pd(); + res = _mm512_insertf64x4(res, lane0, 0); + return _mm512_insertf64x4(res, lane1, 1); } // Loads 4 floats from memory a returns the packet @@ -497,11 +525,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) { // {a0, a0 a0, a0, a1, a1, a1, a1} template <> EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) { - __m128d tmp0 = _mm_load_pd1(from); - __m256d lane0 = _mm256_broadcastsd_pd(tmp0); - __m128d tmp1 = _mm_load_pd1(from + 1); - __m256d lane1 = _mm256_broadcastsd_pd(tmp1); - __m512d tmp = _mm512_undefined_pd(); + Packet8d tmp = _mm512_undefined_pd(); + Packet2d tmp0 = _mm_load_pd1(from); + Packet2d tmp1 = _mm_load_pd1(from + 1); + Packet4d lane0 = _mm256_broadcastsd_pd(tmp0); + Packet4d lane1 = _mm256_broadcastsd_pd(tmp1); tmp = _mm512_insertf64x4(tmp, lane0, 0); return _mm512_insertf64x4(tmp, lane1, 1); } @@ -632,8 +660,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ - __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \ - __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1) + __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \ + _mm512_extractf32x8_ps(INPUT, 1) #else #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \ @@ -723,7 +751,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0)); + final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0)); hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0); hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0); @@ -773,7 +801,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); + final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); __m512 final_output; @@ -823,7 +851,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0); tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1)); @@ -839,7 +867,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0); @@ -848,52 +876,55 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) template <> EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - Packet8f x = _mm256_add_ps(lane0, lane1); - return predux<Packet8f>(x); + //#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + Packet8f sum = padd(lane0, lane1); + Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1)); + tmp0 = _mm256_hadd_ps(tmp0, tmp0); + return pfirst(_mm256_hadd_ps(tmp0, tmp0)); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3)); sum = _mm_hadd_ps(sum, sum); sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1)); - return _mm_cvtss_f32(sum); + return pfirst(sum); #endif } template <> EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d sum = _mm256_add_pd(lane0, lane1); - __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); - return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0))); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d sum = padd(lane0, lane1); + Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); + return pfirst(_mm256_hadd_pd(tmp0, tmp0)); } template <> EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - return _mm256_add_ps(lane0, lane1); + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + return padd(lane0, lane1); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum0 = _mm_add_ps(lane0, lane2); - __m128 sum1 = _mm_add_ps(lane1, lane3); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum0 = padd(lane0, lane2); + Packet4f sum1 = padd(lane1, lane3); return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1); #endif } template <> EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_add_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = padd(lane0, lane1); return res; } @@ -908,59 +939,58 @@ EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) { res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #endif } template <> EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = pmul(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = pmul(lane0, lane1); res = pmul(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } template <> EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_min_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_min_pd(lane0, lane1); res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } - template <> EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_max_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_max_pd(lane0, lane1); res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h index 67518da..294c517 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -53,7 +53,7 @@ namespace half_impl { // Make our own __half definition that is similar to CUDA's. struct __half { - EIGEN_DEVICE_FUNC __half() : x(0) {} + EIGEN_DEVICE_FUNC __half() {} explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -386,18 +386,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hexp(a)); -#else - return half(::expf(float(a))); -#endif -} -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { - return half(numext::expm1(float(a))); + return half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return half(::hlog(a)); + return Eigen::half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -409,11 +402,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hsqrt(a)); -#else - return half(::sqrtf(float(a))); -#endif + return half(::sqrtf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -431,18 +420,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hfloor(a)); -#else return half(::floorf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hceil(a)); -#else return half(::ceilf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits<Eigen::half> { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h index 987a529..0348b41 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -57,18 +57,6 @@ double2 pexp<double2>(const double2& a) } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pexpm1<float4>(const float4& a) -{ - return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pexpm1<double2>(const double2& a) -{ - return make_double2(expm1(a.x), expm1(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt<float4>(const float4& a) { return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h index 8c46af0..4dda631 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const d return make_double2(from[0], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) { +template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) { return make_float4(from[0], from[0], from[1], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) { +template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) { return make_double2(from[0], from[0]); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index b9a125b..ae54225 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,7 +34,6 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -276,14 +275,6 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { return __floats2half2_rn(r1, r2); } -template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) { - float a1 = __low2float(a); - float a2 = __high2float(a); - float r1 = expm1f(a1); - float r2 = expm1f(a2); - return __floats2half2_rn(r1, r2); -} - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h index aede4a6..836fbc0 100644 --- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -116,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h index 03c8a2c..3832de1 100644 --- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h @@ -45,7 +45,7 @@ struct eigen_packet_wrapper m_val = v; return *this; } - + T m_val; }; typedef eigen_packet_wrapper<__m128> Packet4f; @@ -69,7 +69,7 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; }; #define vec2d_swizzle1(v,p,q) \ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2))))) - + #define vec4f_swizzle2(a,b,p,q,r,s) \ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) @@ -190,7 +190,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } #endif - + template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); } template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); } @@ -250,34 +250,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); } #endif -template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("minps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("minpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 @@ -289,34 +263,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const #endif } -template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("maxps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("maxpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 diff --git a/eigen/Eigen/src/Core/functors/NullaryFunctors.h b/eigen/Eigen/src/Core/functors/NullaryFunctors.h index 6a30466..b03be02 100644 --- a/eigen/Eigen/src/Core/functors/NullaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset<Packet>(0)), m_flip(numext::abs(high)<numext::abs(low)) {} template<typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits<Scalar>::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template<typename IndexType> @@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i-m_size1)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> } else { - Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); if(i==m_size1-unpacket_traits<Packet>::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; diff --git a/eigen/Eigen/src/Core/functors/UnaryFunctors.h b/eigen/Eigen/src/Core/functors/UnaryFunctors.h index bfc0465..2e6a00f 100644 --- a/eigen/Eigen/src/Core/functors/UnaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/UnaryFunctors.h @@ -264,26 +264,6 @@ struct functor_traits<scalar_exp_op<Scalar> > { /** \internal * - * \brief Template functor to compute the exponential of a scalar - 1. - * - * \sa class CwiseUnaryOp, ArrayBase::expm1() - */ -template<typename Scalar> struct scalar_expm1_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); } - template <typename Packet> - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); } -}; -template <typename Scalar> -struct functor_traits<scalar_expm1_op<Scalar> > { - enum { - PacketAccess = packet_traits<Scalar>::HasExpm1, - Cost = functor_traits<scalar_exp_op<Scalar> >::Cost // TODO measure cost of expm1 - }; -}; - -/** \internal - * * \brief Template functor to compute the logarithm of a scalar * * \sa class CwiseUnaryOp, ArrayBase::log() @@ -698,13 +678,7 @@ struct functor_traits<scalar_ceil_op<Scalar> > template<typename Scalar> struct scalar_isnan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isnan(a); -#else - return (numext::isnan)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); } }; template<typename Scalar> struct functor_traits<scalar_isnan_op<Scalar> > @@ -722,13 +696,7 @@ struct functor_traits<scalar_isnan_op<Scalar> > template<typename Scalar> struct scalar_isinf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isinf(a); -#else - return (numext::isinf)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); } }; template<typename Scalar> struct functor_traits<scalar_isinf_op<Scalar> > @@ -746,13 +714,7 @@ struct functor_traits<scalar_isinf_op<Scalar> > template<typename Scalar> struct scalar_isfinite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isfinite(a); -#else - return (numext::isfinite)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); } }; template<typename Scalar> struct functor_traits<scalar_isfinite_op<Scalar> > diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ad38bcf..e844e37 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, @@ -283,21 +286,23 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; template<typename MatrixType, unsigned int UpLo> template<typename ProductType> -EIGEN_DEVICE_FUNC TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) +TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); - + general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); - + return derived(); } diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15c..41e18ff 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 41d8242..3c1a7fc 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,8 +15,10 @@ namespace Eigen { namespace internal { /* Optimized col-major matrix * vector product: - * This algorithm processes the matrix per vertical panels, - * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments. + * This algorithm processes 4 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 4 and to reduce + * the instruction dependency. Moreover, we know that all bands have the + * same alignment pattern. * * Mixing type logic: C += alpha * A * B * | A | B |alpha| comments @@ -25,7 +27,33 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul * + * Accesses to the matrix coefficients follow the following logic: + * + * - if all columns have the same alignment then + * - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise + * - if even columns have the same alignment then + * // odd columns are guaranteed to have the same alignment too + * - if even or odd columns have the same alignment as the result, then + * // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double) + * - perform half aligned and half unaligned loads (-> EvenAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise, if the register size is 4 scalars (e.g., SSE with float) then + * - one over 4 consecutive columns is guaranteed to be aligned with the result vector, + * perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case) + * // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h + * - otherwise, + * // if we get here, this means the register size is greater than 4 (e.g., AVX with floats), + * // we currently fall back to the NoneAligned case + * * The same reasoning apply for the transposed case. + * + * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet... + * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment + * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow + * compared to unaligned loads on a 4 byte boundary. + * */ template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version> @@ -59,145 +87,238 @@ EIGEN_DONT_INLINE static void run( template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, RhsScalar alpha) { EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr==1); - - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \ + pstore(&res[j], \ + padd(pload<ResPacket>(&res[j]), \ + padd( \ + padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \ + pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \ + padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \ + pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) ))) + + typedef typename LhsMapper::VectorMapper LhsScalars; conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; + if(ConjugateRhs) + alpha = numext::conj(alpha); + + enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; + const Index columnsAtOnce = 4; + const Index peels = 2; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index ResPacketAlignedMask = ResPacketSize-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; + const Index size = rows; + const Index lhsStride = lhs.stride(); - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; - const Index n8 = rows-8*ResPacketSize+1; - const Index n4 = rows-4*ResPacketSize+1; - const Index n3 = rows-3*ResPacketSize+1; - const Index n2 = rows-2*ResPacketSize+1; - const Index n1 = rows-1*ResPacketSize+1; + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type. + Index alignedStart = internal::first_default_aligned(res,size); + Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; - // TODO: improve the following heuristic: - const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4); - ResPacket palpha = pset1<ResPacket>(alpha); + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(size); - for(Index j2=0; j2<cols; j2+=block_cols) + // find how many columns do we have to skip to be aligned with the result (if possible) + Index skipColumns = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) ) { - Index jend = numext::mini(j2+block_cols,cols); - Index i=0; - for(; i<n8; i+=ResPacketSize*8) + alignedSize = 0; + alignedStart = 0; + alignmentPattern = NoneAligned; + } + else if(LhsPacketSize > 4) + { + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + // Currently, it seems to be better to perform unaligned loads anyway + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize); + + while (skipColumns<LhsPacketSize && + alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize)) + ++skipColumns; + if (skipColumns==LhsPacketSize) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)), - c4 = pset1<ResPacket>(ResScalar(0)), - c5 = pset1<ResPacket>(ResScalar(0)), - c6 = pset1<ResPacket>(ResScalar(0)), - c7 = pset1<ResPacket>(ResScalar(0)); - - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*7,j),b0,c7); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3))); - pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu<ResPacket>(res+i+ResPacketSize*4))); - pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu<ResPacket>(res+i+ResPacketSize*5))); - pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu<ResPacket>(res+i+ResPacketSize*6))); - pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu<ResPacket>(res+i+ResPacketSize*7))); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipColumns = 0; } - if(i<n4) + else { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)); + skipColumns = (std::min)(skipColumns,cols); + // note that the skiped columns are processed later. + } - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3))); + /* eigen_internal_assert( (alignmentPattern==NoneAligned) + || (skipColumns + columnsAtOnce >= cols) + || LhsPacketSize > size + || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/ + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = size; + alignmentPattern = AllAligned; + } - i+=ResPacketSize*4; - } - if(i<n3) - { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); + Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; + for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) + { + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)), + ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)), + ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)), + ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0)); - i+=ResPacketSize*3; - } - if(i<n2) + // this helps a lot generating better binary code + const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1), + lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3); + + if (Vectorizable) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)); + /* explicit vectorization */ + // process initial unaligned coeffs + for (Index j=0; j<alignedStart; ++j) + { + res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]); + } - for(Index j=j2; j<jend; j+=1) + if (alignedSize>alignedStart) { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned); + break; + case EvenAligned: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned); + break; + case FirstAligned: + { + Index j = alignedStart; + if(peels>1) + { + LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; + ResPacket T0, T1; + + A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1); + A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2); + A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3); + + for (; j<peeledSize; j+=peels*ResPacketSize) + { + A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13); + + A00 = lhs0.template load<LhsPacket, Aligned>(j); + A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize); + T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize])); + + T0 = pcj.pmadd(A01, ptmp1, T0); + A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01); + T0 = pcj.pmadd(A02, ptmp2, T0); + A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02); + T0 = pcj.pmadd(A03, ptmp3, T0); + pstore(&res[j],T0); + A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03); + T1 = pcj.pmadd(A11, ptmp1, T1); + T1 = pcj.pmadd(A12, ptmp2, T1); + T1 = pcj.pmadd(A13, ptmp3, T1); + pstore(&res[j+ResPacketSize],T1); + } + } + for (; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned); + break; + } + default: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned); + break; + } } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - i+=ResPacketSize*2; + } // end explicit vectorization + + /* process remaining coeffs (or all if there is no explicit vectorization) */ + for (Index j=alignedSize; j<size; ++j) + { + res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]); } - if(i<n1) + } + + // process remaining first and last columns (at most columnsAtOnce-1) + Index end = cols; + Index start = columnBound; + do + { + for (Index k=start; k<end; ++k) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)); - for(Index j=j2; j<jend; j+=1) + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0)); + const LhsScalars lhs0 = lhs.getVectorMapper(0, k); + + if (Vectorizable) { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); + /* explicit vectorization */ + // process first unaligned result's coeffs + for (Index j=0; j<alignedStart; ++j) + res[j] += cj.pmul(lhs0(j), pfirst(ptmp0)); + // process aligned result's coeffs + if (lhs0.template aligned<LhsPacket>(alignedStart)) + for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) + pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i]))); + else + for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) + pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i]))); } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - i+=ResPacketSize; + + // process remaining scalars (or all if no explicit vectorization) + for (Index i=alignedSize; i<size; ++i) + res[i] += cj.pmul(lhs0(i), pfirst(ptmp0)); } - for(;i<rows;++i) + if (skipColumns) { - ResScalar c0(0); - for(Index j=j2; j<jend; j+=1) - c0 += cj.pmul(lhs(i,j), rhs(j,0)); - res[i] += alpha*c0; + start = 0; + end = skipColumns; + skipColumns = 0; } - } + else + break; + } while(Vectorizable); + #undef _EIGEN_ACCUMULATE_PACKETS } /* Optimized row-major matrix * vector product: @@ -242,160 +363,253 @@ EIGEN_DONT_INLINE static void run( template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, ResScalar alpha) { - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); - eigen_internal_assert(rhs.stride()==1); + + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\ + RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \ + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \ + ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \ + ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \ + ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); } + conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; - // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large, - // processing 8 rows at once might be counter productive wrt cache. - const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7; - const Index n4 = rows-3; - const Index n2 = rows-1; + typedef typename LhsMapper::VectorMapper LhsScalars; - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; + enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; + const Index rowsAtOnce = 4; + const Index peels = 2; + const Index RhsPacketAlignedMask = RhsPacketSize-1; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index depth = cols; + const Index lhsStride = lhs.stride(); - Index i=0; - for(; i<n8; i+=8) + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type + // if that's not the case then vectorization is discarded, see below. + Index alignedStart = rhs.firstAligned(depth); + Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(depth); + const Index rhsAlignmentOffset = rhs.firstAligned(rows); + + // find how many rows do we have to skip to be aligned with rhs (if possible) + Index skipRows = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || + (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) || + (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) ) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)), - c4 = pset1<ResPacket>(ResScalar(0)), - c5 = pset1<ResPacket>(ResScalar(0)), - c6 = pset1<ResPacket>(ResScalar(0)), - c7 = pset1<ResPacket>(ResScalar(0)); - - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); - - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+7,j),b0,c7); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - ResScalar cc4 = predux(c4); - ResScalar cc5 = predux(c5); - ResScalar cc6 = predux(c6); - ResScalar cc7 = predux(c7); - for(; j<cols; ++j) - { - RhsScalar b0 = rhs(j,0); - - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); - cc2 += cj.pmul(lhs(i+2,j), b0); - cc3 += cj.pmul(lhs(i+3,j), b0); - cc4 += cj.pmul(lhs(i+4,j), b0); - cc5 += cj.pmul(lhs(i+5,j), b0); - cc6 += cj.pmul(lhs(i+6,j), b0); - cc7 += cj.pmul(lhs(i+7,j), b0); - } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; - res[(i+2)*resIncr] += alpha*cc2; - res[(i+3)*resIncr] += alpha*cc3; - res[(i+4)*resIncr] += alpha*cc4; - res[(i+5)*resIncr] += alpha*cc5; - res[(i+6)*resIncr] += alpha*cc6; - res[(i+7)*resIncr] += alpha*cc7; + alignedSize = 0; + alignedStart = 0; + alignmentPattern = NoneAligned; } - for(; i<n4; i+=4) + else if(LhsPacketSize > 4) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)); + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + while (skipRows<LhsPacketSize && + alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize)) + ++skipRows; + if (skipRows==LhsPacketSize) { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); - - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipRows = 0; } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - for(; j<cols; ++j) + else { - RhsScalar b0 = rhs(j,0); - - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); - cc2 += cj.pmul(lhs(i+2,j), b0); - cc3 += cj.pmul(lhs(i+3,j), b0); + skipRows = (std::min)(skipRows,Index(rows)); + // note that the skiped columns are processed later. } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; - res[(i+2)*resIncr] += alpha*cc2; - res[(i+3)*resIncr] += alpha*cc3; + /* eigen_internal_assert( alignmentPattern==NoneAligned + || LhsPacketSize==1 + || (skipRows + rowsAtOnce >= rows) + || LhsPacketSize > depth + || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/ } - for(; i<n2; i+=2) + else if(Vectorizable) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)); + alignedStart = 0; + alignedSize = depth; + alignmentPattern = AllAligned; + } - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - for(; j<cols; ++j) + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) + { + // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ?? + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); + ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0); + + // this helps the compiler generating good binary code + const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0), + lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0); + + if (Vectorizable) { - RhsScalar b0 = rhs(j,0); + /* explicit vectorization */ + ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)), + ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0)); + + // process initial unaligned coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; j<alignedStart; ++j) + { + RhsScalar b = rhs(j, 0); + tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b); + tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b); + } + + if (alignedSize>alignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned); + break; + case EvenAligned: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned); + break; + case FirstAligned: + { + Index j = alignedStart; + if (peels>1) + { + /* Here we proccess 4 rows with with two peeled iterations to hide + * the overhead of unaligned loads. Moreover unaligned loads are handled + * using special shift/move operations between the two aligned packets + * overlaping the desired unaligned packet. This is *much* more efficient + * than basic unaligned loads. + */ + LhsPacket A01, A02, A03, A11, A12, A13; + A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1); + A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2); + A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3); + + for (; j<peeledSize; j+=peels*RhsPacketSize) + { + RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); + A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13); + + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0); + ptmp1 = pcj.pmadd(A01, b, ptmp1); + A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01); + ptmp2 = pcj.pmadd(A02, b, ptmp2); + A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02); + ptmp3 = pcj.pmadd(A03, b, ptmp3); + A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03); + + b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0); + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0); + ptmp1 = pcj.pmadd(A11, b, ptmp1); + ptmp2 = pcj.pmadd(A12, b, ptmp2); + ptmp3 = pcj.pmadd(A13, b, ptmp3); + } + } + for (; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned); + break; + } + default: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned); + break; + } + tmp0 += predux(ptmp0); + tmp1 += predux(ptmp1); + tmp2 += predux(ptmp2); + tmp3 += predux(ptmp3); + } + } // end explicit vectorization - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); + // process remaining coeffs (or all if no explicit vectorization) + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j<depth; ++j) + { + RhsScalar b = rhs(j, 0); + tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b); + tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b); } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; + res[i*resIncr] += alpha*tmp0; + res[(i+offset1)*resIncr] += alpha*tmp1; + res[(i+2)*resIncr] += alpha*tmp2; + res[(i+offset3)*resIncr] += alpha*tmp3; } - for(; i<rows; ++i) + + // process remaining first and last rows (at most columnsAtOnce-1) + Index end = rows; + Index start = rowBound; + do { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index i=start; i<end; ++i) { - RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0); + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); + ResPacket ptmp0 = pset1<ResPacket>(tmp0); + const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); + // process first unaligned result's coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; j<alignedStart; ++j) + tmp0 += cj.pmul(lhs0(j), rhs(j, 0)); + + if (alignedSize>alignedStart) + { + // process aligned rhs coeffs + if (lhs0.template aligned<LhsPacket>(alignedStart)) + for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0); + else + for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0); + tmp0 += predux(ptmp0); + } + + // process remaining scalars + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j<depth; ++j) + tmp0 += cj.pmul(lhs0(j), rhs(j, 0)); + res[i*resIncr] += alpha*tmp0; } - ResScalar cc0 = predux(c0); - for(; j<cols; ++j) + if (skipRows) { - cc0 += cj.pmul(lhs(i,j), rhs(j,0)); + start = 0; + end = skipRows; + skipRows = 0; } - res[i*resIncr] += alpha*cc0; - } + else + break; + } while(Vectorizable); + + #undef _EIGEN_ACCUMULATE_PACKETS } } // end namespace internal diff --git a/eigen/Eigen/src/Core/products/SelfadjointProduct.h b/eigen/Eigen/src/Core/products/SelfadjointProduct.h index 39c5b59..f038d68 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointProduct.h +++ b/eigen/Eigen/src/Core/products/SelfadjointProduct.h @@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> template<typename MatrixType, unsigned int UpLo> template<typename DerivedU> -EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> +SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha) { selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha); diff --git a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h index d395888..2ae3641 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -57,7 +57,7 @@ template<bool Cond, typename T> struct conj_expr_if template<typename MatrixType, unsigned int UpLo> template<typename DerivedU, typename DerivedV> -EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> +SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha) { typedef internal::blas_traits<DerivedU> UBlasTraits; diff --git a/eigen/Eigen/src/Core/util/BlasUtil.h b/eigen/Eigen/src/Core/util/BlasUtil.h index b1791fb..6e6ee11 100644 --- a/eigen/Eigen/src/Core/util/BlasUtil.h +++ b/eigen/Eigen/src/Core/util/BlasUtil.h @@ -222,11 +222,6 @@ class blas_data_mapper { return ploadt<Packet, AlignmentType>(&operator()(i, j)); } - template <typename PacketT, int AlignmentT> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { - return ploadt<PacketT, AlignmentT>(&operator()(i, j)); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); } diff --git a/eigen/Eigen/src/Core/util/Constants.h b/eigen/Eigen/src/Core/util/Constants.h index 5d37e5d..7587d68 100644 --- a/eigen/Eigen/src/Core/util/Constants.h +++ b/eigen/Eigen/src/Core/util/Constants.h @@ -25,10 +25,6 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; -/** This value means that the increment to go from one value to another in a sequence is not constant for each step. - */ -const int UndefinedIncr = 0xfffffe; - /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>(). * The value Infinity there means the L-infinity norm. */ diff --git a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h index 4431f2f..7559e12 100644 --- a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,6 +4,7 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable + // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -19,7 +20,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -41,9 +42,6 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" - #if __clang_major__ >= 3 && __clang_minor__ >= 5 - #pragma clang diagnostic ignored "-Wabsolute-value" - #endif #elif defined __GNUC__ && __GNUC__>=6 diff --git a/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/eigen/Eigen/src/Core/util/ForwardDeclarations.h index 1a48cff..ea10739 100644 --- a/eigen/Eigen/src/Core/util/ForwardDeclarations.h +++ b/eigen/Eigen/src/Core/util/ForwardDeclarations.h @@ -83,7 +83,6 @@ template<typename ExpressionType> class ForceAlignedAccess; template<typename ExpressionType> class SwapWrapper; template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block; -template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView; template<typename MatrixType, int Size=Dynamic> class VectorBlock; template<typename MatrixType> class Transpose; diff --git a/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/eigen/Eigen/src/Core/util/IndexedViewHelper.h deleted file mode 100644 index ab01c85..0000000 --- a/eigen/Eigen/src/Core/util/IndexedViewHelper.h +++ /dev/null @@ -1,187 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INDEXED_VIEW_HELPER_H -#define EIGEN_INDEXED_VIEW_HELPER_H - -namespace Eigen { - -/** \namespace Eigen::placeholders - * \ingroup Core_Module - * - * Namespace containing symbolic placeholder and identifiers - */ -namespace placeholders { - -namespace internal { -struct symbolic_last_tag {}; -} - -/** \var last - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * - * A typical usage example would be: - * \code - * using namespace Eigen; - * using Eigen::placeholders::last; - * VectorXd v(n); - * v(seq(2,last-2)).setOnes(); - * \endcode - * - * \sa end - */ -static const Symbolic::SymbolExpr<internal::symbolic_last_tag> last; - -/** \var end - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last+1 element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * It is essentially an alias to last+1 - * - * \sa last - */ -#ifdef EIGEN_PARSED_BY_DOXYGEN -static const auto end = last+1; -#else -// Using a FixedExpr<1> expression is important here to make sure the compiler -// can fully optimize the computation starting indices with zero overhead. -static const Symbolic::AddExpr<Symbolic::SymbolExpr<internal::symbolic_last_tag>,Symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end(last+fix<1>()); -#endif - -} // end namespace placeholders - -namespace internal { - - // Replace symbolic last/end "keywords" by their true runtime value -inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } - -template<int N> -FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) { return x; } - -template<typename Derived> -Index eval_expr_given_size(const Symbolic::BaseExpr<Derived> &x, Index size) -{ - return x.derived().eval(placeholders::last=size-1); -} - -// Extract increment/step at compile time -template<typename T, typename EnableIf = void> struct get_compile_time_incr { - enum { value = UndefinedIncr }; -}; - -// Analogue of std::get<0>(x), but tailored for our needs. -template<typename T> -Index first(const T& x) { return x.first(); } - -// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice -// The generic implementation is a no-op -template<typename T,int XprSize,typename EnableIf=void> -struct IndexedViewCompatibleType { - typedef T type; -}; - -template<typename T,typename Q> -const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } - -//-------------------------------------------------------------------------------- -// Handling of a single Index -//-------------------------------------------------------------------------------- - -struct SingleRange { - enum { - SizeAtCompileTime = 1 - }; - SingleRange(Index val) : m_value(val) {} - Index operator[](Index) const { return m_value; } - Index size() const { return 1; } - Index first() const { return m_value; } - Index m_value; -}; - -template<> struct get_compile_time_incr<SingleRange> { - enum { value = 1 }; // 1 or 0 ?? -}; - -// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operatro[](int) methods) -template<typename T, int XprSize> -struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> { - // Here we could simply use Array, but maybe it's less work for the compiler to use - // a simpler wrapper as SingleRange - //typedef Eigen::Array<Index,1,1> type; - typedef SingleRange type; -}; - -template<typename T, int XprSize> -struct IndexedViewCompatibleType<T, XprSize, typename enable_if<Symbolic::is_symbolic<T>::value>::type> { - typedef SingleRange type; -}; - - -template<typename T> -typename enable_if<Symbolic::is_symbolic<T>::value,SingleRange>::type -makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { - return eval_expr_given_size(id,size); -} - -//-------------------------------------------------------------------------------- -// Handling of all -//-------------------------------------------------------------------------------- - -struct all_t { all_t() {} }; - -// Convert a symbolic 'all' into a usable range type -template<int XprSize> -struct AllRange { - enum { SizeAtCompileTime = XprSize }; - AllRange(Index size = XprSize) : m_size(size) {} - Index operator[](Index i) const { return i; } - Index size() const { return m_size.value(); } - Index first() const { return 0; } - variable_if_dynamic<Index,XprSize> m_size; -}; - -template<int XprSize> -struct IndexedViewCompatibleType<all_t,XprSize> { - typedef AllRange<XprSize> type; -}; - -template<typename XprSizeType> -inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { - return AllRange<get_fixed_value<XprSizeType>::value>(size); -} - -template<int Size> struct get_compile_time_incr<AllRange<Size> > { - enum { value = 1 }; -}; - -} // end namespace internal - - -namespace placeholders { - -/** \var all - * \ingroup Core_Module - * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns - */ -static const Eigen::internal::all_t all; - -} - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/eigen/Eigen/src/Core/util/IntegralConstant.h b/eigen/Eigen/src/Core/util/IntegralConstant.h deleted file mode 100644 index 78a4705..0000000 --- a/eigen/Eigen/src/Core/util/IntegralConstant.h +++ /dev/null @@ -1,270 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INTEGRAL_CONSTANT_H -#define EIGEN_INTEGRAL_CONSTANT_H - -namespace Eigen { - -namespace internal { - -template<int N> class FixedInt; -template<int N> class VariableAndFixedInt; - -/** \internal - * \class FixedInt - * - * This class embeds a compile-time integer \c N. - * - * It is similar to c++11 std::integral_constant<int,N> but with some additional features - * such as: - * - implicit conversion to int - * - arithmetic and some bitwise operators: -, +, *, /, %, &, | - * - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants. - * - * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to - * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does - * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up - * using the generic helper: - * \code - * internal::cleanup_index_type<T>::type - * internal::cleanup_index_type<T,DynamicKey>::type - * \endcode - * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: - * \code - * internal::get_fixed_value<T,DefaultVal>::value - * \endcode - * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix<N>, class VariableAndFixedInt - */ -template<int N> class FixedInt -{ -public: - static const int value = N; - operator int() const { return value; } - FixedInt() {} - FixedInt( VariableAndFixedInt<N> other) { - EIGEN_ONLY_USED_FOR_DEBUG(other); - eigen_internal_assert(int(other)==N); - } - - FixedInt<-N> operator-() const { return FixedInt<-N>(); } - template<int M> - FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); } - template<int M> - FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); } - template<int M> - FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); } - template<int M> - FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); } - template<int M> - FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); } - template<int M> - FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); } - template<int M> - FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); } - -#if EIGEN_HAS_CXX14 - // Needed in C++14 to allow fix<N>(): - FixedInt operator() () const { return *this; } - - VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); } -#else - FixedInt ( FixedInt<N> (*)() ) {} -#endif - -#if EIGEN_HAS_CXX11 - FixedInt(std::integral_constant<int,N>) {} -#endif -}; - -/** \internal - * \class VariableAndFixedInt - * - * This class embeds both a compile-time integer \c N and a runtime integer. - * Both values are supposed to be equal unless the compile-time value \c N has a special - * value meaning that the runtime-value should be used. Depending on the context, this special - * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for - * quantities that can be negative). - * - * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only - * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. - * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert - * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index) - * using the following generic helper: - * \code - * internal::cleanup_index_type<T>::type - * internal::cleanup_index_type<T,DynamicKey>::type - * \endcode - * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can also extract the compile-time value \c N using the following helper: - * \code - * internal::get_fixed_value<T,DefaultVal>::value - * \endcode - * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix<N>(int), class FixedInt - */ -template<int N> class VariableAndFixedInt -{ -public: - static const int value = N; - operator int() const { return m_value; } - VariableAndFixedInt(int val) { m_value = val; } -protected: - int m_value; -}; - -template<typename T, int Default=Dynamic> struct get_fixed_value { - static const int value = Default; -}; - -template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> { - static const int value = N; -}; - -#if !EIGEN_HAS_CXX14 -template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> { - static const int value = N; -}; -#endif - -template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> { - static const int value = N ; -}; - -template<typename T, int N, int Default> -struct get_fixed_value<variable_if_dynamic<T,N>,Default> { - static const int value = N; -}; - -template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } -#if !EIGEN_HAS_CXX14 -template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; } -#endif - -// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: - -// By default, no cleanup: -template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; }; - -// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index -template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; }; - -#if !EIGEN_HAS_CXX14 -// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>: -template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; }; -#endif - -// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: -template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; }; -// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): -template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; }; - -#if EIGEN_HAS_CXX11 -template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; }; -#endif - -} // end namespace internal - -#ifndef EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX14 -template<int N> -static const internal::FixedInt<N> fix{}; -#else -template<int N> -inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); } - -// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload. -// This way a code like fix<N> can only refer to the previous function. -template<int N,typename T> -inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(val); } -#endif - -#else // EIGEN_PARSED_BY_DOXYGEN - -/** \var fix<N>() - * \ingroup Core_Module - * - * This \em identifier permits to construct an object embedding a compile-time integer \c N. - * - * \tparam N the compile-time integer value - * - * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: - * \code - * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] - * \endcode - * - * See also the function fix(int) to pass both a compile-time and runtime value. - * - * In c++14, it is implemented as: - * \code - * template<int N> static const internal::FixedInt<N> fix{}; - * \endcode - * where internal::FixedInt<N> is an internal template class similar to - * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt> - * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>. - * - * In c++98/11, it is implemented as a function: - * \code - * template<int N> inline internal::FixedInt<N> fix(); - * \endcode - * Here internal::FixedInt<N> is thus a pointer to function. - * - * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14. - * - * \sa fix<N>(int), seq, seqN - */ -template<int N> -static const auto fix(); - -/** \fn fix<N>(int) - * \ingroup Core_Module - * - * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. - * - * \tparam N the compile-time integer value - * \param val the fallback runtime integer value - * - * This function is a more general version of the \ref fix identifier/function that can be used in template code - * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers - * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers - * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val - * will be used as a fallback. - * - * A typical use case would be: - * \code - * template<typename Derived> void foo(const MatrixBase<Derived> &mat) { - * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; - * const int n = mat.rows()/2; - * ... mat( seqN(0,fix<N>(n) ) ...; - * } - * \endcode - * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. - * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. - * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>. - * - * \sa fix, seqN, class ArithmeticSequence - */ -template<int N> -static const auto fix(int val); - -#endif // EIGEN_PARSED_BY_DOXYGEN - -} // end namespace Eigen - -#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 14ec87d..38d6ddb 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 90 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -23,7 +23,7 @@ /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) + #define EIGEN_COMP_GNUC 1 #else #define EIGEN_COMP_GNUC 0 #endif @@ -349,14 +349,6 @@ # define __has_feature(x) 0 #endif -// Some old compilers do not support template specializations like: -// template<typename T,int N> void foo(const T x[N]); -#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 -#else -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 -#endif - // Upperbound on the C++ version to use. // Expected values are 03, 11, 14, 17, etc. // By default, let's use an arbitrarily large C++ version. @@ -370,11 +362,6 @@ #define EIGEN_HAS_CXX11 0 #endif -#if EIGEN_MAX_CPP_VER>=14 && (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910) -#define EIGEN_HAS_CXX14 1 -#else -#define EIGEN_HAS_CXX14 0 -#endif // Do we support r-value references? #ifndef EIGEN_HAS_RVALUE_REFERENCES @@ -393,8 +380,7 @@ #if EIGEN_MAX_CPP_VER>=11 && \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ - || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -413,12 +399,10 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 -#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) -#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif @@ -427,14 +411,13 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR -#if defined(__CUDACC__) +#ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ - (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) #define EIGEN_HAS_CONSTEXPR 1 #endif @@ -542,8 +525,8 @@ // - static is not very good because it prevents definitions from different object files to be merged. // So static causes the resulting linked executable to be bloated with multiple copies of the same function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. -#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC -#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline #ifdef NDEBUG # ifndef EIGEN_NO_DEBUG @@ -641,14 +624,6 @@ namespace Eigen { #endif -#if EIGEN_COMP_MSVC - // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. - // This workaround is ugly, but it does the job. -# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond -#else -# define EIGEN_CONST_CONDITIONAL(cond) cond -#endif - //------------------------------------------------------------------------------------------ // Static and dynamic alignment control // @@ -878,8 +853,7 @@ namespace Eigen { typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \ typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \ - enum CompileTimeTraits \ - { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ + enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \ Flags = Eigen::internal::traits<Derived>::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index 7d90534..c634d7e 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -63,7 +63,7 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS @@ -114,7 +114,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -471,8 +471,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template<typename Index> + */ +template<typename Index> inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -502,7 +502,7 @@ template<typename T> struct smart_copy_helper<T,false> { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) @@ -522,15 +522,15 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; @@ -603,7 +603,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal @@ -622,7 +622,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -645,7 +645,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + #endif @@ -701,7 +701,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less<int>, +* std::map< int, Matrix4f, std::less<int>, * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 8de6055..7f63707 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -97,22 +97,17 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; }; template<> struct is_arithmetic<signed long> { enum { value = true }; }; template<> struct is_arithmetic<unsigned long> { enum { value = true }; }; -#if EIGEN_HAS_CXX11 -using std::is_integral; -#else -template<typename T> struct is_integral { enum { value = false }; }; -template<> struct is_integral<bool> { enum { value = true }; }; -template<> struct is_integral<char> { enum { value = true }; }; -template<> struct is_integral<signed char> { enum { value = true }; }; -template<> struct is_integral<unsigned char> { enum { value = true }; }; -template<> struct is_integral<signed short> { enum { value = true }; }; -template<> struct is_integral<unsigned short> { enum { value = true }; }; -template<> struct is_integral<signed int> { enum { value = true }; }; -template<> struct is_integral<unsigned int> { enum { value = true }; }; -template<> struct is_integral<signed long> { enum { value = true }; }; -template<> struct is_integral<unsigned long> { enum { value = true }; }; -#endif - +template<typename T> struct is_integral { enum { value = false }; }; +template<> struct is_integral<bool> { enum { value = true }; }; +template<> struct is_integral<char> { enum { value = true }; }; +template<> struct is_integral<signed char> { enum { value = true }; }; +template<> struct is_integral<unsigned char> { enum { value = true }; }; +template<> struct is_integral<signed short> { enum { value = true }; }; +template<> struct is_integral<unsigned short> { enum { value = true }; }; +template<> struct is_integral<signed int> { enum { value = true }; }; +template<> struct is_integral<unsigned int> { enum { value = true }; }; +template<> struct is_integral<signed long> { enum { value = true }; }; +template<> struct is_integral<unsigned long> { enum { value = true }; }; template <typename T> struct add_const { typedef const T type; }; template <typename T> struct add_const<T&> { typedef T& type; }; @@ -284,59 +279,6 @@ protected: }; /** \internal - * Provides access to the number of elements in the object of as a compile-time constant expression. - * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). - * - * Similar to std::tuple_size, but more general. - * - * It currently supports: - * - any types T defining T::SizeAtCompileTime - * - plain C arrays as T[N] - * - std::array (c++11) - * - some internal types such as SingleRange and AllRange - * - * The second template parameter eases SFINAE-based specializations. - */ -template<typename T, typename EnableIf = void> struct array_size { - enum { value = Dynamic }; -}; - -template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> { - enum { value = T::SizeAtCompileTime }; -}; - -template<typename T, int N> struct array_size<const T (&)[N]> { - enum { value = N }; -}; -template<typename T, int N> struct array_size<T (&)[N]> { - enum { value = N }; -}; - -#if EIGEN_HAS_CXX11 -template<typename T, std::size_t N> struct array_size<const std::array<T,N> > { - enum { value = N }; -}; -template<typename T, std::size_t N> struct array_size<std::array<T,N> > { - enum { value = N }; -}; -#endif - -/** \internal - * Analogue of the std::size free function. - * It returns the size of the container or view \a x of type \c T - * - * It currently supports: - * - any types T defining a member T::size() const - * - plain C arrays as T[N] - * - */ -template<typename T> -Index size(const T& x) { return x.size(); } - -template<typename T,std::size_t N> -Index size(const T (&) [N]) { return N; } - -/** \internal * Convenient struct to get the result type of a unary or binary functor. * * It supports both the current STL mechanism (using the result_type member) as well as @@ -433,10 +375,10 @@ struct meta_no { char a[2]; }; template <typename T> struct has_ReturnType { - template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); - template <typename C> static meta_no testFunctor(...); + template <typename C> static meta_yes testFunctor(typename C::ReturnType const *); + template <typename C> static meta_no testFunctor(...); - enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) }; + enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) }; }; template<typename T> const T* return_ptr(); diff --git a/eigen/Eigen/src/Core/util/SymbolicIndex.h b/eigen/Eigen/src/Core/util/SymbolicIndex.h deleted file mode 100644 index bb6349e..0000000 --- a/eigen/Eigen/src/Core/util/SymbolicIndex.h +++ /dev/null @@ -1,300 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SYMBOLIC_INDEX_H -#define EIGEN_SYMBOLIC_INDEX_H - -namespace Eigen { - -/** \namespace Eigen::Symbolic - * \ingroup Core_Module - * - * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. - * Here is a simple example: - * - * \code - * // First step, defines symbols: - * struct x_tag {}; static const Symbolic::SymbolExpr<x_tag> x; - * struct y_tag {}; static const Symbolic::SymbolExpr<y_tag> y; - * struct z_tag {}; static const Symbolic::SymbolExpr<z_tag> z; - * - * // Defines an expression: - * auto expr = (x+3)/y+z; - * - * // And evaluate it: (c++14) - * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; - * - * // In c++98/11, only one symbol per expression is supported for now: - * auto expr98 = (3-x)/2; - * std::cout << expr98.eval(x=6) << "\n"; - * \endcode - * - * It is currently only used internally to define and minipulate the placeholders::last and placeholders::end symbols in Eigen::seq and Eigen::seqN. - * - */ -namespace Symbolic { - -template<typename Tag> class Symbol; -template<typename Arg0> class NegateExpr; -template<typename Arg1,typename Arg2> class AddExpr; -template<typename Arg1,typename Arg2> class ProductExpr; -template<typename Arg1,typename Arg2> class QuotientExpr; - -// A simple wrapper around an integral value to provide the eval method. -// We could also use a free-function symbolic_eval... -template<typename IndexType=Index> -class ValueExpr { -public: - ValueExpr(IndexType val) : m_value(val) {} - template<typename T> - IndexType eval_impl(const T&) const { return m_value; } -protected: - IndexType m_value; -}; - -// Specialization for compile-time value, -// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. -template<int N> -class ValueExpr<internal::FixedInt<N> > { -public: - ValueExpr() {} - template<typename T> - Index eval_impl(const T&) const { return N; } -}; - - -/** \class BaseExpr - * \ingroup Core_Module - * Common base class of any symbolic expressions - */ -template<typename Derived> -class BaseExpr -{ -public: - const Derived& derived() const { return *static_cast<const Derived*>(this); } - - /** Evaluate the expression given the \a values of the symbols. - * - * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue - * as constructed by SymbolExpr::operator= operator. - * - */ - template<typename T> - Index eval(const T& values) const { return derived().eval_impl(values); } - -#if EIGEN_HAS_CXX14 - template<typename... Types> - Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } -#endif - - NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); } - - AddExpr<Derived,ValueExpr<> > operator+(Index b) const - { return AddExpr<Derived,ValueExpr<> >(derived(), b); } - AddExpr<Derived,ValueExpr<> > operator-(Index a) const - { return AddExpr<Derived,ValueExpr<> >(derived(), -a); } - ProductExpr<Derived,ValueExpr<> > operator*(Index a) const - { return ProductExpr<Derived,ValueExpr<> >(derived(),a); } - QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const - { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); } - - friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); } - friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); } - friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b) - { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); } - friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b) - { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); } - - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } - template<int N> - ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const - { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - template<int N> - QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const - { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - - template<int N> - friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) - { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - template<int N> - friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) - { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - -#if (!EIGEN_HAS_CXX14) - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } - template<int N> - ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const - { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - template<int N> - QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const - { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - - template<int N> - friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - template<int N> - friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } -#endif - - - template<typename OtherDerived> - AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const - { return AddExpr<Derived,OtherDerived>(derived(), b.derived()); } - - template<typename OtherDerived> - AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const - { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); } - - template<typename OtherDerived> - ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const - { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); } - - template<typename OtherDerived> - QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const - { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); } -}; - -template<typename T> -struct is_symbolic { - // BaseExpr has no conversion ctor, so we only have to check whether T can be staticaly cast to its base class BaseExpr<T>. - enum { value = internal::is_convertible<T,BaseExpr<T> >::value }; -}; - -// Specialization for functions, because is_convertible fails in this case. -// Useful in c++98/11 mode when testing is_symbolic<decltype(fix<N>)> -template<typename T> -struct is_symbolic<T (*)()> { - enum { value = false }; -}; - -/** Represents the actual value of a symbol identified by its tag - * - * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. - */ -template<typename Tag> -class SymbolValue -{ -public: - /** Default constructor from the value \a val */ - SymbolValue(Index val) : m_value(val) {} - - /** \returns the stored value of the symbol */ - Index value() const { return m_value; } -protected: - Index m_value; -}; - -/** Expression of a symbol uniquely identified by the template parameter type \c tag */ -template<typename tag> -class SymbolExpr : public BaseExpr<SymbolExpr<tag> > -{ -public: - /** Alias to the template parameter \c tag */ - typedef tag Tag; - - SymbolExpr() {} - - /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. - * - * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. - */ - SymbolValue<Tag> operator=(Index val) const { - return SymbolValue<Tag>(val); - } - - Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); } - -#if EIGEN_HAS_CXX14 - // C++14 versions suitable for multiple symbols - template<typename... Types> - Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); } -#endif -}; - -template<typename Arg0> -class NegateExpr : public BaseExpr<NegateExpr<Arg0> > -{ -public: - NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} - - template<typename T> - Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } -protected: - Arg0 m_arg0; -}; - -template<typename Arg0, typename Arg1> -class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> > -{ -public: - AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template<typename Arg0, typename Arg1> -class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> > -{ -public: - ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template<typename Arg0, typename Arg1> -class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> > -{ -public: - QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -} // end namespace Symbolic - -} // end namespace Eigen - -#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/eigen/Eigen/src/Core/util/XprHelper.h b/eigen/Eigen/src/Core/util/XprHelper.h index 4b337f2..ba5bd18 100644 --- a/eigen/Eigen/src/Core/util/XprHelper.h +++ b/eigen/Eigen/src/Core/util/XprHelper.h @@ -109,7 +109,6 @@ template<typename T, int Value> class variable_if_dynamic EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; @@ -120,7 +119,6 @@ template<typename T> class variable_if_dynamic<T, Dynamic> public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; @@ -673,7 +671,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces // Internal helper defining the cost of a scalar division for the type T. // The default heuristic can be specialized for each scalar type and architecture. -template<typename T,bool Vectorized=false,typename EnableIf = void> +template<typename T,bool Vectorized=false,typename EnaleIf = void> struct scalar_div_cost { enum { value = 8*NumTraits<T>::MulCost }; }; diff --git a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h index dbbd480..4fec8af 100644 --- a/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +++ b/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h @@ -85,7 +85,7 @@ MatrixBase<Derived>::eigenvalues() const * \sa SelfAdjointEigenSolver::eigenvalues(), MatrixBase::eigenvalues() */ template<typename MatrixType, unsigned int UpLo> -EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType +inline typename SelfAdjointView<MatrixType, UpLo>::EigenvaluesReturnType SelfAdjointView<MatrixType, UpLo>::eigenvalues() const { typedef typename SelfAdjointView<MatrixType, UpLo>::PlainObject PlainObject; @@ -149,7 +149,7 @@ MatrixBase<Derived>::operatorNorm() const * \sa eigenvalues(), MatrixBase::operatorNorm() */ template<typename MatrixType, unsigned int UpLo> -EIGEN_DEVICE_FUNC inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar +inline typename SelfAdjointView<MatrixType, UpLo>::RealScalar SelfAdjointView<MatrixType, UpLo>::operatorNorm() const { return eigenvalues().cwiseAbs().maxCoeff(); diff --git a/eigen/Eigen/src/Geometry/AlignedBox.h b/eigen/Eigen/src/Geometry/AlignedBox.h index c902d8f..066eae4 100644 --- a/eigen/Eigen/src/Geometry/AlignedBox.h +++ b/eigen/Eigen/src/Geometry/AlignedBox.h @@ -63,7 +63,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Default constructor initializing a null box. */ EIGEN_DEVICE_FUNC inline AlignedBox() - { if (EIGEN_CONST_CONDITIONAL(AmbientDimAtCompileTime!=Dynamic)) setEmpty(); } + { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); } /** Constructs a null box with \a _dim the dimension of the ambient space. */ EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) diff --git a/eigen/Eigen/src/Geometry/ParametrizedLine.h b/eigen/Eigen/src/Geometry/ParametrizedLine.h index 3929ca8..1e985d8 100644 --- a/eigen/Eigen/src/Geometry/ParametrizedLine.h +++ b/eigen/Eigen/src/Geometry/ParametrizedLine.h @@ -104,44 +104,7 @@ public: template <int OtherOptions> EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; - /** Applies the transformation matrix \a mat to \c *this and returns a reference to \c *this. - * - * \param mat the Dim x Dim transformation matrix - * \param traits specifies whether the matrix \a mat represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - */ - template<typename XprType> - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine) - { - if (traits==Affine) - direction() = (mat * direction()).normalized(); - else if (traits==Isometry) - direction() = mat * direction(); - else - { - eigen_assert(0 && "invalid traits value in ParametrizedLine::transform()"); - } - origin() = mat * origin(); - return *this; - } - - /** Applies the transformation \a t to \c *this and returns a reference to \c *this. - * - * \param t the transformation of dimension Dim - * \param traits specifies whether the transformation \a t represents an #Isometry - * or a more generic #Affine transformation. The default is #Affine. - * Other kind of transformations are not supported. - */ - template<int TrOptions> - EIGEN_DEVICE_FUNC inline ParametrizedLine& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t, - TransformTraits traits = Affine) - { - transform(t.linear(), traits); - origin() += t.translation(); - return *this; - } - -/** \returns \c *this with scalar type casted to \a NewScalarType + /** \returns \c *this with scalar type casted to \a NewScalarType * * Note that if \a NewScalarType is equal to the current scalar type of \c *this * then this function smartly returns a const reference to \c *this. diff --git a/eigen/Eigen/src/Geometry/Quaternion.h b/eigen/Eigen/src/Geometry/Quaternion.h index f6ef1bc..3e5a9ba 100644 --- a/eigen/Eigen/src/Geometry/Quaternion.h +++ b/eigen/Eigen/src/Geometry/Quaternion.h @@ -423,7 +423,7 @@ typedef Map<Quaternion<double>, Aligned> QuaternionMapAlignedd; // Generic Quaternion * Quaternion product // This product can be specialized for a given architecture via the Arch template argument. namespace internal { -template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product +template<int Arch, class Derived1, class Derived2, typename Scalar> struct quat_product { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){ return Quaternion<Scalar> @@ -446,8 +446,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) return internal::quat_product<Architecture::Target, Derived, OtherDerived, - typename internal::traits<Derived>::Scalar, - EIGEN_PLAIN_ENUM_MIN(internal::traits<Derived>::Alignment, internal::traits<OtherDerived>::Alignment)>::run(*this, other); + typename internal::traits<Derived>::Scalar>::run(*this, other); } /** \sa operator*(Quaternion) */ @@ -672,7 +671,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> // Generic conjugate of a Quaternion namespace internal { -template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj +template<int Arch, class Derived, typename Scalar> struct quat_conj { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){ return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z()); @@ -691,8 +690,7 @@ EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::conjugate() const { return internal::quat_conj<Architecture::Target, Derived, - typename internal::traits<Derived>::Scalar, - internal::traits<Derived>::Alignment>::run(*this); + typename internal::traits<Derived>::Scalar>::run(*this); } diff --git a/eigen/Eigen/src/Geometry/Transform.h b/eigen/Eigen/src/Geometry/Transform.h index 2d36dfa..3f31ee4 100644 --- a/eigen/Eigen/src/Geometry/Transform.h +++ b/eigen/Eigen/src/Geometry/Transform.h @@ -335,7 +335,7 @@ public: OtherModeIsAffineCompact = OtherMode == int(AffineCompact) }; - if(EIGEN_CONST_CONDITIONAL(ModeIsAffineCompact == OtherModeIsAffineCompact)) + if(ModeIsAffineCompact == OtherModeIsAffineCompact) { // We need the block expression because the code is compiled for all // combinations of transformations and will trigger a compile time error @@ -343,7 +343,7 @@ public: m_matrix.template block<Dim,Dim+1>(0,0) = other.matrix().template block<Dim,Dim+1>(0,0); makeAffine(); } - else if(EIGEN_CONST_CONDITIONAL(OtherModeIsAffineCompact)) + else if(OtherModeIsAffineCompact) { typedef typename Transform<Scalar,Dim,OtherMode,OtherOptions>::MatrixType OtherMatrixType; internal::transform_construct_from_matrix<OtherMatrixType,Mode,Options,Dim,HDim>::run(this, other.matrix()); @@ -481,7 +481,7 @@ public: TransformTimeDiagonalReturnType res; res.linear().noalias() = a*b.linear(); res.translation().noalias() = a*b.translation(); - if (EIGEN_CONST_CONDITIONAL(Mode!=int(AffineCompact))) + if (Mode!=int(AffineCompact)) res.matrix().row(Dim) = b.matrix().row(Dim); return res; } @@ -755,7 +755,7 @@ template<typename Scalar, int Dim, int Mode,int Options> Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other) { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -801,7 +801,7 @@ Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator { check_template_params(); EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) m_matrix << other.m11(), other.m21(), other.dx(), other.m12(), other.m22(), other.dy(); else @@ -819,7 +819,7 @@ template<typename Scalar, int Dim, int Mode, int Options> QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const { EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE) - if (EIGEN_CONST_CONDITIONAL(Mode == int(AffineCompact))) + if (Mode == int(AffineCompact)) return QTransform(m_matrix.coeff(0,0), m_matrix.coeff(1,0), m_matrix.coeff(0,1), m_matrix.coeff(1,1), m_matrix.coeff(0,2), m_matrix.coeff(1,2)); @@ -912,7 +912,7 @@ EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) - if(EIGEN_CONST_CONDITIONAL(int(Mode)==int(Projective))) + if(int(Mode)==int(Projective)) affine() += other * m_matrix.row(Dim); else translation() += other; diff --git a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h index 1a86ff8..f68cab5 100644 --- a/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -16,17 +16,23 @@ namespace Eigen { namespace internal { template<class Derived, class OtherDerived> -struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> +struct quat_product<Architecture::SSE, Derived, OtherDerived, float> { + enum { + AAlignment = traits<Derived>::Alignment, + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - __m128 a = _a.coeffs().template packet<Aligned16>(0); - __m128 b = _b.coeffs().template packet<Aligned16>(0); + __m128 a = _a.coeffs().template packet<AAlignment>(0); + __m128 b = _b.coeffs().template packet<BAlignment>(0); __m128 s1 = _mm_mul_ps(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); __m128 s2 = _mm_mul_ps(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); - pstore(&res.x(), + pstoret<float,Packet4f,ResAlignment>( + &res.x(), _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,vec4f_swizzle1(b,3,3,3,3)), _mm_mul_ps(vec4f_swizzle1(a,2,0,1,0), vec4f_swizzle1(b,1,2,0,0))), @@ -36,14 +42,17 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, float, Aligned16> } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, float, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, float> { + enum { + ResAlignment = traits<Quaternion<float> >::Alignment + }; static inline Quaternion<float> run(const QuaternionBase<Derived>& q) { Quaternion<float> res; const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstore(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<Alignment>(0))); + pstoret<float,Packet4f,ResAlignment>(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet<traits<Derived>::Alignment>(0))); return res; } }; @@ -52,6 +61,9 @@ struct quat_conj<Architecture::SSE, Derived, float, Alignment> template<typename VectorLhs,typename VectorRhs> struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> { + enum { + ResAlignment = traits<typename plain_matrix_type<VectorLhs>::type>::Alignment + }; static inline typename plain_matrix_type<VectorLhs>::type run(const VectorLhs& lhs, const VectorRhs& rhs) { @@ -60,7 +72,7 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type<VectorLhs>::type res; - pstore(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret<float,Packet4f,ResAlignment>(&res.x(),_mm_sub_ps(mul1,mul2)); return res; } }; @@ -68,9 +80,14 @@ struct cross3_impl<Architecture::SSE,VectorLhs,VectorRhs,float,true> -template<class Derived, class OtherDerived, int Alignment> -struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> +template<class Derived, class OtherDerived> +struct quat_product<Architecture::SSE, Derived, OtherDerived, double> { + enum { + BAlignment = traits<OtherDerived>::Alignment, + ResAlignment = traits<Quaternion<double> >::Alignment + }; + static inline Quaternion<double> run(const QuaternionBase<Derived>& _a, const QuaternionBase<OtherDerived>& _b) { const Packet2d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); @@ -78,8 +95,8 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> Quaternion<double> res; const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet<Alignment>(0); - Packet2d b_zw = _b.coeffs().template packet<Alignment>(2); + Packet2d b_xy = _b.coeffs().template packet<BAlignment>(0); + Packet2d b_zw = _b.coeffs().template packet<BAlignment>(2); Packet2d a_xx = pset1<Packet2d>(a[0]); Packet2d a_yy = pset1<Packet2d>(a[1]); Packet2d a_zz = pset1<Packet2d>(a[2]); @@ -97,9 +114,9 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = psub(pmul(a_zz, b_xy), pmul(a_xx, b_zw)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.x(), _mm_addsub_pd(t1, preverse(t2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_addsub_pd(t1, preverse(t2))); #else - pstore(&res.x(), padd(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), padd(t1, pxor(mask,preverse(t2)))); #endif /* @@ -111,25 +128,28 @@ struct quat_product<Architecture::SSE, Derived, OtherDerived, double, Alignment> t2 = padd(pmul(a_zz, b_zw), pmul(a_xx, b_xy)); #ifdef EIGEN_VECTORIZE_SSE3 EIGEN_UNUSED_VARIABLE(mask) - pstore(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), preverse(_mm_addsub_pd(preverse(t1), t2))); #else - pstore(&res.z(), psub(t1, pxor(mask,preverse(t2)))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), psub(t1, pxor(mask,preverse(t2)))); #endif return res; } }; -template<class Derived, int Alignment> -struct quat_conj<Architecture::SSE, Derived, double, Alignment> +template<class Derived> +struct quat_conj<Architecture::SSE, Derived, double> { + enum { + ResAlignment = traits<Quaternion<double> >::Alignment + }; static inline Quaternion<double> run(const QuaternionBase<Derived>& q) { Quaternion<double> res; const __m128d mask0 = _mm_setr_pd(-0.,-0.); const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstore(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<Alignment>(0))); - pstore(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<Alignment>(2))); + pstoret<double,Packet2d,ResAlignment>(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet<traits<Derived>::Alignment>(0))); + pstoret<double,Packet2d,ResAlignment>(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet<traits<Derived>::Alignment>(2))); return res; } }; diff --git a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h index 358444a..facdaf8 100644 --- a/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +++ b/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h @@ -152,13 +152,28 @@ class LeastSquareDiagonalPreconditioner : public DiagonalPreconditioner<_Scalar> { // Compute the inverse squared-norm of each column of mat m_invdiag.resize(mat.cols()); - for(Index j=0; j<mat.outerSize(); ++j) + if(MatType::IsRowMajor) { - RealScalar sum = mat.innerVector(j).squaredNorm(); - if(sum>0) - m_invdiag(j) = RealScalar(1)/sum; - else - m_invdiag(j) = RealScalar(1); + m_invdiag.setZero(); + for(Index j=0; j<mat.outerSize(); ++j) + { + for(typename MatType::InnerIterator it(mat,j); it; ++it) + m_invdiag(it.index()) += numext::abs2(it.value()); + } + for(Index j=0; j<mat.cols(); ++j) + if(numext::real(m_invdiag(j))>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/numext::real(m_invdiag(j)); + } + else + { + for(Index j=0; j<mat.outerSize(); ++j) + { + RealScalar sum = mat.innerVector(j).squaredNorm(); + if(sum>RealScalar(0)) + m_invdiag(j) = RealScalar(1)/sum; + else + m_invdiag(j) = RealScalar(1); + } } Base::m_isInitialized = true; return *this; diff --git a/eigen/Eigen/src/Jacobi/Jacobi.h b/eigen/Eigen/src/Jacobi/Jacobi.h index d25af8e..c30326e 100644 --- a/eigen/Eigen/src/Jacobi/Jacobi.h +++ b/eigen/Eigen/src/Jacobi/Jacobi.h @@ -302,8 +302,12 @@ template<typename VectorX, typename VectorY, typename OtherScalar> void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) { typedef typename VectorX::Scalar Scalar; - enum { PacketSize = packet_traits<Scalar>::size }; + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; eigen_assert(xpr_x.size() == xpr_y.size()); Index size = xpr_x.size(); Index incrx = xpr_x.derived().innerStride(); @@ -321,6 +325,7 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x if(VectorX::SizeAtCompileTime == Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && ((incrx==1 && incry==1) || PacketSize == 1)) { // both vectors are sequentially stored in memory => vectorization @@ -329,9 +334,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Index alignedStart = internal::first_default_aligned(y, size); Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; for(Index i=0; i<alignedStart; ++i) { @@ -350,8 +356,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } @@ -365,10 +371,10 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x Packet xi1 = ploadu<Packet>(px+PacketSize); Packet yi = pload <Packet>(py); Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pmul(ps,xi1))); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); px += Peeling*PacketSize; py += Peeling*PacketSize; } @@ -376,8 +382,8 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x { Packet xi = ploadu<Packet>(x+peelingEnd); Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); } } @@ -393,19 +399,21 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x /*** fixed-size vectorized path ***/ else if(VectorX::SizeAtCompileTime != Dynamic && (VectorX::Flags & VectorY::Flags & PacketAccessBit) && + (PacketSize == OtherPacketSize) && (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment { - const Packet pc = pset1<Packet>(c); - const Packet ps = pset1<Packet>(s); - conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex,false> pcj; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; Scalar* EIGEN_RESTRICT px = x; Scalar* EIGEN_RESTRICT py = y; for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); - pstore(px, padd(pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pmul(ps,xi))); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); px += PacketSize; py += PacketSize; } diff --git a/eigen/Eigen/src/LU/FullPivLU.h b/eigen/Eigen/src/LU/FullPivLU.h index ec61086..03b6af7 100644 --- a/eigen/Eigen/src/LU/FullPivLU.h +++ b/eigen/Eigen/src/LU/FullPivLU.h @@ -411,9 +411,11 @@ template<typename _MatrixType> class FullPivLU #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; template<bool Conjugate, typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/QR/ColPivHouseholderQR.h b/eigen/Eigen/src/QR/ColPivHouseholderQR.h index d35395d..a7b47d5 100644 --- a/eigen/Eigen/src/QR/ColPivHouseholderQR.h +++ b/eigen/Eigen/src/QR/ColPivHouseholderQR.h @@ -416,6 +416,7 @@ template<typename _MatrixType> class ColPivHouseholderQR #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif @@ -505,8 +506,8 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() m_colNormsUpdated.coeffRef(k) = m_colNormsDirect.coeffRef(k); } - RealScalar threshold_helper = numext::abs2<Scalar>(m_colNormsUpdated.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows); - RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon()); + RealScalar threshold_helper = numext::abs2<RealScalar>(m_colNormsUpdated.maxCoeff() * NumTraits<RealScalar>::epsilon()) / RealScalar(rows); + RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<RealScalar>::epsilon()); m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) m_maxpivot = RealScalar(0); @@ -552,12 +553,12 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace() // http://www.netlib.org/lapack/lawnspdf/lawn176.pdf // and used in LAPACK routines xGEQPF and xGEQP3. // See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html - if (m_colNormsUpdated.coeffRef(j) != 0) { + if (m_colNormsUpdated.coeffRef(j) != RealScalar(0)) { RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNormsUpdated.coeffRef(j); temp = (RealScalar(1) + temp) * (RealScalar(1) - temp); - temp = temp < 0 ? 0 : temp; - RealScalar temp2 = temp * numext::abs2<Scalar>(m_colNormsUpdated.coeffRef(j) / - m_colNormsDirect.coeffRef(j)); + temp = temp < RealScalar(0) ? RealScalar(0) : temp; + RealScalar temp2 = temp * numext::abs2<RealScalar>(m_colNormsUpdated.coeffRef(j) / + m_colNormsDirect.coeffRef(j)); if (temp2 <= norm_downdate_threshold) { // The updated norm has become too inaccurate so re-compute the column // norm directly. diff --git a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h index 13b61fc..34c637b 100644 --- a/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +++ b/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h @@ -367,7 +367,7 @@ class CompleteOrthogonalDecomposition { #ifndef EIGEN_PARSED_BY_DOXYGEN template <typename RhsType, typename DstType> - void _solve_impl(const RhsType& rhs, DstType& dst) const; + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType& rhs, DstType& dst) const; #endif protected: diff --git a/eigen/Eigen/src/QR/FullPivHouseholderQR.h b/eigen/Eigen/src/QR/FullPivHouseholderQR.h index c31e47c..e489bdd 100644 --- a/eigen/Eigen/src/QR/FullPivHouseholderQR.h +++ b/eigen/Eigen/src/QR/FullPivHouseholderQR.h @@ -392,21 +392,22 @@ template<typename _MatrixType> class FullPivHouseholderQR * diagonal coefficient of U. */ RealScalar maxPivot() const { return m_maxpivot; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; IntDiagSizeVectorType m_rows_transpositions; diff --git a/eigen/Eigen/src/QR/HouseholderQR.h b/eigen/Eigen/src/QR/HouseholderQR.h index 762b21c..3513d99 100644 --- a/eigen/Eigen/src/QR/HouseholderQR.h +++ b/eigen/Eigen/src/QR/HouseholderQR.h @@ -204,27 +204,28 @@ template<typename _MatrixType> class HouseholderQR inline Index rows() const { return m_qr.rows(); } inline Index cols() const { return m_qr.cols(); } - + /** \returns a const reference to the vector of Householder coefficients used to represent the factor \c Q. * * For advanced uses only. */ const HCoeffsType& hCoeffs() const { return m_hCoeffs; } - + #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } void computeInPlace(); - + MatrixType m_qr; HCoeffsType m_hCoeffs; RowVectorType m_temp; diff --git a/eigen/Eigen/src/SVD/BDCSVD.h b/eigen/Eigen/src/SVD/BDCSVD.h index 25fca6f..d7a4271 100644 --- a/eigen/Eigen/src/SVD/BDCSVD.h +++ b/eigen/Eigen/src/SVD/BDCSVD.h @@ -77,6 +77,7 @@ public: typedef _MatrixType MatrixType; typedef typename MatrixType::Scalar Scalar; typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { RowsAtCompileTime = MatrixType::RowsAtCompileTime, ColsAtCompileTime = MatrixType::ColsAtCompileTime, @@ -259,7 +260,7 @@ BDCSVD<MatrixType>& BDCSVD<MatrixType>::compute(const MatrixType& matrix, unsign //**** step 0 - Copy the input matrix and apply scaling to reduce over/under-flows RealScalar scale = matrix.cwiseAbs().maxCoeff(); - if(scale==RealScalar(0)) scale = RealScalar(1); + if(scale==Literal(0)) scale = Literal(1); MatrixX copy; if (m_isTranspose) copy = matrix.adjoint()/scale; else copy = matrix/scale; @@ -351,13 +352,13 @@ void BDCSVD<MatrixType>::structured_update(Block<MatrixXr,Dynamic,Dynamic> A, co Index k1=0, k2=0; for(Index j=0; j<n; ++j) { - if( (A.col(j).head(n1).array()!=0).any() ) + if( (A.col(j).head(n1).array()!=Literal(0)).any() ) { A1.col(k1) = A.col(j).head(n1); B1.row(k1) = B.row(j); ++k1; } - if( (A.col(j).tail(n2).array()!=0).any() ) + if( (A.col(j).tail(n2).array()!=Literal(0)).any() ) { A2.col(k2) = A.col(j).tail(n2); B2.row(k2) = B.row(j); @@ -449,11 +450,11 @@ void BDCSVD<MatrixType>::divide (Index firstCol, Index lastCol, Index firstRowW, l = m_naiveU.row(1).segment(firstCol, k); f = m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1); } - if (m_compV) m_naiveV(firstRowW+k, firstColW) = 1; + if (m_compV) m_naiveV(firstRowW+k, firstColW) = Literal(1); if (r0<considerZero) { - c0 = 1; - s0 = 0; + c0 = Literal(1); + s0 = Literal(0); } else { @@ -574,7 +575,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec ArrayRef col0 = m_computed.col(firstCol).segment(firstCol, n); m_workspace.head(n) = m_computed.block(firstCol, firstCol, n, n).diagonal(); ArrayRef diag = m_workspace.head(n); - diag(0) = 0; + diag(0) = Literal(0); // Allocate space for singular values and vectors singVals.resize(n); @@ -590,7 +591,7 @@ void BDCSVD<MatrixType>::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec // but others are interleaved and we must ignore them at this stage. // To this end, let's compute a permutation skipping them: Index actual_n = n; - while(actual_n>1 && diag(actual_n-1)==0) --actual_n; + while(actual_n>1 && diag(actual_n-1)==Literal(0)) --actual_n; Index m = 0; // size of the deflated problem for(Index k=0;k<actual_n;++k) if(abs(col0(k))>considerZero) @@ -691,7 +692,7 @@ template <typename MatrixType> typename BDCSVD<MatrixType>::RealScalar BDCSVD<MatrixType>::secularEq(RealScalar mu, const ArrayRef& col0, const ArrayRef& diag, const IndicesRef &perm, const ArrayRef& diagShifted, RealScalar shift) { Index m = perm.size(); - RealScalar res = 1; + RealScalar res = Literal(1); for(Index i=0; i<m; ++i) { Index j = perm(i); @@ -710,16 +711,16 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d Index n = col0.size(); Index actual_n = n; - while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + while(actual_n>1 && col0(actual_n-1)==Literal(0)) --actual_n; for (Index k = 0; k < n; ++k) { - if (col0(k) == 0 || actual_n==1) + if (col0(k) == Literal(0) || actual_n==1) { // if col0(k) == 0, then entry is deflated, so singular value is on diagonal // if actual_n==1, then the deflated problem is already diagonalized singVals(k) = k==0 ? col0(0) : diag(k); - mus(k) = 0; + mus(k) = Literal(0); shifts(k) = k==0 ? col0(0) : diag(k); continue; } @@ -733,13 +734,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d { // Skip deflated singular values Index l = k+1; - while(col0(l)==0) { ++l; eigen_internal_assert(l<actual_n); } + while(col0(l)==Literal(0)) { ++l; eigen_internal_assert(l<actual_n); } right = diag(l); } // first decide whether it's closer to the left end or the right end - RealScalar mid = left + (right-left) / 2; - RealScalar fMid = secularEq(mid, col0, diag, perm, diag, 0); + RealScalar mid = left + (right-left) / Literal(2); + RealScalar fMid = secularEq(mid, col0, diag, perm, diag, Literal(0)); #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << right-left << "\n"; std::cout << "fMid = " << fMid << " " << secularEq(mid-left, col0, diag, perm, diag-left, left) << " " << secularEq(mid-right, col0, diag, perm, diag-right, right) << "\n"; @@ -755,7 +756,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d << " " << secularEq(0.8*(left+right), col0, diag, perm, diag, 0) << " " << secularEq(0.9*(left+right), col0, diag, perm, diag, 0) << "\n"; #endif - RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > Literal(0)) ? left : right; // measure everything relative to shift Map<ArrayXr> diagShifted(m_workspace.data()+4*n, n); @@ -785,13 +786,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate - bool useBisection = fPrev*fCur>0; - while (fCur!=0 && abs(muCur - muPrev) > 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) + bool useBisection = fPrev*fCur>Literal(0); + while (fCur!=Literal(0) && abs(muCur - muPrev) > Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits<RealScalar>::epsilon() && !useBisection) { ++m_numIters; // Find a and b such that the function f(mu) = a / mu + b matches the current and previous samples. - RealScalar a = (fCur - fPrev) / (1/muCur - 1/muPrev); + RealScalar a = (fCur - fPrev) / (Literal(1)/muCur - Literal(1)/muPrev); RealScalar b = fCur - a / muCur; // And find mu such that f(mu)==0: RealScalar muZero = -a/b; @@ -803,8 +804,8 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d fCur = fZero; - if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; - if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; + if (shift == left && (muCur < Literal(0) || muCur > right - left)) useBisection = true; + if (shift == right && (muCur < -(right - left) || muCur > Literal(0))) useBisection = true; if (abs(fCur)>abs(fPrev)) useBisection = true; } @@ -841,13 +842,13 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d std::cout << k << " : " << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; } #endif - eigen_internal_assert(fLeft * fRight < 0); + eigen_internal_assert(fLeft * fRight < Literal(0)); - while (rightShifted - leftShifted > 2 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) + while (rightShifted - leftShifted > Literal(2) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(abs(leftShifted), abs(rightShifted))) { - RealScalar midShifted = (leftShifted + rightShifted) / 2; + RealScalar midShifted = (leftShifted + rightShifted) / Literal(2); fMid = secularEq(midShifted, col0, diag, perm, diagShifted, shift); - if (fLeft * fMid < 0) + if (fLeft * fMid < Literal(0)) { rightShifted = midShifted; } @@ -858,7 +859,7 @@ void BDCSVD<MatrixType>::computeSingVals(const ArrayRef& col0, const ArrayRef& d } } - muCur = (leftShifted + rightShifted) / 2; + muCur = (leftShifted + rightShifted) / Literal(2); } singVals[k] = shift + muCur; @@ -892,8 +893,8 @@ void BDCSVD<MatrixType>::perturbCol0 // The offset permits to skip deflated entries while computing zhat for (Index k = 0; k < n; ++k) { - if (col0(k) == 0) // deflated - zhat(k) = 0; + if (col0(k) == Literal(0)) // deflated + zhat(k) = Literal(0); else { // see equation (3.6) @@ -918,7 +919,7 @@ void BDCSVD<MatrixType>::perturbCol0 std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(last) + dk) << " * " << mus(last) + shifts(last) << " - " << dk << "\n"; #endif RealScalar tmp = sqrt(prod); - zhat(k) = col0(k) > 0 ? tmp : -tmp; + zhat(k) = col0(k) > Literal(0) ? tmp : -tmp; } } } @@ -934,7 +935,7 @@ void BDCSVD<MatrixType>::computeSingVecs for (Index k = 0; k < n; ++k) { - if (zhat(k) == 0) + if (zhat(k) == Literal(0)) { U.col(k) = VectorType::Unit(n+1, k); if (m_compV) V.col(k) = VectorType::Unit(n, k); @@ -947,7 +948,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); U(i,k) = zhat(i)/(((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - U(n,k) = 0; + U(n,k) = Literal(0); U.col(k).normalize(); if (m_compV) @@ -958,7 +959,7 @@ void BDCSVD<MatrixType>::computeSingVecs Index i = perm(l); V(i,k) = diag(i) * zhat(i) / (((diag(i) - shifts(k)) - mus(k)) )/( (diag(i) + singVals[k])); } - V(0,k) = -1; + V(0,k) = Literal(-1); V.col(k).normalize(); } } @@ -980,14 +981,14 @@ void BDCSVD<MatrixType>::deflation43(Index firstCol, Index shift, Index i, Index RealScalar c = m_computed(start, start); RealScalar s = m_computed(start+i, start); RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); - if (r == 0) + if (r == Literal(0)) { - m_computed(start+i, start+i) = 0; + m_computed(start+i, start+i) = Literal(0); return; } m_computed(start,start) = r; - m_computed(start+i, start) = 0; - m_computed(start+i, start+i) = 0; + m_computed(start+i, start) = Literal(0); + m_computed(start+i, start+i) = Literal(0); JacobiRotation<RealScalar> J(c/r,-s/r); if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); @@ -1020,7 +1021,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi << m_computed(firstColm + i+1, firstColm+i+1) << " " << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; #endif - if (r==0) + if (r==Literal(0)) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); return; @@ -1029,7 +1030,7 @@ void BDCSVD<MatrixType>::deflation44(Index firstColu , Index firstColm, Index fi s/=r; m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + j, firstColm + j) = m_computed(firstColm + i, firstColm + i); - m_computed(firstColm + j, firstColm) = 0; + m_computed(firstColm + j, firstColm) = Literal(0); JacobiRotation<RealScalar> J(c,-s); if (m_compU) m_naiveU.middleRows(firstColu, size+1).applyOnTheRight(firstColu + i, firstColu + j, J); @@ -1053,7 +1054,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index const RealScalar considerZero = (std::numeric_limits<RealScalar>::min)(); RealScalar maxDiag = diag.tail((std::max)(Index(1),length-1)).cwiseAbs().maxCoeff(); RealScalar epsilon_strict = numext::maxi<RealScalar>(considerZero,NumTraits<RealScalar>::epsilon() * maxDiag); - RealScalar epsilon_coarse = 8 * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); + RealScalar epsilon_coarse = Literal(8) * NumTraits<RealScalar>::epsilon() * numext::maxi<RealScalar>(col0.cwiseAbs().maxCoeff(), maxDiag); #ifdef EIGEN_BDCSVD_SANITY_CHECKS assert(m_naiveU.allFinite()); @@ -1081,7 +1082,7 @@ void BDCSVD<MatrixType>::deflation(Index firstCol, Index lastCol, Index k, Index #ifdef EIGEN_BDCSVD_DEBUG_VERBOSE std::cout << "deflation 4.2, set z(" << i << ") to zero because " << abs(col0(i)) << " < " << epsilon_strict << " (diag(" << i << ")=" << diag(i) << ")\n"; #endif - col0(i) = 0; + col0(i) = Literal(0); } //condition 4.3 diff --git a/eigen/Eigen/src/SVD/SVDBase.h b/eigen/Eigen/src/SVD/SVDBase.h index 4294147..cc90a3b 100644 --- a/eigen/Eigen/src/SVD/SVDBase.h +++ b/eigen/Eigen/src/SVD/SVDBase.h @@ -212,6 +212,7 @@ public: #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC void _solve_impl(const RhsType &rhs, DstType &dst) const; #endif diff --git a/eigen/Eigen/src/SVD/UpperBidiagonalization.h b/eigen/Eigen/src/SVD/UpperBidiagonalization.h index 0b14608..11ac847 100644 --- a/eigen/Eigen/src/SVD/UpperBidiagonalization.h +++ b/eigen/Eigen/src/SVD/UpperBidiagonalization.h @@ -159,6 +159,8 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, traits<MatrixType>::Flags & RowMajorBit> > Y) { typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename NumTraits<RealScalar>::Literal Literal; enum { StorageOrder = traits<MatrixType>::Flags & RowMajorBit }; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? 1 : Dynamic> ColInnerStride; typedef InnerStride<int(StorageOrder) == int(ColMajor) ? Dynamic : 1> RowInnerStride; @@ -263,7 +265,7 @@ void upperbidiagonalization_blocked_helper(MatrixType& A, SubMatType A10( A.block(bs,0, brows-bs,bs) ); SubMatType A01( A.block(0,bs, bs,bcols-bs) ); Scalar tmp = A01(bs-1,0); - A01(bs-1,0) = 1; + A01(bs-1,0) = Literal(1); A11.noalias() -= A10 * Y.topLeftCorner(bcols,bs).bottomRows(bcols-bs).adjoint(); A11.noalias() -= X.topLeftCorner(brows,bs).bottomRows(brows-bs) * A01; A01(bs-1,0) = tmp; diff --git a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h index e0b3c22..5ccb466 100644 --- a/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +++ b/eigen/Eigen/src/SparseCore/SparseCompressedBase.h @@ -185,14 +185,6 @@ class SparseCompressedBase<Derived>::InnerIterator } inline InnerIterator& operator++() { m_id++; return *this; } - inline InnerIterator& operator+=(Index i) { m_id += i ; return *this; } - - inline InnerIterator operator+(Index i) - { - InnerIterator result = *this; - result += i; - return result; - } inline const Scalar& value() const { return m_values[m_id]; } inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id]); } @@ -253,14 +245,6 @@ class SparseCompressedBase<Derived>::ReverseInnerIterator } inline ReverseInnerIterator& operator--() { --m_id; return *this; } - inline ReverseInnerIterator& operator-=(Index i) { m_id -= i; return *this; } - - inline ReverseInnerIterator operator-(Index i) - { - ReverseInnerIterator result = *this; - result -= i; - return result; - } inline const Scalar& value() const { return m_values[m_id-1]; } inline Scalar& valueRef() { return const_cast<Scalar&>(m_values[m_id-1]); } diff --git a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h index c41c07a..e315e35 100644 --- a/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +++ b/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h @@ -212,7 +212,8 @@ public: enum { CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -299,7 +300,8 @@ public: enum { CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit) }; explicit binary_evaluator(const XprType& xpr) @@ -531,7 +533,8 @@ public: enum { CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(RhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) @@ -605,7 +608,8 @@ public: enum { CoeffReadCost = evaluator<LhsArg>::CoeffReadCost + evaluator<RhsArg>::CoeffReadCost + functor_traits<BinaryOp>::Cost, - Flags = XprType::Flags + // Expose storage order of the sparse expression + Flags = (XprType::Flags & ~RowMajorBit) | (int(LhsArg::Flags)&RowMajorBit) }; explicit sparse_conjunction_evaluator(const XprType& xpr) diff --git a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h index 9e39be7..5ab64f1 100644 --- a/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +++ b/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h @@ -47,6 +47,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView enum { Mode = _Mode, + TransposeMode = ((Mode & Upper) ? Lower : 0) | ((Mode & Lower) ? Upper : 0), RowsAtCompileTime = internal::traits<SparseSelfAdjointView>::RowsAtCompileTime, ColsAtCompileTime = internal::traits<SparseSelfAdjointView>::ColsAtCompileTime }; @@ -368,7 +369,7 @@ struct generic_product_impl<Lhs, RhsView, DenseShape, SparseSelfAdjointShape, Pr // transpose everything Transpose<Dest> dstT(dst); - internal::sparse_selfadjoint_time_dense_product<RhsView::Mode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); + internal::sparse_selfadjoint_time_dense_product<RhsView::TransposeMode>(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha); } }; diff --git a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h index 9568cc1..91c09ab 100644 --- a/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +++ b/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h @@ -320,7 +320,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > * * \sa umfpackControl() */ - void printUmfpackControl() + void umfpackReportControl() { umfpack_report_control(m_control.data(), Scalar()); } @@ -329,7 +329,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > * * \sa analyzePattern(), compute() */ - void printUmfpackInfo() + void umfpackReportInfo() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_info(m_control.data(), m_umfpackInfo.data(), Scalar()); @@ -339,7 +339,7 @@ class UmfPackLU : public SparseSolverBase<UmfPackLU<_MatrixType> > * * \sa analyzePattern(), compute() */ - void printUmfpackStatus() { + void umfpackReportStatus() { eigen_assert(m_analysisIsOk && "UmfPackLU: you must first call analyzePattern()"); umfpack_report_status(m_control.data(), m_fact_errorCode, Scalar()); } diff --git a/eigen/Eigen/src/misc/lapacke.h b/eigen/Eigen/src/misc/lapacke.h index 3d8e24f..8c7e79b 100644 --- a/eigen/Eigen/src/misc/lapacke.h +++ b/eigen/Eigen/src/misc/lapacke.h @@ -43,6 +43,10 @@ #include "lapacke_config.h" #endif +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + #include <stdlib.h> #ifndef lapack_int @@ -104,11 +108,6 @@ lapack_complex_double lapack_make_complex_double( double re, double im ); #endif - -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ - #ifndef LAPACKE_malloc #define LAPACKE_malloc( size ) malloc( size ) #endif diff --git a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 43615bd..ebaa3f1 100644 --- a/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -10,7 +10,6 @@ typedef CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> Inverse typedef CwiseUnaryOp<internal::scalar_boolean_not_op<Scalar>, const Derived> BooleanNotReturnType; typedef CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived> ExpReturnType; -typedef CwiseUnaryOp<internal::scalar_expm1_op<Scalar>, const Derived> Expm1ReturnType; typedef CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived> LogReturnType; typedef CwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> Log1pReturnType; typedef CwiseUnaryOp<internal::scalar_log10_op<Scalar>, const Derived> Log10ReturnType; @@ -91,20 +90,6 @@ exp() const return ExpReturnType(derived()); } -/** \returns an expression of the coefficient-wise exponential of *this minus 1. - * - * In exact arithmetic, \c x.expm1() is equivalent to \c x.exp() - 1, - * however, with finite precision, this function is much more accurate when \c x is close to zero. - * - * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_expm1">Math functions</a>, exp() - */ -EIGEN_DEVICE_FUNC -inline const Expm1ReturnType -expm1() const -{ - return Expm1ReturnType(derived()); -} - /** \returns an expression of the coefficient-wise logarithm of *this. * * This function computes the coefficient-wise logarithm. The function MatrixBase::log() in the @@ -113,7 +98,7 @@ expm1() const * Example: \include Cwise_log.cpp * Output: \verbinclude Cwise_log.out * - * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log">Math functions</a>, log() + * \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_log">Math functions</a>, exp() */ EIGEN_DEVICE_FUNC inline const LogReturnType diff --git a/eigen/Eigen/src/plugins/BlockMethods.h b/eigen/Eigen/src/plugins/BlockMethods.h index 5caf144..ac35a00 100644 --- a/eigen/Eigen/src/plugins/BlockMethods.h +++ b/eigen/Eigen/src/plugins/BlockMethods.h @@ -42,116 +42,66 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc #endif // not EIGEN_PARSED_BY_DOXYGEN -/// \returns an expression of a block in \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a block in *this. /// -/// \param startRow the first row in the block -/// \param startCol the first column in the block -/// \param blockRows number of rows in the block, specified at either run-time or compile-time -/// \param blockCols number of columns in the block, specified at either run-time or compile-time -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. +/// \param startRow the first row in the block +/// \param startCol the first column in the block +/// \param blockRows the number of rows in the block +/// \param blockCols the number of columns in the block /// -/// Example using runtime (aka dynamic) sizes: \include MatrixBase_block_int_int_int_int.cpp +/// Example: \include MatrixBase_block_int_int_int_int.cpp /// Output: \verbinclude MatrixBase_block_int_int_int_int.out /// -/// \newin{3.4}: -/// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. In the later case, \c n plays the role of a runtime fallback value in case \c N equals Eigen::Dynamic. -/// Here is an example with a fixed number of rows \c NRows and dynamic number of columns \c cols: -/// \code -/// mat.block(i,j,fix<NRows>,cols) -/// \endcode -/// -/// This function thus fully covers the features offered by the following overloads block<NRows,NCols>(Index, Index), -/// and block<NRows,NCols>(Index, Index, Index, Index) that are thus obsolete. Indeed, this generic version avoids -/// redundancy, it preserves the argument order, and prevents the need to rely on the template keyword in templated code. -/// -/// but with less redundancy and more consistency as it does not modify the argument order -/// and seamlessly enable hybrid fixed/dynamic sizes. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size matrix, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa class Block, fix, fix<N>(int) +/// \sa class Block, block(Index,Index) /// -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) +inline BlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) { - return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type( - derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols)); + return BlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/// This is the const version of block(Index,Index,NRowsType,NColsType) -template<typename NRowsType, typename NColsType> +/// This is the const version of block(Index,Index,Index,Index). */ EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -block(Index startRow, Index startCol, NRowsType blockRows, NColsType blockCols) const +inline const ConstBlockXpr block(Index startRow, Index startCol, Index blockRows, Index blockCols) const { - return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type( - derived(), startRow, startCol, internal::get_runtime_value(blockRows), internal::get_runtime_value(blockCols)); + return ConstBlockXpr(derived(), startRow, startCol, blockRows, blockCols); } -/// \returns a expression of a top-right corner of \c *this with either dynamic or fixed sizes. + +/// \returns a dynamic-size expression of a top-right corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// -/// Example with dynamic sizes: \include MatrixBase_topRightCorner_int_int.cpp +/// Example: \include MatrixBase_topRightCorner_int_int.cpp /// Output: \verbinclude MatrixBase_topRightCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -topRightCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr topRightCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/// This is the const version of topRightCorner(NRowsType, NColsType). -template<typename NRowsType, typename NColsType> +/// This is the const version of topRightCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -topRightCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr topRightCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(cCols), internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), 0, cols() - cCols, cRows, cCols); } -/// \returns an expression of a fixed-size top-right corner of \c *this. +/// \returns an expression of a fixed-size top-right corner of *this. /// /// \tparam CRows the number of rows in the corner /// \tparam CCols the number of columns in the corner @@ -178,7 +128,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner() con return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, cols() - CCols); } -/// \returns an expression of a top-right corner of \c *this. +/// \returns an expression of a top-right corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -212,51 +162,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topRightCorner(Index -/// \returns an expression of a top-left corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a top-left corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_topLeftCorner_int_int.cpp /// Output: \verbinclude MatrixBase_topLeftCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -topLeftCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr topLeftCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), 0, 0, cRows, cCols); } /// This is the const version of topLeftCorner(Index, Index). -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -topLeftCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr topLeftCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, 0, internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), 0, 0, cRows, cCols); } -/// \returns an expression of a fixed-size top-left corner of \c *this. +/// \returns an expression of a fixed-size top-left corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -265,7 +196,7 @@ topLeftCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int CRows, int CCols> EIGEN_DEVICE_FUNC @@ -282,7 +213,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner() cons return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), 0, 0); } -/// \returns an expression of a top-left corner of \c *this. +/// \returns an expression of a top-left corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -316,53 +247,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type topLeftCorner(Index -/// \returns an expression of a bottom-right corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a bottom-right corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_bottomRightCorner_int_int.cpp /// Output: \verbinclude MatrixBase_bottomRightCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -bottomRightCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr bottomRightCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols), - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/// This is the const version of bottomRightCorner(NRowsType, NColsType). -template<typename NRowsType, typename NColsType> +/// This is the const version of bottomRightCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstFixedBlockXpr<...,...>::Type -#endif -bottomRightCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr bottomRightCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), cols() - internal::get_runtime_value(cCols), - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), rows() - cRows, cols() - cCols, cRows, cCols); } -/// \returns an expression of a fixed-size bottom-right corner of \c *this. +/// \returns an expression of a fixed-size bottom-right corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -371,7 +281,7 @@ bottomRightCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int CRows, int CCols> EIGEN_DEVICE_FUNC @@ -388,7 +298,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner() return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, cols() - CCols); } -/// \returns an expression of a bottom-right corner of \c *this. +/// \returns an expression of a bottom-right corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -422,53 +332,32 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomRightCorner(In -/// \returns an expression of a bottom-left corner of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a bottom-left corner of *this. /// /// \param cRows the number of rows in the corner /// \param cCols the number of columns in the corner -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_bottomLeftCorner_int_int.cpp /// Output: \verbinclude MatrixBase_bottomLeftCorner_int_int.out /// -/// The number of rows \a blockRows and columns \a blockCols can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType, typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename FixedBlockXpr<...,...>::Type -#endif -bottomLeftCorner(NRowsType cRows, NColsType cCols) +inline BlockXpr bottomLeftCorner(Index cRows, Index cCols) { - return typename FixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), 0, - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return BlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/// This is the const version of bottomLeftCorner(NRowsType, NColsType). -template<typename NRowsType, typename NColsType> +/// This is the const version of bottomLeftCorner(Index, Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename ConstFixedBlockXpr<...,...>::Type -#endif -bottomLeftCorner(NRowsType cRows, NColsType cCols) const +inline const ConstBlockXpr bottomLeftCorner(Index cRows, Index cCols) const { - return typename ConstFixedBlockXpr<internal::get_fixed_value<NRowsType>::value,internal::get_fixed_value<NColsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(cRows), 0, - internal::get_runtime_value(cRows), internal::get_runtime_value(cCols)); + return ConstBlockXpr(derived(), rows() - cRows, 0, cRows, cCols); } -/// \returns an expression of a fixed-size bottom-left corner of \c *this. +/// \returns an expression of a fixed-size bottom-left corner of *this. /// /// The template parameters CRows and CCols are the number of rows and columns in the corner. /// @@ -477,7 +366,7 @@ bottomLeftCorner(NRowsType cRows, NColsType cCols) const /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int CRows, int CCols> EIGEN_DEVICE_FUNC @@ -494,7 +383,7 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner() c return typename ConstFixedBlockXpr<CRows,CCols>::Type(derived(), rows() - CRows, 0); } -/// \returns an expression of a bottom-left corner of \c *this. +/// \returns an expression of a bottom-left corner of *this. /// /// \tparam CRows number of rows in corner as specified at compile-time /// \tparam CCols number of columns in corner as specified at compile-time @@ -528,50 +417,31 @@ inline const typename ConstFixedBlockXpr<CRows,CCols>::Type bottomLeftCorner(Ind -/// \returns a block consisting of the top rows of \c *this. +/// \returns a block consisting of the top rows of *this. /// /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include MatrixBase_topRows_int.cpp /// Output: \verbinclude MatrixBase_topRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -topRows(NRowsType n) +inline RowsBlockXpr topRows(Index n) { - return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), 0, 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), 0, 0, n, cols()); } -/// This is the const version of topRows(NRowsType). -template<typename NRowsType> +/// This is the const version of topRows(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -topRows(NRowsType n) const +inline ConstRowsBlockXpr topRows(Index n) const { - return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), 0, 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), 0, 0, n, cols()); } -/// \returns a block consisting of the top rows of \c *this. +/// \returns a block consisting of the top rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param n the number of rows in the block as specified at run-time @@ -584,7 +454,7 @@ topRows(NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -603,50 +473,31 @@ inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const -/// \returns a block consisting of the bottom rows of \c *this. +/// \returns a block consisting of the bottom rows of *this. /// /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include MatrixBase_bottomRows_int.cpp /// Output: \verbinclude MatrixBase_bottomRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -bottomRows(NRowsType n) +inline RowsBlockXpr bottomRows(Index n) { - return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/// This is the const version of bottomRows(NRowsType). -template<typename NRowsType> +/// This is the const version of bottomRows(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -bottomRows(NRowsType n) const +inline ConstRowsBlockXpr bottomRows(Index n) const { - return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), rows() - internal::get_runtime_value(n), 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols()); } -/// \returns a block consisting of the bottom rows of \c *this. +/// \returns a block consisting of the bottom rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param n the number of rows in the block as specified at run-time @@ -659,7 +510,7 @@ bottomRows(NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -678,51 +529,32 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const -/// \returns a block consisting of a range of rows of \c *this. +/// \returns a block consisting of a range of rows of *this. /// /// \param startRow the index of the first row in the block /// \param n the number of rows in the block -/// \tparam NRowsType the type of the value handling the number of rows in the block, typically Index. /// /// Example: \include DenseBase_middleRows_int.cpp /// Output: \verbinclude DenseBase_middleRows_int.out /// -/// The number of rows \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NRowsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline typename NRowsBlockXpr<...>::Type -#endif -middleRows(Index startRow, NRowsType n) +inline RowsBlockXpr middleRows(Index startRow, Index n) { - return typename NRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), startRow, 0, internal::get_runtime_value(n), cols()); + return RowsBlockXpr(derived(), startRow, 0, n, cols()); } -/// This is the const version of middleRows(Index,NRowsType). -template<typename NRowsType> +/// This is the const version of middleRows(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type -#else -inline const typename ConstNRowsBlockXpr<...>::Type -#endif -middleRows(Index startRow, NRowsType n) const +inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const { - return typename ConstNRowsBlockXpr<internal::get_fixed_value<NRowsType>::value>::Type - (derived(), startRow, 0, internal::get_runtime_value(n), cols()); + return ConstRowsBlockXpr(derived(), startRow, 0, n, cols()); } -/// \returns a block consisting of a range of rows of \c *this. +/// \returns a block consisting of a range of rows of *this. /// /// \tparam N the number of rows in the block as specified at compile-time /// \param startRow the index of the first row in the block @@ -736,7 +568,7 @@ middleRows(Index startRow, NRowsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(row-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -755,50 +587,31 @@ inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = -/// \returns a block consisting of the left columns of \c *this. +/// \returns a block consisting of the left columns of *this. /// /// \param n the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_leftCols_int.cpp /// Output: \verbinclude MatrixBase_leftCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -leftCols(NColsType n) +inline ColsBlockXpr leftCols(Index n) { - return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, 0, rows(), internal::get_runtime_value(n)); + return ColsBlockXpr(derived(), 0, 0, rows(), n); } -/// This is the const version of leftCols(NColsType). -template<typename NColsType> +/// This is the const version of leftCols(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -leftCols(NColsType n) const +inline ConstColsBlockXpr leftCols(Index n) const { - return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, 0, rows(), internal::get_runtime_value(n)); + return ConstColsBlockXpr(derived(), 0, 0, rows(), n); } -/// \returns a block consisting of the left columns of \c *this. +/// \returns a block consisting of the left columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param n the number of columns in the block as specified at run-time @@ -811,7 +624,7 @@ leftCols(NColsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -830,50 +643,31 @@ inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const -/// \returns a block consisting of the right columns of \c *this. +/// \returns a block consisting of the right columns of *this. /// /// \param n the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include MatrixBase_rightCols_int.cpp /// Output: \verbinclude MatrixBase_rightCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -rightCols(NColsType n) +inline ColsBlockXpr rightCols(Index n) { - return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n)); + return ColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/// This is the const version of rightCols(NColsType). -template<typename NColsType> +/// This is the const version of rightCols(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -rightCols(NColsType n) const +inline ConstColsBlockXpr rightCols(Index n) const { - return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, cols() - internal::get_runtime_value(n), rows(), internal::get_runtime_value(n)); + return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n); } -/// \returns a block consisting of the right columns of \c *this. +/// \returns a block consisting of the right columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param n the number of columns in the block as specified at run-time @@ -886,7 +680,7 @@ rightCols(NColsType n) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -905,51 +699,32 @@ inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const -/// \returns a block consisting of a range of columns of \c *this. +/// \returns a block consisting of a range of columns of *this. /// /// \param startCol the index of the first column in the block /// \param numCols the number of columns in the block -/// \tparam NColsType the type of the value handling the number of columns in the block, typically Index. /// /// Example: \include DenseBase_middleCols_int.cpp /// Output: \verbinclude DenseBase_middleCols_int.out /// -/// The number of columns \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// -template<typename NColsType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline typename NColsBlockXpr<...>::Type -#endif -middleCols(Index startCol, NColsType numCols) +inline ColsBlockXpr middleCols(Index startCol, Index numCols) { - return typename NColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols)); + return ColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/// This is the const version of middleCols(Index,NColsType). -template<typename NColsType> +/// This is the const version of middleCols(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type -#else -inline const typename ConstNColsBlockXpr<...>::Type -#endif -middleCols(Index startCol, NColsType numCols) const +inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const { - return typename ConstNColsBlockXpr<internal::get_fixed_value<NColsType>::value>::Type - (derived(), 0, startCol, rows(), internal::get_runtime_value(numCols)); + return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols); } -/// \returns a block consisting of a range of columns of \c *this. +/// \returns a block consisting of a range of columns of *this. /// /// \tparam N the number of columns in the block as specified at compile-time /// \param startCol the index of the first column in the block @@ -963,7 +738,7 @@ middleCols(Index startCol, NColsType numCols) const /// EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(column-major) /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int N> EIGEN_DEVICE_FUNC @@ -982,7 +757,7 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = -/// \returns a fixed-size expression of a block of \c *this. +/// \returns a fixed-size expression of a block in *this. /// /// The template parameters \a NRows and \a NCols are the number of /// rows and columns in the block. @@ -993,18 +768,12 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = /// Example: \include MatrixBase_block_int_int.cpp /// Output: \verbinclude MatrixBase_block_int_int.out /// -/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic -/// block(Index,Index,NRowsType,NColsType), here is the one-to-one equivalence: -/// \code -/// mat.template block<NRows,NCols>(i,j) <--> mat.block(i,j,fix<NRows>,fix<NCols>) -/// \endcode -/// /// \note since block is a templated member, the keyword template has to be used /// if the matrix type is also a template parameter: \code m.template block<3,3>(1,1); \endcode /// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int NRows, int NCols> EIGEN_DEVICE_FUNC @@ -1021,7 +790,7 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol); } -/// \returns an expression of a block of \c *this. +/// \returns an expression of a block in *this. /// /// \tparam NRows number of rows in block as specified at compile-time /// \tparam NCols number of columns in block as specified at compile-time @@ -1038,19 +807,9 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow /// Example: \include MatrixBase_template_int_int_block_int_int_int_int.cpp /// Output: \verbinclude MatrixBase_template_int_int_block_int_int_int_int.cpp /// -/// \note The usage of of this overload is discouraged from %Eigen 3.4, better used the generic -/// block(Index,Index,NRowsType,NColsType), here is the one-to-one complete equivalence: -/// \code -/// mat.template block<NRows,NCols>(i,j,rows,cols) <--> mat.block(i,j,fix<NRows>(rows),fix<NCols>(cols)) -/// \endcode -/// If we known that, e.g., NRows==Dynamic and NCols!=Dynamic, then the equivalence becomes: -/// \code -/// mat.template block<Dynamic,NCols>(i,j,rows,NCols) <--> mat.block(i,j,rows,fix<NCols>) -/// \endcode -/// EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL /// -/// \sa block(Index,Index,NRowsType,NColsType), class Block +/// \sa class Block, block(Index,Index,Index,Index) /// template<int NRows, int NCols> inline typename FixedBlockXpr<NRows,NCols>::Type block(Index startRow, Index startCol, @@ -1067,7 +826,7 @@ inline const typename ConstFixedBlockXpr<NRows,NCols>::Type block(Index startRow return typename ConstFixedBlockXpr<NRows,NCols>::Type(derived(), startRow, startCol, blockRows, blockCols); } -/// \returns an expression of the \a i-th column of \c *this. Note that the numbering starts at 0. +/// \returns an expression of the \a i-th column of *this. Note that the numbering starts at 0. /// /// Example: \include MatrixBase_col.cpp /// Output: \verbinclude MatrixBase_col.out @@ -1088,7 +847,7 @@ inline ConstColXpr col(Index i) const return ConstColXpr(derived(), i); } -/// \returns an expression of the \a i-th row of \c *this. Note that the numbering starts at 0. +/// \returns an expression of the \a i-th row of *this. Note that the numbering starts at 0. /// /// Example: \include MatrixBase_row.cpp /// Output: \verbinclude MatrixBase_row.out @@ -1109,153 +868,96 @@ inline ConstRowXpr row(Index i) const return ConstRowXpr(derived(), i); } -/// \returns an expression of a segment (i.e. a vector block) in \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of a segment (i.e. a vector block) in *this. /// /// \only_for_vectors /// /// \param start the first coefficient in the segment /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_segment_int_int.cpp /// Output: \verbinclude MatrixBase_segment_int_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// -/// \sa block(Index,Index,NRowsType,NColsType), fix<N>, fix<N>(int), class Block +/// \sa class Block, segment(Index) /// -template<typename NType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -segment(Index start, NType n) +inline SegmentReturnType segment(Index start, Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), start, internal::get_runtime_value(n)); + return SegmentReturnType(derived(), start, n); } -/// This is the const version of segment(Index,NType). -template<typename NType> +/// This is the const version of segment(Index,Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -segment(Index start, NType n) const +inline ConstSegmentReturnType segment(Index start, Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), start, internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), start, n); } -/// \returns an expression of the first coefficients of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of the first coefficients of *this. /// /// \only_for_vectors /// /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_start_int.cpp /// Output: \verbinclude MatrixBase_start_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// /// \sa class Block, block(Index,Index) /// -template<typename NType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -head(NType n) +inline SegmentReturnType head(Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), 0, internal::get_runtime_value(n)); + return SegmentReturnType(derived(), 0, n); } -/// This is the const version of head(NType). -template<typename NType> +/// This is the const version of head(Index). EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -head(NType n) const +inline ConstSegmentReturnType head(Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), 0, internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), 0, n); } -/// \returns an expression of a last coefficients of \c *this with either dynamic or fixed sizes. +/// \returns a dynamic-size expression of the last coefficients of *this. /// /// \only_for_vectors /// /// \param n the number of coefficients in the segment -/// \tparam NType the type of the value handling the number of coefficients in the segment, typically Index. /// /// Example: \include MatrixBase_end_int.cpp /// Output: \verbinclude MatrixBase_end_int.out /// -/// The number of coefficients \a n can also be specified at compile-time by passing Eigen::fix<N>, -/// or Eigen::fix<N>(n) as arguments. -/// See \link block(Index,Index,NRowsType,NColsType) block() \endlink for the details. -/// -/// \note Even in the case that the returned expression has dynamic size, in the case +/// \note Even though the returned expression has dynamic size, in the case /// when it is applied to a fixed-size vector, it inherits a fixed maximal size, /// which means that evaluating it does not cause a dynamic memory allocation. /// /// \sa class Block, block(Index,Index) /// -template<typename NType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline typename FixedSegmentReturnType<...>::Type -#endif -tail(NType n) +inline SegmentReturnType tail(Index n) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename FixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n)); + return SegmentReturnType(derived(), this->size() - n, n); } /// This is the const version of tail(Index). -template<typename NType> EIGEN_DEVICE_FUNC -#ifndef EIGEN_PARSED_BY_DOXYGEN -inline const typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type -#else -inline const typename ConstFixedSegmentReturnType<...>::Type -#endif -tail(NType n) const +inline ConstSegmentReturnType tail(Index n) const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return typename ConstFixedSegmentReturnType<internal::get_fixed_value<NType>::value>::Type - (derived(), this->size() - internal::get_runtime_value(n), internal::get_runtime_value(n)); + return ConstSegmentReturnType(derived(), this->size() - n, n); } /// \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this @@ -1272,7 +974,7 @@ tail(NType n) const /// Example: \include MatrixBase_template_int_segment.cpp /// Output: \verbinclude MatrixBase_template_int_segment.out /// -/// \sa segment(Index,NType), class Block +/// \sa class Block /// template<int N> EIGEN_DEVICE_FUNC @@ -1291,7 +993,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n); } -/// \returns a fixed-size expression of the first coefficients of \c *this. +/// \returns a fixed-size expression of the first coefficients of *this. /// /// \only_for_vectors /// @@ -1304,7 +1006,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index /// Example: \include MatrixBase_template_int_start.cpp /// Output: \verbinclude MatrixBase_template_int_start.out /// -/// \sa head(NType), class Block +/// \sa class Block /// template<int N> EIGEN_DEVICE_FUNC @@ -1323,7 +1025,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n); } -/// \returns a fixed-size expression of the last coefficients of \c *this. +/// \returns a fixed-size expression of the last coefficients of *this. /// /// \only_for_vectors /// @@ -1336,7 +1038,7 @@ inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const /// Example: \include MatrixBase_template_int_end.cpp /// Output: \verbinclude MatrixBase_template_int_end.out /// -/// \sa tail(NType), class Block +/// \sa class Block /// template<int N> EIGEN_DEVICE_FUNC diff --git a/eigen/Eigen/src/plugins/IndexedViewMethods.h b/eigen/Eigen/src/plugins/IndexedViewMethods.h deleted file mode 100644 index 22c1666..0000000 --- a/eigen/Eigen/src/plugins/IndexedViewMethods.h +++ /dev/null @@ -1,267 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#if !defined(EIGEN_PARSED_BY_DOXYGEN) - -// This file is automatically included twice to generate const and non-const versions - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#define EIGEN_INDEXED_VIEW_METHOD_CONST const -#define EIGEN_INDEXED_VIEW_METHOD_TYPE ConstIndexedViewType -#else -#define EIGEN_INDEXED_VIEW_METHOD_CONST -#define EIGEN_INDEXED_VIEW_METHOD_TYPE IndexedViewType -#endif - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -protected: - -// define some aliases to ease readability - -template<typename Indices> -struct IvcRowType : public internal::IndexedViewCompatibleType<Indices,RowsAtCompileTime> {}; - -template<typename Indices> -struct IvcColType : public internal::IndexedViewCompatibleType<Indices,ColsAtCompileTime> {}; - -template<typename Indices> -struct IvcType : public internal::IndexedViewCompatibleType<Indices,SizeAtCompileTime> {}; - -typedef typename internal::IndexedViewCompatibleType<Index,1>::type IvcIndex; - -template<typename Indices> -typename IvcRowType<Indices>::type -ivcRow(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,RowsAtCompileTime>(derived().rows()),Specialized); -} - -template<typename Indices> -typename IvcColType<Indices>::type -ivcCol(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,ColsAtCompileTime>(derived().cols()),Specialized); -} - -template<typename Indices> -typename IvcColType<Indices>::type -ivcSize(const Indices& indices) const { - return internal::makeIndexedViewCompatible(indices, internal::variable_if_dynamic<Index,SizeAtCompileTime>(derived().size()),Specialized); -} - -template<typename RowIndices, typename ColIndices> -struct valid_indexed_view_overload { - // Here we use is_convertible to Index instead of is_integral in order to treat enums as Index. - // In c++11 we could use is_integral<T> && is_enum<T> if is_convertible appears to be too permissive. - enum { value = !(internal::is_convertible<RowIndices,Index>::value && internal::is_convertible<ColIndices,Index>::value) }; -}; - -public: - -#endif - -template<typename RowIndices, typename ColIndices> -struct EIGEN_INDEXED_VIEW_METHOD_TYPE { - typedef IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived, - typename IvcRowType<RowIndices>::type, - typename IvcColType<ColIndices>::type> type; -}; - -// This is the generic version - -template<typename RowIndices, typename ColIndices> -typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value - && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsIndexedView, - typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type >::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type - (derived(), ivcRow(rowIndices), ivcCol(colIndices)); -} - -// The following overload returns a Block<> object - -template<typename RowIndices, typename ColIndices> -typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value - && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsBlock, - typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType>::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - typedef typename internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::BlockType BlockType; - typename IvcRowType<RowIndices>::type actualRowIndices = ivcRow(rowIndices); - typename IvcColType<ColIndices>::type actualColIndices = ivcCol(colIndices); - return BlockType(derived(), - internal::first(actualRowIndices), - internal::first(actualColIndices), - internal::size(actualRowIndices), - internal::size(actualColIndices)); -} - -// The following overload returns a Scalar - -template<typename RowIndices, typename ColIndices> -typename internal::enable_if<valid_indexed_view_overload<RowIndices,ColIndices>::value - && internal::traits<typename EIGEN_INDEXED_VIEW_METHOD_TYPE<RowIndices,ColIndices>::type>::ReturnAsScalar, - CoeffReturnType >::type -operator()(const RowIndices& rowIndices, const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return Base::operator()(internal::eval_expr_given_size(rowIndices,rows()),internal::eval_expr_given_size(colIndices,cols())); -} - -#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -// The folowing three overloads are needed to handle raw Index[N] arrays. - -template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndices> -IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type> -operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndices& colIndices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],typename IvcColType<ColIndices>::type> - (derived(), rowIndices, ivcCol(colIndices)); -} - -template<typename RowIndices, typename ColIndicesT, std::size_t ColIndicesN> -IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type, const ColIndicesT (&)[ColIndicesN]> -operator()(const RowIndices& rowIndices, const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcRowType<RowIndices>::type,const ColIndicesT (&)[ColIndicesN]> - (derived(), ivcRow(rowIndices), colIndices); -} - -template<typename RowIndicesT, std::size_t RowIndicesN, typename ColIndicesT, std::size_t ColIndicesN> -IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN], const ColIndicesT (&)[ColIndicesN]> -operator()(const RowIndicesT (&rowIndices)[RowIndicesN], const ColIndicesT (&colIndices)[ColIndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const RowIndicesT (&)[RowIndicesN],const ColIndicesT (&)[ColIndicesN]> - (derived(), rowIndices, colIndices); -} - -#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -// Overloads for 1D vectors/arrays - -template<typename Indices> -typename internal::enable_if< - IsRowMajor && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_integral<Indices>::value)), - IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,typename IvcType<Indices>::type> - (derived(), IvcIndex(0), ivcCol(indices)); -} - -template<typename Indices> -typename internal::enable_if< - (!IsRowMajor) && (!(internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1 || internal::is_integral<Indices>::value)), - IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,typename IvcType<Indices>::type,IvcIndex> - (derived(), ivcRow(indices), IvcIndex(0)); -} - -template<typename Indices> -typename internal::enable_if< - (internal::get_compile_time_incr<typename IvcType<Indices>::type>::value==1) && (!internal::is_integral<Indices>::value) && (!Symbolic::is_symbolic<Indices>::value), - VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> >::type -operator()(const Indices& indices) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - typename IvcType<Indices>::type actualIndices = ivcSize(indices); - return VectorBlock<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,internal::array_size<Indices>::value> - (derived(), internal::first(actualIndices), internal::size(actualIndices)); -} - -template<typename IndexType> -typename internal::enable_if<Symbolic::is_symbolic<IndexType>::value, CoeffReturnType >::type -operator()(const IndexType& id) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - return Base::operator()(internal::eval_expr_given_size(id,size())); -} - -#if EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -template<typename IndicesT, std::size_t IndicesN> -typename internal::enable_if<IsRowMajor, - IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> >::type -operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,IvcIndex,const IndicesT (&)[IndicesN]> - (derived(), IvcIndex(0), indices); -} - -template<typename IndicesT, std::size_t IndicesN> -typename internal::enable_if<!IsRowMajor, - IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> >::type -operator()(const IndicesT (&indices)[IndicesN]) EIGEN_INDEXED_VIEW_METHOD_CONST -{ - EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return IndexedView<EIGEN_INDEXED_VIEW_METHOD_CONST Derived,const IndicesT (&)[IndicesN],IvcIndex> - (derived(), indices, IvcIndex(0)); -} - -#endif // EIGEN_HAS_STATIC_ARRAY_TEMPLATE - -#undef EIGEN_INDEXED_VIEW_METHOD_CONST -#undef EIGEN_INDEXED_VIEW_METHOD_TYPE - -#ifndef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#define EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#include "IndexedViewMethods.h" -#undef EIGEN_INDEXED_VIEW_METHOD_2ND_PASS -#endif - -#else // EIGEN_PARSED_BY_DOXYGEN - -/** - * \returns a generic submatrix view defined by the rows and columns indexed \a rowIndices and \a colIndices respectively. - * - * Each parameter must either be: - * - An integer indexing a single row or column - * - Eigen::all indexing the full set of respective rows or columns in increasing order - * - An ArithmeticSequence as returned by the Eigen::seq and Eigen::seqN functions - * - Any %Eigen's vector/array of integers or expressions - * - Plain C arrays: \c int[N] - * - And more generally any type exposing the following two member functions: - * \code - * <integral type> operator[](<integral type>) const; - * <integral type> size() const; - * \endcode - * where \c <integral \c type> stands for any integer type compatible with Eigen::Index (i.e. \c std::ptrdiff_t). - * - * The last statement implies compatibility with \c std::vector, \c std::valarray, \c std::array, many of the Range-v3's ranges, etc. - * - * If the submatrix can be represented using a starting position \c (i,j) and positive sizes \c (rows,columns), then this - * method will returns a Block object after extraction of the relevant information from the passed arguments. This is the case - * when all arguments are either: - * - An integer - * - Eigen::all - * - An ArithmeticSequence with compile-time increment strictly equal to 1, as returned by Eigen::seq(a,b), and Eigen::seqN(a,N). - * - * Otherwise a more general IndexedView<Derived,RowIndices',ColIndices'> object will be returned, after conversion of the inputs - * to more suitable types \c RowIndices' and \c ColIndices'. - * - * For 1D vectors and arrays, you better use the operator()(const Indices&) overload, which behave the same way but taking a single parameter. - * - * \sa operator()(const Indices&), class Block, class IndexedView, DenseBase::block(Index,Index,Index,Index) - */ -template<typename RowIndices, typename ColIndices> -IndexedView_or_Block -operator()(const RowIndices& rowIndices, const ColIndices& colIndices); - -/** This is an overload of operator()(const RowIndices&, const ColIndices&) for 1D vectors or arrays - * - * \only_for_vectors - */ -template<typename Indices> -IndexedView_or_VectorBlock -operator()(const Indices& indices); - -#endif // EIGEN_PARSED_BY_DOXYGEN |