diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2018-07-03 07:37:12 +0200 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2018-07-03 08:13:09 +0200 |
commit | 88534ba623421c956d8ffcda2d27f41d704d15ef (patch) | |
tree | fccc55245aec3f7381cd525a1355568e10ea37f4 /eigen/Eigen/src/Core | |
parent | 3ee09beb3f0458fbeb0b0e816f854b9d5b406e6b (diff) |
update eigen
Diffstat (limited to 'eigen/Eigen/src/Core')
66 files changed, 904 insertions, 2446 deletions
diff --git a/eigen/Eigen/src/Core/ArithmeticSequence.h b/eigen/Eigen/src/Core/ArithmeticSequence.h deleted file mode 100644 index ada1571..0000000 --- a/eigen/Eigen/src/Core/ArithmeticSequence.h +++ /dev/null @@ -1,350 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_ARITHMETIC_SEQUENCE_H -#define EIGEN_ARITHMETIC_SEQUENCE_H - -namespace Eigen { - -namespace internal { - -#if (!EIGEN_HAS_CXX11) || !((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) -template<typename T> struct aseq_negate {}; - -template<> struct aseq_negate<Index> { - typedef Index type; -}; - -template<int N> struct aseq_negate<FixedInt<N> > { - typedef FixedInt<-N> type; -}; - -// Compilation error in the following case: -template<> struct aseq_negate<FixedInt<DynamicIndex> > {}; - -template<typename FirstType,typename SizeType,typename IncrType, - bool FirstIsSymbolic=Symbolic::is_symbolic<FirstType>::value, - bool SizeIsSymbolic =Symbolic::is_symbolic<SizeType>::value> -struct aseq_reverse_first_type { - typedef Index type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,true> { - typedef Symbolic::AddExpr<FirstType, - Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >, - Symbolic::ValueExpr<IncrType> > - > type; -}; - -template<typename SizeType,typename IncrType,typename EnableIf = void> -struct aseq_reverse_first_type_aux { - typedef Index type; -}; - -template<typename SizeType,typename IncrType> -struct aseq_reverse_first_type_aux<SizeType,IncrType,typename internal::enable_if<bool((SizeType::value+IncrType::value)|0x1)>::type> { - typedef FixedInt<(SizeType::value-1)*IncrType::value> type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,true,false> { - typedef typename aseq_reverse_first_type_aux<SizeType,IncrType>::type Aux; - typedef Symbolic::AddExpr<FirstType,Symbolic::ValueExpr<Aux> > type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -struct aseq_reverse_first_type<FirstType,SizeType,IncrType,false,true> { - typedef Symbolic::AddExpr<Symbolic::ProductExpr<Symbolic::AddExpr<SizeType,Symbolic::ValueExpr<FixedInt<-1> > >, - Symbolic::ValueExpr<IncrType> >, - Symbolic::ValueExpr<> > type; -}; -#endif - -// Helper to cleanup the type of the increment: -template<typename T> struct cleanup_seq_incr { - typedef typename cleanup_index_type<T,DynamicIndex>::type type; -}; - -} - -//-------------------------------------------------------------------------------- -// seq(first,last,incr) and seqN(first,size,incr) -//-------------------------------------------------------------------------------- - -template<typename FirstType=Index,typename SizeType=Index,typename IncrType=internal::FixedInt<1> > -class ArithmeticSequence; - -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - typename internal::cleanup_index_type<SizeType>::type, - typename internal::cleanup_seq_incr<IncrType>::type > -seqN(FirstType first, SizeType size, IncrType incr); - -/** \class ArithmeticSequence - * \ingroup Core_Module - * - * This class represents an arithmetic progression \f$ a_0, a_1, a_2, ..., a_{n-1}\f$ defined by - * its \em first value \f$ a_0 \f$, its \em size (aka length) \em n, and the \em increment (aka stride) - * that is equal to \f$ a_{i+1}-a_{i}\f$ for any \em i. - * - * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments - * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the - * only way it is used. - * - * \tparam FirstType type of the first element, usually an Index, - * but internally it can be a symbolic expression - * \tparam SizeType type representing the size of the sequence, usually an Index - * or a compile time integral constant. Internally, it can also be a symbolic expression - * \tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is compile-time 1) - * - * \sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView - */ -template<typename FirstType,typename SizeType,typename IncrType> -class ArithmeticSequence -{ -public: - ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} - ArithmeticSequence(FirstType first, SizeType size, IncrType incr) : m_first(first), m_size(size), m_incr(incr) {} - - enum { - SizeAtCompileTime = internal::get_fixed_value<SizeType>::value, - IncrAtCompileTime = internal::get_fixed_value<IncrType,DynamicIndex>::value - }; - - /** \returns the size, i.e., number of elements, of the sequence */ - Index size() const { return m_size; } - - /** \returns the first element \f$ a_0 \f$ in the sequence */ - Index first() const { return m_first; } - - /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ - Index operator[](Index i) const { return m_first + i * m_incr; } - - const FirstType& firstObject() const { return m_first; } - const SizeType& sizeObject() const { return m_size; } - const IncrType& incrObject() const { return m_incr; } - -protected: - FirstType m_first; - SizeType m_size; - IncrType m_incr; - -public: - -#if EIGEN_HAS_CXX11 && ((!EIGEN_COMP_GNUC) || EIGEN_COMP_GNUC>=48) - auto reverse() const -> decltype(Eigen::seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr)) { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#else -protected: - typedef typename internal::aseq_negate<IncrType>::type ReverseIncrType; - typedef typename internal::aseq_reverse_first_type<FirstType,SizeType,IncrType>::type ReverseFirstType; -public: - ArithmeticSequence<ReverseFirstType,SizeType,ReverseIncrType> - reverse() const { - return seqN(m_first+(m_size+fix<-1>())*m_incr,m_size,-m_incr); - } -#endif -}; - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type > -seqN(FirstType first, SizeType size, IncrType incr) { - return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type,typename internal::cleanup_seq_incr<IncrType>::type>(first,size,incr); -} - -/** \returns an ArithmeticSequence starting at \a first, of length \a size, and unit increment - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */ -template<typename FirstType,typename SizeType> -ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type > -seqN(FirstType first, SizeType size) { - return ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,typename internal::cleanup_index_type<SizeType>::type>(first,size); -} - -#ifdef EIGEN_PARSED_BY_DOXYGEN - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a incr - * - * It is essentially an alias to: - * \code - * seqN(f, (l-f+incr)/incr, incr); - * \endcode - * - * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) - */ -template<typename FirstType,typename LastType, typename IncrType> -auto seq(FirstType f, LastType l, IncrType incr); - -/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment - * - * It is essentially an alias to: - * \code - * seqN(f,l-f+1); - * \endcode - * - * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) - */ -template<typename FirstType,typename LastType> -auto seq(FirstType f, LastType l); - -#else // EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX11 -template<typename FirstType,typename LastType> -auto seq(FirstType f, LastType l) -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - - typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()))) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - (typename internal::cleanup_index_type<LastType>::type(l) - -typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())); -} - -template<typename FirstType,typename LastType, typename IncrType> -auto seq(FirstType f, LastType l, IncrType incr) - -> decltype(seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - - typename internal::cleanup_index_type<FirstType>::type(f)+typename internal::cleanup_seq_incr<IncrType>::type(incr) - ) / typename internal::cleanup_seq_incr<IncrType>::type(incr), - typename internal::cleanup_seq_incr<IncrType>::type(incr))) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - ( typename internal::cleanup_index_type<LastType>::type(l) - -typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr)) / CleanedIncrType(incr), - CleanedIncrType(incr)); -} -#else - -template<typename FirstType,typename LastType> -typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value), - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index> >::type -seq(FirstType f, LastType l) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>()))); -} - -template<typename FirstTypeDerived,typename LastType> -typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value, - ArithmeticSequence<FirstTypeDerived, Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l) -{ - return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+fix<1>())); -} - -template<typename FirstType,typename LastTypeDerived> -typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value, - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<internal::FixedInt<1> > > > >::type -seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l) -{ - return seqN(typename internal::cleanup_index_type<FirstType>::type(f),(l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+fix<1>())); -} - -template<typename FirstTypeDerived,typename LastTypeDerived> -ArithmeticSequence<FirstTypeDerived, - Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::NegateExpr<FirstTypeDerived> >,Symbolic::ValueExpr<internal::FixedInt<1> > > > -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l) -{ - return seqN(f.derived(),(l.derived()-f.derived()+fix<1>())); -} - - -template<typename FirstType,typename LastType, typename IncrType> -typename internal::enable_if<!(Symbolic::is_symbolic<FirstType>::value || Symbolic::is_symbolic<LastType>::value), - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type,Index,typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(FirstType f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - Index((typename internal::cleanup_index_type<LastType>::type(l)-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr)), incr); -} - -template<typename FirstTypeDerived,typename LastType, typename IncrType> -typename internal::enable_if<!Symbolic::is_symbolic<LastType>::value, - ArithmeticSequence<FirstTypeDerived, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<Symbolic::NegateExpr<FirstTypeDerived>, - Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, LastType l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(f.derived(),(typename internal::cleanup_index_type<LastType>::type(l)-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template<typename FirstType,typename LastTypeDerived, typename IncrType> -typename internal::enable_if<!Symbolic::is_symbolic<FirstType>::value, - ArithmeticSequence<typename internal::cleanup_index_type<FirstType>::type, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived,Symbolic::ValueExpr<> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> >::type -seq(FirstType f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(typename internal::cleanup_index_type<FirstType>::type(f), - (l.derived()-typename internal::cleanup_index_type<FirstType>::type(f)+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} - -template<typename FirstTypeDerived,typename LastTypeDerived, typename IncrType> -ArithmeticSequence<FirstTypeDerived, - Symbolic::QuotientExpr<Symbolic::AddExpr<Symbolic::AddExpr<LastTypeDerived, - Symbolic::NegateExpr<FirstTypeDerived> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - Symbolic::ValueExpr<typename internal::cleanup_seq_incr<IncrType>::type> >, - typename internal::cleanup_seq_incr<IncrType>::type> -seq(const Symbolic::BaseExpr<FirstTypeDerived> &f, const Symbolic::BaseExpr<LastTypeDerived> &l, IncrType incr) -{ - typedef typename internal::cleanup_seq_incr<IncrType>::type CleanedIncrType; - return seqN(f.derived(),(l.derived()-f.derived()+CleanedIncrType(incr))/CleanedIncrType(incr), incr); -} -#endif - -#endif // EIGEN_PARSED_BY_DOXYGEN - -namespace internal { - -// Convert a symbolic span into a usable one (i.e., remove last/end "keywords") -template<typename T> -struct make_size_type { - typedef typename internal::conditional<Symbolic::is_symbolic<T>::value, Index, T>::type type; -}; - -template<typename FirstType,typename SizeType,typename IncrType,int XprSize> -struct IndexedViewCompatibleType<ArithmeticSequence<FirstType,SizeType,IncrType>, XprSize> { - typedef ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> type; -}; - -template<typename FirstType,typename SizeType,typename IncrType> -ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType> -makeIndexedViewCompatible(const ArithmeticSequence<FirstType,SizeType,IncrType>& ids, Index size,SpecializedType) { - return ArithmeticSequence<Index,typename make_size_type<SizeType>::type,IncrType>( - eval_expr_given_size(ids.firstObject(),size),eval_expr_given_size(ids.sizeObject(),size),ids.incrObject()); -} - -template<typename FirstType,typename SizeType,typename IncrType> -struct get_compile_time_incr<ArithmeticSequence<FirstType,SizeType,IncrType> > { - enum { value = get_fixed_value<IncrType,DynamicIndex>::value }; -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_ARITHMETIC_SEQUENCE_H diff --git a/eigen/Eigen/src/Core/Array.h b/eigen/Eigen/src/Core/Array.h index 0d34269..e10020d 100644 --- a/eigen/Eigen/src/Core/Array.h +++ b/eigen/Eigen/src/Core/Array.h @@ -231,10 +231,16 @@ class Array : Base(other) { } + private: + struct PrivateType {}; + public: + /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */ template<typename OtherDerived> EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other) + EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other, + typename internal::enable_if<internal::is_convertible<typename OtherDerived::Scalar,Scalar>::value, + PrivateType>::type = PrivateType()) : Base(other.derived()) { } diff --git a/eigen/Eigen/src/Core/ArrayBase.h b/eigen/Eigen/src/Core/ArrayBase.h index 9da960f..3dbc708 100644 --- a/eigen/Eigen/src/Core/ArrayBase.h +++ b/eigen/Eigen/src/Core/ArrayBase.h @@ -69,7 +69,6 @@ template<typename Derived> class ArrayBase using Base::coeff; using Base::coeffRef; using Base::lazyAssign; - using Base::operator-; using Base::operator=; using Base::operator+=; using Base::operator-=; @@ -89,6 +88,7 @@ template<typename Derived> class ArrayBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/ArrayCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/ArrayWrapper.h b/eigen/Eigen/src/Core/ArrayWrapper.h index a04521a..688aadd 100644 --- a/eigen/Eigen/src/Core/ArrayWrapper.h +++ b/eigen/Eigen/src/Core/ArrayWrapper.h @@ -32,7 +32,8 @@ struct traits<ArrayWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } @@ -129,7 +130,8 @@ struct traits<MatrixWrapper<ExpressionType> > // Let's remove NestByRefBit enum { Flags0 = traits<typename remove_all<typename ExpressionType::Nested>::type >::Flags, - Flags = Flags0 & ~NestByRefBit + LvalueBitFlag = is_lvalue<ExpressionType>::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag }; }; } diff --git a/eigen/Eigen/src/Core/Assign.h b/eigen/Eigen/src/Core/Assign.h index 655412e..53806ba 100644 --- a/eigen/Eigen/src/Core/Assign.h +++ b/eigen/Eigen/src/Core/Assign.h @@ -16,7 +16,7 @@ namespace Eigen { template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived> +EIGEN_STRONG_INLINE Derived& DenseBase<Derived> ::lazyAssign(const DenseBase<OtherDerived>& other) { enum{ diff --git a/eigen/Eigen/src/Core/BooleanRedux.h b/eigen/Eigen/src/Core/BooleanRedux.h index ccf5190..8409d87 100644 --- a/eigen/Eigen/src/Core/BooleanRedux.h +++ b/eigen/Eigen/src/Core/BooleanRedux.h @@ -14,54 +14,56 @@ namespace Eigen { namespace internal { -template<typename Derived, int UnrollCount, int Rows> +template<typename Derived, int UnrollCount> struct all_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return all_unroller<Derived, UnrollCount-1, Rows>::run(mat) && mat.coeff(row, col); + return all_unroller<Derived, UnrollCount-1>::run(mat) && mat.coeff(row, col); } }; -template<typename Derived, int Rows> -struct all_unroller<Derived, 0, Rows> +template<typename Derived> +struct all_unroller<Derived, 0> { static inline bool run(const Derived &/*mat*/) { return true; } }; -template<typename Derived, int Rows> -struct all_unroller<Derived, Dynamic, Rows> +template<typename Derived> +struct all_unroller<Derived, Dynamic> { static inline bool run(const Derived &) { return false; } }; -template<typename Derived, int UnrollCount, int Rows> +template<typename Derived, int UnrollCount> struct any_unroller { + typedef typename Derived::ExpressionTraits Traits; enum { - col = (UnrollCount-1) / Rows, - row = (UnrollCount-1) % Rows + col = (UnrollCount-1) / Traits::RowsAtCompileTime, + row = (UnrollCount-1) % Traits::RowsAtCompileTime }; static inline bool run(const Derived &mat) { - return any_unroller<Derived, UnrollCount-1, Rows>::run(mat) || mat.coeff(row, col); + return any_unroller<Derived, UnrollCount-1>::run(mat) || mat.coeff(row, col); } }; -template<typename Derived, int Rows> -struct any_unroller<Derived, 0, Rows> +template<typename Derived> +struct any_unroller<Derived, 0> { static inline bool run(const Derived & /*mat*/) { return false; } }; -template<typename Derived, int Rows> -struct any_unroller<Derived, Dynamic, Rows> +template<typename Derived> +struct any_unroller<Derived, Dynamic> { static inline bool run(const Derived &) { return false; } }; @@ -76,7 +78,7 @@ struct any_unroller<Derived, Dynamic, Rows> * \sa any(), Cwise::operator<() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const +inline bool DenseBase<Derived>::all() const { typedef internal::evaluator<Derived> Evaluator; enum { @@ -85,7 +87,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const }; Evaluator evaluator(derived()); if(unroll) - return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); + return internal::all_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -100,7 +102,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::all() const * \sa all() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const +inline bool DenseBase<Derived>::any() const { typedef internal::evaluator<Derived> Evaluator; enum { @@ -109,7 +111,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const }; Evaluator evaluator(derived()); if(unroll) - return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic, internal::traits<Derived>::RowsAtCompileTime>::run(evaluator); + return internal::any_unroller<Evaluator, unroll ? int(SizeAtCompileTime) : Dynamic>::run(evaluator); else { for(Index j = 0; j < cols(); ++j) @@ -124,7 +126,7 @@ EIGEN_DEVICE_FUNC inline bool DenseBase<Derived>::any() const * \sa all(), any() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Eigen::Index DenseBase<Derived>::count() const +inline Eigen::Index DenseBase<Derived>::count() const { return derived().template cast<bool>().template cast<Index>().sum(); } diff --git a/eigen/Eigen/src/Core/CommaInitializer.h b/eigen/Eigen/src/Core/CommaInitializer.h index 35fdbb8..d218e98 100644 --- a/eigen/Eigen/src/Core/CommaInitializer.h +++ b/eigen/Eigen/src/Core/CommaInitializer.h @@ -141,7 +141,7 @@ struct CommaInitializer * \sa CommaInitializer::finished(), class CommaInitializer */ template<typename Derived> -EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s) +inline CommaInitializer<Derived> DenseBase<Derived>::operator<< (const Scalar& s) { return CommaInitializer<Derived>(*static_cast<Derived*>(this), s); } @@ -149,7 +149,7 @@ EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> DenseBase<Derived>::operator< /** \sa operator<<(const Scalar&) */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline CommaInitializer<Derived> +inline CommaInitializer<Derived> DenseBase<Derived>::operator<<(const DenseBase<OtherDerived>& other) { return CommaInitializer<Derived>(*static_cast<Derived *>(this), other); diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h index 15b361b..f7c1eff 100644 --- a/eigen/Eigen/src/Core/CoreEvaluators.h +++ b/eigen/Eigen/src/Core/CoreEvaluators.h @@ -106,7 +106,7 @@ struct evaluator<const T> // ---------- base class for all evaluators ---------- template<typename ExpressionType> -struct evaluator_base +struct evaluator_base : public noncopyable { // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. typedef traits<ExpressionType> ExpressionTraits; @@ -114,14 +114,6 @@ struct evaluator_base enum { Alignment = 0 }; - // noncopyable: - // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) - // and make complex evaluator much larger than then should do. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE evaluator_base() {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~evaluator_base() {} -private: - EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); - EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); }; // -------------------- Matrix and Array -------------------- @@ -131,27 +123,6 @@ private: // Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, // so no need for more sophisticated dispatching. -// this helper permits to completely eliminate m_outerStride if it is known at compiletime. -template<typename Scalar,int OuterStride> class plainobjectbase_evaluator_data { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr) - { - EIGEN_ONLY_USED_FOR_DEBUG(outerStride); - eigen_internal_assert(outerStride==OuterStride); - } - EIGEN_DEVICE_FUNC Index outerStride() const { return OuterStride; } - const Scalar *data; -}; - -template<typename Scalar> class plainobjectbase_evaluator_data<Scalar,Dynamic> { -public: - EIGEN_DEVICE_FUNC plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) : data(ptr), m_outerStride(outerStride) {} - EIGEN_DEVICE_FUNC Index outerStride() const { return m_outerStride; } - const Scalar *data; -protected: - Index m_outerStride; -}; - template<typename Derived> struct evaluator<PlainObjectBase<Derived> > : evaluator_base<Derived> @@ -170,21 +141,18 @@ struct evaluator<PlainObjectBase<Derived> > Flags = traits<Derived>::EvaluatorFlags, Alignment = traits<Derived>::Alignment }; - enum { - // We do not need to know the outer stride for vectors - OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 - : int(IsRowMajor) ? ColsAtCompileTime - : RowsAtCompileTime - }; - + EIGEN_DEVICE_FUNC evaluator() - : m_d(0,OuterStrideAtCompileTime) + : m_data(0), + m_outerStride(IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } - + EIGEN_DEVICE_FUNC explicit evaluator(const PlainObjectType& m) - : m_d(m.data(),IsVectorAtCompileTime ? 0 : m.outerStride()) + : m_data(m.data()), m_outerStride(IsVectorAtCompileTime ? 0 : m.outerStride()) { EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } @@ -193,30 +161,30 @@ struct evaluator<PlainObjectBase<Derived> > CoeffReturnType coeff(Index row, Index col) const { if (IsRowMajor) - return m_d.data[row * m_d.outerStride() + col]; + return m_data[row * m_outerStride.value() + col]; else - return m_d.data[row + col * m_d.outerStride()]; + return m_data[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.data[index]; + return m_data[index]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { if (IsRowMajor) - return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col]; + return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; else - return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()]; + return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return const_cast<Scalar*>(m_d.data)[index]; + return const_cast<Scalar*>(m_data)[index]; } template<int LoadMode, typename PacketType> @@ -224,16 +192,16 @@ struct evaluator<PlainObjectBase<Derived> > PacketType packet(Index row, Index col) const { if (IsRowMajor) - return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col); + return ploadt<PacketType, LoadMode>(m_data + row * m_outerStride.value() + col); else - return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride()); + return ploadt<PacketType, LoadMode>(m_data + row + col * m_outerStride.value()); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return ploadt<PacketType, LoadMode>(m_d.data + index); + return ploadt<PacketType, LoadMode>(m_data + index); } template<int StoreMode,typename PacketType> @@ -242,22 +210,26 @@ struct evaluator<PlainObjectBase<Derived> > { if (IsRowMajor) return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x); + (const_cast<Scalar*>(m_data) + row * m_outerStride.value() + col, x); else return pstoret<Scalar, PacketType, StoreMode> - (const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x); + (const_cast<Scalar*>(m_data) + row + col * m_outerStride.value(), x); } template<int StoreMode, typename PacketType> EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x); + return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); } protected: + const Scalar *m_data; - plainobjectbase_evaluator_data<Scalar,OuterStrideAtCompileTime> m_d; + // We do not need to know the outer stride for vectors + variable_if_dynamic<Index, IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime> m_outerStride; }; template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols> @@ -555,7 +527,9 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - explicit unary_evaluator(const XprType& op) : m_d(op) + explicit unary_evaluator(const XprType& op) + : m_functor(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -566,43 +540,32 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased > EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_functor(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_functor(m_argImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(row, col)); + return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index)); + return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - class Data : private UnaryOp - { - public: - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } - evaluator<ArgType> argImpl; - }; - - Data m_d; + const UnaryOp m_functor; + evaluator<ArgType> m_argImpl; }; // -------------------- CwiseTernaryOp -------------------- @@ -646,7 +609,11 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased evaluator<Arg3>::Alignment) }; - EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_arg1Impl(xpr.arg1()), + m_arg2Impl(xpr.arg2()), + m_arg3Impl(xpr.arg3()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<TernaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -657,47 +624,38 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); + return m_functor(m_arg1Impl.coeff(row, col), m_arg2Impl.coeff(row, col), m_arg3Impl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); + return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(row, col), - m_d.arg2Impl.template packet<LoadMode,PacketType>(row, col), - m_d.arg3Impl.template packet<LoadMode,PacketType>(row, col)); + return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(row, col), + m_arg2Impl.template packet<LoadMode,PacketType>(row, col), + m_arg3Impl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.arg1Impl.template packet<LoadMode,PacketType>(index), - m_d.arg2Impl.template packet<LoadMode,PacketType>(index), - m_d.arg3Impl.template packet<LoadMode,PacketType>(index)); + return m_functor.packetOp(m_arg1Impl.template packet<LoadMode,PacketType>(index), + m_arg2Impl.template packet<LoadMode,PacketType>(index), + m_arg3Impl.template packet<LoadMode,PacketType>(index)); } protected: - // this helper permits to completely eliminate the functor if it is empty - struct Data : private TernaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : TernaryOp(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const TernaryOp& func() const { return static_cast<const TernaryOp&>(*this); } - evaluator<Arg1> arg1Impl; - evaluator<Arg2> arg2Impl; - evaluator<Arg3> arg3Impl; - }; - - Data m_d; + const TernaryOp m_functor; + evaluator<Arg1> m_arg1Impl; + evaluator<Arg2> m_arg2Impl; + evaluator<Arg3> m_arg3Impl; }; // -------------------- CwiseBinaryOp -------------------- @@ -738,7 +696,10 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<Lhs>::Alignment,evaluator<Rhs>::Alignment) }; - EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) : m_d(xpr) + EIGEN_DEVICE_FUNC explicit binary_evaluator(const XprType& xpr) + : m_functor(xpr.functor()), + m_lhsImpl(xpr.lhs()), + m_rhsImpl(xpr.rhs()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -749,45 +710,35 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); + return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); + return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(row, col), - m_d.rhsImpl.template packet<LoadMode,PacketType>(row, col)); + return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), + m_rhsImpl.template packet<LoadMode,PacketType>(row, col)); } template<int LoadMode, typename PacketType> EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return m_d.func().packetOp(m_d.lhsImpl.template packet<LoadMode,PacketType>(index), - m_d.rhsImpl.template packet<LoadMode,PacketType>(index)); + return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), + m_rhsImpl.template packet<LoadMode,PacketType>(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private BinaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : BinaryOp(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const BinaryOp& func() const { return static_cast<const BinaryOp&>(*this); } - evaluator<Lhs> lhsImpl; - evaluator<Rhs> rhsImpl; - }; - - Data m_d; + const BinaryOp m_functor; + evaluator<Lhs> m_lhsImpl; + evaluator<Rhs> m_rhsImpl; }; // -------------------- CwiseUnaryView -------------------- @@ -806,7 +757,9 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... }; - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) + : m_unaryOp(op.functor()), + m_argImpl(op.nestedExpression()) { EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<UnaryOp>::Cost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); @@ -818,40 +771,30 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { - return m_d.func()(m_d.argImpl.coeff(row, col)); + return m_unaryOp(m_argImpl.coeff(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return m_d.func()(m_d.argImpl.coeff(index)); + return m_unaryOp(m_argImpl.coeff(index)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { - return m_d.func()(m_d.argImpl.coeffRef(row, col)); + return m_unaryOp(m_argImpl.coeffRef(row, col)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return m_d.func()(m_d.argImpl.coeffRef(index)); + return m_unaryOp(m_argImpl.coeffRef(index)); } protected: - - // this helper permits to completely eliminate the functor if it is empty - struct Data : private UnaryOp - { - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Data(const XprType& xpr) : UnaryOp(xpr.functor()), argImpl(xpr.nestedExpression()) {} - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const UnaryOp& func() const { return static_cast<const UnaryOp&>(*this); } - evaluator<ArgType> argImpl; - }; - - Data m_d; + const UnaryOp m_unaryOp; + evaluator<ArgType> m_argImpl; }; // -------------------- Map -------------------- diff --git a/eigen/Eigen/src/Core/CoreIterators.h b/eigen/Eigen/src/Core/CoreIterators.h index b967196..4eb42b9 100644 --- a/eigen/Eigen/src/Core/CoreIterators.h +++ b/eigen/Eigen/src/Core/CoreIterators.h @@ -48,11 +48,6 @@ public: * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView */ EIGEN_STRONG_INLINE InnerIterator& operator++() { m_iter.operator++(); return *this; } - EIGEN_STRONG_INLINE InnerIterator& operator+=(Index i) { m_iter.operator+=(i); return *this; } - EIGEN_STRONG_INLINE InnerIterator operator+(Index i) - { InnerIterator result(*this); result+=i; return result; } - - /// \returns the column or row index of the current coefficient. EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } /// \returns the row index of the current coefficient. diff --git a/eigen/Eigen/src/Core/CwiseBinaryOp.h b/eigen/Eigen/src/Core/CwiseBinaryOp.h index bf2632d..a36765e 100644 --- a/eigen/Eigen/src/Core/CwiseBinaryOp.h +++ b/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -158,7 +158,7 @@ public: */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) { call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -171,7 +171,7 @@ MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other) */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived & +EIGEN_STRONG_INLINE Derived & MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) { call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -181,3 +181,4 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other) } // end namespace Eigen #endif // EIGEN_CWISE_BINARY_OP_H + diff --git a/eigen/Eigen/src/Core/CwiseNullaryOp.h b/eigen/Eigen/src/Core/CwiseNullaryOp.h index 144608e..ddd607e 100644 --- a/eigen/Eigen/src/Core/CwiseNullaryOp.h +++ b/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -131,7 +131,7 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f */ template<typename Derived> template<typename CustomNullaryOp> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> +EIGEN_STRONG_INLINE const CwiseNullaryOp<CustomNullaryOp, typename DenseBase<Derived>::PlainObject> DenseBase<Derived>::NullaryExpr(Index size, const CustomNullaryOp& func) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) @@ -170,7 +170,7 @@ DenseBase<Derived>::NullaryExpr(const CustomNullaryOp& func) * \sa class CwiseNullaryOp */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType +EIGEN_STRONG_INLINE const typename DenseBase<Derived>::ConstantReturnType DenseBase<Derived>::Constant(Index rows, Index cols, const Scalar& value) { return DenseBase<Derived>::NullaryExpr(rows, cols, internal::scalar_constant_op<Scalar>(value)); diff --git a/eigen/Eigen/src/Core/DenseBase.h b/eigen/Eigen/src/Core/DenseBase.h index fd933ee..90066ae 100644 --- a/eigen/Eigen/src/Core/DenseBase.h +++ b/eigen/Eigen/src/Core/DenseBase.h @@ -570,17 +570,13 @@ template<typename Derived> class DenseBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase #define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) -#define EIGEN_DOC_UNARY_ADDONS(X,Y) -# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/BlockMethods.h" -# include "../plugins/IndexedViewMethods.h" # ifdef EIGEN_DENSEBASE_PLUGIN # include EIGEN_DENSEBASE_PLUGIN # endif #undef EIGEN_CURRENT_STORAGE_BASE_CLASS #undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL #undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF -#undef EIGEN_DOC_UNARY_ADDONS // disable the use of evalTo for dense objects with a nice compilation error template<typename Dest> diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h index c62f5ff..49e7112 100644 --- a/eigen/Eigen/src/Core/Diagonal.h +++ b/eigen/Eigen/src/Core/Diagonal.h @@ -184,7 +184,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal * * \sa class Diagonal */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalReturnType +inline typename MatrixBase<Derived>::DiagonalReturnType MatrixBase<Derived>::diagonal() { return DiagonalReturnType(derived()); @@ -192,7 +192,7 @@ MatrixBase<Derived>::diagonal() /** This is the const version of diagonal(). */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalReturnType +inline typename MatrixBase<Derived>::ConstDiagonalReturnType MatrixBase<Derived>::diagonal() const { return ConstDiagonalReturnType(derived()); @@ -210,7 +210,7 @@ MatrixBase<Derived>::diagonal() const * * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType +inline typename MatrixBase<Derived>::DiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) { return DiagonalDynamicIndexReturnType(derived(), index); @@ -218,7 +218,7 @@ MatrixBase<Derived>::diagonal(Index index) /** This is the const version of diagonal(Index). */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType +inline typename MatrixBase<Derived>::ConstDiagonalDynamicIndexReturnType MatrixBase<Derived>::diagonal(Index index) const { return ConstDiagonalDynamicIndexReturnType(derived(), index); @@ -237,7 +237,6 @@ MatrixBase<Derived>::diagonal(Index index) const * \sa MatrixBase::diagonal(), class Diagonal */ template<typename Derived> template<int Index_> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template DiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() { @@ -247,7 +246,6 @@ MatrixBase<Derived>::diagonal() /** This is the const version of diagonal<int>(). */ template<typename Derived> template<int Index_> -EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template ConstDiagonalIndexReturnType<Index_>::Type MatrixBase<Derived>::diagonal() const { diff --git a/eigen/Eigen/src/Core/DiagonalMatrix.h b/eigen/Eigen/src/Core/DiagonalMatrix.h index 4e8297e..ecfdce8 100644 --- a/eigen/Eigen/src/Core/DiagonalMatrix.h +++ b/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -44,7 +44,7 @@ class DiagonalBase : public EigenBase<Derived> EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } - + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } EIGEN_DEVICE_FUNC @@ -273,7 +273,7 @@ class DiagonalWrapper * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() **/ template<typename Derived> -EIGEN_DEVICE_FUNC inline const DiagonalWrapper<const Derived> +inline const DiagonalWrapper<const Derived> MatrixBase<Derived>::asDiagonal() const { return DiagonalWrapper<const Derived>(derived()); diff --git a/eigen/Eigen/src/Core/DiagonalProduct.h b/eigen/Eigen/src/Core/DiagonalProduct.h index 7911d1c..d372b93 100644 --- a/eigen/Eigen/src/Core/DiagonalProduct.h +++ b/eigen/Eigen/src/Core/DiagonalProduct.h @@ -17,7 +17,7 @@ namespace Eigen { */ template<typename Derived> template<typename DiagonalDerived> -EIGEN_DEVICE_FUNC inline const Product<Derived, DiagonalDerived, LazyProduct> +inline const Product<Derived, DiagonalDerived, LazyProduct> MatrixBase<Derived>::operator*(const DiagonalBase<DiagonalDerived> &a_diagonal) const { return Product<Derived, DiagonalDerived, LazyProduct>(derived(),a_diagonal.derived()); diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h index bb8e3fe..06ef18b 100644 --- a/eigen/Eigen/src/Core/Dot.h +++ b/eigen/Eigen/src/Core/Dot.h @@ -90,7 +90,7 @@ MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const * \sa dot(), norm(), lpNorm() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const +EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::squaredNorm() const { return numext::real((*this).cwiseAbs2().sum()); } @@ -102,7 +102,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits<typename internal::trai * \sa lpNorm(), dot(), squaredNorm() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const +inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +117,7 @@ EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>:: * \sa norm(), normalize() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject +inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::normalized() const { typedef typename internal::nested_eval<Derived,2>::type _Nested; @@ -139,7 +139,7 @@ MatrixBase<Derived>::normalized() const * \sa norm(), normalized() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize() +inline void MatrixBase<Derived>::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +160,7 @@ EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::PlainObject +inline const typename MatrixBase<Derived>::PlainObject MatrixBase<Derived>::stableNormalized() const { typedef typename internal::nested_eval<Derived,3>::type _Nested; @@ -185,7 +185,7 @@ MatrixBase<Derived>::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::stableNormalize() +inline void MatrixBase<Derived>::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); @@ -257,9 +257,9 @@ struct lpNorm_selector<Derived, Infinity> template<typename Derived> template<int p> #ifndef EIGEN_PARSED_BY_DOXYGEN -EIGEN_DEVICE_FUNC inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real +inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real #else -EIGEN_DEVICE_FUNC MatrixBase<Derived>::RealScalar +MatrixBase<Derived>::RealScalar #endif MatrixBase<Derived>::lpNorm() const { diff --git a/eigen/Eigen/src/Core/EigenBase.h b/eigen/Eigen/src/Core/EigenBase.h index ccc122c..b195506 100644 --- a/eigen/Eigen/src/Core/EigenBase.h +++ b/eigen/Eigen/src/Core/EigenBase.h @@ -14,6 +14,7 @@ namespace Eigen { /** \class EigenBase + * \ingroup Core_Module * * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). * diff --git a/eigen/Eigen/src/Core/Fuzzy.h b/eigen/Eigen/src/Core/Fuzzy.h index 43aa49b..3e403a0 100644 --- a/eigen/Eigen/src/Core/Fuzzy.h +++ b/eigen/Eigen/src/Core/Fuzzy.h @@ -100,7 +100,7 @@ struct isMuchSmallerThan_scalar_selector<Derived, true> */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox( +bool DenseBase<Derived>::isApprox( const DenseBase<OtherDerived>& other, const RealScalar& prec ) const @@ -122,7 +122,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isApprox( * \sa isApprox(), isMuchSmallerThan(const DenseBase<OtherDerived>&, RealScalar) const */ template<typename Derived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( +bool DenseBase<Derived>::isMuchSmallerThan( const typename NumTraits<Scalar>::Real& other, const RealScalar& prec ) const @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( */ template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC bool DenseBase<Derived>::isMuchSmallerThan( +bool DenseBase<Derived>::isMuchSmallerThan( const DenseBase<OtherDerived>& other, const RealScalar& prec ) const diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h index b206b0a..0f16cd8 100644 --- a/eigen/Eigen/src/Core/GeneralProduct.h +++ b/eigen/Eigen/src/Core/GeneralProduct.h @@ -428,7 +428,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const template<typename Derived> template<typename OtherDerived> const Product<Derived,OtherDerived,LazyProduct> -EIGEN_DEVICE_FUNC MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const +MatrixBase<Derived>::lazyProduct(const MatrixBase<OtherDerived> &other) const { enum { ProductIsValid = Derived::ColsAtCompileTime==Dynamic diff --git a/eigen/Eigen/src/Core/GenericPacketMath.h b/eigen/Eigen/src/Core/GenericPacketMath.h index d19d5bb..029f8ac 100644 --- a/eigen/Eigen/src/Core/GenericPacketMath.h +++ b/eigen/Eigen/src/Core/GenericPacketMath.h @@ -61,7 +61,6 @@ struct default_packet_traits HasSqrt = 0, HasRsqrt = 0, HasExp = 0, - HasExpm1 = 0, HasLog = 0, HasLog1p = 0, HasLog10 = 0, @@ -402,10 +401,6 @@ Packet ptanh(const Packet& a) { using std::tanh; return tanh(a); } template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { using std::exp; return exp(a); } -/** \internal \returns the expm1 of \a a (coeff-wise) */ -template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet pexpm1(const Packet& a) { return numext::expm1(a); } - /** \internal \returns the log of \a a (coeff-wise) */ template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { using std::log; return log(a); } diff --git a/eigen/Eigen/src/Core/GlobalFunctions.h b/eigen/Eigen/src/Core/GlobalFunctions.h index 12828a7..769dc25 100644 --- a/eigen/Eigen/src/Core/GlobalFunctions.h +++ b/eigen/Eigen/src/Core/GlobalFunctions.h @@ -71,7 +71,6 @@ namespace Eigen EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp) - EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p,scalar_log1p_op,natural logarithm of 1 plus the value,\sa ArrayBase::log1p) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10,scalar_log10_op,base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log) diff --git a/eigen/Eigen/src/Core/IndexedView.h b/eigen/Eigen/src/Core/IndexedView.h deleted file mode 100644 index 8c57a27..0000000 --- a/eigen/Eigen/src/Core/IndexedView.h +++ /dev/null @@ -1,207 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INDEXED_VIEW_H -#define EIGEN_INDEXED_VIEW_H - -namespace Eigen { - -namespace internal { - -template<typename XprType, typename RowIndices, typename ColIndices> -struct traits<IndexedView<XprType, RowIndices, ColIndices> > - : traits<XprType> -{ - enum { - RowsAtCompileTime = int(array_size<RowIndices>::value), - ColsAtCompileTime = int(array_size<ColIndices>::value), - MaxRowsAtCompileTime = RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) : int(traits<XprType>::MaxRowsAtCompileTime), - MaxColsAtCompileTime = ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) : int(traits<XprType>::MaxColsAtCompileTime), - - XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0, - IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 - : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 - : XprTypeIsRowMajor, - - RowIncr = int(get_compile_time_incr<RowIndices>::value), - ColIncr = int(get_compile_time_incr<ColIndices>::value), - InnerIncr = IsRowMajor ? ColIncr : RowIncr, - OuterIncr = IsRowMajor ? RowIncr : ColIncr, - - HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), - XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time<XprType>::ret) : int(outer_stride_at_compile_time<XprType>::ret), - XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time<XprType>::ret) : int(inner_stride_at_compile_time<XprType>::ret), - - InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime, - IsBlockAlike = InnerIncr==1 && OuterIncr==1, - IsInnerPannel = HasSameStorageOrderAsXprType && is_same<AllRange<InnerSize>,typename conditional<XprTypeIsRowMajor,ColIndices,RowIndices>::type>::value, - - InnerStrideAtCompileTime = InnerIncr<0 || InnerIncr==DynamicIndex || XprInnerStride==Dynamic ? Dynamic : XprInnerStride * InnerIncr, - OuterStrideAtCompileTime = OuterIncr<0 || OuterIncr==DynamicIndex || XprOuterstride==Dynamic ? Dynamic : XprOuterstride * OuterIncr, - - ReturnAsScalar = is_same<RowIndices,SingleRange>::value && is_same<ColIndices,SingleRange>::value, - ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, - ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), - - // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, - // but this is too strict regarding negative strides... - DirectAccessMask = (int(InnerIncr)!=UndefinedIncr && int(OuterIncr)!=UndefinedIncr && InnerIncr>=0 && OuterIncr>=0) ? DirectAccessBit : 0, - FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, - FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0, - Flags = (traits<XprType>::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit - }; - - typedef Block<XprType,RowsAtCompileTime,ColsAtCompileTime,IsInnerPannel> BlockType; -}; - -} - -template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind> -class IndexedViewImpl; - - -/** \class IndexedView - * \ingroup Core_Module - * - * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices - * - * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns - * \tparam RowIndices the type of the object defining the sequence of row indices - * \tparam ColIndices the type of the object defining the sequence of column indices - * - * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection - * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$ \{r_0,r_1,..r_{m-1}\} \f$ - * and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$ be the nested matrix, then the resulting matrix \f$ B \f$ has \c m - * rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j) \f$. - * - * The \c RowIndices and \c ColIndices types must be compatible with the following API: - * \code - * <integral type> operator[](Index) const; - * Index size() const; - * \endcode - * - * Typical supported types thus include: - * - std::vector<int> - * - std::valarray<int> - * - std::array<int> - * - Plain C arrays: int[N] - * - Eigen::ArrayXi - * - decltype(ArrayXi::LinSpaced(...)) - * - Any view/expressions of the previous types - * - Eigen::ArithmeticSequence - * - Eigen::internal::AllRange (helper for Eigen::all) - * - Eigen::internal::SingleRange (helper for single index) - * - etc. - * - * In typical usages of %Eigen, this class should never be used directly. It is the return type of - * DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * \sa class Block - */ -template<typename XprType, typename RowIndices, typename ColIndices> -class IndexedView : public IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind> -{ -public: - typedef typename IndexedViewImpl<XprType, RowIndices, ColIndices, typename internal::traits<XprType>::StorageKind>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView) - EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView) - - typedef typename internal::ref_selector<XprType>::non_const_type MatrixTypeNested; - typedef typename internal::remove_all<XprType>::type NestedExpression; - - template<typename T0, typename T1> - IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) - : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) - {} - - /** \returns number of rows */ - Index rows() const { return internal::size(m_rowIndices); } - - /** \returns number of columns */ - Index cols() const { return internal::size(m_colIndices); } - - /** \returns the nested expression */ - const typename internal::remove_all<XprType>::type& - nestedExpression() const { return m_xpr; } - - /** \returns the nested expression */ - typename internal::remove_reference<XprType>::type& - nestedExpression() { return m_xpr.const_cast_derived(); } - - /** \returns a const reference to the object storing/generating the row indices */ - const RowIndices& rowIndices() const { return m_rowIndices; } - - /** \returns a const reference to the object storing/generating the column indices */ - const ColIndices& colIndices() const { return m_colIndices; } - -protected: - MatrixTypeNested m_xpr; - RowIndices m_rowIndices; - ColIndices m_colIndices; -}; - - -// Generic API dispatcher -template<typename XprType, typename RowIndices, typename ColIndices, typename StorageKind> -class IndexedViewImpl - : public internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type -{ -public: - typedef typename internal::generic_xpr_base<IndexedView<XprType, RowIndices, ColIndices> >::type Base; -}; - -namespace internal { - - -template<typename ArgType, typename RowIndices, typename ColIndices> -struct unary_evaluator<IndexedView<ArgType, RowIndices, ColIndices>, IndexBased> - : evaluator_base<IndexedView<ArgType, RowIndices, ColIndices> > -{ - typedef IndexedView<ArgType, RowIndices, ColIndices> XprType; - - enum { - CoeffReadCost = evaluator<ArgType>::CoeffReadCost /* TODO + cost of row/col index */, - - Flags = (evaluator<ArgType>::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)), - - Alignment = 0 - }; - - EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) - { - EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); - } - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - CoeffReturnType coeff(Index row, Index col) const - { - return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - Scalar& coeffRef(Index row, Index col) - { - return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); - } - -protected: - - evaluator<ArgType> m_argImpl; - const XprType& m_xpr; - -}; - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_H diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index 5ec6c39..a648aa0 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -14,6 +14,7 @@ // TODO this should better be moved to NumTraits #define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L + namespace Eigen { // On WINCE, std::abs is defined for int only, so let's defined our own overloads: @@ -412,7 +413,7 @@ inline NewType cast(const OldType& x) static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT((!NumTraits<Scalar>::IsComplex), NUMERIC_TYPE_MUST_BE_REAL) - EIGEN_USING_STD_MATH(round); + using std::round; return round(x); } }; @@ -482,55 +483,6 @@ struct arg_retval }; /**************************************************************************** -* Implementation of expm1 * -****************************************************************************/ - -// This implementation is based on GSL Math's expm1. -namespace std_fallback { - // fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, - // or that there is no suitable std::expm1 function available. Implementation - // attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. - template<typename Scalar> - EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - typedef typename NumTraits<Scalar>::Real RealScalar; - - EIGEN_USING_STD_MATH(exp); - Scalar u = exp(x); - if (u == Scalar(1)) { - return x; - } - Scalar um1 = u - RealScalar(1); - if (um1 == Scalar(-1)) { - return RealScalar(-1); - } - - EIGEN_USING_STD_MATH(log); - return (u - RealScalar(1)) * x / log(u); - } -} - -template<typename Scalar> -struct expm1_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) - { - EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) - #if EIGEN_HAS_CXX11_MATH - using std::expm1; - #endif - using std_fallback::expm1; - return expm1(x); - } -}; - - -template<typename Scalar> -struct expm1_retval -{ - typedef Scalar type; -}; - -/**************************************************************************** * Implementation of log1p * ****************************************************************************/ @@ -549,7 +501,7 @@ namespace std_fallback { template<typename Scalar> struct log1p_impl { - EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) + static inline Scalar run(const Scalar& x) { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) #if EIGEN_HAS_CXX11_MATH @@ -688,7 +640,7 @@ template<typename Scalar> struct random_default_impl<Scalar, false, true> { static inline Scalar run(const Scalar& x, const Scalar& y) - { + { typedef typename conditional<NumTraits<Scalar>::IsSigned,std::ptrdiff_t,std::size_t>::type ScalarX; if(y<x) return x; @@ -874,7 +826,7 @@ template<typename T> T generic_fast_tanh_float(const T& a_x); namespace numext { -#if !defined(__CUDA_ARCH__) && !defined(__SYCL_DEVICE_ONLY__) +#ifndef __CUDA_ARCH__ template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) @@ -890,84 +842,6 @@ EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) EIGEN_USING_STD_MATH(max); return max EIGEN_NOT_A_MACRO (x,y); } - - -#elif defined(__SYCL_DEVICE_ONLY__) -template<typename T> -EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) -{ - - return y < x ? y : x; -} - -template<typename T> -EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) -{ - - return x < y ? y : x; -} - -EIGEN_ALWAYS_INLINE int mini(const int& x, const int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE int maxi(const int& x, const int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int mini(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned int maxi(const unsigned int& x, const unsigned int& y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE long mini(const long & x, const long & y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE long maxi(const long & x, const long & y) -{ - return cl::sycl::max(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long mini(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::min(x,y); -} - -EIGEN_ALWAYS_INLINE unsigned long maxi(const unsigned long& x, const unsigned long& y) -{ - return cl::sycl::max(x,y); -} - - -EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) -{ - return cl::sycl::fmax(x,y); -} - -EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) -{ - return cl::sycl::fmin(x,y); -} - -EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) -{ - return cl::sycl::fmax(x,y); -} - #else template<typename T> EIGEN_DEVICE_FUNC @@ -1080,11 +954,6 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log1p(float x) { return cl::sycl::log1p(x); } -EIGEN_ALWAYS_INLINE double log1p(double x) { return cl::sycl::log1p(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float &x) { return ::log1pf(x); } @@ -1100,24 +969,10 @@ inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const Scala return internal::pow_impl<ScalarX,ScalarY>::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float pow(float x, float y) { return cl::sycl::pow(x, y); } -EIGEN_ALWAYS_INLINE double pow(double x, double y) { return cl::sycl::pow(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template<typename T> EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float isnan(float x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE double isnan(double x) { return cl::sycl::isnan(x); } -EIGEN_ALWAYS_INLINE float isinf(float x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE double isinf(double x) { return cl::sycl::isinf(x); } -EIGEN_ALWAYS_INLINE float isfinite(float x) { return cl::sycl::isfinite(x); } -EIGEN_ALWAYS_INLINE double isfinite(double x) { return cl::sycl::isfinite(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename Scalar> EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) @@ -1125,11 +980,6 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float round(float x) { return cl::sycl::round(x); } -EIGEN_ALWAYS_INLINE double round(double x) { return cl::sycl::round(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC T (floor)(const T& x) @@ -1138,11 +988,6 @@ T (floor)(const T& x) return floor(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float floor(float x) { return cl::sycl::floor(x); } -EIGEN_ALWAYS_INLINE double floor(double x) { return cl::sycl::floor(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float &x) { return ::floorf(x); } @@ -1159,11 +1004,6 @@ T (ceil)(const T& x) return ceil(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float ceil(float x) { return cl::sycl::ceil(x); } -EIGEN_ALWAYS_INLINE double ceil(double x) { return cl::sycl::ceil(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float &x) { return ::ceilf(x); } @@ -1204,11 +1044,6 @@ T sqrt(const T &x) return sqrt(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sqrt(float x) { return cl::sycl::sqrt(x); } -EIGEN_ALWAYS_INLINE double sqrt(double x) { return cl::sycl::sqrt(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T &x) { @@ -1216,12 +1051,6 @@ T log(const T &x) { return log(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float log(float x) { return cl::sycl::log(x); } -EIGEN_ALWAYS_INLINE double log(double x) { return cl::sycl::log(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float &x) { return ::logf(x); } @@ -1232,11 +1061,19 @@ double log(const double &x) { return ::log(x); } template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -typename NumTraits<T>::Real abs(const T &x) { +typename internal::enable_if<NumTraits<T>::IsSigned || NumTraits<T>::IsComplex,typename NumTraits<T>::Real>::type +abs(const T &x) { EIGEN_USING_STD_MATH(abs); return abs(x); } +template<typename T> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +typename internal::enable_if<!(NumTraits<T>::IsSigned || NumTraits<T>::IsComplex),typename NumTraits<T>::Real>::type +abs(const T &x) { + return x; +} + #if defined(__SYCL_DEVICE_ONLY__) EIGEN_ALWAYS_INLINE float abs(float x) { return cl::sycl::fabs(x); } EIGEN_ALWAYS_INLINE double abs(double x) { return cl::sycl::fabs(x); } @@ -1267,11 +1104,6 @@ T exp(const T &x) { return exp(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float exp(float x) { return cl::sycl::exp(x); } -EIGEN_ALWAYS_INLINE double exp(double x) { return cl::sycl::exp(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float &x) { return ::expf(x); } @@ -1280,26 +1112,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double &x) { return ::exp(x); } #endif -template<typename Scalar> -EIGEN_DEVICE_FUNC -inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) -{ - return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); -} - -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float expm1(float x) { return cl::sycl::expm1(x); } -EIGEN_ALWAYS_INLINE double expm1(double x) { return cl::sycl::expm1(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - -#ifdef __CUDACC__ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -float expm1(const float &x) { return ::expm1f(x); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE -double expm1(const double &x) { return ::expm1(x); } -#endif - template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T &x) { @@ -1307,11 +1119,6 @@ T cos(const T &x) { return cos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cos(float x) { return cl::sycl::cos(x); } -EIGEN_ALWAYS_INLINE double cos(double x) { return cl::sycl::cos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float &x) { return ::cosf(x); } @@ -1327,11 +1134,6 @@ T sin(const T &x) { return sin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sin(float x) { return cl::sycl::sin(x); } -EIGEN_ALWAYS_INLINE double sin(double x) { return cl::sycl::sin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float &x) { return ::sinf(x); } @@ -1347,11 +1149,6 @@ T tan(const T &x) { return tan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tan(float x) { return cl::sycl::tan(x); } -EIGEN_ALWAYS_INLINE double tan(double x) { return cl::sycl::tan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float &x) { return ::tanf(x); } @@ -1367,11 +1164,6 @@ T acos(const T &x) { return acos(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float acos(float x) { return cl::sycl::acos(x); } -EIGEN_ALWAYS_INLINE double acos(double x) { return cl::sycl::acos(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float &x) { return ::acosf(x); } @@ -1387,11 +1179,6 @@ T asin(const T &x) { return asin(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float asin(float x) { return cl::sycl::asin(x); } -EIGEN_ALWAYS_INLINE double asin(double x) { return cl::sycl::asin(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float &x) { return ::asinf(x); } @@ -1407,11 +1194,6 @@ T atan(const T &x) { return atan(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float atan(float x) { return cl::sycl::atan(x); } -EIGEN_ALWAYS_INLINE double atan(double x) { return cl::sycl::atan(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float &x) { return ::atanf(x); } @@ -1428,11 +1210,6 @@ T cosh(const T &x) { return cosh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float cosh(float x) { return cl::sycl::cosh(x); } -EIGEN_ALWAYS_INLINE double cosh(double x) { return cl::sycl::cosh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float &x) { return ::coshf(x); } @@ -1448,11 +1225,6 @@ T sinh(const T &x) { return sinh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float sinh(float x) { return cl::sycl::sinh(x); } -EIGEN_ALWAYS_INLINE double sinh(double x) { return cl::sycl::sinh(x); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float &x) { return ::sinhf(x); } @@ -1468,10 +1240,7 @@ T tanh(const T &x) { return tanh(x); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float tanh(float x) { return cl::sycl::tanh(x); } -EIGEN_ALWAYS_INLINE double tanh(double x) { return cl::sycl::tanh(x); } -#elif (!defined(__CUDACC__)) && EIGEN_FAST_MATH +#if (!defined(__CUDACC__)) && EIGEN_FAST_MATH EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif @@ -1491,11 +1260,6 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } -#if defined(__SYCL_DEVICE_ONLY__) -EIGEN_ALWAYS_INLINE float fmod(float x, float y) { return cl::sycl::fmod(x, y); } -EIGEN_ALWAYS_INLINE double fmod(double x, double y) { return cl::sycl::fmod(x, y); } -#endif // defined(__SYCL_DEVICE_ONLY__) - #ifdef __CUDACC__ template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1638,13 +1402,13 @@ template<> struct random_impl<bool> template<> struct scalar_fuzzy_impl<bool> { typedef bool RealScalar; - + template<typename OtherScalar> EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { return !x; } - + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { @@ -1656,10 +1420,10 @@ template<> struct scalar_fuzzy_impl<bool> { return (!x) || y; } - + }; - + } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h index ae1386b..3c9ef22 100644 --- a/eigen/Eigen/src/Core/MathFunctionsImpl.h +++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -29,7 +29,12 @@ T generic_fast_tanh_float(const T& a_x) // this range is +/-1.0f in single-precision. const T plus_9 = pset1<T>(9.f); const T minus_9 = pset1<T>(-9.f); - const T x = pmax(pmin(a_x, plus_9), minus_9); + // NOTE GCC prior to 6.3 might improperly optimize this max/min + // step such that if a_x is nan, x will be either 9 or -9, + // and tanh will return 1 or -1 instead of nan. + // This is supposed to be fixed in gcc6.3, + // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 + const T x = pmax(minus_9,pmin(plus_9,a_x)); // The monomial coefficients of the numerator polynomial (odd). const T alpha_1 = pset1<T>(4.89352455891786e-03f); const T alpha_3 = pset1<T>(6.37261928875436e-04f); diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h index 200e577..ce41218 100644 --- a/eigen/Eigen/src/Core/MatrixBase.h +++ b/eigen/Eigen/src/Core/MatrixBase.h @@ -76,7 +76,6 @@ template<typename Derived> class MatrixBase using Base::coeffRef; using Base::lazyAssign; using Base::eval; - using Base::operator-; using Base::operator+=; using Base::operator-=; using Base::operator*=; @@ -123,6 +122,7 @@ template<typename Derived> class MatrixBase #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase #define EIGEN_DOC_UNARY_ADDONS(X,Y) +# include "../plugins/CommonCwiseUnaryOps.h" # include "../plugins/CommonCwiseBinaryOps.h" # include "../plugins/MatrixCwiseUnaryOps.h" # include "../plugins/MatrixCwiseBinaryOps.h" diff --git a/eigen/Eigen/src/Core/NestByValue.h b/eigen/Eigen/src/Core/NestByValue.h index 01cf192..13adf07 100644 --- a/eigen/Eigen/src/Core/NestByValue.h +++ b/eigen/Eigen/src/Core/NestByValue.h @@ -67,25 +67,25 @@ template<typename ExpressionType> class NestByValue } template<int LoadMode> - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index row, Index col) const + inline const PacketScalar packet(Index row, Index col) const { return m_expression.template packet<LoadMode>(row, col); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline void writePacket(Index row, Index col, const PacketScalar& x) + inline void writePacket(Index row, Index col, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket<LoadMode>(row, col, x); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline const PacketScalar packet(Index index) const + inline const PacketScalar packet(Index index) const { return m_expression.template packet<LoadMode>(index); } template<int LoadMode> - EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& x) + inline void writePacket(Index index, const PacketScalar& x) { m_expression.const_cast_derived().template writePacket<LoadMode>(index, x); } @@ -99,7 +99,7 @@ template<typename ExpressionType> class NestByValue /** \returns an expression of the temporary version of *this. */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const NestByValue<Derived> +inline const NestByValue<Derived> DenseBase<Derived>::nestByValue() const { return NestByValue<Derived>(derived()); diff --git a/eigen/Eigen/src/Core/NumTraits.h b/eigen/Eigen/src/Core/NumTraits.h index aebc0c2..daf4898 100644 --- a/eigen/Eigen/src/Core/NumTraits.h +++ b/eigen/Eigen/src/Core/NumTraits.h @@ -71,7 +71,7 @@ struct default_digits10_impl<T,false,true> // Integer * and to \c 0 otherwise. * \li Enum values ReadCost, AddCost and MulCost representing a rough estimate of the number of CPU cycles needed * to by move / add / mul instructions respectively, assuming the data is already stored in CPU registers. - * Stay vague here. No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * Stay vague here. No need to do architecture-specific stuff. * \li An enum value \a IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. * \li An enum value \a RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must * be called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. @@ -215,6 +215,8 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); } EIGEN_DEVICE_FUNC static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); } + + static inline int digits10() { return NumTraits<Scalar>::digits10(); } }; template<> struct NumTraits<std::string> diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h index 583b7f5..c42725d 100644 --- a/eigen/Eigen/src/Core/ProductEvaluators.h +++ b/eigen/Eigen/src/Core/ProductEvaluators.h @@ -207,6 +207,12 @@ struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename static const bool value = true; }; +template<typename OtherXpr, typename Lhs, typename Rhs> +struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_difference_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr, + const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > { + static const bool value = true; +}; + template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2> struct assignment_from_xpr_op_product { diff --git a/eigen/Eigen/src/Core/Random.h b/eigen/Eigen/src/Core/Random.h index 486e9ed..6faf789 100644 --- a/eigen/Eigen/src/Core/Random.h +++ b/eigen/Eigen/src/Core/Random.h @@ -128,7 +128,7 @@ DenseBase<Derived>::Random() * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Derived& DenseBase<Derived>::setRandom() +inline Derived& DenseBase<Derived>::setRandom() { return *this = Random(rows(), cols()); } diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h index 2b5b73b..b6e8f88 100644 --- a/eigen/Eigen/src/Core/Redux.h +++ b/eigen/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ protected: */ template<typename Derived> template<typename Func> -EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar +typename internal::traits<Derived>::Scalar DenseBase<Derived>::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); @@ -422,7 +422,7 @@ DenseBase<Derived>::redux(const Func& func) const * \warning the result is undefined if \c *this contains NaN. */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::minCoeff() const { return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>()); @@ -432,7 +432,7 @@ DenseBase<Derived>::minCoeff() const * \warning the result is undefined if \c *this contains NaN. */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::maxCoeff() const { return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>()); @@ -445,7 +445,7 @@ DenseBase<Derived>::maxCoeff() const * \sa trace(), prod(), mean() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::sum() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -458,7 +458,7 @@ DenseBase<Derived>::sum() const * \sa trace(), prod(), sum() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::mean() const { #ifdef __INTEL_COMPILER @@ -479,7 +479,7 @@ DenseBase<Derived>::mean() const * \sa sum(), mean(), trace() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar DenseBase<Derived>::prod() const { if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0)) @@ -494,7 +494,7 @@ DenseBase<Derived>::prod() const * \sa diagonal(), sum() */ template<typename Derived> -EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar +EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar MatrixBase<Derived>::trace() const { return derived().diagonal().sum(); diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h index abb1e51..bdf24f5 100644 --- a/eigen/Eigen/src/Core/Ref.h +++ b/eigen/Eigen/src/Core/Ref.h @@ -184,8 +184,6 @@ protected: * void foo(const Ref<MatrixXf,0,Stride<> >& A) { foo_impl(A); } * \endcode * - * See also the following stackoverflow questions for further references: - * - <a href="http://stackoverflow.com/questions/21132538/correct-usage-of-the-eigenref-class">Correct usage of the Eigen::Ref<> class</a> * * \sa PlainObjectBase::Map(), \ref TopicStorageOrders */ diff --git a/eigen/Eigen/src/Core/Replicate.h b/eigen/Eigen/src/Core/Replicate.h index 0b2d6d7..9960ef8 100644 --- a/eigen/Eigen/src/Core/Replicate.h +++ b/eigen/Eigen/src/Core/Replicate.h @@ -115,7 +115,7 @@ template<typename MatrixType,int RowFactor,int ColFactor> class Replicate */ template<typename Derived> template<int RowFactor, int ColFactor> -EIGEN_DEVICE_FUNC const Replicate<Derived,RowFactor,ColFactor> +const Replicate<Derived,RowFactor,ColFactor> DenseBase<Derived>::replicate() const { return Replicate<Derived,RowFactor,ColFactor>(derived()); @@ -130,7 +130,7 @@ DenseBase<Derived>::replicate() const * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate */ template<typename ExpressionType, int Direction> -EIGEN_DEVICE_FUNC const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType +const typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType VectorwiseOp<ExpressionType,Direction>::replicate(Index factor) const { return typename VectorwiseOp<ExpressionType,Direction>::ReplicateReturnType diff --git a/eigen/Eigen/src/Core/ReturnByValue.h b/eigen/Eigen/src/Core/ReturnByValue.h index 11dc86d..c44b767 100644 --- a/eigen/Eigen/src/Core/ReturnByValue.h +++ b/eigen/Eigen/src/Core/ReturnByValue.h @@ -79,7 +79,7 @@ template<typename Derived> class ReturnByValue template<typename Derived> template<typename OtherDerived> -EIGEN_DEVICE_FUNC Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) +Derived& DenseBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other) { other.evalTo(derived()); return derived(); diff --git a/eigen/Eigen/src/Core/Reverse.h b/eigen/Eigen/src/Core/Reverse.h index 8b6b3ab..0640cda 100644 --- a/eigen/Eigen/src/Core/Reverse.h +++ b/eigen/Eigen/src/Core/Reverse.h @@ -114,7 +114,7 @@ template<typename MatrixType, int Direction> class Reverse * */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ReverseReturnType +inline typename DenseBase<Derived>::ReverseReturnType DenseBase<Derived>::reverse() { return ReverseReturnType(derived()); @@ -136,7 +136,7 @@ DenseBase<Derived>::reverse() * * \sa VectorwiseOp::reverseInPlace(), reverse() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::reverseInPlace() +inline void DenseBase<Derived>::reverseInPlace() { if(cols()>rows()) { @@ -201,7 +201,7 @@ struct vectorwise_reverse_inplace_impl<Horizontal> * * \sa DenseBase::reverseInPlace(), reverse() */ template<typename ExpressionType, int Direction> -EIGEN_DEVICE_FUNC void VectorwiseOp<ExpressionType,Direction>::reverseInPlace() +void VectorwiseOp<ExpressionType,Direction>::reverseInPlace() { internal::vectorwise_reverse_inplace_impl<Direction>::run(_expression().const_cast_derived()); } diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h index 7e71fe3..504c98f 100644 --- a/eigen/Eigen/src/Core/SelfAdjointView.h +++ b/eigen/Eigen/src/Core/SelfAdjointView.h @@ -322,7 +322,7 @@ public: /** This is the const version of MatrixBase::selfadjointView() */ template<typename Derived> template<unsigned int UpLo> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type +typename MatrixBase<Derived>::template ConstSelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() const { return typename ConstSelfAdjointViewReturnType<UpLo>::Type(derived()); @@ -339,7 +339,7 @@ MatrixBase<Derived>::selfadjointView() const */ template<typename Derived> template<unsigned int UpLo> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type +typename MatrixBase<Derived>::template SelfAdjointViewReturnType<UpLo>::Type MatrixBase<Derived>::selfadjointView() { return typename SelfAdjointViewReturnType<UpLo>::Type(derived()); diff --git a/eigen/Eigen/src/Core/Solve.h b/eigen/Eigen/src/Core/Solve.h index 960a585..a8daea5 100644 --- a/eigen/Eigen/src/Core/Solve.h +++ b/eigen/Eigen/src/Core/Solve.h @@ -34,12 +34,12 @@ template<typename Decomposition, typename RhsType,typename StorageKind> struct s template<typename Decomposition, typename RhsType> struct solve_traits<Decomposition,RhsType,Dense> { - typedef Matrix<typename RhsType::Scalar, + typedef typename make_proper_matrix_type<typename RhsType::Scalar, Decomposition::ColsAtCompileTime, RhsType::ColsAtCompileTime, RhsType::PlainObject::Options, Decomposition::MaxColsAtCompileTime, - RhsType::MaxColsAtCompileTime> PlainObject; + RhsType::MaxColsAtCompileTime>::type PlainObject; }; template<typename Decomposition, typename RhsType> diff --git a/eigen/Eigen/src/Core/SolveTriangular.h b/eigen/Eigen/src/Core/SolveTriangular.h index a0011d4..049890b 100644 --- a/eigen/Eigen/src/Core/SolveTriangular.h +++ b/eigen/Eigen/src/Core/SolveTriangular.h @@ -164,7 +164,7 @@ struct triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,CompleteUnrolling,1> { #ifndef EIGEN_PARSED_BY_DOXYGEN template<typename MatrixType, unsigned int Mode> template<int Side, typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const +void TriangularViewImpl<MatrixType,Mode,Dense>::solveInPlace(const MatrixBase<OtherDerived>& _other) const { OtherDerived& other = _other.const_cast_derived(); eigen_assert( derived().cols() == derived().rows() && ((Side==OnTheLeft && derived().cols() == other.rows()) || (Side==OnTheRight && derived().cols() == other.cols())) ); diff --git a/eigen/Eigen/src/Core/Transpose.h b/eigen/Eigen/src/Core/Transpose.h index ba7d6e6..79b767b 100644 --- a/eigen/Eigen/src/Core/Transpose.h +++ b/eigen/Eigen/src/Core/Transpose.h @@ -168,7 +168,7 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense> * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline Transpose<Derived> +inline Transpose<Derived> DenseBase<Derived>::transpose() { return TransposeReturnType(derived()); @@ -180,7 +180,7 @@ DenseBase<Derived>::transpose() * * \sa transposeInPlace(), adjoint() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ConstTransposeReturnType +inline typename DenseBase<Derived>::ConstTransposeReturnType DenseBase<Derived>::transpose() const { return ConstTransposeReturnType(derived()); @@ -206,7 +206,7 @@ DenseBase<Derived>::transpose() const * * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ template<typename Derived> -EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::AdjointReturnType +inline const typename MatrixBase<Derived>::AdjointReturnType MatrixBase<Derived>::adjoint() const { return AdjointReturnType(this->transpose()); @@ -281,7 +281,7 @@ struct inplace_transpose_selector<MatrixType,false,MatchPacketSize> { // non squ * * \sa transpose(), adjoint(), adjointInPlace() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace() +inline void DenseBase<Derived>::transposeInPlace() { eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && "transposeInPlace() called on a non-square non-resizable matrix"); @@ -312,7 +312,7 @@ EIGEN_DEVICE_FUNC inline void DenseBase<Derived>::transposeInPlace() * * \sa transpose(), adjoint(), transposeInPlace() */ template<typename Derived> -EIGEN_DEVICE_FUNC inline void MatrixBase<Derived>::adjointInPlace() +inline void MatrixBase<Derived>::adjointInPlace() { derived() = adjoint().eval(); } diff --git a/eigen/Eigen/src/Core/TriangularMatrix.h b/eigen/Eigen/src/Core/TriangularMatrix.h index ed80da3..667ef09 100644 --- a/eigen/Eigen/src/Core/TriangularMatrix.h +++ b/eigen/Eigen/src/Core/TriangularMatrix.h @@ -488,6 +488,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat * \sa TriangularView::solveInPlace() */ template<int Side, typename Other> + EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval<Side,TriangularViewType, Other> solve(const MatrixBase<Other>& other) const; @@ -553,7 +554,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>& +inline TriangularView<MatrixType, Mode>& TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other) { internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>()); @@ -563,7 +564,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDer // FIXME should we keep that possibility template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const MatrixBase<OtherDerived>& other) { internal::call_assignment_no_alias(derived(), other.template triangularView<Mode>()); } @@ -572,7 +573,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC inline TriangularView<MatrixType, Mode>& +inline TriangularView<MatrixType, Mode>& TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<OtherDerived>& other) { eigen_assert(Mode == int(OtherDerived::Mode)); @@ -582,7 +583,7 @@ TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const TriangularBase<Othe template<typename MatrixType, unsigned int Mode> template<typename OtherDerived> -EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other) +void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(const TriangularBase<OtherDerived>& other) { eigen_assert(Mode == int(OtherDerived::Mode)); internal::call_assignment_no_alias(derived(), other.derived()); @@ -597,7 +598,7 @@ EIGEN_DEVICE_FUNC void TriangularViewImpl<MatrixType, Mode, Dense>::lazyAssign(c * If the matrix is triangular, the opposite part is set to zero. */ template<typename Derived> template<typename DenseDerived> -EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const +void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> &other) const { evalToLazy(other.derived()); } @@ -623,7 +624,6 @@ EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalTo(MatrixBase<DenseDerived> */ template<typename Derived> template<unsigned int Mode> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template TriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() { @@ -633,7 +633,6 @@ MatrixBase<Derived>::triangularView() /** This is the const version of MatrixBase::triangularView() */ template<typename Derived> template<unsigned int Mode> -EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::template ConstTriangularViewReturnType<Mode>::Type MatrixBase<Derived>::triangularView() const { @@ -931,7 +930,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite> * If the matrix is triangular, the opposite part is set to zero. */ template<typename Derived> template<typename DenseDerived> -EIGEN_DEVICE_FUNC void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const +void TriangularBase<Derived>::evalToLazy(MatrixBase<DenseDerived> &other) const { other.derived().resize(this->rows(), this->cols()); internal::call_triangular_assignment_loop<Derived::Mode,(Derived::Mode&SelfAdjoint)==0 /* SetOpposite */>(other.derived(), derived().nestedExpression()); diff --git a/eigen/Eigen/src/Core/VectorwiseOp.h b/eigen/Eigen/src/Core/VectorwiseOp.h index 893bc79..4fe267e 100644 --- a/eigen/Eigen/src/Core/VectorwiseOp.h +++ b/eigen/Eigen/src/Core/VectorwiseOp.h @@ -670,7 +670,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::ColwiseReturnType +inline typename DenseBase<Derived>::ColwiseReturnType DenseBase<Derived>::colwise() { return ColwiseReturnType(derived()); @@ -684,7 +684,7 @@ DenseBase<Derived>::colwise() * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting */ template<typename Derived> -EIGEN_DEVICE_FUNC inline typename DenseBase<Derived>::RowwiseReturnType +inline typename DenseBase<Derived>::RowwiseReturnType DenseBase<Derived>::rowwise() { return RowwiseReturnType(derived()); diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h index 6362309..195d40f 100644 --- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h @@ -183,22 +183,12 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& } #endif -template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::min. - return _mm256_min_pd(b,a); -} -template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_ps(b,a); -} -template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { - // Arguments are swapped to match NaN propagation behavior of std::max. - return _mm256_max_pd(b,a); -} +template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); } + +template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } + template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); } template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); } @@ -235,7 +225,7 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from) // Packet8f tmp = _mm256_castps128_ps256(_mm_loadu_ps(from)); // tmp = _mm256_insertf128_ps(tmp, _mm_movehl_ps(_mm256_castps256_ps128(tmp),_mm256_castps256_ps128(tmp)), 1); // return _mm256_unpacklo_ps(tmp,tmp); - + // _mm256_insertf128_ps is very slow on Haswell, thus: Packet8f tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); // mimic an "inplace" permutation of the lower 128bits using a blend diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index 12b8975..f6500a1 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -59,8 +59,8 @@ template<> struct packet_traits<float> : default_packet_traits HasLog = 1, #endif HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, + HasSqrt = 1, + HasRsqrt = 1, #endif HasDiv = 1 }; @@ -75,7 +75,7 @@ template<> struct packet_traits<double> : default_packet_traits size = 8, HasHalfPacket = 1, #if EIGEN_GNUC_AT_LEAST(5, 3) - HasSqrt = EIGEN_FAST_MATH, + HasSqrt = 1, HasRsqrt = EIGEN_FAST_MATH, #endif HasDiv = 1 @@ -230,27 +230,23 @@ EIGEN_STRONG_INLINE Packet8d pmadd(const Packet8d& a, const Packet8d& b, template <> EIGEN_STRONG_INLINE Packet16f pmin<Packet16f>(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_ps(b, a); + return _mm512_min_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmin<Packet8d>(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm512_min_pd(b, a); + return _mm512_min_pd(a, b); } template <> EIGEN_STRONG_INLINE Packet16f pmax<Packet16f>(const Packet16f& a, const Packet16f& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_ps(b, a); + return _mm512_max_ps(a, b); } template <> EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a, const Packet8d& b) { - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm512_max_pd(b, a); + return _mm512_max_pd(a, b); } template <> @@ -465,21 +461,53 @@ EIGEN_STRONG_INLINE Packet16i ploadu<Packet16i>(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup<Packet16f>(const float* from) { - __m256i low_half = _mm256_load_si256(reinterpret_cast<const __m256i*>(from)); - __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); - __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); - return pairs; + Packet8f lane0 = _mm256_broadcast_ps((const __m128*)(const void*)from); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane0 = _mm256_blend_ps( + lane0, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane0), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane0 = _mm256_permute_ps(lane0, _MM_SHUFFLE(3, 3, 2, 2)); + + Packet8f lane1 = _mm256_broadcast_ps((const __m128*)(const void*)(from + 4)); + // mimic an "inplace" permutation of the lower 128bits using a blend + lane1 = _mm256_blend_ps( + lane1, _mm256_castps128_ps256(_mm_permute_ps( + _mm256_castps256_ps128(lane1), _MM_SHUFFLE(1, 0, 1, 0))), + 15); + // then we can perform a consistent permutation on the global register to get + // everything in shape: + lane1 = _mm256_permute_ps(lane1, _MM_SHUFFLE(3, 3, 2, 2)); + +#ifdef EIGEN_VECTORIZE_AVX512DQ + Packet16f res = _mm512_undefined_ps(); + return _mm512_insertf32x8(res, lane0, 0); + return _mm512_insertf32x8(res, lane1, 1); + return res; +#else + Packet16f res = _mm512_undefined_ps(); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 0), 0); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane0, 1), 1); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 0), 2); + res = _mm512_insertf32x4(res, _mm256_extractf128_ps(lane1, 1), 3); + return res; +#endif } // Loads 4 doubles from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, // a3} template <> EIGEN_STRONG_INLINE Packet8d ploaddup<Packet8d>(const double* from) { - __m512d x = _mm512_setzero_pd(); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[0]), 0); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[1]), 1); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[2]), 2); - x = _mm512_insertf64x2(x, _mm_loaddup_pd(&from[3]), 3); - return x; + Packet4d lane0 = _mm256_broadcast_pd((const __m128d*)(const void*)from); + lane0 = _mm256_permute_pd(lane0, 3 << 2); + + Packet4d lane1 = _mm256_broadcast_pd((const __m128d*)(const void*)(from + 2)); + lane1 = _mm256_permute_pd(lane1, 3 << 2); + + Packet8d res = _mm512_undefined_pd(); + res = _mm512_insertf64x4(res, lane0, 0); + return _mm512_insertf64x4(res, lane1, 1); } // Loads 4 floats from memory a returns the packet @@ -497,11 +525,11 @@ EIGEN_STRONG_INLINE Packet16f ploadquad<Packet16f>(const float* from) { // {a0, a0 a0, a0, a1, a1, a1, a1} template <> EIGEN_STRONG_INLINE Packet8d ploadquad<Packet8d>(const double* from) { - __m128d tmp0 = _mm_load_pd1(from); - __m256d lane0 = _mm256_broadcastsd_pd(tmp0); - __m128d tmp1 = _mm_load_pd1(from + 1); - __m256d lane1 = _mm256_broadcastsd_pd(tmp1); - __m512d tmp = _mm512_undefined_pd(); + Packet8d tmp = _mm512_undefined_pd(); + Packet2d tmp0 = _mm_load_pd1(from); + Packet2d tmp1 = _mm_load_pd1(from + 1); + Packet4d lane0 = _mm256_broadcastsd_pd(tmp0); + Packet4d lane1 = _mm256_broadcastsd_pd(tmp1); tmp = _mm512_insertf64x4(tmp, lane0, 0); return _mm512_insertf64x4(tmp, lane1, 1); } @@ -632,8 +660,8 @@ EIGEN_STRONG_INLINE Packet8d pabs(const Packet8d& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ // AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512 #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ - __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0); \ - __m256 OUTPUT##_1 = _mm512_extractf32x8_ps(INPUT, 1) + __m256 OUTPUT##_0 = _mm512_extractf32x8_ps(INPUT, 0) __m256 OUTPUT##_1 = \ + _mm512_extractf32x8_ps(INPUT, 1) #else #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ __m256 OUTPUT##_0 = _mm256_insertf128_ps( \ @@ -723,7 +751,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final = _mm256_add_ps(final, _mm256_blend_ps(blend1, blend2, 0xf0)); + final = padd(final, _mm256_blend_ps(blend1, blend2, 0xf0)); hsum1 = _mm256_hadd_ps(vecs8_0, vecs9_0); hsum2 = _mm256_hadd_ps(vecs10_0, vecs11_0); @@ -773,7 +801,7 @@ vecs) blend1 = _mm256_blend_ps(sum1, sum2, 0xcc); blend2 = _mm256_blend_ps(sum3, sum4, 0xcc); - final_1 = _mm256_add_ps(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); + final_1 = padd(final_1, _mm256_blend_ps(blend1, blend2, 0xf0)); __m512 final_output; @@ -823,7 +851,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs2_1, vecs3_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_0 = _mm256_add_pd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_0 = padd(final_0, _mm256_blend_pd(tmp0, tmp1, 0xC)); tmp0 = _mm256_hadd_pd(vecs4_0, vecs5_0); tmp0 = _mm256_add_pd(tmp0, _mm256_permute2f128_pd(tmp0, tmp0, 1)); @@ -839,7 +867,7 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) tmp1 = _mm256_hadd_pd(vecs6_1, vecs7_1); tmp1 = _mm256_add_pd(tmp1, _mm256_permute2f128_pd(tmp1, tmp1, 1)); - final_1 = _mm256_add_pd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); + final_1 = padd(final_1, _mm256_blend_pd(tmp0, tmp1, 0xC)); __m512d final_output = _mm512_insertf64x4(final_output, final_0, 0); @@ -848,52 +876,55 @@ template<> EIGEN_STRONG_INLINE Packet8d preduxp<Packet8d>(const Packet8d* vecs) template <> EIGEN_STRONG_INLINE float predux<Packet16f>(const Packet16f& a) { -#ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - Packet8f x = _mm256_add_ps(lane0, lane1); - return predux<Packet8f>(x); + //#ifdef EIGEN_VECTORIZE_AVX512DQ +#if 0 + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + Packet8f sum = padd(lane0, lane1); + Packet8f tmp0 = _mm256_hadd_ps(sum, _mm256_permute2f128_ps(a, a, 1)); + tmp0 = _mm256_hadd_ps(tmp0, tmp0); + return pfirst(_mm256_hadd_ps(tmp0, tmp0)); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum = _mm_add_ps(_mm_add_ps(lane0, lane1), _mm_add_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum = padd(padd(lane0, lane1), padd(lane2, lane3)); sum = _mm_hadd_ps(sum, sum); sum = _mm_hadd_ps(sum, _mm_permute_ps(sum, 1)); - return _mm_cvtss_f32(sum); + return pfirst(sum); #endif } template <> EIGEN_STRONG_INLINE double predux<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d sum = _mm256_add_pd(lane0, lane1); - __m256d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); - return _mm_cvtsd_f64(_mm256_castpd256_pd128(_mm256_hadd_pd(tmp0, tmp0))); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d sum = padd(lane0, lane1); + Packet4d tmp0 = _mm256_hadd_pd(sum, _mm256_permute2f128_pd(sum, sum, 1)); + return pfirst(_mm256_hadd_pd(tmp0, tmp0)); } template <> EIGEN_STRONG_INLINE Packet8f predux_downto4<Packet16f>(const Packet16f& a) { #ifdef EIGEN_VECTORIZE_AVX512DQ - __m256 lane0 = _mm512_extractf32x8_ps(a, 0); - __m256 lane1 = _mm512_extractf32x8_ps(a, 1); - return _mm256_add_ps(lane0, lane1); + Packet8f lane0 = _mm512_extractf32x8_ps(a, 0); + Packet8f lane1 = _mm512_extractf32x8_ps(a, 1); + return padd(lane0, lane1); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 sum0 = _mm_add_ps(lane0, lane2); - __m128 sum1 = _mm_add_ps(lane1, lane3); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f sum0 = padd(lane0, lane2); + Packet4f sum1 = padd(lane1, lane3); return _mm256_insertf128_ps(_mm256_castps128_ps256(sum0), sum1, 1); #endif } template <> EIGEN_STRONG_INLINE Packet4d predux_downto4<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_add_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = padd(lane0, lane1); return res; } @@ -908,59 +939,58 @@ EIGEN_STRONG_INLINE float predux_mul<Packet16f>(const Packet16f& a) { res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #else - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = pmul(pmul(lane0, lane1), pmul(lane2, lane3)); res = pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(pmul(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); #endif } template <> EIGEN_STRONG_INLINE double predux_mul<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = pmul(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = pmul(lane0, lane1); res = pmul(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(pmul(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_min<Packet16f>(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_min_ps(_mm_min_ps(lane0, lane1), _mm_min_ps(lane2, lane3)); res = _mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_min_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } template <> EIGEN_STRONG_INLINE double predux_min<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_min_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_min_pd(lane0, lane1); res = _mm256_min_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_min_pd(res, _mm256_shuffle_pd(res, res, 1))); } template <> EIGEN_STRONG_INLINE float predux_max<Packet16f>(const Packet16f& a) { - __m128 lane0 = _mm512_extractf32x4_ps(a, 0); - __m128 lane1 = _mm512_extractf32x4_ps(a, 1); - __m128 lane2 = _mm512_extractf32x4_ps(a, 2); - __m128 lane3 = _mm512_extractf32x4_ps(a, 3); - __m128 res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); + Packet4f lane0 = _mm512_extractf32x4_ps(a, 0); + Packet4f lane1 = _mm512_extractf32x4_ps(a, 1); + Packet4f lane2 = _mm512_extractf32x4_ps(a, 2); + Packet4f lane3 = _mm512_extractf32x4_ps(a, 3); + Packet4f res = _mm_max_ps(_mm_max_ps(lane0, lane1), _mm_max_ps(lane2, lane3)); res = _mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 3, 2))); return pfirst(_mm_max_ps(res, _mm_permute_ps(res, _MM_SHUFFLE(0, 0, 0, 1)))); } - template <> EIGEN_STRONG_INLINE double predux_max<Packet8d>(const Packet8d& a) { - __m256d lane0 = _mm512_extractf64x4_pd(a, 0); - __m256d lane1 = _mm512_extractf64x4_pd(a, 1); - __m256d res = _mm256_max_pd(lane0, lane1); + Packet4d lane0 = _mm512_extractf64x4_pd(a, 0); + Packet4d lane1 = _mm512_extractf64x4_pd(a, 1); + Packet4d res = _mm256_max_pd(lane0, lane1); res = _mm256_max_pd(res, _mm256_permute2f128_pd(res, res, 1)); return pfirst(_mm256_max_pd(res, _mm256_shuffle_pd(res, res, 1))); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h index 67518da..294c517 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -53,7 +53,7 @@ namespace half_impl { // Make our own __half definition that is similar to CUDA's. struct __half { - EIGEN_DEVICE_FUNC __half() : x(0) {} + EIGEN_DEVICE_FUNC __half() {} explicit EIGEN_DEVICE_FUNC __half(unsigned short raw) : x(raw) {} unsigned short x; }; @@ -386,18 +386,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hexp(a)); -#else - return half(::expf(float(a))); -#endif -} -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half expm1(const half& a) { - return half(numext::expm1(float(a))); + return half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { #if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return half(::hlog(a)); + return Eigen::half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -409,11 +402,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 - return half(hsqrt(a)); -#else - return half(::sqrtf(float(a))); -#endif + return half(::sqrtf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -431,18 +420,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hfloor(a)); -#else return half(::floorf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 300 - return half(hceil(a)); -#else return half(::ceilf(float(a))); -#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,9 +474,59 @@ template<> struct is_arithmetic<half> { enum { value = true }; }; } // end namespace internal +} // end namespace Eigen + +namespace std { +template<> +struct numeric_limits<Eigen::half> { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 11; + static const int digits10 = 2; + //static const int max_digits10 = ; + static const int radix = 2; + static const int min_exponent = -13; + static const int min_exponent10 = -4; + static const int max_exponent = 16; + static const int max_exponent10 = 4; + static const bool traps = true; + static const bool tinyness_before = false; + + static Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } + static Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } + static Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static Eigen::half round_error() { return Eigen::half(0.5); } + static Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } + static Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } + static Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } +}; +} + +namespace Eigen { + template<> struct NumTraits<Eigen::half> : GenericNumTraits<Eigen::half> { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h index 987a529..0348b41 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -57,18 +57,6 @@ double2 pexp<double2>(const double2& a) } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -float4 pexpm1<float4>(const float4& a) -{ - return make_float4(expm1f(a.x), expm1f(a.y), expm1f(a.z), expm1f(a.w)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -double2 pexpm1<double2>(const double2& a) -{ - return make_double2(expm1(a.x), expm1(a.y)); -} - -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 psqrt<float4>(const float4& a) { return make_float4(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z), sqrtf(a.w)); diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h index 8c46af0..4dda631 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -167,10 +167,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploadu<double2>(const d return make_double2(from[0], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) { +template<> EIGEN_STRONG_INLINE float4 ploaddup<float4>(const float* from) { return make_float4(from[0], from[0], from[1], from[1]); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) { +template<> EIGEN_STRONG_INLINE double2 ploaddup<double2>(const double* from) { return make_double2(from[0], from[0]); } diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index b9a125b..ae54225 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,7 +34,6 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasExpm1 = 1, HasLog = 1, HasLog1p = 1 }; @@ -276,14 +275,6 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) { return __floats2half2_rn(r1, r2); } -template<> __device__ EIGEN_STRONG_INLINE half2 pexpm1<half2>(const half2& a) { - float a1 = __low2float(a); - float a2 = __high2float(a); - float r1 = expm1f(a1); - float r2 = expm1f(a2); - return __floats2half2_rn(r1, r2); -} - #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> __device__ EIGEN_STRONG_INLINE diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h index aede4a6..836fbc0 100644 --- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -116,7 +116,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { - const float32_t f[] = {0, 1, 2, 3}; + const float f[] = {0, 1, 2, 3}; Packet4f countdown = vld1q_f32(f); return vaddq_f32(pset1<Packet4f>(a), countdown); } diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h index 03c8a2c..3832de1 100644 --- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h @@ -45,7 +45,7 @@ struct eigen_packet_wrapper m_val = v; return *this; } - + T m_val; }; typedef eigen_packet_wrapper<__m128> Packet4f; @@ -69,7 +69,7 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; }; #define vec2d_swizzle1(v,p,q) \ (_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), ((q*2+1)<<6|(q*2)<<4|(p*2+1)<<2|(p*2))))) - + #define vec4f_swizzle2(a,b,p,q,r,s) \ (_mm_shuffle_ps( (a), (b), ((s)<<6|(r)<<4|(q)<<2|(p)))) @@ -190,7 +190,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float *from) { return vec4f_swizzle1(_mm_load_ss(from),0,0,0,0); } #endif - + template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) { return _mm_add_ps(pset1<Packet4f>(a), _mm_set_ps(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return _mm_add_pd(pset1<Packet2d>(a),_mm_set_pd(1,0)); } template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return _mm_add_epi32(pset1<Packet4i>(a),_mm_set_epi32(3,2,1,0)); } @@ -250,34 +250,8 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); } #endif -template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("minps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_min_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("minpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::min. - return _mm_min_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 @@ -289,34 +263,8 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const #endif } -template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_ps, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet4f res = b; - asm("maxps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_ps(b, a); -#endif -} -template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { -#if EIGEN_COMP_GNUC - // There appears to be a bug in GCC, by which the optimizer may - // flip the argument order in calls to _mm_max_pd, so we have to - // resort to inline ASM here. This is supposed to be fixed in gcc6.3, - // see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 - Packet2d res = b; - asm("maxpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a)); - return res; -#else - // Arguments are reversed to match NaN propagation behavior of std::max. - return _mm_max_pd(b, a); -#endif -} +template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { #ifdef EIGEN_VECTORIZE_SSE4_1 diff --git a/eigen/Eigen/src/Core/functors/NullaryFunctors.h b/eigen/Eigen/src/Core/functors/NullaryFunctors.h index 6a30466..b03be02 100644 --- a/eigen/Eigen/src/Core/functors/NullaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/NullaryFunctors.h @@ -44,16 +44,16 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> { linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) : m_low(low), m_high(high), m_size1(num_steps==1 ? 1 : num_steps-1), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)), - m_interPacket(plset<Packet>(0)), m_flip(numext::abs(high)<numext::abs(low)) {} template<typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { + typedef typename NumTraits<Scalar>::Real RealScalar; if(m_flip) - return (i==0)? m_low : (m_high - (m_size1-i)*m_step); + return (i==0)? m_low : (m_high - RealScalar(m_size1-i)*m_step); else - return (i==m_size1)? m_high : (m_low + i*m_step); + return (i==m_size1)? m_high : (m_low + RealScalar(i)*m_step); } template<typename IndexType> @@ -63,7 +63,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) if(m_flip) { - Packet pi = padd(pset1<Packet>(Scalar(i-m_size1)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i-m_size1)); Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi)); if(i==0) res = pinsertfirst(res, m_low); @@ -71,7 +71,7 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> } else { - Packet pi = padd(pset1<Packet>(Scalar(i)),m_interPacket); + Packet pi = plset<Packet>(Scalar(i)); Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi)); if(i==m_size1-unpacket_traits<Packet>::size+1) res = pinsertlast(res, m_high); @@ -83,7 +83,6 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/false> const Scalar m_high; const Index m_size1; const Scalar m_step; - const Packet m_interPacket; const bool m_flip; }; diff --git a/eigen/Eigen/src/Core/functors/UnaryFunctors.h b/eigen/Eigen/src/Core/functors/UnaryFunctors.h index bfc0465..2e6a00f 100644 --- a/eigen/Eigen/src/Core/functors/UnaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/UnaryFunctors.h @@ -264,26 +264,6 @@ struct functor_traits<scalar_exp_op<Scalar> > { /** \internal * - * \brief Template functor to compute the exponential of a scalar - 1. - * - * \sa class CwiseUnaryOp, ArrayBase::expm1() - */ -template<typename Scalar> struct scalar_expm1_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_expm1_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::expm1(a); } - template <typename Packet> - EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pexpm1(a); } -}; -template <typename Scalar> -struct functor_traits<scalar_expm1_op<Scalar> > { - enum { - PacketAccess = packet_traits<Scalar>::HasExpm1, - Cost = functor_traits<scalar_exp_op<Scalar> >::Cost // TODO measure cost of expm1 - }; -}; - -/** \internal - * * \brief Template functor to compute the logarithm of a scalar * * \sa class CwiseUnaryOp, ArrayBase::log() @@ -698,13 +678,7 @@ struct functor_traits<scalar_ceil_op<Scalar> > template<typename Scalar> struct scalar_isnan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isnan(a); -#else - return (numext::isnan)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isnan)(a); } }; template<typename Scalar> struct functor_traits<scalar_isnan_op<Scalar> > @@ -722,13 +696,7 @@ struct functor_traits<scalar_isnan_op<Scalar> > template<typename Scalar> struct scalar_isinf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isinf(a); -#else - return (numext::isinf)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isinf)(a); } }; template<typename Scalar> struct functor_traits<scalar_isinf_op<Scalar> > @@ -746,13 +714,7 @@ struct functor_traits<scalar_isinf_op<Scalar> > template<typename Scalar> struct scalar_isfinite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op) typedef bool result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) - return numext::isfinite(a); -#else - return (numext::isfinite)(a); -#endif - } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return (numext::isfinite)(a); } }; template<typename Scalar> struct functor_traits<scalar_isfinite_op<Scalar> > diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h index ad38bcf..e844e37 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h @@ -269,10 +269,13 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0, LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0, - RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0 + RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0, + SkipDiag = (UpLo&(UnitDiag|ZeroDiag))!=0 }; Index size = mat.cols(); + if(SkipDiag) + size--; Index depth = actualLhs.cols(); typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar, @@ -283,21 +286,23 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false> internal::general_matrix_matrix_triangular_product<Index, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, - IsRowMajor ? RowMajor : ColMajor, UpLo> + IsRowMajor ? RowMajor : ColMajor, UpLo&(Lower|Upper)> ::run(size, depth, - &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), - mat.data(), mat.outerStride(), actualAlpha, blocking); + &actualLhs.coeffRef(SkipDiag&&(UpLo&Lower)==Lower ? 1 : 0,0), actualLhs.outerStride(), + &actualRhs.coeffRef(0,SkipDiag&&(UpLo&Upper)==Upper ? 1 : 0), actualRhs.outerStride(), + mat.data() + (SkipDiag ? (bool(IsRowMajor) != ((UpLo&Lower)==Lower) ? 1 : mat.outerStride() ) : 0), mat.outerStride(), actualAlpha, blocking); } }; template<typename MatrixType, unsigned int UpLo> template<typename ProductType> -EIGEN_DEVICE_FUNC TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) +TriangularView<MatrixType,UpLo>& TriangularViewImpl<MatrixType,UpLo,Dense>::_assignProduct(const ProductType& prod, const Scalar& alpha, bool beta) { + EIGEN_STATIC_ASSERT((UpLo&UnitDiag)==0, WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED); eigen_assert(derived().nestedExpression().rows() == prod.rows() && derived().cols() == prod.cols()); - + general_product_to_triangular_selector<MatrixType, ProductType, UpLo, internal::traits<ProductType>::InnerSize==1>::run(derived().nestedExpression().const_cast_derived(), prod, alpha, beta); - + return derived(); } diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 5b7c15c..41e18ff 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -52,7 +52,7 @@ struct general_matrix_matrix_triangular_product<Index,Scalar,LhsStorageOrder,Con static EIGEN_STRONG_INLINE void run(Index size, Index depth,const Scalar* lhs, Index lhsStride, \ const Scalar* rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar, Scalar>& blocking) \ { \ - if (lhs==rhs) { \ + if ( lhs==rhs && ((UpLo&(Lower|Upper)==UpLo)) ) { \ general_matrix_matrix_rankupdate<Index,Scalar,LhsStorageOrder,ConjugateLhs,ColMajor,UpLo> \ ::run(size,depth,lhs,lhsStride,rhs,rhsStride,res,resStride,alpha,blocking); \ } else { \ diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 41d8242..3c1a7fc 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr> +// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -15,8 +15,10 @@ namespace Eigen { namespace internal { /* Optimized col-major matrix * vector product: - * This algorithm processes the matrix per vertical panels, - * which are then processed horizontaly per chunck of 8*PacketSize x 1 vertical segments. + * This algorithm processes 4 columns at onces that allows to both reduce + * the number of load/stores of the result by a factor 4 and to reduce + * the instruction dependency. Moreover, we know that all bands have the + * same alignment pattern. * * Mixing type logic: C += alpha * A * B * | A | B |alpha| comments @@ -25,7 +27,33 @@ namespace internal { * |cplx |real |cplx | invalid, the caller has to do tmp: = A * B; C += alpha*tmp * |cplx |real |real | optimal case, vectorization possible via real-cplx mul * + * Accesses to the matrix coefficients follow the following logic: + * + * - if all columns have the same alignment then + * - if the columns have the same alignment as the result vector, then easy! (-> AllAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise + * - if even columns have the same alignment then + * // odd columns are guaranteed to have the same alignment too + * - if even or odd columns have the same alignment as the result, then + * // for a register size of 2 scalars, this is guarantee to be the case (e.g., SSE with double) + * - perform half aligned and half unaligned loads (-> EvenAligned case) + * - otherwise perform unaligned loads only (-> NoneAligned case) + * - otherwise, if the register size is 4 scalars (e.g., SSE with float) then + * - one over 4 consecutive columns is guaranteed to be aligned with the result vector, + * perform simple aligned loads for this column and aligned loads plus re-alignment for the other. (-> FirstAligned case) + * // this re-alignment is done by the palign function implemented for SSE in Eigen/src/Core/arch/SSE/PacketMath.h + * - otherwise, + * // if we get here, this means the register size is greater than 4 (e.g., AVX with floats), + * // we currently fall back to the NoneAligned case + * * The same reasoning apply for the transposed case. + * + * The last case (PacketSize>4) could probably be improved by generalizing the FirstAligned case, but since we do not support AVX yet... + * One might also wonder why in the EvenAligned case we perform unaligned loads instead of using the aligned-loads plus re-alignment + * strategy as in the FirstAligned case. The reason is that we observed that unaligned loads on a 8 byte boundary are not too slow + * compared to unaligned loads on a 4 byte boundary. + * */ template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version> @@ -59,145 +87,238 @@ EIGEN_DONT_INLINE static void run( template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, RhsScalar alpha) { EIGEN_UNUSED_VARIABLE(resIncr); eigen_internal_assert(resIncr==1); - - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) \ + pstore(&res[j], \ + padd(pload<ResPacket>(&res[j]), \ + padd( \ + padd(pcj.pmul(lhs0.template load<LhsPacket, Alignment0>(j), ptmp0), \ + pcj.pmul(lhs1.template load<LhsPacket, Alignment13>(j), ptmp1)), \ + padd(pcj.pmul(lhs2.template load<LhsPacket, Alignment2>(j), ptmp2), \ + pcj.pmul(lhs3.template load<LhsPacket, Alignment13>(j), ptmp3)) ))) + + typedef typename LhsMapper::VectorMapper LhsScalars; conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; + if(ConjugateRhs) + alpha = numext::conj(alpha); + + enum { AllAligned = 0, EvenAligned, FirstAligned, NoneAligned }; + const Index columnsAtOnce = 4; + const Index peels = 2; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index ResPacketAlignedMask = ResPacketSize-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; + const Index size = rows; + const Index lhsStride = lhs.stride(); - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; - const Index n8 = rows-8*ResPacketSize+1; - const Index n4 = rows-4*ResPacketSize+1; - const Index n3 = rows-3*ResPacketSize+1; - const Index n2 = rows-2*ResPacketSize+1; - const Index n1 = rows-1*ResPacketSize+1; + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type. + Index alignedStart = internal::first_default_aligned(res,size); + Index alignedSize = ResPacketSize>1 ? alignedStart + ((size-alignedStart) & ~ResPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; - // TODO: improve the following heuristic: - const Index block_cols = cols<128 ? cols : (lhsStride*sizeof(LhsScalar)<32000?16:4); - ResPacket palpha = pset1<ResPacket>(alpha); + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(size); - for(Index j2=0; j2<cols; j2+=block_cols) + // find how many columns do we have to skip to be aligned with the result (if possible) + Index skipColumns = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == size) || (UIntPtr(res)%sizeof(ResScalar)) ) { - Index jend = numext::mini(j2+block_cols,cols); - Index i=0; - for(; i<n8; i+=ResPacketSize*8) + alignedSize = 0; + alignedStart = 0; + alignmentPattern = NoneAligned; + } + else if(LhsPacketSize > 4) + { + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + // Currently, it seems to be better to perform unaligned loads anyway + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || size<LhsPacketSize); + + while (skipColumns<LhsPacketSize && + alignedStart != ((lhsAlignmentOffset + alignmentStep*skipColumns)%LhsPacketSize)) + ++skipColumns; + if (skipColumns==LhsPacketSize) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)), - c4 = pset1<ResPacket>(ResScalar(0)), - c5 = pset1<ResPacket>(ResScalar(0)), - c6 = pset1<ResPacket>(ResScalar(0)), - c7 = pset1<ResPacket>(ResScalar(0)); - - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*7,j),b0,c7); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3))); - pstoreu(res+i+ResPacketSize*4, pmadd(c4,palpha,ploadu<ResPacket>(res+i+ResPacketSize*4))); - pstoreu(res+i+ResPacketSize*5, pmadd(c5,palpha,ploadu<ResPacket>(res+i+ResPacketSize*5))); - pstoreu(res+i+ResPacketSize*6, pmadd(c6,palpha,ploadu<ResPacket>(res+i+ResPacketSize*6))); - pstoreu(res+i+ResPacketSize*7, pmadd(c7,palpha,ploadu<ResPacket>(res+i+ResPacketSize*7))); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipColumns = 0; } - if(i<n4) + else { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)); + skipColumns = (std::min)(skipColumns,cols); + // note that the skiped columns are processed later. + } - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*3,j),b0,c3); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); - pstoreu(res+i+ResPacketSize*3, pmadd(c3,palpha,ploadu<ResPacket>(res+i+ResPacketSize*3))); + /* eigen_internal_assert( (alignmentPattern==NoneAligned) + || (skipColumns + columnsAtOnce >= cols) + || LhsPacketSize > size + || (size_t(firstLhs+alignedStart+lhsStride*skipColumns)%sizeof(LhsPacket))==0);*/ + } + else if(Vectorizable) + { + alignedStart = 0; + alignedSize = size; + alignmentPattern = AllAligned; + } - i+=ResPacketSize*4; - } - if(i<n3) - { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - for(Index j=j2; j<jend; j+=1) - { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*2,j),b0,c2); - } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - pstoreu(res+i+ResPacketSize*2, pmadd(c2,palpha,ploadu<ResPacket>(res+i+ResPacketSize*2))); + Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns; + for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce) + { + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(i, 0)), + ptmp1 = pset1<RhsPacket>(alpha*rhs(i+offset1, 0)), + ptmp2 = pset1<RhsPacket>(alpha*rhs(i+2, 0)), + ptmp3 = pset1<RhsPacket>(alpha*rhs(i+offset3, 0)); - i+=ResPacketSize*3; - } - if(i<n2) + // this helps a lot generating better binary code + const LhsScalars lhs0 = lhs.getVectorMapper(0, i+0), lhs1 = lhs.getVectorMapper(0, i+offset1), + lhs2 = lhs.getVectorMapper(0, i+2), lhs3 = lhs.getVectorMapper(0, i+offset3); + + if (Vectorizable) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)); + /* explicit vectorization */ + // process initial unaligned coeffs + for (Index j=0; j<alignedStart; ++j) + { + res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]); + } - for(Index j=j2; j<jend; j+=1) + if (alignedSize>alignedStart) { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+LhsPacketSize*1,j),b0,c1); + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned); + break; + case EvenAligned: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned); + break; + case FirstAligned: + { + Index j = alignedStart; + if(peels>1) + { + LhsPacket A00, A01, A02, A03, A10, A11, A12, A13; + ResPacket T0, T1; + + A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1); + A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2); + A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3); + + for (; j<peeledSize; j+=peels*ResPacketSize) + { + A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13); + + A00 = lhs0.template load<LhsPacket, Aligned>(j); + A10 = lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize); + T0 = pcj.pmadd(A00, ptmp0, pload<ResPacket>(&res[j])); + T1 = pcj.pmadd(A10, ptmp0, pload<ResPacket>(&res[j+ResPacketSize])); + + T0 = pcj.pmadd(A01, ptmp1, T0); + A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01); + T0 = pcj.pmadd(A02, ptmp2, T0); + A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02); + T0 = pcj.pmadd(A03, ptmp3, T0); + pstore(&res[j],T0); + A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03); + T1 = pcj.pmadd(A11, ptmp1, T1); + T1 = pcj.pmadd(A12, ptmp2, T1); + T1 = pcj.pmadd(A13, ptmp3, T1); + pstore(&res[j+ResPacketSize],T1); + } + } + for (; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned); + break; + } + default: + for (Index j = alignedStart; j<alignedSize; j+=ResPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned); + break; + } } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - pstoreu(res+i+ResPacketSize*1, pmadd(c1,palpha,ploadu<ResPacket>(res+i+ResPacketSize*1))); - i+=ResPacketSize*2; + } // end explicit vectorization + + /* process remaining coeffs (or all if there is no explicit vectorization) */ + for (Index j=alignedSize; j<size; ++j) + { + res[j] = cj.pmadd(lhs0(j), pfirst(ptmp0), res[j]); + res[j] = cj.pmadd(lhs1(j), pfirst(ptmp1), res[j]); + res[j] = cj.pmadd(lhs2(j), pfirst(ptmp2), res[j]); + res[j] = cj.pmadd(lhs3(j), pfirst(ptmp3), res[j]); } - if(i<n1) + } + + // process remaining first and last columns (at most columnsAtOnce-1) + Index end = cols; + Index start = columnBound; + do + { + for (Index k=start; k<end; ++k) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)); - for(Index j=j2; j<jend; j+=1) + RhsPacket ptmp0 = pset1<RhsPacket>(alpha*rhs(k, 0)); + const LhsScalars lhs0 = lhs.getVectorMapper(0, k); + + if (Vectorizable) { - RhsPacket b0 = pset1<RhsPacket>(rhs(j,0)); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); + /* explicit vectorization */ + // process first unaligned result's coeffs + for (Index j=0; j<alignedStart; ++j) + res[j] += cj.pmul(lhs0(j), pfirst(ptmp0)); + // process aligned result's coeffs + if (lhs0.template aligned<LhsPacket>(alignedStart)) + for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) + pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(i), ptmp0, pload<ResPacket>(&res[i]))); + else + for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize) + pstore(&res[i], pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(i), ptmp0, pload<ResPacket>(&res[i]))); } - pstoreu(res+i+ResPacketSize*0, pmadd(c0,palpha,ploadu<ResPacket>(res+i+ResPacketSize*0))); - i+=ResPacketSize; + + // process remaining scalars (or all if no explicit vectorization) + for (Index i=alignedSize; i<size; ++i) + res[i] += cj.pmul(lhs0(i), pfirst(ptmp0)); } - for(;i<rows;++i) + if (skipColumns) { - ResScalar c0(0); - for(Index j=j2; j<jend; j+=1) - c0 += cj.pmul(lhs(i,j), rhs(j,0)); - res[i] += alpha*c0; + start = 0; + end = skipColumns; + skipColumns = 0; } - } + else + break; + } while(Vectorizable); + #undef _EIGEN_ACCUMULATE_PACKETS } /* Optimized row-major matrix * vector product: @@ -242,160 +363,253 @@ EIGEN_DONT_INLINE static void run( template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version> EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>::run( Index rows, Index cols, - const LhsMapper& alhs, + const LhsMapper& lhs, const RhsMapper& rhs, ResScalar* res, Index resIncr, ResScalar alpha) { - // The following copy tells the compiler that lhs's attributes are not modified outside this function - // This helps GCC to generate propoer code. - LhsMapper lhs(alhs); - eigen_internal_assert(rhs.stride()==1); + + #ifdef _EIGEN_ACCUMULATE_PACKETS + #error _EIGEN_ACCUMULATE_PACKETS has already been defined + #endif + + #define _EIGEN_ACCUMULATE_PACKETS(Alignment0,Alignment13,Alignment2) {\ + RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); \ + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Alignment0>(j), b, ptmp0); \ + ptmp1 = pcj.pmadd(lhs1.template load<LhsPacket, Alignment13>(j), b, ptmp1); \ + ptmp2 = pcj.pmadd(lhs2.template load<LhsPacket, Alignment2>(j), b, ptmp2); \ + ptmp3 = pcj.pmadd(lhs3.template load<LhsPacket, Alignment13>(j), b, ptmp3); } + conj_helper<LhsScalar,RhsScalar,ConjugateLhs,ConjugateRhs> cj; conj_helper<LhsPacket,RhsPacket,ConjugateLhs,ConjugateRhs> pcj; - // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large, - // processing 8 rows at once might be counter productive wrt cache. - const Index n8 = lhs.stride()*sizeof(LhsScalar)>32000 ? 0 : rows-7; - const Index n4 = rows-3; - const Index n2 = rows-1; + typedef typename LhsMapper::VectorMapper LhsScalars; - // TODO: for padded aligned inputs, we could enable aligned reads - enum { LhsAlignment = Unaligned }; + enum { AllAligned=0, EvenAligned=1, FirstAligned=2, NoneAligned=3 }; + const Index rowsAtOnce = 4; + const Index peels = 2; + const Index RhsPacketAlignedMask = RhsPacketSize-1; + const Index LhsPacketAlignedMask = LhsPacketSize-1; + const Index depth = cols; + const Index lhsStride = lhs.stride(); - Index i=0; - for(; i<n8; i+=8) + // How many coeffs of the result do we have to skip to be aligned. + // Here we assume data are at least aligned on the base scalar type + // if that's not the case then vectorization is discarded, see below. + Index alignedStart = rhs.firstAligned(depth); + Index alignedSize = RhsPacketSize>1 ? alignedStart + ((depth-alignedStart) & ~RhsPacketAlignedMask) : 0; + const Index peeledSize = alignedSize - RhsPacketSize*peels - RhsPacketSize + 1; + + const Index alignmentStep = LhsPacketSize>1 ? (LhsPacketSize - lhsStride % LhsPacketSize) & LhsPacketAlignedMask : 0; + Index alignmentPattern = alignmentStep==0 ? AllAligned + : alignmentStep==(LhsPacketSize/2) ? EvenAligned + : FirstAligned; + + // we cannot assume the first element is aligned because of sub-matrices + const Index lhsAlignmentOffset = lhs.firstAligned(depth); + const Index rhsAlignmentOffset = rhs.firstAligned(rows); + + // find how many rows do we have to skip to be aligned with rhs (if possible) + Index skipRows = 0; + // if the data cannot be aligned (TODO add some compile time tests when possible, e.g. for floats) + if( (sizeof(LhsScalar)!=sizeof(RhsScalar)) || + (lhsAlignmentOffset < 0) || (lhsAlignmentOffset == depth) || + (rhsAlignmentOffset < 0) || (rhsAlignmentOffset == rows) ) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)), - c4 = pset1<ResPacket>(ResScalar(0)), - c5 = pset1<ResPacket>(ResScalar(0)), - c6 = pset1<ResPacket>(ResScalar(0)), - c7 = pset1<ResPacket>(ResScalar(0)); - - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); - - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3); - c4 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+4,j),b0,c4); - c5 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+5,j),b0,c5); - c6 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+6,j),b0,c6); - c7 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+7,j),b0,c7); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - ResScalar cc4 = predux(c4); - ResScalar cc5 = predux(c5); - ResScalar cc6 = predux(c6); - ResScalar cc7 = predux(c7); - for(; j<cols; ++j) - { - RhsScalar b0 = rhs(j,0); - - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); - cc2 += cj.pmul(lhs(i+2,j), b0); - cc3 += cj.pmul(lhs(i+3,j), b0); - cc4 += cj.pmul(lhs(i+4,j), b0); - cc5 += cj.pmul(lhs(i+5,j), b0); - cc6 += cj.pmul(lhs(i+6,j), b0); - cc7 += cj.pmul(lhs(i+7,j), b0); - } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; - res[(i+2)*resIncr] += alpha*cc2; - res[(i+3)*resIncr] += alpha*cc3; - res[(i+4)*resIncr] += alpha*cc4; - res[(i+5)*resIncr] += alpha*cc5; - res[(i+6)*resIncr] += alpha*cc6; - res[(i+7)*resIncr] += alpha*cc7; + alignedSize = 0; + alignedStart = 0; + alignmentPattern = NoneAligned; } - for(; i<n4; i+=4) + else if(LhsPacketSize > 4) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)), - c2 = pset1<ResPacket>(ResScalar(0)), - c3 = pset1<ResPacket>(ResScalar(0)); + // TODO: extend the code to support aligned loads whenever possible when LhsPacketSize > 4. + alignmentPattern = NoneAligned; + } + else if (LhsPacketSize>1) + { + // eigen_internal_assert(size_t(firstLhs+lhsAlignmentOffset)%sizeof(LhsPacket)==0 || depth<LhsPacketSize); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + while (skipRows<LhsPacketSize && + alignedStart != ((lhsAlignmentOffset + alignmentStep*skipRows)%LhsPacketSize)) + ++skipRows; + if (skipRows==LhsPacketSize) { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); - - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - c2 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+2,j),b0,c2); - c3 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+3,j),b0,c3); + // nothing can be aligned, no need to skip any column + alignmentPattern = NoneAligned; + skipRows = 0; } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - ResScalar cc2 = predux(c2); - ResScalar cc3 = predux(c3); - for(; j<cols; ++j) + else { - RhsScalar b0 = rhs(j,0); - - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); - cc2 += cj.pmul(lhs(i+2,j), b0); - cc3 += cj.pmul(lhs(i+3,j), b0); + skipRows = (std::min)(skipRows,Index(rows)); + // note that the skiped columns are processed later. } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; - res[(i+2)*resIncr] += alpha*cc2; - res[(i+3)*resIncr] += alpha*cc3; + /* eigen_internal_assert( alignmentPattern==NoneAligned + || LhsPacketSize==1 + || (skipRows + rowsAtOnce >= rows) + || LhsPacketSize > depth + || (size_t(firstLhs+alignedStart+lhsStride*skipRows)%sizeof(LhsPacket))==0);*/ } - for(; i<n2; i+=2) + else if(Vectorizable) { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)), - c1 = pset1<ResPacket>(ResScalar(0)); + alignedStart = 0; + alignedSize = depth; + alignmentPattern = AllAligned; + } - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) - { - RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0); + const Index offset1 = (FirstAligned && alignmentStep==1)?3:1; + const Index offset3 = (FirstAligned && alignmentStep==1)?1:3; - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+0,j),b0,c0); - c1 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i+1,j),b0,c1); - } - ResScalar cc0 = predux(c0); - ResScalar cc1 = predux(c1); - for(; j<cols; ++j) + Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows; + for (Index i=skipRows; i<rowBound; i+=rowsAtOnce) + { + // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ?? + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); + ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0); + + // this helps the compiler generating good binary code + const LhsScalars lhs0 = lhs.getVectorMapper(i+0, 0), lhs1 = lhs.getVectorMapper(i+offset1, 0), + lhs2 = lhs.getVectorMapper(i+2, 0), lhs3 = lhs.getVectorMapper(i+offset3, 0); + + if (Vectorizable) { - RhsScalar b0 = rhs(j,0); + /* explicit vectorization */ + ResPacket ptmp0 = pset1<ResPacket>(ResScalar(0)), ptmp1 = pset1<ResPacket>(ResScalar(0)), + ptmp2 = pset1<ResPacket>(ResScalar(0)), ptmp3 = pset1<ResPacket>(ResScalar(0)); + + // process initial unaligned coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; j<alignedStart; ++j) + { + RhsScalar b = rhs(j, 0); + tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b); + tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b); + } + + if (alignedSize>alignedStart) + { + switch(alignmentPattern) + { + case AllAligned: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Aligned,Aligned); + break; + case EvenAligned: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Aligned); + break; + case FirstAligned: + { + Index j = alignedStart; + if (peels>1) + { + /* Here we proccess 4 rows with with two peeled iterations to hide + * the overhead of unaligned loads. Moreover unaligned loads are handled + * using special shift/move operations between the two aligned packets + * overlaping the desired unaligned packet. This is *much* more efficient + * than basic unaligned loads. + */ + LhsPacket A01, A02, A03, A11, A12, A13; + A01 = lhs1.template load<LhsPacket, Aligned>(alignedStart-1); + A02 = lhs2.template load<LhsPacket, Aligned>(alignedStart-2); + A03 = lhs3.template load<LhsPacket, Aligned>(alignedStart-3); + + for (; j<peeledSize; j+=peels*RhsPacketSize) + { + RhsPacket b = rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0); + A11 = lhs1.template load<LhsPacket, Aligned>(j-1+LhsPacketSize); palign<1>(A01,A11); + A12 = lhs2.template load<LhsPacket, Aligned>(j-2+LhsPacketSize); palign<2>(A02,A12); + A13 = lhs3.template load<LhsPacket, Aligned>(j-3+LhsPacketSize); palign<3>(A03,A13); + + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), b, ptmp0); + ptmp1 = pcj.pmadd(A01, b, ptmp1); + A01 = lhs1.template load<LhsPacket, Aligned>(j-1+2*LhsPacketSize); palign<1>(A11,A01); + ptmp2 = pcj.pmadd(A02, b, ptmp2); + A02 = lhs2.template load<LhsPacket, Aligned>(j-2+2*LhsPacketSize); palign<2>(A12,A02); + ptmp3 = pcj.pmadd(A03, b, ptmp3); + A03 = lhs3.template load<LhsPacket, Aligned>(j-3+2*LhsPacketSize); palign<3>(A13,A03); + + b = rhs.getVectorMapper(j+RhsPacketSize, 0).template load<RhsPacket, Aligned>(0); + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j+LhsPacketSize), b, ptmp0); + ptmp1 = pcj.pmadd(A11, b, ptmp1); + ptmp2 = pcj.pmadd(A12, b, ptmp2); + ptmp3 = pcj.pmadd(A13, b, ptmp3); + } + } + for (; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Aligned,Unaligned,Unaligned); + break; + } + default: + for (Index j = alignedStart; j<alignedSize; j+=RhsPacketSize) + _EIGEN_ACCUMULATE_PACKETS(Unaligned,Unaligned,Unaligned); + break; + } + tmp0 += predux(ptmp0); + tmp1 += predux(ptmp1); + tmp2 += predux(ptmp2); + tmp3 += predux(ptmp3); + } + } // end explicit vectorization - cc0 += cj.pmul(lhs(i+0,j), b0); - cc1 += cj.pmul(lhs(i+1,j), b0); + // process remaining coeffs (or all if no explicit vectorization) + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j<depth; ++j) + { + RhsScalar b = rhs(j, 0); + tmp0 += cj.pmul(lhs0(j),b); tmp1 += cj.pmul(lhs1(j),b); + tmp2 += cj.pmul(lhs2(j),b); tmp3 += cj.pmul(lhs3(j),b); } - res[(i+0)*resIncr] += alpha*cc0; - res[(i+1)*resIncr] += alpha*cc1; + res[i*resIncr] += alpha*tmp0; + res[(i+offset1)*resIncr] += alpha*tmp1; + res[(i+2)*resIncr] += alpha*tmp2; + res[(i+offset3)*resIncr] += alpha*tmp3; } - for(; i<rows; ++i) + + // process remaining first and last rows (at most columnsAtOnce-1) + Index end = rows; + Index start = rowBound; + do { - ResPacket c0 = pset1<ResPacket>(ResScalar(0)); - Index j=0; - for(; j+LhsPacketSize<=cols; j+=LhsPacketSize) + for (Index i=start; i<end; ++i) { - RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0); - c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0); + EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0); + ResPacket ptmp0 = pset1<ResPacket>(tmp0); + const LhsScalars lhs0 = lhs.getVectorMapper(i, 0); + // process first unaligned result's coeffs + // FIXME this loop get vectorized by the compiler ! + for (Index j=0; j<alignedStart; ++j) + tmp0 += cj.pmul(lhs0(j), rhs(j, 0)); + + if (alignedSize>alignedStart) + { + // process aligned rhs coeffs + if (lhs0.template aligned<LhsPacket>(alignedStart)) + for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Aligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0); + else + for (Index j = alignedStart;j<alignedSize;j+=RhsPacketSize) + ptmp0 = pcj.pmadd(lhs0.template load<LhsPacket, Unaligned>(j), rhs.getVectorMapper(j, 0).template load<RhsPacket, Aligned>(0), ptmp0); + tmp0 += predux(ptmp0); + } + + // process remaining scalars + // FIXME this loop get vectorized by the compiler ! + for (Index j=alignedSize; j<depth; ++j) + tmp0 += cj.pmul(lhs0(j), rhs(j, 0)); + res[i*resIncr] += alpha*tmp0; } - ResScalar cc0 = predux(c0); - for(; j<cols; ++j) + if (skipRows) { - cc0 += cj.pmul(lhs(i,j), rhs(j,0)); + start = 0; + end = skipRows; + skipRows = 0; } - res[i*resIncr] += alpha*cc0; - } + else + break; + } while(Vectorizable); + + #undef _EIGEN_ACCUMULATE_PACKETS } } // end namespace internal diff --git a/eigen/Eigen/src/Core/products/SelfadjointProduct.h b/eigen/Eigen/src/Core/products/SelfadjointProduct.h index 39c5b59..f038d68 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointProduct.h +++ b/eigen/Eigen/src/Core/products/SelfadjointProduct.h @@ -120,7 +120,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false> template<typename MatrixType, unsigned int UpLo> template<typename DerivedU> -EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> +SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> ::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha) { selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha); diff --git a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h index d395888..2ae3641 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +++ b/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h @@ -57,7 +57,7 @@ template<bool Cond, typename T> struct conj_expr_if template<typename MatrixType, unsigned int UpLo> template<typename DerivedU, typename DerivedV> -EIGEN_DEVICE_FUNC SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> +SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo> ::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha) { typedef internal::blas_traits<DerivedU> UBlasTraits; diff --git a/eigen/Eigen/src/Core/util/BlasUtil.h b/eigen/Eigen/src/Core/util/BlasUtil.h index b1791fb..6e6ee11 100644 --- a/eigen/Eigen/src/Core/util/BlasUtil.h +++ b/eigen/Eigen/src/Core/util/BlasUtil.h @@ -222,11 +222,6 @@ class blas_data_mapper { return ploadt<Packet, AlignmentType>(&operator()(i, j)); } - template <typename PacketT, int AlignmentT> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { - return ploadt<PacketT, AlignmentT>(&operator()(i, j)); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { return ploadt<HalfPacket, AlignmentType>(&operator()(i, j)); } diff --git a/eigen/Eigen/src/Core/util/Constants.h b/eigen/Eigen/src/Core/util/Constants.h index 5d37e5d..7587d68 100644 --- a/eigen/Eigen/src/Core/util/Constants.h +++ b/eigen/Eigen/src/Core/util/Constants.h @@ -25,10 +25,6 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; -/** This value means that the increment to go from one value to another in a sequence is not constant for each step. - */ -const int UndefinedIncr = 0xfffffe; - /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm<int>(). * The value Infinity there means the L-infinity norm. */ diff --git a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h index 4431f2f..7559e12 100644 --- a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,6 +4,7 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable + // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data @@ -19,7 +20,7 @@ #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) @@ -41,9 +42,6 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" - #if __clang_major__ >= 3 && __clang_minor__ >= 5 - #pragma clang diagnostic ignored "-Wabsolute-value" - #endif #elif defined __GNUC__ && __GNUC__>=6 diff --git a/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/eigen/Eigen/src/Core/util/ForwardDeclarations.h index 1a48cff..ea10739 100644 --- a/eigen/Eigen/src/Core/util/ForwardDeclarations.h +++ b/eigen/Eigen/src/Core/util/ForwardDeclarations.h @@ -83,7 +83,6 @@ template<typename ExpressionType> class ForceAlignedAccess; template<typename ExpressionType> class SwapWrapper; template<typename XprType, int BlockRows=Dynamic, int BlockCols=Dynamic, bool InnerPanel = false> class Block; -template<typename XprType, typename RowIndices, typename ColIndices> class IndexedView; template<typename MatrixType, int Size=Dynamic> class VectorBlock; template<typename MatrixType> class Transpose; diff --git a/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/eigen/Eigen/src/Core/util/IndexedViewHelper.h deleted file mode 100644 index ab01c85..0000000 --- a/eigen/Eigen/src/Core/util/IndexedViewHelper.h +++ /dev/null @@ -1,187 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INDEXED_VIEW_HELPER_H -#define EIGEN_INDEXED_VIEW_HELPER_H - -namespace Eigen { - -/** \namespace Eigen::placeholders - * \ingroup Core_Module - * - * Namespace containing symbolic placeholder and identifiers - */ -namespace placeholders { - -namespace internal { -struct symbolic_last_tag {}; -} - -/** \var last - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * - * A typical usage example would be: - * \code - * using namespace Eigen; - * using Eigen::placeholders::last; - * VectorXd v(n); - * v(seq(2,last-2)).setOnes(); - * \endcode - * - * \sa end - */ -static const Symbolic::SymbolExpr<internal::symbolic_last_tag> last; - -/** \var end - * \ingroup Core_Module - * - * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last+1 element/row/columns - * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). - * - * This symbolic placeholder support standard arithmetic operation. - * It is essentially an alias to last+1 - * - * \sa last - */ -#ifdef EIGEN_PARSED_BY_DOXYGEN -static const auto end = last+1; -#else -// Using a FixedExpr<1> expression is important here to make sure the compiler -// can fully optimize the computation starting indices with zero overhead. -static const Symbolic::AddExpr<Symbolic::SymbolExpr<internal::symbolic_last_tag>,Symbolic::ValueExpr<Eigen::internal::FixedInt<1> > > end(last+fix<1>()); -#endif - -} // end namespace placeholders - -namespace internal { - - // Replace symbolic last/end "keywords" by their true runtime value -inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } - -template<int N> -FixedInt<N> eval_expr_given_size(FixedInt<N> x, Index /*size*/) { return x; } - -template<typename Derived> -Index eval_expr_given_size(const Symbolic::BaseExpr<Derived> &x, Index size) -{ - return x.derived().eval(placeholders::last=size-1); -} - -// Extract increment/step at compile time -template<typename T, typename EnableIf = void> struct get_compile_time_incr { - enum { value = UndefinedIncr }; -}; - -// Analogue of std::get<0>(x), but tailored for our needs. -template<typename T> -Index first(const T& x) { return x.first(); } - -// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice -// The generic implementation is a no-op -template<typename T,int XprSize,typename EnableIf=void> -struct IndexedViewCompatibleType { - typedef T type; -}; - -template<typename T,typename Q> -const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } - -//-------------------------------------------------------------------------------- -// Handling of a single Index -//-------------------------------------------------------------------------------- - -struct SingleRange { - enum { - SizeAtCompileTime = 1 - }; - SingleRange(Index val) : m_value(val) {} - Index operator[](Index) const { return m_value; } - Index size() const { return 1; } - Index first() const { return m_value; } - Index m_value; -}; - -template<> struct get_compile_time_incr<SingleRange> { - enum { value = 1 }; // 1 or 0 ?? -}; - -// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operatro[](int) methods) -template<typename T, int XprSize> -struct IndexedViewCompatibleType<T,XprSize,typename internal::enable_if<internal::is_integral<T>::value>::type> { - // Here we could simply use Array, but maybe it's less work for the compiler to use - // a simpler wrapper as SingleRange - //typedef Eigen::Array<Index,1,1> type; - typedef SingleRange type; -}; - -template<typename T, int XprSize> -struct IndexedViewCompatibleType<T, XprSize, typename enable_if<Symbolic::is_symbolic<T>::value>::type> { - typedef SingleRange type; -}; - - -template<typename T> -typename enable_if<Symbolic::is_symbolic<T>::value,SingleRange>::type -makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { - return eval_expr_given_size(id,size); -} - -//-------------------------------------------------------------------------------- -// Handling of all -//-------------------------------------------------------------------------------- - -struct all_t { all_t() {} }; - -// Convert a symbolic 'all' into a usable range type -template<int XprSize> -struct AllRange { - enum { SizeAtCompileTime = XprSize }; - AllRange(Index size = XprSize) : m_size(size) {} - Index operator[](Index i) const { return i; } - Index size() const { return m_size.value(); } - Index first() const { return 0; } - variable_if_dynamic<Index,XprSize> m_size; -}; - -template<int XprSize> -struct IndexedViewCompatibleType<all_t,XprSize> { - typedef AllRange<XprSize> type; -}; - -template<typename XprSizeType> -inline AllRange<get_fixed_value<XprSizeType>::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { - return AllRange<get_fixed_value<XprSizeType>::value>(size); -} - -template<int Size> struct get_compile_time_incr<AllRange<Size> > { - enum { value = 1 }; -}; - -} // end namespace internal - - -namespace placeholders { - -/** \var all - * \ingroup Core_Module - * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns - */ -static const Eigen::internal::all_t all; - -} - -} // end namespace Eigen - -#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/eigen/Eigen/src/Core/util/IntegralConstant.h b/eigen/Eigen/src/Core/util/IntegralConstant.h deleted file mode 100644 index 78a4705..0000000 --- a/eigen/Eigen/src/Core/util/IntegralConstant.h +++ /dev/null @@ -1,270 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - - -#ifndef EIGEN_INTEGRAL_CONSTANT_H -#define EIGEN_INTEGRAL_CONSTANT_H - -namespace Eigen { - -namespace internal { - -template<int N> class FixedInt; -template<int N> class VariableAndFixedInt; - -/** \internal - * \class FixedInt - * - * This class embeds a compile-time integer \c N. - * - * It is similar to c++11 std::integral_constant<int,N> but with some additional features - * such as: - * - implicit conversion to int - * - arithmetic and some bitwise operators: -, +, *, /, %, &, | - * - c++98/14 compatibility with fix<N> and fix<N>() syntax to define integral constants. - * - * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to - * be created by the user using Eigen::fix<N> or Eigen::fix<N>(). In C++98-11, the former syntax does - * not create a FixedInt<N> instance but rather a point to function that needs to be \em cleaned-up - * using the generic helper: - * \code - * internal::cleanup_index_type<T>::type - * internal::cleanup_index_type<T,DynamicKey>::type - * \endcode - * where T can a FixedInt<N>, a pointer to function FixedInt<N> (*)(), or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: - * \code - * internal::get_fixed_value<T,DefaultVal>::value - * \endcode - * that will give you \c N if T equals FixedInt<N> or FixedInt<N> (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix<N>, class VariableAndFixedInt - */ -template<int N> class FixedInt -{ -public: - static const int value = N; - operator int() const { return value; } - FixedInt() {} - FixedInt( VariableAndFixedInt<N> other) { - EIGEN_ONLY_USED_FOR_DEBUG(other); - eigen_internal_assert(int(other)==N); - } - - FixedInt<-N> operator-() const { return FixedInt<-N>(); } - template<int M> - FixedInt<N+M> operator+( FixedInt<M>) const { return FixedInt<N+M>(); } - template<int M> - FixedInt<N-M> operator-( FixedInt<M>) const { return FixedInt<N-M>(); } - template<int M> - FixedInt<N*M> operator*( FixedInt<M>) const { return FixedInt<N*M>(); } - template<int M> - FixedInt<N/M> operator/( FixedInt<M>) const { return FixedInt<N/M>(); } - template<int M> - FixedInt<N%M> operator%( FixedInt<M>) const { return FixedInt<N%M>(); } - template<int M> - FixedInt<N|M> operator|( FixedInt<M>) const { return FixedInt<N|M>(); } - template<int M> - FixedInt<N&M> operator&( FixedInt<M>) const { return FixedInt<N&M>(); } - -#if EIGEN_HAS_CXX14 - // Needed in C++14 to allow fix<N>(): - FixedInt operator() () const { return *this; } - - VariableAndFixedInt<N> operator() (int val) const { return VariableAndFixedInt<N>(val); } -#else - FixedInt ( FixedInt<N> (*)() ) {} -#endif - -#if EIGEN_HAS_CXX11 - FixedInt(std::integral_constant<int,N>) {} -#endif -}; - -/** \internal - * \class VariableAndFixedInt - * - * This class embeds both a compile-time integer \c N and a runtime integer. - * Both values are supposed to be equal unless the compile-time value \c N has a special - * value meaning that the runtime-value should be used. Depending on the context, this special - * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for - * quantities that can be negative). - * - * It is the return-type of the function Eigen::fix<N>(int), and most of the time this is the only - * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. - * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert - * it to either a true compile-time quantity (i.e. a FixedInt<N>), or to a runtime quantity (e.g., an Index) - * using the following generic helper: - * \code - * internal::cleanup_index_type<T>::type - * internal::cleanup_index_type<T,DynamicKey>::type - * \endcode - * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. - * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. - * - * For convenience, you can also extract the compile-time value \c N using the following helper: - * \code - * internal::get_fixed_value<T,DefaultVal>::value - * \endcode - * that will give you \c N if T equals VariableAndFixedInt<N>, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). - * - * \sa fix<N>(int), class FixedInt - */ -template<int N> class VariableAndFixedInt -{ -public: - static const int value = N; - operator int() const { return m_value; } - VariableAndFixedInt(int val) { m_value = val; } -protected: - int m_value; -}; - -template<typename T, int Default=Dynamic> struct get_fixed_value { - static const int value = Default; -}; - -template<int N,int Default> struct get_fixed_value<FixedInt<N>,Default> { - static const int value = N; -}; - -#if !EIGEN_HAS_CXX14 -template<int N,int Default> struct get_fixed_value<FixedInt<N> (*)(),Default> { - static const int value = N; -}; -#endif - -template<int N,int Default> struct get_fixed_value<VariableAndFixedInt<N>,Default> { - static const int value = N ; -}; - -template<typename T, int N, int Default> -struct get_fixed_value<variable_if_dynamic<T,N>,Default> { - static const int value = N; -}; - -template<typename T> EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } -#if !EIGEN_HAS_CXX14 -template<int N> EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt<N> (*)()) { return N; } -#endif - -// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: - -// By default, no cleanup: -template<typename T, int DynamicKey=Dynamic, typename EnableIf=void> struct cleanup_index_type { typedef T type; }; - -// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index -template<typename T, int DynamicKey> struct cleanup_index_type<T,DynamicKey,typename internal::enable_if<internal::is_integral<T>::value>::type> { typedef Index type; }; - -#if !EIGEN_HAS_CXX14 -// In c++98/c++11, fix<N> is a pointer to function that we better cleanup to a true FixedInt<N>: -template<int N, int DynamicKey> struct cleanup_index_type<FixedInt<N> (*)(), DynamicKey> { typedef FixedInt<N> type; }; -#endif - -// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: -template<int N, int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<N>, DynamicKey> { typedef FixedInt<N> type; }; -// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): -template<int DynamicKey> struct cleanup_index_type<VariableAndFixedInt<DynamicKey>, DynamicKey> { typedef Index type; }; - -#if EIGEN_HAS_CXX11 -template<int N, int DynamicKey> struct cleanup_index_type<std::integral_constant<int,N>, DynamicKey> { typedef FixedInt<N> type; }; -#endif - -} // end namespace internal - -#ifndef EIGEN_PARSED_BY_DOXYGEN - -#if EIGEN_HAS_CXX14 -template<int N> -static const internal::FixedInt<N> fix{}; -#else -template<int N> -inline internal::FixedInt<N> fix() { return internal::FixedInt<N>(); } - -// The generic typename T is mandatory. Otherwise, a code like fix<N> could refer to either the function above or this next overload. -// This way a code like fix<N> can only refer to the previous function. -template<int N,typename T> -inline internal::VariableAndFixedInt<N> fix(T val) { return internal::VariableAndFixedInt<N>(val); } -#endif - -#else // EIGEN_PARSED_BY_DOXYGEN - -/** \var fix<N>() - * \ingroup Core_Module - * - * This \em identifier permits to construct an object embedding a compile-time integer \c N. - * - * \tparam N the compile-time integer value - * - * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: - * \code - * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] - * \endcode - * - * See also the function fix(int) to pass both a compile-time and runtime value. - * - * In c++14, it is implemented as: - * \code - * template<int N> static const internal::FixedInt<N> fix{}; - * \endcode - * where internal::FixedInt<N> is an internal template class similar to - * <a href="http://en.cppreference.com/w/cpp/types/integral_constant">\c std::integral_constant </a><tt> <int,N> </tt> - * Here, \c fix<N> is thus an object of type \c internal::FixedInt<N>. - * - * In c++98/11, it is implemented as a function: - * \code - * template<int N> inline internal::FixedInt<N> fix(); - * \endcode - * Here internal::FixedInt<N> is thus a pointer to function. - * - * If for some reason you want a true object in c++98 then you can write: \code fix<N>() \endcode which is also valid in c++14. - * - * \sa fix<N>(int), seq, seqN - */ -template<int N> -static const auto fix(); - -/** \fn fix<N>(int) - * \ingroup Core_Module - * - * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. - * - * \tparam N the compile-time integer value - * \param val the fallback runtime integer value - * - * This function is a more general version of the \ref fix identifier/function that can be used in template code - * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers - * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers - * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val - * will be used as a fallback. - * - * A typical use case would be: - * \code - * template<typename Derived> void foo(const MatrixBase<Derived> &mat) { - * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; - * const int n = mat.rows()/2; - * ... mat( seqN(0,fix<N>(n) ) ...; - * } - * \endcode - * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. - * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. - * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as <tt> seqN(0,fix<N>) </tt>. - * - * \sa fix, seqN, class ArithmeticSequence - */ -template<int N> -static const auto fix(int val); - -#endif // EIGEN_PARSED_BY_DOXYGEN - -} // end namespace Eigen - -#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 14ec87d..38d6ddb 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 90 +#define EIGEN_MINOR_VERSION 4 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -23,7 +23,7 @@ /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) + #define EIGEN_COMP_GNUC 1 #else #define EIGEN_COMP_GNUC 0 #endif @@ -349,14 +349,6 @@ # define __has_feature(x) 0 #endif -// Some old compilers do not support template specializations like: -// template<typename T,int N> void foo(const T x[N]); -#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 -#else -#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 -#endif - // Upperbound on the C++ version to use. // Expected values are 03, 11, 14, 17, etc. // By default, let's use an arbitrarily large C++ version. @@ -370,11 +362,6 @@ #define EIGEN_HAS_CXX11 0 #endif -#if EIGEN_MAX_CPP_VER>=14 && (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910) -#define EIGEN_HAS_CXX14 1 -#else -#define EIGEN_HAS_CXX14 0 -#endif // Do we support r-value references? #ifndef EIGEN_HAS_RVALUE_REFERENCES @@ -393,8 +380,7 @@ #if EIGEN_MAX_CPP_VER>=11 && \ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ - || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ - || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER))) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -413,12 +399,10 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 -#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) -#define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 #endif @@ -427,14 +411,13 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR -#if defined(__CUDACC__) +#ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ - (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ - (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L))) #define EIGEN_HAS_CONSTEXPR 1 #endif @@ -542,8 +525,8 @@ // - static is not very good because it prevents definitions from different object files to be merged. // So static causes the resulting linked executable to be bloated with multiple copies of the same function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. -#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC -#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline #ifdef NDEBUG # ifndef EIGEN_NO_DEBUG @@ -641,14 +624,6 @@ namespace Eigen { #endif -#if EIGEN_COMP_MSVC - // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. - // This workaround is ugly, but it does the job. -# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond -#else -# define EIGEN_CONST_CONDITIONAL(cond) cond -#endif - //------------------------------------------------------------------------------------------ // Static and dynamic alignment control // @@ -878,8 +853,7 @@ namespace Eigen { typedef typename Eigen::internal::ref_selector<Derived>::type Nested; \ typedef typename Eigen::internal::traits<Derived>::StorageKind StorageKind; \ typedef typename Eigen::internal::traits<Derived>::StorageIndex StorageIndex; \ - enum CompileTimeTraits \ - { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ + enum { RowsAtCompileTime = Eigen::internal::traits<Derived>::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits<Derived>::ColsAtCompileTime, \ Flags = Eigen::internal::traits<Derived>::Flags, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index 7d90534..c634d7e 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -63,7 +63,7 @@ namespace Eigen { namespace internal { -EIGEN_DEVICE_FUNC +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS @@ -114,7 +114,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = void *previous_aligned = static_cast<char *>(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); - + *(reinterpret_cast<void**>(aligned) - 1) = original; return aligned; } @@ -142,7 +142,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } -#else +#else EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif @@ -471,8 +471,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index } /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size - */ -template<typename Index> + */ +template<typename Index> inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; @@ -502,7 +502,7 @@ template<typename T> struct smart_copy_helper<T,false> { { std::copy(start, end, target); } }; -// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. template<typename T, bool UseMemmove> struct smart_memmove_helper; template<typename T> void smart_memmove(const T* start, const T* end, T* target) @@ -522,15 +522,15 @@ template<typename T> struct smart_memmove_helper<T,true> { template<typename T> struct smart_memmove_helper<T,false> { static inline void run(const T* start, const T* end, T* target) - { + { if (UIntPtr(target) < UIntPtr(start)) { std::copy(start, end, target); } - else + else { std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); - std::copy_backward(start, end, target + count); + std::copy_backward(start, end, target + count); } } }; @@ -603,7 +603,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) { std::swap(a.ptr(),b.ptr()); } - + } // end namespace internal /** \internal @@ -622,7 +622,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token. */ #ifdef EIGEN_ALLOCA - + #if EIGEN_DEFAULT_ALIGN_BYTES>0 // We always manually re-align the result of EIGEN_ALLOCA. // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. @@ -645,7 +645,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \ TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \ Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true) - + #endif @@ -701,7 +701,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b) * Example: * \code * // Matrix4f requires 16 bytes alignment: -* std::map< int, Matrix4f, std::less<int>, +* std::map< int, Matrix4f, std::less<int>, * aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4; * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: * std::map< int, Vector3f > my_map_vec3; diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 8de6055..7f63707 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -97,22 +97,17 @@ template<> struct is_arithmetic<unsigned int> { enum { value = true }; }; template<> struct is_arithmetic<signed long> { enum { value = true }; }; template<> struct is_arithmetic<unsigned long> { enum { value = true }; }; -#if EIGEN_HAS_CXX11 -using std::is_integral; -#else -template<typename T> struct is_integral { enum { value = false }; }; -template<> struct is_integral<bool> { enum { value = true }; }; -template<> struct is_integral<char> { enum { value = true }; }; -template<> struct is_integral<signed char> { enum { value = true }; }; -template<> struct is_integral<unsigned char> { enum { value = true }; }; -template<> struct is_integral<signed short> { enum { value = true }; }; -template<> struct is_integral<unsigned short> { enum { value = true }; }; -template<> struct is_integral<signed int> { enum { value = true }; }; -template<> struct is_integral<unsigned int> { enum { value = true }; }; -template<> struct is_integral<signed long> { enum { value = true }; }; -template<> struct is_integral<unsigned long> { enum { value = true }; }; -#endif - +template<typename T> struct is_integral { enum { value = false }; }; +template<> struct is_integral<bool> { enum { value = true }; }; +template<> struct is_integral<char> { enum { value = true }; }; +template<> struct is_integral<signed char> { enum { value = true }; }; +template<> struct is_integral<unsigned char> { enum { value = true }; }; +template<> struct is_integral<signed short> { enum { value = true }; }; +template<> struct is_integral<unsigned short> { enum { value = true }; }; +template<> struct is_integral<signed int> { enum { value = true }; }; +template<> struct is_integral<unsigned int> { enum { value = true }; }; +template<> struct is_integral<signed long> { enum { value = true }; }; +template<> struct is_integral<unsigned long> { enum { value = true }; }; template <typename T> struct add_const { typedef const T type; }; template <typename T> struct add_const<T&> { typedef T& type; }; @@ -284,59 +279,6 @@ protected: }; /** \internal - * Provides access to the number of elements in the object of as a compile-time constant expression. - * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). - * - * Similar to std::tuple_size, but more general. - * - * It currently supports: - * - any types T defining T::SizeAtCompileTime - * - plain C arrays as T[N] - * - std::array (c++11) - * - some internal types such as SingleRange and AllRange - * - * The second template parameter eases SFINAE-based specializations. - */ -template<typename T, typename EnableIf = void> struct array_size { - enum { value = Dynamic }; -}; - -template<typename T> struct array_size<T,typename internal::enable_if<((T::SizeAtCompileTime&0)==0)>::type> { - enum { value = T::SizeAtCompileTime }; -}; - -template<typename T, int N> struct array_size<const T (&)[N]> { - enum { value = N }; -}; -template<typename T, int N> struct array_size<T (&)[N]> { - enum { value = N }; -}; - -#if EIGEN_HAS_CXX11 -template<typename T, std::size_t N> struct array_size<const std::array<T,N> > { - enum { value = N }; -}; -template<typename T, std::size_t N> struct array_size<std::array<T,N> > { - enum { value = N }; -}; -#endif - -/** \internal - * Analogue of the std::size free function. - * It returns the size of the container or view \a x of type \c T - * - * It currently supports: - * - any types T defining a member T::size() const - * - plain C arrays as T[N] - * - */ -template<typename T> -Index size(const T& x) { return x.size(); } - -template<typename T,std::size_t N> -Index size(const T (&) [N]) { return N; } - -/** \internal * Convenient struct to get the result type of a unary or binary functor. * * It supports both the current STL mechanism (using the result_type member) as well as @@ -433,10 +375,10 @@ struct meta_no { char a[2]; }; template <typename T> struct has_ReturnType { - template <typename C> static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); - template <typename C> static meta_no testFunctor(...); + template <typename C> static meta_yes testFunctor(typename C::ReturnType const *); + template <typename C> static meta_no testFunctor(...); - enum { value = sizeof(testFunctor<T>(static_cast<T*>(0))) == sizeof(meta_yes) }; + enum { value = sizeof(testFunctor<T>(0)) == sizeof(meta_yes) }; }; template<typename T> const T* return_ptr(); diff --git a/eigen/Eigen/src/Core/util/SymbolicIndex.h b/eigen/Eigen/src/Core/util/SymbolicIndex.h deleted file mode 100644 index bb6349e..0000000 --- a/eigen/Eigen/src/Core/util/SymbolicIndex.h +++ /dev/null @@ -1,300 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2017 Gael Guennebaud <gael.guennebaud@inria.fr> -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_SYMBOLIC_INDEX_H -#define EIGEN_SYMBOLIC_INDEX_H - -namespace Eigen { - -/** \namespace Eigen::Symbolic - * \ingroup Core_Module - * - * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. - * Here is a simple example: - * - * \code - * // First step, defines symbols: - * struct x_tag {}; static const Symbolic::SymbolExpr<x_tag> x; - * struct y_tag {}; static const Symbolic::SymbolExpr<y_tag> y; - * struct z_tag {}; static const Symbolic::SymbolExpr<z_tag> z; - * - * // Defines an expression: - * auto expr = (x+3)/y+z; - * - * // And evaluate it: (c++14) - * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; - * - * // In c++98/11, only one symbol per expression is supported for now: - * auto expr98 = (3-x)/2; - * std::cout << expr98.eval(x=6) << "\n"; - * \endcode - * - * It is currently only used internally to define and minipulate the placeholders::last and placeholders::end symbols in Eigen::seq and Eigen::seqN. - * - */ -namespace Symbolic { - -template<typename Tag> class Symbol; -template<typename Arg0> class NegateExpr; -template<typename Arg1,typename Arg2> class AddExpr; -template<typename Arg1,typename Arg2> class ProductExpr; -template<typename Arg1,typename Arg2> class QuotientExpr; - -// A simple wrapper around an integral value to provide the eval method. -// We could also use a free-function symbolic_eval... -template<typename IndexType=Index> -class ValueExpr { -public: - ValueExpr(IndexType val) : m_value(val) {} - template<typename T> - IndexType eval_impl(const T&) const { return m_value; } -protected: - IndexType m_value; -}; - -// Specialization for compile-time value, -// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. -template<int N> -class ValueExpr<internal::FixedInt<N> > { -public: - ValueExpr() {} - template<typename T> - Index eval_impl(const T&) const { return N; } -}; - - -/** \class BaseExpr - * \ingroup Core_Module - * Common base class of any symbolic expressions - */ -template<typename Derived> -class BaseExpr -{ -public: - const Derived& derived() const { return *static_cast<const Derived*>(this); } - - /** Evaluate the expression given the \a values of the symbols. - * - * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue - * as constructed by SymbolExpr::operator= operator. - * - */ - template<typename T> - Index eval(const T& values) const { return derived().eval_impl(values); } - -#if EIGEN_HAS_CXX14 - template<typename... Types> - Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } -#endif - - NegateExpr<Derived> operator-() const { return NegateExpr<Derived>(derived()); } - - AddExpr<Derived,ValueExpr<> > operator+(Index b) const - { return AddExpr<Derived,ValueExpr<> >(derived(), b); } - AddExpr<Derived,ValueExpr<> > operator-(Index a) const - { return AddExpr<Derived,ValueExpr<> >(derived(), -a); } - ProductExpr<Derived,ValueExpr<> > operator*(Index a) const - { return ProductExpr<Derived,ValueExpr<> >(derived(),a); } - QuotientExpr<Derived,ValueExpr<> > operator/(Index a) const - { return QuotientExpr<Derived,ValueExpr<> >(derived(),a); } - - friend AddExpr<Derived,ValueExpr<> > operator+(Index a, const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<> >(b.derived(), a); } - friend AddExpr<NegateExpr<Derived>,ValueExpr<> > operator-(Index a, const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<> >(-b.derived(), a); } - friend ProductExpr<ValueExpr<>,Derived> operator*(Index a, const BaseExpr& b) - { return ProductExpr<ValueExpr<>,Derived>(a,b.derived()); } - friend QuotientExpr<ValueExpr<>,Derived> operator/(Index a, const BaseExpr& b) - { return QuotientExpr<ValueExpr<>,Derived>(a,b.derived()); } - - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N>) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } - template<int N> - ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N>) const - { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - template<int N> - QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N>) const - { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - - template<int N> - friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N>, const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N>, const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N>, const BaseExpr& b) - { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - template<int N> - friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N>, const BaseExpr& b) - { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - -#if (!EIGEN_HAS_CXX14) - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)()) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > > operator-(internal::FixedInt<N> (*)()) const - { return AddExpr<Derived,ValueExpr<internal::FixedInt<-N> > >(derived(), ValueExpr<internal::FixedInt<-N> >()); } - template<int N> - ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator*(internal::FixedInt<N> (*)()) const - { return ProductExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - template<int N> - QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator/(internal::FixedInt<N> (*)()) const - { return QuotientExpr<Derived,ValueExpr<internal::FixedInt<N> > >(derived(),ValueExpr<internal::FixedInt<N> >()); } - - template<int N> - friend AddExpr<Derived,ValueExpr<internal::FixedInt<N> > > operator+(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return AddExpr<Derived,ValueExpr<internal::FixedInt<N> > >(b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > > operator-(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return AddExpr<NegateExpr<Derived>,ValueExpr<internal::FixedInt<N> > >(-b.derived(), ValueExpr<internal::FixedInt<N> >()); } - template<int N> - friend ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator*(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return ProductExpr<ValueExpr<internal::FixedInt<N> >,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } - template<int N> - friend QuotientExpr<ValueExpr<internal::FixedInt<N> >,Derived> operator/(internal::FixedInt<N> (*)(), const BaseExpr& b) - { return QuotientExpr<ValueExpr<internal::FixedInt<N> > ,Derived>(ValueExpr<internal::FixedInt<N> >(),b.derived()); } -#endif - - - template<typename OtherDerived> - AddExpr<Derived,OtherDerived> operator+(const BaseExpr<OtherDerived> &b) const - { return AddExpr<Derived,OtherDerived>(derived(), b.derived()); } - - template<typename OtherDerived> - AddExpr<Derived,NegateExpr<OtherDerived> > operator-(const BaseExpr<OtherDerived> &b) const - { return AddExpr<Derived,NegateExpr<OtherDerived> >(derived(), -b.derived()); } - - template<typename OtherDerived> - ProductExpr<Derived,OtherDerived> operator*(const BaseExpr<OtherDerived> &b) const - { return ProductExpr<Derived,OtherDerived>(derived(), b.derived()); } - - template<typename OtherDerived> - QuotientExpr<Derived,OtherDerived> operator/(const BaseExpr<OtherDerived> &b) const - { return QuotientExpr<Derived,OtherDerived>(derived(), b.derived()); } -}; - -template<typename T> -struct is_symbolic { - // BaseExpr has no conversion ctor, so we only have to check whether T can be staticaly cast to its base class BaseExpr<T>. - enum { value = internal::is_convertible<T,BaseExpr<T> >::value }; -}; - -// Specialization for functions, because is_convertible fails in this case. -// Useful in c++98/11 mode when testing is_symbolic<decltype(fix<N>)> -template<typename T> -struct is_symbolic<T (*)()> { - enum { value = false }; -}; - -/** Represents the actual value of a symbol identified by its tag - * - * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. - */ -template<typename Tag> -class SymbolValue -{ -public: - /** Default constructor from the value \a val */ - SymbolValue(Index val) : m_value(val) {} - - /** \returns the stored value of the symbol */ - Index value() const { return m_value; } -protected: - Index m_value; -}; - -/** Expression of a symbol uniquely identified by the template parameter type \c tag */ -template<typename tag> -class SymbolExpr : public BaseExpr<SymbolExpr<tag> > -{ -public: - /** Alias to the template parameter \c tag */ - typedef tag Tag; - - SymbolExpr() {} - - /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. - * - * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. - */ - SymbolValue<Tag> operator=(Index val) const { - return SymbolValue<Tag>(val); - } - - Index eval_impl(const SymbolValue<Tag> &values) const { return values.value(); } - -#if EIGEN_HAS_CXX14 - // C++14 versions suitable for multiple symbols - template<typename... Types> - Index eval_impl(const std::tuple<Types...>& values) const { return std::get<SymbolValue<Tag> >(values).value(); } -#endif -}; - -template<typename Arg0> -class NegateExpr : public BaseExpr<NegateExpr<Arg0> > -{ -public: - NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} - - template<typename T> - Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } -protected: - Arg0 m_arg0; -}; - -template<typename Arg0, typename Arg1> -class AddExpr : public BaseExpr<AddExpr<Arg0,Arg1> > -{ -public: - AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template<typename Arg0, typename Arg1> -class ProductExpr : public BaseExpr<ProductExpr<Arg0,Arg1> > -{ -public: - ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -template<typename Arg0, typename Arg1> -class QuotientExpr : public BaseExpr<QuotientExpr<Arg0,Arg1> > -{ -public: - QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} - - template<typename T> - Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } -protected: - Arg0 m_arg0; - Arg1 m_arg1; -}; - -} // end namespace Symbolic - -} // end namespace Eigen - -#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/eigen/Eigen/src/Core/util/XprHelper.h b/eigen/Eigen/src/Core/util/XprHelper.h index 4b337f2..ba5bd18 100644 --- a/eigen/Eigen/src/Core/util/XprHelper.h +++ b/eigen/Eigen/src/Core/util/XprHelper.h @@ -109,7 +109,6 @@ template<typename T, int Value> class variable_if_dynamic EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; @@ -120,7 +119,6 @@ template<typename T> class variable_if_dynamic<T, Dynamic> public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; @@ -673,7 +671,7 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces // Internal helper defining the cost of a scalar division for the type T. // The default heuristic can be specialized for each scalar type and architecture. -template<typename T,bool Vectorized=false,typename EnableIf = void> +template<typename T,bool Vectorized=false,typename EnaleIf = void> struct scalar_div_cost { enum { value = 8*NumTraits<T>::MulCost }; }; |