From 35f7829af10c61e33dd2e2a7a015058e11a11ea0 Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Sat, 25 Mar 2017 14:17:07 +0100 Subject: update --- eigen/Eigen/src/Core/util/BlasUtil.h | 239 +++++++-- eigen/Eigen/src/Core/util/CMakeLists.txt | 6 - eigen/Eigen/src/Core/util/Constants.h | 164 ++++-- eigen/Eigen/src/Core/util/DisableStupidWarnings.h | 35 +- eigen/Eigen/src/Core/util/ForwardDeclarations.h | 135 ++--- eigen/Eigen/src/Core/util/IndexedViewHelper.h | 187 +++++++ eigen/Eigen/src/Core/util/IntegralConstant.h | 270 ++++++++++ eigen/Eigen/src/Core/util/MKL_support.h | 50 +- eigen/Eigen/src/Core/util/Macros.h | 578 +++++++++++++++----- eigen/Eigen/src/Core/util/Memory.h | 491 +++++++++-------- eigen/Eigen/src/Core/util/Meta.h | 407 ++++++++++++-- eigen/Eigen/src/Core/util/ReenableStupidWarnings.h | 10 + eigen/Eigen/src/Core/util/StaticAssert.h | 40 +- eigen/Eigen/src/Core/util/SymbolicIndex.h | 300 +++++++++++ eigen/Eigen/src/Core/util/XprHelper.h | 584 +++++++++++++++++---- 15 files changed, 2749 insertions(+), 747 deletions(-) delete mode 100644 eigen/Eigen/src/Core/util/CMakeLists.txt create mode 100644 eigen/Eigen/src/Core/util/IndexedViewHelper.h create mode 100644 eigen/Eigen/src/Core/util/IntegralConstant.h create mode 100644 eigen/Eigen/src/Core/util/SymbolicIndex.h (limited to 'eigen/Eigen/src/Core/util') diff --git a/eigen/Eigen/src/Core/util/BlasUtil.h b/eigen/Eigen/src/Core/util/BlasUtil.h index 9d03af3..b1791fb 100644 --- a/eigen/Eigen/src/Core/util/BlasUtil.h +++ b/eigen/Eigen/src/Core/util/BlasUtil.h @@ -18,13 +18,13 @@ namespace Eigen { namespace internal { // forward declarations -template +template struct gebp_kernel; -template +template struct gemm_pack_rhs; -template +template struct gemm_pack_lhs; template< @@ -34,7 +34,9 @@ template< int ResStorageOrder> struct general_matrix_matrix_product; -template +template struct general_matrix_vector_product; @@ -42,22 +44,35 @@ template struct conj_if; template<> struct conj_if { template - inline T operator()(const T& x) { return numext::conj(x); } + inline T operator()(const T& x) const { return numext::conj(x); } template - inline T pconj(const T& x) { return internal::pconj(x); } + inline T pconj(const T& x) const { return internal::pconj(x); } }; template<> struct conj_if { template - inline const T& operator()(const T& x) { return x; } + inline const T& operator()(const T& x) const { return x; } template - inline const T& pconj(const T& x) { return x; } + inline const T& pconj(const T& x) const { return x; } +}; + +// Generic implementation for custom complex types. +template +struct conj_helper +{ + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + + EIGEN_STRONG_INLINE Scalar pmadd(const LhsScalar& x, const RhsScalar& y, const Scalar& c) const + { return padd(c, pmul(x,y)); } + + EIGEN_STRONG_INLINE Scalar pmul(const LhsScalar& x, const RhsScalar& y) const + { return conj_if()(x) * conj_if()(y); } }; template struct conj_helper { - EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); } - EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); } }; template struct conj_helper, std::complex, false,true> @@ -109,39 +124,147 @@ template struct conj_helper struct get_factor { - static EIGEN_STRONG_INLINE To run(const From& x) { return x; } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return To(x); } }; template struct get_factor::Real> { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE typename NumTraits::Real run(const Scalar& x) { return numext::real(x); } }; + +template +class BlasVectorMapper { + public: + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasVectorMapper(Scalar *data) : m_data(data) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_data[i]; + } + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet load(Index i) const { + return ploadt(m_data + i); + } + + template + EIGEN_DEVICE_FUNC bool aligned(Index i) const { + return (UIntPtr(m_data+i)%sizeof(Packet))==0; + } + + protected: + Scalar* m_data; +}; + +template +class BlasLinearMapper { + public: + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE BlasLinearMapper(Scalar *data) : m_data(data) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void prefetch(int i) const { + internal::prefetch(&operator()(i)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar& operator()(Index i) const { + return m_data[i]; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return ploadt(m_data + i); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i) const { + return ploadt(m_data + i); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacket(Index i, const Packet &p) const { + pstoret(m_data + i, p); + } + + protected: + Scalar *m_data; +}; + // Lightweight helper class to access matrix coefficients. -// Yes, this is somehow redundant with Map<>, but this version is much much lighter, -// and so I hope better compilation performance (time and code quality). -template -class blas_data_mapper -{ +template +class blas_data_mapper { public: - blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} - EIGEN_STRONG_INLINE Scalar& operator()(Index i, Index j) - { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } + typedef typename packet_traits::type Packet; + typedef typename packet_traits::half HalfPacket; + + typedef BlasLinearMapper LinearMapper; + typedef BlasVectorMapper VectorMapper; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper(Scalar* data, Index stride) : m_data(data), m_stride(stride) {} + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE blas_data_mapper + getSubMapper(Index i, Index j) const { + return blas_data_mapper(&operator()(i, j), m_stride); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(&operator()(i, j)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE VectorMapper getVectorMapper(Index i, Index j) const { + return VectorMapper(&operator()(i, j)); + } + + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar& operator()(Index i, Index j) const { + return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + return ploadt(&operator()(i, j)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketT load(Index i, Index j) const { + return ploadt(&operator()(i, j)); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE HalfPacket loadHalfPacket(Index i, Index j) const { + return ploadt(&operator()(i, j)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void scatterPacket(Index i, Index j, const SubPacket &p) const { + pscatter(&operator()(i, j), p, m_stride); + } + + template + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SubPacket gatherPacket(Index i, Index j) const { + return pgather(&operator()(i, j), m_stride); + } + + EIGEN_DEVICE_FUNC const Index stride() const { return m_stride; } + EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + + EIGEN_DEVICE_FUNC Index firstAligned(Index size) const { + if (UIntPtr(m_data)%sizeof(Scalar)) { + return -1; + } + return internal::first_default_aligned(m_data, size); + } + protected: - Scalar* EIGEN_RESTRICT m_data; - Index m_stride; + Scalar* EIGEN_RESTRICT m_data; + const Index m_stride; }; // lightweight helper class to access matrix coefficients (const version) template -class const_blas_data_mapper -{ +class const_blas_data_mapper : public blas_data_mapper { public: - const_blas_data_mapper(const Scalar* data, Index stride) : m_data(data), m_stride(stride) {} - EIGEN_STRONG_INLINE const Scalar& operator()(Index i, Index j) const - { return m_data[StorageOrder==RowMajor ? j + i*m_stride : i + j*m_stride]; } - protected: - const Scalar* EIGEN_RESTRICT m_data; - Index m_stride; + EIGEN_ALWAYS_INLINE const_blas_data_mapper(const Scalar *data, Index stride) : blas_data_mapper(data, stride) {} + + EIGEN_ALWAYS_INLINE const_blas_data_mapper getSubMapper(Index i, Index j) const { + return const_blas_data_mapper(&(this->operator()(i, j)), this->m_stride); + } }; @@ -171,13 +294,12 @@ template struct blas_traits }; // pop conjugate -template -struct blas_traits, Xpr> > - : blas_traits::type> +template +struct blas_traits, NestedXpr> > + : blas_traits { - typedef typename internal::remove_all::type NestedXpr; typedef blas_traits Base; - typedef CwiseUnaryOp, Xpr> XprType; + typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; enum { @@ -189,27 +311,41 @@ struct blas_traits, Xpr> > }; // pop scalar multiple -template -struct blas_traits, Xpr> > - : blas_traits::type> +template +struct blas_traits, const CwiseNullaryOp,Plain>, NestedXpr> > + : blas_traits { - typedef typename internal::remove_all::type NestedXpr; typedef blas_traits Base; - typedef CwiseUnaryOp, Xpr> XprType; + typedef CwiseBinaryOp, const CwiseNullaryOp,Plain>, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); } static inline Scalar extractScalarFactor(const XprType& x) - { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); } + { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); } }; +template +struct blas_traits, NestedXpr, const CwiseNullaryOp,Plain> > > + : blas_traits +{ + typedef blas_traits Base; + typedef CwiseBinaryOp, NestedXpr, const CwiseNullaryOp,Plain> > XprType; + typedef typename Base::ExtractType ExtractType; + static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); } + static inline Scalar extractScalarFactor(const XprType& x) + { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; } +}; +template +struct blas_traits, const CwiseNullaryOp,Plain1>, + const CwiseNullaryOp,Plain2> > > + : blas_traits,Plain1> > +{}; // pop opposite -template -struct blas_traits, Xpr> > - : blas_traits::type> +template +struct blas_traits, NestedXpr> > + : blas_traits { - typedef typename internal::remove_all::type NestedXpr; typedef blas_traits Base; - typedef CwiseUnaryOp, Xpr> XprType; + typedef CwiseUnaryOp, NestedXpr> XprType; typedef typename Base::ExtractType ExtractType; static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } static inline Scalar extractScalarFactor(const XprType& x) @@ -217,14 +353,13 @@ struct blas_traits, Xpr> > }; // pop/push transpose -template -struct blas_traits > - : blas_traits::type> +template +struct blas_traits > + : blas_traits { - typedef typename internal::remove_all::type NestedXpr; typedef typename NestedXpr::Scalar Scalar; typedef blas_traits Base; - typedef Transpose XprType; + typedef Transpose XprType; typedef Transpose ExtractType; // const to get rid of a compile error; anyway blas traits are only used on the RHS typedef Transpose _ExtractType; typedef typename conditional > enum { IsTransposed = Base::IsTransposed ? 0 : 1 }; - static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); } + static inline ExtractType extract(const XprType& x) { return ExtractType(Base::extract(x.nestedExpression())); } static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); } }; diff --git a/eigen/Eigen/src/Core/util/CMakeLists.txt b/eigen/Eigen/src/Core/util/CMakeLists.txt deleted file mode 100644 index a1e2e52..0000000 --- a/eigen/Eigen/src/Core/util/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Core_util_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Core_util_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/Core/util COMPONENT Devel - ) diff --git a/eigen/Eigen/src/Core/util/Constants.h b/eigen/Eigen/src/Core/util/Constants.h index 78499ff..5d37e5d 100644 --- a/eigen/Eigen/src/Core/util/Constants.h +++ b/eigen/Eigen/src/Core/util/Constants.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2007-2009 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -25,11 +25,23 @@ const int Dynamic = -1; */ const int DynamicIndex = 0xffffff; +/** This value means that the increment to go from one value to another in a sequence is not constant for each step. + */ +const int UndefinedIncr = 0xfffffe; + /** This value means +Infinity; it is currently used only as the p parameter to MatrixBase::lpNorm(). * The value Infinity there means the L-infinity norm. */ const int Infinity = -1; +/** This value means that the cost to evaluate an expression coefficient is either very expensive or + * cannot be known at compile time. + * + * This value has to be positive to (1) simplify cost computation, and (2) allow to distinguish between a very expensive and very very expensive expressions. + * It thus must also be large enough to make sure unrolling won't happen and that sub expressions will be evaluated, but not too large to avoid overflow. + */ +const int HugeCost = 10000; + /** \defgroup flags Flags * \ingroup Core_Module * @@ -48,19 +60,19 @@ const int Infinity = -1; * for a matrix, this means that the storage order is row-major. * If this bit is not set, the storage order is column-major. * For an expression, this determines the storage order of - * the matrix created by evaluation of that expression. - * \sa \ref TopicStorageOrders */ + * the matrix created by evaluation of that expression. + * \sa \blank \ref TopicStorageOrders */ const unsigned int RowMajorBit = 0x1; /** \ingroup flags - * * means the expression should be evaluated by the calling expression */ const unsigned int EvalBeforeNestingBit = 0x2; /** \ingroup flags - * + * \deprecated * means the expression should be evaluated before any assignment */ -const unsigned int EvalBeforeAssigningBit = 0x4; +EIGEN_DEPRECATED +const unsigned int EvalBeforeAssigningBit = 0x4; // FIXME deprecated /** \ingroup flags * @@ -141,17 +153,46 @@ const unsigned int LvalueBit = 0x20; */ const unsigned int DirectAccessBit = 0x40; -/** \ingroup flags +/** \deprecated \ingroup flags * - * means the first coefficient packet is guaranteed to be aligned */ -const unsigned int AlignedBit = 0x80; + * means the first coefficient packet is guaranteed to be aligned. + * An expression cannot has the AlignedBit without the PacketAccessBit flag. + * In other words, this means we are allow to perform an aligned packet access to the first element regardless + * of the expression kind: + * \code + * expression.packet(0); + * \endcode + */ +EIGEN_DEPRECATED const unsigned int AlignedBit = 0x80; const unsigned int NestByRefBit = 0x100; +/** \ingroup flags + * + * for an expression, this means that the storage order + * can be either row-major or column-major. + * The precise choice will be decided at evaluation time or when + * combined with other expressions. + * \sa \blank \ref RowMajorBit, \ref TopicStorageOrders */ +const unsigned int NoPreferredStorageOrderBit = 0x200; + +/** \ingroup flags + * + * Means that the underlying coefficients can be accessed through pointers to the sparse (un)compressed storage format, + * that is, the expression provides: + * \code + inline const Scalar* valuePtr() const; + inline const Index* innerIndexPtr() const; + inline const Index* outerIndexPtr() const; + inline const Index* innerNonZeroPtr() const; + \endcode + */ +const unsigned int CompressedAccessBit = 0x400; + + // list of flags that are inherited by default const unsigned int HereditaryBits = RowMajorBit - | EvalBeforeNestingBit - | EvalBeforeAssigningBit; + | EvalBeforeNestingBit; /** \defgroup enums Enumerations * \ingroup Core_Module @@ -160,8 +201,8 @@ const unsigned int HereditaryBits = RowMajorBit */ /** \ingroup enums - * Enum containing possible values for the \p Mode parameter of - * MatrixBase::selfadjointView() and MatrixBase::triangularView(). */ + * Enum containing possible values for the \c Mode or \c UpLo parameter of + * MatrixBase::selfadjointView() and MatrixBase::triangularView(), and selfadjoint solvers. */ enum UpLoType { /** View matrix as a lower triangular matrix. */ Lower=0x1, @@ -186,12 +227,31 @@ enum UpLoType { }; /** \ingroup enums - * Enum for indicating whether an object is aligned or not. */ + * Enum for indicating whether a buffer is aligned or not. */ enum AlignmentType { - /** Object is not correctly aligned for vectorization. */ - Unaligned=0, - /** Object is aligned for vectorization. */ - Aligned=1 + Unaligned=0, /**< Data pointer has no specific alignment. */ + Aligned8=8, /**< Data pointer is aligned on a 8 bytes boundary. */ + Aligned16=16, /**< Data pointer is aligned on a 16 bytes boundary. */ + Aligned32=32, /**< Data pointer is aligned on a 32 bytes boundary. */ + Aligned64=64, /**< Data pointer is aligned on a 64 bytes boundary. */ + Aligned128=128, /**< Data pointer is aligned on a 128 bytes boundary. */ + AlignedMask=255, + Aligned=16, /**< \deprecated Synonym for Aligned16. */ +#if EIGEN_MAX_ALIGN_BYTES==128 + AlignedMax = Aligned128 +#elif EIGEN_MAX_ALIGN_BYTES==64 + AlignedMax = Aligned64 +#elif EIGEN_MAX_ALIGN_BYTES==32 + AlignedMax = Aligned32 +#elif EIGEN_MAX_ALIGN_BYTES==16 + AlignedMax = Aligned16 +#elif EIGEN_MAX_ALIGN_BYTES==8 + AlignedMax = Aligned8 +#elif EIGEN_MAX_ALIGN_BYTES==0 + AlignedMax = Unaligned +#else +#error Invalid value for EIGEN_MAX_ALIGN_BYTES +#endif }; /** \ingroup enums @@ -297,7 +357,7 @@ enum Default_t { Default }; /** \internal \ingroup enums * Used in AmbiVector. */ -enum { +enum AmbiVectorMode { IsDense = 0, IsSparse }; @@ -406,10 +466,16 @@ namespace Architecture Generic = 0x0, SSE = 0x1, AltiVec = 0x2, + VSX = 0x3, + NEON = 0x4, #if defined EIGEN_VECTORIZE_SSE Target = SSE #elif defined EIGEN_VECTORIZE_ALTIVEC Target = AltiVec +#elif defined EIGEN_VECTORIZE_VSX + Target = VSX +#elif defined EIGEN_VECTORIZE_NEON + Target = NEON #else Target = Generic #endif @@ -417,8 +483,9 @@ namespace Architecture } /** \internal \ingroup enums - * Enum used as template parameter in GeneralProduct. */ -enum ProductImplType { CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; + * Enum used as template parameter in Product and product evaluators. */ +enum ProductImplType +{ DefaultProduct=0, LazyProduct, AliasFreeProduct, CoeffBasedProductMode, LazyCoeffBasedProductMode, OuterProduct, InnerProduct, GemvProduct, GemmProduct }; /** \internal \ingroup enums * Enum used in experimental parallel implementation. */ @@ -427,24 +494,57 @@ enum Action {GetAction, SetAction}; /** The type used to identify a dense storage. */ struct Dense {}; +/** The type used to identify a general sparse storage. */ +struct Sparse {}; + +/** The type used to identify a general solver (factored) storage. */ +struct SolverStorage {}; + +/** The type used to identify a permutation storage. */ +struct PermutationStorage {}; + +/** The type used to identify a permutation storage. */ +struct TranspositionsStorage {}; + /** The type used to identify a matrix expression */ struct MatrixXpr {}; /** The type used to identify an array expression */ struct ArrayXpr {}; +// An evaluator must define its shape. By default, it can be one of the following: +struct DenseShape { static std::string debugName() { return "DenseShape"; } }; +struct SolverShape { static std::string debugName() { return "SolverShape"; } }; +struct HomogeneousShape { static std::string debugName() { return "HomogeneousShape"; } }; +struct DiagonalShape { static std::string debugName() { return "DiagonalShape"; } }; +struct BandShape { static std::string debugName() { return "BandShape"; } }; +struct TriangularShape { static std::string debugName() { return "TriangularShape"; } }; +struct SelfAdjointShape { static std::string debugName() { return "SelfAdjointShape"; } }; +struct PermutationShape { static std::string debugName() { return "PermutationShape"; } }; +struct TranspositionsShape { static std::string debugName() { return "TranspositionsShape"; } }; +struct SparseShape { static std::string debugName() { return "SparseShape"; } }; + namespace internal { - /** \internal - * Constants for comparison functors - */ - enum ComparisonName { - cmp_EQ = 0, - cmp_LT = 1, - cmp_LE = 2, - cmp_UNORD = 3, - cmp_NEQ = 4 - }; -} + + // random access iterators based on coeff*() accessors. +struct IndexBased {}; + +// evaluator based on iterators to access coefficients. +struct IteratorBased {}; + +/** \internal + * Constants for comparison functors + */ +enum ComparisonName { + cmp_EQ = 0, + cmp_LT = 1, + cmp_LE = 2, + cmp_UNORD = 3, + cmp_NEQ = 4, + cmp_GT = 5, + cmp_GE = 6 +}; +} // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h index c53457a..4431f2f 100644 --- a/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +++ b/eigen/Eigen/src/Core/util/DisableStupidWarnings.h @@ -4,30 +4,36 @@ #ifdef _MSC_VER // 4100 - unreferenced formal parameter (occurred e.g. in aligned_allocator::destroy(pointer p)) // 4101 - unreferenced local variable - // 4127 - conditional expression is constant // 4181 - qualifier applied to reference type ignored // 4211 - nonstandard extension used : redefined extern to static // 4244 - 'argument' : conversion from 'type1' to 'type2', possible loss of data // 4273 - QtAlignedMalloc, inconsistent DLL linkage // 4324 - structure was padded due to declspec(align()) + // 4503 - decorated name length exceeded, name was truncated // 4512 - assignment operator could not be generated // 4522 - 'class' : multiple assignment operators specified // 4700 - uninitialized local variable 'xyz' used + // 4714 - function marked as __forceinline not inlined // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow + // 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning) #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning( push ) #endif - #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4512 4522 4700 4717 ) + #pragma warning( disable : 4100 4101 4181 4211 4244 4273 4324 4503 4512 4522 4700 4714 4717 4800) + #elif defined __INTEL_COMPILER // 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body // typedef that may be a reference type. // 279 - controlling expression is constant // ICC 12 generates this warning on assert(constant_expression_depending_on_template_params) and frankly this is a legitimate use case. + // 1684 - conversion from pointer to same-sized integral type (potential portability problem) + // 2259 - non-pointer conversion from "Eigen::Index={ptrdiff_t={long}}" to "int" may lose significant bits #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #pragma warning push #endif - #pragma warning disable 2196 279 + #pragma warning disable 2196 279 1684 2259 + #elif defined __clang__ // -Wconstant-logical-operand - warning: use of logical && with constant operand; switch to bitwise & or remove constant // this is really a stupid warning as it warns on compile-time expressions involving enums @@ -35,6 +41,9 @@ #pragma clang diagnostic push #endif #pragma clang diagnostic ignored "-Wconstant-logical-operand" + #if __clang_major__ >= 3 && __clang_minor__ >= 5 + #pragma clang diagnostic ignored "-Wabsolute-value" + #endif #elif defined __GNUC__ && __GNUC__>=6 @@ -45,4 +54,24 @@ #endif +#if defined __NVCC__ + // Disable the "statement is unreachable" message + #pragma diag_suppress code_is_unreachable + // Disable the "dynamic initialization in unreachable code" message + #pragma diag_suppress initialization_not_reachable + // Disable the "invalid error number" message that we get with older versions of nvcc + #pragma diag_suppress 1222 + // Disable the "calling a __host__ function from a __host__ __device__ function is not allowed" messages (yes, there are many of them and they seem to change with every version of the compiler) + #pragma diag_suppress 2527 + #pragma diag_suppress 2529 + #pragma diag_suppress 2651 + #pragma diag_suppress 2653 + #pragma diag_suppress 2668 + #pragma diag_suppress 2669 + #pragma diag_suppress 2670 + #pragma diag_suppress 2671 + #pragma diag_suppress 2735 + #pragma diag_suppress 2737 +#endif + #endif // not EIGEN_WARNINGS_DISABLED diff --git a/eigen/Eigen/src/Core/util/ForwardDeclarations.h b/eigen/Eigen/src/Core/util/ForwardDeclarations.h index f277720..1a48cff 100644 --- a/eigen/Eigen/src/Core/util/ForwardDeclarations.h +++ b/eigen/Eigen/src/Core/util/ForwardDeclarations.h @@ -36,6 +36,10 @@ template struct accessors_level }; }; +template struct evaluator_traits; + +template< typename T> struct evaluator; + } // end namespace internal template struct NumTraits; @@ -51,18 +55,18 @@ class DenseCoeffsBase; template class ForceAlignedAccess; template class SwapWrapper; template class Block; +template class IndexedView; template class VectorBlock; template class Transpose; @@ -87,10 +92,11 @@ template class CwiseNullaryOp; template class CwiseUnaryOp; template class CwiseUnaryView; template class CwiseBinaryOp; -template class SelfCwiseBinaryOp; -template class ProductBase; -template class GeneralProduct; -template class CoeffBasedProduct; +template class CwiseTernaryOp; +template class Solve; +template class Inverse; + +template class Product; template class DiagonalBase; template class DiagonalWrapper; @@ -108,7 +114,12 @@ template::has_write_access ? WriteAccessors : ReadOnlyAccessors > class MapBase; template class Stride; +template class InnerStride; +template class OuterStride; template > class Map; +template class RefBase; +template,OuterStride<> >::type > class Ref; template class TriangularBase; template class TriangularView; @@ -119,10 +130,10 @@ template struct CommaInitializer; template class ReturnByValue; template class ArrayWrapper; template class MatrixWrapper; +template class SolverBase; +template class InnerIterator; namespace internal { -template struct solve_retval_base; -template struct solve_retval; template struct kernel_retval_base; template struct kernel_retval; template struct image_retval_base; @@ -135,6 +146,21 @@ template struct product_type; + +template struct EnableIf; + +/** \internal + * \class product_evaluator + * Products need their own evaluator with more template arguments allowing for + * easier partial template specializations. + */ +template< typename T, + int ProductTag = internal::product_type::ret, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar + > struct product_evaluator; } template struct conj_helper; -template struct scalar_sum_op; -template struct scalar_difference_op; -template struct scalar_conj_product_op; +template struct scalar_sum_op; +template struct scalar_difference_op; +template struct scalar_conj_product_op; +template struct scalar_min_op; +template struct scalar_max_op; template struct scalar_opposite_op; template struct scalar_conjugate_op; template struct scalar_real_op; @@ -160,6 +188,7 @@ template struct scalar_imag_op; template struct scalar_abs_op; template struct scalar_abs2_op; template struct scalar_sqrt_op; +template struct scalar_rsqrt_op; template struct scalar_exp_op; template struct scalar_log_op; template struct scalar_cos_op; @@ -167,24 +196,29 @@ template struct scalar_sin_op; template struct scalar_acos_op; template struct scalar_asin_op; template struct scalar_tan_op; -template struct scalar_pow_op; template struct scalar_inverse_op; template struct scalar_square_op; template struct scalar_cube_op; template struct scalar_cast_op; -template struct scalar_multiple_op; -template struct scalar_quotient1_op; -template struct scalar_min_op; -template struct scalar_max_op; template struct scalar_random_op; -template struct scalar_add_op; template struct scalar_constant_op; template struct scalar_identity_op; - +template struct scalar_sign_op; +template struct scalar_pow_op; +template struct scalar_hypot_op; template struct scalar_product_op; -template struct scalar_multiple2_op; template struct scalar_quotient_op; +// SpecialFunctions module +template struct scalar_lgamma_op; +template struct scalar_digamma_op; +template struct scalar_erf_op; +template struct scalar_erfc_op; +template struct scalar_igamma_op; +template struct scalar_igammac_op; +template struct scalar_zeta_op; +template struct scalar_betainc_op; + } // end namespace internal struct IOFormat; @@ -192,18 +226,18 @@ struct IOFormat; // Array module template class Array; @@ -221,7 +255,9 @@ template struct inverse_impl; template class HouseholderQR; template class ColPivHouseholderQR; template class FullPivHouseholderQR; +template class CompleteOrthogonalDecomposition; template class JacobiSVD; +template class BDCSVD; template class LLT; template class LDLT; template class HouseholderSequence; @@ -234,39 +270,16 @@ template class QuaternionBase; template class Rotation2D; template class AngleAxis; template class Translation; - -// Sparse module: -template class SparseMatrixBase; - -#ifdef EIGEN2_SUPPORT -template class eigen2_RotationBase; -template class eigen2_Cross; -template class eigen2_Quaternion; -template class eigen2_Rotation2D; -template class eigen2_AngleAxis; -template class eigen2_Transform; -template class eigen2_ParametrizedLine; -template class eigen2_Hyperplane; -template class eigen2_Translation; -template class eigen2_Scaling; -#endif - -#if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS -template class Quaternion; -template class Transform; -template class ParametrizedLine; -template class Hyperplane; -template class Scaling; -#endif - -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS +template class AlignedBox; template class Quaternion; template class Transform; template class ParametrizedLine; template class Hyperplane; template class UniformScaling; template class Homogeneous; -#endif + +// Sparse module: +template class SparseMatrixBase; // MatrixFunctions module template struct MatrixExponentialReturnValue; @@ -274,7 +287,7 @@ template class MatrixFunctionReturnValue; template class MatrixSquareRootReturnValue; template class MatrixLogarithmReturnValue; template class MatrixPowerReturnValue; -template class MatrixPowerProduct; +template class MatrixComplexPowerReturnValue; namespace internal { template @@ -285,18 +298,6 @@ struct stem_function }; } - -#ifdef EIGEN2_SUPPORT -template class Cwise; -template class Minor; -template class LU; -template class QR; -template class SVD; -namespace internal { -template struct eigen2_part_return_type; -} -#endif - } // end namespace Eigen #endif // EIGEN_FORWARDDECLARATIONS_H diff --git a/eigen/Eigen/src/Core/util/IndexedViewHelper.h b/eigen/Eigen/src/Core/util/IndexedViewHelper.h new file mode 100644 index 0000000..ab01c85 --- /dev/null +++ b/eigen/Eigen/src/Core/util/IndexedViewHelper.h @@ -0,0 +1,187 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INDEXED_VIEW_HELPER_H +#define EIGEN_INDEXED_VIEW_HELPER_H + +namespace Eigen { + +/** \namespace Eigen::placeholders + * \ingroup Core_Module + * + * Namespace containing symbolic placeholder and identifiers + */ +namespace placeholders { + +namespace internal { +struct symbolic_last_tag {}; +} + +/** \var last + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last element/row/columns + * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder support standard arithmetic operation. + * + * A typical usage example would be: + * \code + * using namespace Eigen; + * using Eigen::placeholders::last; + * VectorXd v(n); + * v(seq(2,last-2)).setOnes(); + * \endcode + * + * \sa end + */ +static const Symbolic::SymbolExpr last; + +/** \var end + * \ingroup Core_Module + * + * Can be used as a parameter to Eigen::seq and Eigen::seqN functions to symbolically reference the last+1 element/row/columns + * of the underlying vector or matrix once passed to DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * This symbolic placeholder support standard arithmetic operation. + * It is essentially an alias to last+1 + * + * \sa last + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +static const auto end = last+1; +#else +// Using a FixedExpr<1> expression is important here to make sure the compiler +// can fully optimize the computation starting indices with zero overhead. +static const Symbolic::AddExpr,Symbolic::ValueExpr > > end(last+fix<1>()); +#endif + +} // end namespace placeholders + +namespace internal { + + // Replace symbolic last/end "keywords" by their true runtime value +inline Index eval_expr_given_size(Index x, Index /* size */) { return x; } + +template +FixedInt eval_expr_given_size(FixedInt x, Index /*size*/) { return x; } + +template +Index eval_expr_given_size(const Symbolic::BaseExpr &x, Index size) +{ + return x.derived().eval(placeholders::last=size-1); +} + +// Extract increment/step at compile time +template struct get_compile_time_incr { + enum { value = UndefinedIncr }; +}; + +// Analogue of std::get<0>(x), but tailored for our needs. +template +Index first(const T& x) { return x.first(); } + +// IndexedViewCompatibleType/makeIndexedViewCompatible turn an arbitrary object of type T into something usable by MatrixSlice +// The generic implementation is a no-op +template +struct IndexedViewCompatibleType { + typedef T type; +}; + +template +const T& makeIndexedViewCompatible(const T& x, Index /*size*/, Q) { return x; } + +//-------------------------------------------------------------------------------- +// Handling of a single Index +//-------------------------------------------------------------------------------- + +struct SingleRange { + enum { + SizeAtCompileTime = 1 + }; + SingleRange(Index val) : m_value(val) {} + Index operator[](Index) const { return m_value; } + Index size() const { return 1; } + Index first() const { return m_value; } + Index m_value; +}; + +template<> struct get_compile_time_incr { + enum { value = 1 }; // 1 or 0 ?? +}; + +// Turn a single index into something that looks like an array (i.e., that exposes a .size(), and operatro[](int) methods) +template +struct IndexedViewCompatibleType::value>::type> { + // Here we could simply use Array, but maybe it's less work for the compiler to use + // a simpler wrapper as SingleRange + //typedef Eigen::Array type; + typedef SingleRange type; +}; + +template +struct IndexedViewCompatibleType::value>::type> { + typedef SingleRange type; +}; + + +template +typename enable_if::value,SingleRange>::type +makeIndexedViewCompatible(const T& id, Index size, SpecializedType) { + return eval_expr_given_size(id,size); +} + +//-------------------------------------------------------------------------------- +// Handling of all +//-------------------------------------------------------------------------------- + +struct all_t { all_t() {} }; + +// Convert a symbolic 'all' into a usable range type +template +struct AllRange { + enum { SizeAtCompileTime = XprSize }; + AllRange(Index size = XprSize) : m_size(size) {} + Index operator[](Index i) const { return i; } + Index size() const { return m_size.value(); } + Index first() const { return 0; } + variable_if_dynamic m_size; +}; + +template +struct IndexedViewCompatibleType { + typedef AllRange type; +}; + +template +inline AllRange::value> makeIndexedViewCompatible(all_t , XprSizeType size, SpecializedType) { + return AllRange::value>(size); +} + +template struct get_compile_time_incr > { + enum { value = 1 }; +}; + +} // end namespace internal + + +namespace placeholders { + +/** \var all + * \ingroup Core_Module + * Can be used as a parameter to DenseBase::operator()(const RowIndices&, const ColIndices&) to index all rows or columns + */ +static const Eigen::internal::all_t all; + +} + +} // end namespace Eigen + +#endif // EIGEN_INDEXED_VIEW_HELPER_H diff --git a/eigen/Eigen/src/Core/util/IntegralConstant.h b/eigen/Eigen/src/Core/util/IntegralConstant.h new file mode 100644 index 0000000..78a4705 --- /dev/null +++ b/eigen/Eigen/src/Core/util/IntegralConstant.h @@ -0,0 +1,270 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_INTEGRAL_CONSTANT_H +#define EIGEN_INTEGRAL_CONSTANT_H + +namespace Eigen { + +namespace internal { + +template class FixedInt; +template class VariableAndFixedInt; + +/** \internal + * \class FixedInt + * + * This class embeds a compile-time integer \c N. + * + * It is similar to c++11 std::integral_constant but with some additional features + * such as: + * - implicit conversion to int + * - arithmetic and some bitwise operators: -, +, *, /, %, &, | + * - c++98/14 compatibility with fix and fix() syntax to define integral constants. + * + * It is strongly discouraged to directly deal with this class FixedInt. Instances are expcected to + * be created by the user using Eigen::fix or Eigen::fix(). In C++98-11, the former syntax does + * not create a FixedInt instance but rather a point to function that needs to be \em cleaned-up + * using the generic helper: + * \code + * internal::cleanup_index_type::type + * internal::cleanup_index_type::type + * \endcode + * where T can a FixedInt, a pointer to function FixedInt (*)(), or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can extract the compile-time value \c N in a generic way using the following helper: + * \code + * internal::get_fixed_value::value + * \endcode + * that will give you \c N if T equals FixedInt or FixedInt (*)(), and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix, class VariableAndFixedInt + */ +template class FixedInt +{ +public: + static const int value = N; + operator int() const { return value; } + FixedInt() {} + FixedInt( VariableAndFixedInt other) { + EIGEN_ONLY_USED_FOR_DEBUG(other); + eigen_internal_assert(int(other)==N); + } + + FixedInt<-N> operator-() const { return FixedInt<-N>(); } + template + FixedInt operator+( FixedInt) const { return FixedInt(); } + template + FixedInt operator-( FixedInt) const { return FixedInt(); } + template + FixedInt operator*( FixedInt) const { return FixedInt(); } + template + FixedInt operator/( FixedInt) const { return FixedInt(); } + template + FixedInt operator%( FixedInt) const { return FixedInt(); } + template + FixedInt operator|( FixedInt) const { return FixedInt(); } + template + FixedInt operator&( FixedInt) const { return FixedInt(); } + +#if EIGEN_HAS_CXX14 + // Needed in C++14 to allow fix(): + FixedInt operator() () const { return *this; } + + VariableAndFixedInt operator() (int val) const { return VariableAndFixedInt(val); } +#else + FixedInt ( FixedInt (*)() ) {} +#endif + +#if EIGEN_HAS_CXX11 + FixedInt(std::integral_constant) {} +#endif +}; + +/** \internal + * \class VariableAndFixedInt + * + * This class embeds both a compile-time integer \c N and a runtime integer. + * Both values are supposed to be equal unless the compile-time value \c N has a special + * value meaning that the runtime-value should be used. Depending on the context, this special + * value can be either Eigen::Dynamic (for positive quantities) or Eigen::DynamicIndex (for + * quantities that can be negative). + * + * It is the return-type of the function Eigen::fix(int), and most of the time this is the only + * way it is used. It is strongly discouraged to directly deal with instances of VariableAndFixedInt. + * Indeed, in order to write generic code, it is the responsibility of the callee to properly convert + * it to either a true compile-time quantity (i.e. a FixedInt), or to a runtime quantity (e.g., an Index) + * using the following generic helper: + * \code + * internal::cleanup_index_type::type + * internal::cleanup_index_type::type + * \endcode + * where T can be a template instantiation of VariableAndFixedInt or numerous other integer-like representations. + * \c DynamicKey is either Dynamic (default) or DynamicIndex and used to identify true compile-time values. + * + * For convenience, you can also extract the compile-time value \c N using the following helper: + * \code + * internal::get_fixed_value::value + * \endcode + * that will give you \c N if T equals VariableAndFixedInt, and \c DefaultVal if T does not embed any compile-time value (e.g., T==int). + * + * \sa fix(int), class FixedInt + */ +template class VariableAndFixedInt +{ +public: + static const int value = N; + operator int() const { return m_value; } + VariableAndFixedInt(int val) { m_value = val; } +protected: + int m_value; +}; + +template struct get_fixed_value { + static const int value = Default; +}; + +template struct get_fixed_value,Default> { + static const int value = N; +}; + +#if !EIGEN_HAS_CXX14 +template struct get_fixed_value (*)(),Default> { + static const int value = N; +}; +#endif + +template struct get_fixed_value,Default> { + static const int value = N ; +}; + +template +struct get_fixed_value,Default> { + static const int value = N; +}; + +template EIGEN_DEVICE_FUNC Index get_runtime_value(const T &x) { return x; } +#if !EIGEN_HAS_CXX14 +template EIGEN_DEVICE_FUNC Index get_runtime_value(FixedInt (*)()) { return N; } +#endif + +// Cleanup integer/FixedInt/VariableAndFixedInt/etc types: + +// By default, no cleanup: +template struct cleanup_index_type { typedef T type; }; + +// Convert any integral type (e.g., short, int, unsigned int, etc.) to Eigen::Index +template struct cleanup_index_type::value>::type> { typedef Index type; }; + +#if !EIGEN_HAS_CXX14 +// In c++98/c++11, fix is a pointer to function that we better cleanup to a true FixedInt: +template struct cleanup_index_type (*)(), DynamicKey> { typedef FixedInt type; }; +#endif + +// If VariableAndFixedInt does not match DynamicKey, then we turn it to a pure compile-time value: +template struct cleanup_index_type, DynamicKey> { typedef FixedInt type; }; +// If VariableAndFixedInt matches DynamicKey, then we turn it to a pure runtime-value (aka Index): +template struct cleanup_index_type, DynamicKey> { typedef Index type; }; + +#if EIGEN_HAS_CXX11 +template struct cleanup_index_type, DynamicKey> { typedef FixedInt type; }; +#endif + +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN + +#if EIGEN_HAS_CXX14 +template +static const internal::FixedInt fix{}; +#else +template +inline internal::FixedInt fix() { return internal::FixedInt(); } + +// The generic typename T is mandatory. Otherwise, a code like fix could refer to either the function above or this next overload. +// This way a code like fix can only refer to the previous function. +template +inline internal::VariableAndFixedInt fix(T val) { return internal::VariableAndFixedInt(val); } +#endif + +#else // EIGEN_PARSED_BY_DOXYGEN + +/** \var fix() + * \ingroup Core_Module + * + * This \em identifier permits to construct an object embedding a compile-time integer \c N. + * + * \tparam N the compile-time integer value + * + * It is typically used in conjunction with the Eigen::seq and Eigen::seqN functions to pass compile-time values to them: + * \code + * seqN(10,fix<4>,fix<-3>) // <=> [10 7 4 1] + * \endcode + * + * See also the function fix(int) to pass both a compile-time and runtime value. + * + * In c++14, it is implemented as: + * \code + * template static const internal::FixedInt fix{}; + * \endcode + * where internal::FixedInt is an internal template class similar to + * \c std::integral_constant + * Here, \c fix is thus an object of type \c internal::FixedInt. + * + * In c++98/11, it is implemented as a function: + * \code + * template inline internal::FixedInt fix(); + * \endcode + * Here internal::FixedInt is thus a pointer to function. + * + * If for some reason you want a true object in c++98 then you can write: \code fix() \endcode which is also valid in c++14. + * + * \sa fix(int), seq, seqN + */ +template +static const auto fix(); + +/** \fn fix(int) + * \ingroup Core_Module + * + * This function returns an object embedding both a compile-time integer \c N, and a fallback runtime value \a val. + * + * \tparam N the compile-time integer value + * \param val the fallback runtime integer value + * + * This function is a more general version of the \ref fix identifier/function that can be used in template code + * where the compile-time value could turn out to actually mean "undefined at compile-time". For positive integers + * such as a size or a dimension, this case is identified by Eigen::Dynamic, whereas runtime signed integers + * (e.g., an increment/stride) are identified as Eigen::DynamicIndex. In such a case, the runtime value \a val + * will be used as a fallback. + * + * A typical use case would be: + * \code + * template void foo(const MatrixBase &mat) { + * const int N = Derived::RowsAtCompileTime==Dynamic ? Dynamic : Derived::RowsAtCompileTime/2; + * const int n = mat.rows()/2; + * ... mat( seqN(0,fix(n) ) ...; + * } + * \endcode + * In this example, the function Eigen::seqN knows that the second argument is expected to be a size. + * If the passed compile-time value N equals Eigen::Dynamic, then the proxy object returned by fix will be dissmissed, and converted to an Eigen::Index of value \c n. + * Otherwise, the runtime-value \c n will be dissmissed, and the returned ArithmeticSequence will be of the exact same type as seqN(0,fix) . + * + * \sa fix, seqN, class ArithmeticSequence + */ +template +static const auto fix(int val); + +#endif // EIGEN_PARSED_BY_DOXYGEN + +} // end namespace Eigen + +#endif // EIGEN_INTEGRAL_CONSTANT_H diff --git a/eigen/Eigen/src/Core/util/MKL_support.h b/eigen/Eigen/src/Core/util/MKL_support.h index 1ef3b61..26b5966 100644 --- a/eigen/Eigen/src/Core/util/MKL_support.h +++ b/eigen/Eigen/src/Core/util/MKL_support.h @@ -49,7 +49,7 @@ #define EIGEN_USE_LAPACKE #endif -#if defined(EIGEN_USE_BLAS) || defined(EIGEN_USE_LAPACKE) || defined(EIGEN_USE_MKL_VML) +#if defined(EIGEN_USE_MKL_VML) #define EIGEN_USE_MKL #endif @@ -64,7 +64,6 @@ # ifndef EIGEN_USE_MKL /*If the MKL version is too old, undef everything*/ # undef EIGEN_USE_MKL_ALL -# undef EIGEN_USE_BLAS # undef EIGEN_USE_LAPACKE # undef EIGEN_USE_MKL_VML # undef EIGEN_USE_LAPACKE_STRICT @@ -73,7 +72,7 @@ #endif #if defined EIGEN_USE_MKL -#include + #define EIGEN_MKL_VML_THRESHOLD 128 /* MKL_DOMAIN_BLAS, etc are defined only in 10.3 update 7 */ @@ -107,52 +106,23 @@ #else #define EIGEN_MKL_DOMAIN_PARDISO MKL_PARDISO #endif +#endif namespace Eigen { typedef std::complex dcomplex; typedef std::complex scomplex; -namespace internal { - -template -static inline void assign_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { - mklScalar=eigenScalar; -} - -template -static inline void assign_conj_scalar_eig2mkl(MKLType& mklScalar, const EigenType& eigenScalar) { - mklScalar=eigenScalar; -} - -template <> -inline void assign_scalar_eig2mkl(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=eigenScalar.imag(); -} - -template <> -inline void assign_scalar_eig2mkl(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=eigenScalar.imag(); -} - -template <> -inline void assign_conj_scalar_eig2mkl(MKL_Complex16& mklScalar, const dcomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=-eigenScalar.imag(); -} - -template <> -inline void assign_conj_scalar_eig2mkl(MKL_Complex8& mklScalar, const scomplex& eigenScalar) { - mklScalar.real=eigenScalar.real(); - mklScalar.imag=-eigenScalar.imag(); -} - -} // end namespace internal +#if defined(EIGEN_USE_MKL) +typedef MKL_INT BlasIndex; +#else +typedef int BlasIndex; +#endif } // end namespace Eigen +#if defined(EIGEN_USE_BLAS) +#include "../../misc/blas.h" #endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 16c248c..14ec87d 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -12,26 +12,25 @@ #define EIGEN_MACROS_H #define EIGEN_WORLD_VERSION 3 -#define EIGEN_MAJOR_VERSION 2 -#define EIGEN_MINOR_VERSION 9 +#define EIGEN_MAJOR_VERSION 3 +#define EIGEN_MINOR_VERSION 90 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ EIGEN_MINOR_VERSION>=z)))) - // Compiler identification, EIGEN_COMP_* /// \internal EIGEN_COMP_GNUC set to 1 for all compilers compatible with GCC #ifdef __GNUC__ - #define EIGEN_COMP_GNUC 1 + #define EIGEN_COMP_GNUC (__GNUC__*10+__GNUC_MINOR__) #else #define EIGEN_COMP_GNUC 0 #endif -/// \internal EIGEN_COMP_CLANG set to 1 if the compiler is clang (alias for __clang__) +/// \internal EIGEN_COMP_CLANG set to major+minor version (e.g., 307 for clang 3.7) if the compiler is clang #if defined(__clang__) - #define EIGEN_COMP_CLANG 1 + #define EIGEN_COMP_CLANG (__clang_major__*100+__clang_minor__) #else #define EIGEN_COMP_CLANG 0 #endif @@ -72,8 +71,17 @@ #define EIGEN_COMP_MSVC 0 #endif -/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC -#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC) +// For the record, here is a table summarizing the possible values for EIGEN_COMP_MSVC: +// name ver MSC_VER +// 2008 9 1500 +// 2010 10 1600 +// 2012 11 1700 +// 2013 12 1800 +// 2015 14 1900 +// "15" 15 1900 + +/// \internal EIGEN_COMP_MSVC_STRICT set to 1 if the compiler is really Microsoft Visual C++ and not ,e.g., ICC or clang-cl +#if EIGEN_COMP_MSVC && !(EIGEN_COMP_ICC || EIGEN_COMP_LLVM || EIGEN_COMP_CLANG) #define EIGEN_COMP_MSVC_STRICT _MSC_VER #else #define EIGEN_COMP_MSVC_STRICT 0 @@ -100,9 +108,16 @@ #define EIGEN_COMP_ARM 0 #endif +/// \internal EIGEN_COMP_ARM set to 1 if the compiler is ARM Compiler +#if defined(__EMSCRIPTEN__) + #define EIGEN_COMP_EMSCRIPTEN 1 +#else + #define EIGEN_COMP_EMSCRIPTEN 0 +#endif + /// \internal EIGEN_GNUC_STRICT set to 1 if the compiler is really GCC and not a compatible compiler (e.g., ICC, clang, mingw, etc.) -#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM ) +#if EIGEN_COMP_GNUC && !(EIGEN_COMP_CLANG || EIGEN_COMP_ICC || EIGEN_COMP_MINGW || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM || EIGEN_COMP_EMSCRIPTEN) #define EIGEN_COMP_GNUC_STRICT 1 #else #define EIGEN_COMP_GNUC_STRICT 0 @@ -299,91 +314,176 @@ #endif -#if EIGEN_GNUC_AT_MOST(4,3) && !defined(__clang__) + +#if EIGEN_GNUC_AT_MOST(4,3) && !EIGEN_COMP_CLANG // see bug 89 #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 0 #else #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1 #endif -// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable -// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always -// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in -// certain common platform (compiler+architecture combinations) to avoid these problems. -// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't -// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even -// when we have to disable static alignment. -#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__)) -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 +// This macro can be used to prevent from macro expansion, e.g.: +// std::max EIGEN_NOT_A_MACRO(a,b) +#define EIGEN_NOT_A_MACRO + +#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor #else -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 +#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor #endif -// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX -#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ - && !EIGEN_GCC3_OR_OLDER \ - && !defined(__SUNPRO_CC) \ - && !defined(__QNXNTO__) - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 -#else - #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 +#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t #endif -#ifdef EIGEN_DONT_ALIGN - #ifndef EIGEN_DONT_ALIGN_STATICALLY - #define EIGEN_DONT_ALIGN_STATICALLY - #endif - #define EIGEN_ALIGN 0 +// Cross compiler wrapper around LLVM's __has_builtin +#ifdef __has_builtin +# define EIGEN_HAS_BUILTIN(x) __has_builtin(x) #else - #define EIGEN_ALIGN 1 +# define EIGEN_HAS_BUILTIN(x) 0 #endif -// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable -// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used. -#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY) - #define EIGEN_ALIGN_STATICALLY 1 -#else - #define EIGEN_ALIGN_STATICALLY 0 - #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #endif +// A Clang feature extension to determine compiler features. +// We use it to determine 'cxx_rvalue_references' +#ifndef __has_feature +# define __has_feature(x) 0 #endif -#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor +// Some old compilers do not support template specializations like: +// template void foo(const T x[N]); +#if !( EIGEN_COMP_CLANG && ((EIGEN_COMP_CLANG<309) || defined(__apple_build_version__)) || EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<49) +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 1 #else -#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor +#define EIGEN_HAS_STATIC_ARRAY_TEMPLATE 0 #endif -#ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE std::ptrdiff_t +// Upperbound on the C++ version to use. +// Expected values are 03, 11, 14, 17, etc. +// By default, let's use an arbitrarily large C++ version. +#ifndef EIGEN_MAX_CPP_VER +#define EIGEN_MAX_CPP_VER 99 #endif -// A Clang feature extension to determine compiler features. -// We use it to determine 'cxx_rvalue_references' -#ifndef __has_feature -# define __has_feature(x) 0 +#if EIGEN_MAX_CPP_VER>=11 && (defined(__cplusplus) && (__cplusplus >= 201103L) || EIGEN_COMP_MSVC >= 1900) +#define EIGEN_HAS_CXX11 1 +#else +#define EIGEN_HAS_CXX11 0 +#endif + +#if EIGEN_MAX_CPP_VER>=14 && (defined(__cplusplus) && (__cplusplus > 201103L) || EIGEN_COMP_MSVC >= 1910) +#define EIGEN_HAS_CXX14 1 +#else +#define EIGEN_HAS_CXX14 0 #endif // Do we support r-value references? -#if (__has_feature(cxx_rvalue_references) || \ - (defined(__cplusplus) && __cplusplus >= 201103L) || \ - (defined(_MSC_VER) && _MSC_VER >= 1600)) - #define EIGEN_HAVE_RVALUE_REFERENCES +#ifndef EIGEN_HAS_RVALUE_REFERENCES +#if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_rvalue_references) || \ + (defined(__cplusplus) && __cplusplus >= 201103L) || \ + (EIGEN_COMP_MSVC >= 1600)) + #define EIGEN_HAS_RVALUE_REFERENCES 1 +#else + #define EIGEN_HAS_RVALUE_REFERENCES 0 +#endif #endif +// Does the compiler support C99? +#ifndef EIGEN_HAS_C99_MATH +#if EIGEN_MAX_CPP_VER>=11 && \ + ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ + || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ + || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ + || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + #define EIGEN_HAS_C99_MATH 1 +#else + #define EIGEN_HAS_C99_MATH 0 +#endif +#endif -// Cross compiler wrapper around LLVM's __has_builtin -#ifdef __has_builtin -# define EIGEN_HAS_BUILTIN(x) __has_builtin(x) +// Does the compiler support result_of? +#ifndef EIGEN_HAS_STD_RESULT_OF +#if EIGEN_MAX_CPP_VER>=11 && ((__has_feature(cxx_lambdas) || (defined(__cplusplus) && __cplusplus >= 201103L))) +#define EIGEN_HAS_STD_RESULT_OF 1 #else -# define EIGEN_HAS_BUILTIN(x) 0 +#define EIGEN_HAS_STD_RESULT_OF 0 +#endif +#endif + +// Does the compiler support variadic templates? +#ifndef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: + // this prevents nvcc from crashing when compiling Eigen on Tegra X1 +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) +#define EIGEN_HAS_VARIADIC_TEMPLATES 1 +#else +#define EIGEN_HAS_VARIADIC_TEMPLATES 0 +#endif +#endif + +// Does the compiler fully support const expressions? (as in c++14) +#ifndef EIGEN_HAS_CONSTEXPR + +#if defined(__CUDACC__) +// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) + #define EIGEN_HAS_CONSTEXPR 1 +#endif +#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ + (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ + (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) +#define EIGEN_HAS_CONSTEXPR 1 +#endif + +#ifndef EIGEN_HAS_CONSTEXPR +#define EIGEN_HAS_CONSTEXPR 0 +#endif + +#endif + +// Does the compiler support C++11 math? +// Let's be conservative and enable the default C++11 implementation only if we are sure it exists +#ifndef EIGEN_HAS_CXX11_MATH + #if EIGEN_MAX_CPP_VER>=11 && ((__cplusplus > 201103L) || (__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC || EIGEN_COMP_ICC) \ + && (EIGEN_ARCH_i386_OR_x86_64) && (EIGEN_OS_GNULINUX || EIGEN_OS_WIN_STRICT || EIGEN_OS_MAC)) + #define EIGEN_HAS_CXX11_MATH 1 + #else + #define EIGEN_HAS_CXX11_MATH 0 + #endif +#endif + +// Does the compiler support proper C++11 containers? +#ifndef EIGEN_HAS_CXX11_CONTAINERS + #if EIGEN_MAX_CPP_VER>=11 && \ + ((__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900) + #define EIGEN_HAS_CXX11_CONTAINERS 1 + #else + #define EIGEN_HAS_CXX11_CONTAINERS 0 + #endif +#endif + +// Does the compiler support C++11 noexcept? +#ifndef EIGEN_HAS_CXX11_NOEXCEPT + #if EIGEN_MAX_CPP_VER>=11 && \ + (__has_feature(cxx_noexcept) \ + || (__cplusplus > 201103L) \ + || ((__cplusplus >= 201103L) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_CLANG || EIGEN_COMP_ICC>=1400)) \ + || EIGEN_COMP_MSVC >= 1900) + #define EIGEN_HAS_CXX11_NOEXCEPT 1 + #else + #define EIGEN_HAS_CXX11_NOEXCEPT 0 + #endif #endif /** Allows to disable some optimizations which might affect the accuracy of the result. * Such optimization are enabled by default, and set EIGEN_FAST_MATH to 0 to disable them. * They currently include: - * - single precision Cwise::sin() and Cwise::cos() when SSE vectorization is enabled. + * - single precision ArrayBase::sin() and ArrayBase::cos() for SSE and AVX vectorization. */ #ifndef EIGEN_FAST_MATH #define EIGEN_FAST_MATH 1 @@ -395,6 +495,8 @@ #define EIGEN_CAT2(a,b) a ## b #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b) +#define EIGEN_COMMA , + // convert a token to a string #define EIGEN_MAKESTRING2(a) #a #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a) @@ -402,7 +504,7 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. -#if (defined _MSC_VER) || (defined __INTEL_COMPILER) +#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline @@ -412,24 +514,25 @@ // attribute to maximize inlining. This should only be used when really necessary: in particular, // it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times. // FIXME with the always_inline attribute, -// gcc 3.4.x reports the following compilation error: +// gcc 3.4.x and 4.1 reports the following compilation error: // Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval Eigen::MatrixBase::eval() const' // : function body not available -#if EIGEN_GNUC_AT_LEAST(4,0) +// See also bug 1367 +#if EIGEN_GNUC_AT_LEAST(4,2) #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline #else #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_DONT_INLINE __attribute__((noinline)) -#elif (defined _MSC_VER) +#elif EIGEN_COMP_MSVC #define EIGEN_DONT_INLINE __declspec(noinline) #else #define EIGEN_DONT_INLINE #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_PERMISSIVE_EXPR __extension__ #else #define EIGEN_PERMISSIVE_EXPR @@ -439,8 +542,8 @@ // - static is not very good because it prevents definitions from different object files to be merged. // So static causes the resulting linked executable to be bloated with multiple copies of the same function. // - inline is not perfect either as it unwantedly hints the compiler toward inlining the function. -#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS inline +#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC +#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC inline #ifdef NDEBUG # ifndef EIGEN_NO_DEBUG @@ -498,15 +601,15 @@ #endif #ifdef EIGEN_NO_DEBUG -#define EIGEN_ONLY_USED_FOR_DEBUG(x) (void)x +#define EIGEN_ONLY_USED_FOR_DEBUG(x) EIGEN_UNUSED_VARIABLE(x) #else #define EIGEN_ONLY_USED_FOR_DEBUG(x) #endif #ifndef EIGEN_NO_DEPRECATED_WARNING - #if (defined __GNUC__) + #if EIGEN_COMP_GNUC #define EIGEN_DEPRECATED __attribute__((deprecated)) - #elif (defined _MSC_VER) + #elif EIGEN_COMP_MSVC #define EIGEN_DEPRECATED __declspec(deprecated) #else #define EIGEN_DEPRECATED @@ -515,7 +618,7 @@ #define EIGEN_DEPRECATED #endif -#if (defined __GNUC__) +#if EIGEN_COMP_GNUC #define EIGEN_UNUSED __attribute__((unused)) #else #define EIGEN_UNUSED @@ -524,19 +627,41 @@ // Suppresses 'unused variable' warnings. namespace Eigen { namespace internal { - template void ignore_unused_variable(const T&) {} + template EIGEN_DEVICE_FUNC void ignore_unused_variable(const T&) {} } } #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var); #if !defined(EIGEN_ASM_COMMENT) - #if (defined __GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) + #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64) #define EIGEN_ASM_COMMENT(X) __asm__("#" X) #else #define EIGEN_ASM_COMMENT(X) #endif #endif + +#if EIGEN_COMP_MSVC + // NOTE MSVC often gives C4127 warnings with compiletime if statements. See bug 1362. + // This workaround is ugly, but it does the job. +# define EIGEN_CONST_CONDITIONAL(cond) (void)0, cond +#else +# define EIGEN_CONST_CONDITIONAL(cond) cond +#endif + +//------------------------------------------------------------------------------------------ +// Static and dynamic alignment control +// +// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES +// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively. +// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not, +// a default value is automatically computed based on architecture, compiler, and OS. +// +// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX} +// to be used to declare statically aligned buffers. +//------------------------------------------------------------------------------------------ + + /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements. * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled, * so that vectorization doesn't affect binary compatibility. @@ -544,28 +669,149 @@ namespace Eigen { * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link * vectorized and non-vectorized code. */ -#if (defined __GNUC__) || (defined __PGI) || (defined __IBMCPP__) || (defined __ARMCC_VERSION) +#if (defined __CUDACC__) + #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n) +#elif EIGEN_COMP_GNUC || EIGEN_COMP_PGI || EIGEN_COMP_IBM || EIGEN_COMP_ARM #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) -#elif (defined _MSC_VER) +#elif EIGEN_COMP_MSVC #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n)) -#elif (defined __SUNPRO_CC) +#elif EIGEN_COMP_SUNCC // FIXME not sure about this one: #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) #else #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler #endif +// If the user explicitly disable vectorization, then we also disable alignment +#if defined(EIGEN_DONT_VECTORIZE) + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0 +#elif defined(EIGEN_VECTORIZE_AVX512) + // 64 bytes static alignmeent is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64 +#elif defined(__AVX__) + // 32 bytes static alignmeent is preferred only if really required + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32 +#else + #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16 +#endif + + +// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense +#define EIGEN_MIN_ALIGN_BYTES 16 + +// Defined the boundary (in bytes) on which the data needs to be aligned. Note +// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be +// aligned at all regardless of the value of this #define. + +#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY. +#endif + +// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated +// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0 +#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN) + #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES + #undef EIGEN_MAX_STATIC_ALIGN_BYTES + #endif + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 +#endif + +#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES + + // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES + + // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable + // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always + // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in + // certain common platform (compiler+architecture combinations) to avoid these problems. + // Only static alignment is really problematic (relies on nonstandard compiler extensions), + // try to keep heap alignment even when we have to disable static alignment. + #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64) + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #elif EIGEN_ARCH_ARM_OR_ARM64 && EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_AT_MOST(4, 6) + // Old versions of GCC on ARM, at least 4.4, were once seen to have buggy static alignment support. + // Not sure which version fixed it, hopefully it doesn't affect 4.7, which is still somewhat in use. + // 4.8 and newer seem definitely unaffected. + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 + #else + #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 + #endif + + // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX + #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \ + && !EIGEN_GCC3_OR_OLDER \ + && !EIGEN_COMP_SUNCC \ + && !EIGEN_OS_QNX + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 + #else + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 + #endif + + #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT + #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES + #else + #define EIGEN_MAX_STATIC_ALIGN_BYTES 0 + #endif + +#endif + +// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES +#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES0 is the true test whether we want to align arrays on the stack or not. +// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES) +// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). +// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used. + + +// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8) #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16) +#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32) +#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64) +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES) +#else +#define EIGEN_ALIGN_MAX +#endif + + +// Dynamic alignment control + +#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0 +#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN. +#endif -#if EIGEN_ALIGN_STATICALLY -#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n) -#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16 +#ifdef EIGEN_DONT_ALIGN + #ifdef EIGEN_MAX_ALIGN_BYTES + #undef EIGEN_MAX_ALIGN_BYTES + #endif + #define EIGEN_MAX_ALIGN_BYTES 0 +#elif !defined(EIGEN_MAX_ALIGN_BYTES) + #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES +#endif + +#if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES #else -#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) -#define EIGEN_USER_ALIGN16 +#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES #endif + +#ifndef EIGEN_UNALIGNED_VECTORIZE +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +//---------------------------------------------------------------------- + + #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD #define EIGEN_RESTRICT #endif @@ -591,25 +837,26 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if defined(_MSC_VER) && (_MSC_VER < 1900) && (!defined(__INTEL_COMPILER)) -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; -#elif defined(__clang__) // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; \ - EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ - template \ - EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other) { Base::operator=(other.derived()); return *this; } -#else -#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ - using Base::operator =; \ - EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \ - { \ - Base::operator=(other); \ - return *this; \ - } +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; +#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) { Base::operator=(other); return *this; } \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other) { Base::operator=(other.derived()); return *this; } +#else + #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ + using Base::operator =; \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Derived& other) \ + { \ + Base::operator=(other); \ + return *this; \ + } #endif + /** \internal * \brief Macro to manually inherit assignment operators. * This is necessary, because the implicitly defined assignment operator gets deleted when a custom operator= is defined. @@ -628,32 +875,13 @@ namespace Eigen { typedef typename Eigen::internal::traits::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex. */ \ typedef typename Eigen::NumTraits::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex, T were corresponding to RealScalar. */ \ typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \ - typedef typename Eigen::internal::nested::type Nested; \ + typedef typename Eigen::internal::ref_selector::type Nested; \ typedef typename Eigen::internal::traits::StorageKind StorageKind; \ - typedef typename Eigen::internal::traits::Index Index; \ - enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ + typedef typename Eigen::internal::traits::StorageIndex StorageIndex; \ + enum CompileTimeTraits \ + { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ Flags = Eigen::internal::traits::Flags, \ - CoeffReadCost = Eigen::internal::traits::CoeffReadCost, \ - SizeAtCompileTime = Base::SizeAtCompileTime, \ - MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ - IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; - - -#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ - typedef typename Eigen::internal::traits::Scalar Scalar; /*!< \brief Numeric type, e.g. float, double, int or std::complex. */ \ - typedef typename Eigen::NumTraits::Real RealScalar; /*!< \brief The underlying numeric type for composed scalar types. \details In cases where Scalar is e.g. std::complex, T were corresponding to RealScalar. */ \ - typedef typename Base::PacketScalar PacketScalar; \ - typedef typename Base::CoeffReturnType CoeffReturnType; /*!< \brief The return type for coefficient access. \details Depending on whether the object allows direct coefficient access (e.g. for a MatrixXd), this type is either 'const Scalar&' or simply 'Scalar' for objects that do not allow direct coefficient access. */ \ - typedef typename Eigen::internal::nested::type Nested; \ - typedef typename Eigen::internal::traits::StorageKind StorageKind; \ - typedef typename Eigen::internal::traits::Index Index; \ - enum { RowsAtCompileTime = Eigen::internal::traits::RowsAtCompileTime, \ - ColsAtCompileTime = Eigen::internal::traits::ColsAtCompileTime, \ - MaxRowsAtCompileTime = Eigen::internal::traits::MaxRowsAtCompileTime, \ - MaxColsAtCompileTime = Eigen::internal::traits::MaxColsAtCompileTime, \ - Flags = Eigen::internal::traits::Flags, \ - CoeffReadCost = Eigen::internal::traits::CoeffReadCost, \ SizeAtCompileTime = Base::SizeAtCompileTime, \ MaxSizeAtCompileTime = Base::MaxSizeAtCompileTime, \ IsVectorAtCompileTime = Base::IsVectorAtCompileTime }; \ @@ -661,6 +889,12 @@ namespace Eigen { using Base::const_cast_derived; +// FIXME Maybe the EIGEN_DENSE_PUBLIC_INTERFACE could be removed as importing PacketScalar is rarely needed +#define EIGEN_DENSE_PUBLIC_INTERFACE(Derived) \ + EIGEN_GENERIC_PUBLIC_INTERFACE(Derived) \ + typedef typename Base::PacketScalar PacketScalar; + + #define EIGEN_PLAIN_ENUM_MIN(a,b) (((int)a <= (int)b) ? (int)a : (int)b) #define EIGEN_PLAIN_ENUM_MAX(a,b) (((int)a >= (int)b) ? (int)a : (int)b) @@ -686,24 +920,14 @@ namespace Eigen { #define EIGEN_SIZE_MAX(a,b) (((int)a == Dynamic || (int)b == Dynamic) ? Dynamic \ : ((int)a >= (int)b) ? (int)a : (int)b) -#define EIGEN_ADD_COST(a,b) int(a)==Dynamic || int(b)==Dynamic ? Dynamic : int(a)+int(b) - #define EIGEN_LOGICAL_XOR(a,b) (((a) || (b)) && !((a) && (b))) #define EIGEN_IMPLIES(a,b) (!(a) || (b)) -#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \ - template \ - EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const OtherDerived> \ - (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ - { \ - return CwiseBinaryOp, const Derived, const OtherDerived>(derived(), other.derived()); \ - } - -// the expression type of a cwise product -#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \ +// the expression type of a standard coefficient wise binary operation +#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \ CwiseBinaryOp< \ - internal::scalar_product_op< \ + EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \ typename internal::traits::Scalar, \ typename internal::traits::Scalar \ >, \ @@ -711,4 +935,84 @@ namespace Eigen { const RHS \ > +#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \ + (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS &other) const \ + { \ + return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \ + } + +#define EIGEN_SCALAR_BINARY_SUPPORTED(OPNAME,TYPEA,TYPEB) \ + (Eigen::internal::has_ReturnType > >::value) + +#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \ + CwiseBinaryOp::Scalar,SCALAR>, const EXPR, \ + const typename internal::plain_constant_type::type> + +#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \ + CwiseBinaryOp::Scalar>, \ + const typename internal::plain_constant_type::type, const EXPR> + +// Workaround for MSVC 2010 (see ML thread "patch with compile for for MSVC 2010") +#if EIGEN_COMP_MSVC_STRICT<=1600 +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) typename internal::enable_if::type +#else +#define EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(X) X +#endif + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg::type,OPNAME))\ + (METHOD)(const T& scalar) const { \ + typedef typename internal::promote_scalar_arg::type PromotedT; \ + return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \ + typename internal::plain_constant_type::type(derived().rows(), derived().cols(), internal::scalar_constant_op(scalar))); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + template EIGEN_DEVICE_FUNC inline friend \ + EIGEN_MSVC10_WORKAROUND_BINARYOP_RETURN_TYPE(const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg::type,Derived,OPNAME)) \ + (METHOD)(const T& scalar, const StorageBaseType& matrix) { \ + typedef typename internal::promote_scalar_arg::type PromotedT; \ + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \ + typename internal::plain_constant_type::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op(scalar)), matrix.derived()); \ + } + +#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \ + EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) + + +#ifdef EIGEN_EXCEPTIONS +# define EIGEN_THROW_X(X) throw X +# define EIGEN_THROW throw +# define EIGEN_TRY try +# define EIGEN_CATCH(X) catch (X) +#else +# ifdef __CUDA_ARCH__ +# define EIGEN_THROW_X(X) asm("trap;") +# define EIGEN_THROW asm("trap;") +# else +# define EIGEN_THROW_X(X) std::abort() +# define EIGEN_THROW std::abort() +# endif +# define EIGEN_TRY if (true) +# define EIGEN_CATCH(X) else +#endif + + +#if EIGEN_HAS_CXX11_NOEXCEPT +# define EIGEN_INCLUDE_TYPE_TRAITS +# define EIGEN_NOEXCEPT noexcept +# define EIGEN_NOEXCEPT_IF(x) noexcept(x) +# define EIGEN_NO_THROW noexcept(true) +# define EIGEN_EXCEPTION_SPEC(X) noexcept(false) +#else +# define EIGEN_NOEXCEPT +# define EIGEN_NOEXCEPT_IF(x) +# define EIGEN_NO_THROW throw() +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +#endif + #endif // EIGEN_MACROS_H diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index 74e37a4..7d90534 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -1,11 +1,12 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2008-2009 Benoit Jacob // Copyright (C) 2009 Kenneth Riddile // Copyright (C) 2010 Hauke Heibel // Copyright (C) 2010 Thomas Capricelli +// Copyright (C) 2013 Pavel Holoborodko // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -31,7 +32,7 @@ // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed // quite safe, at least within the context of glibc, to equate 64-bit with LP64. #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \ - && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) + && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0 @@ -41,15 +42,15 @@ // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup -#if defined(__FreeBSD__) && !defined(__arm__) && !defined(__mips__) +#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16) #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1 #else #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0 #endif -#if defined(__APPLE__) \ - || defined(_WIN64) \ - || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ +#if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ + || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \ + || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \ || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED #define EIGEN_MALLOC_ALREADY_ALIGNED 1 #else @@ -58,36 +59,17 @@ #endif -// See bug 554 (http://eigen.tuxfamily.org/bz/show_bug.cgi?id=554) -// It seems to be unsafe to check _POSIX_ADVISORY_INFO without including unistd.h first. -// Currently, let's include it only on unix systems: -#if defined(__unix__) || defined(__unix) - #include - #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || (defined __PGI) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0) - #define EIGEN_HAS_POSIX_MEMALIGN 1 - #endif -#endif - -#ifndef EIGEN_HAS_POSIX_MEMALIGN - #define EIGEN_HAS_POSIX_MEMALIGN 0 -#endif - -#ifdef EIGEN_VECTORIZE_SSE - #define EIGEN_HAS_MM_MALLOC 1 -#else - #define EIGEN_HAS_MM_MALLOC 0 -#endif - namespace Eigen { namespace internal { +EIGEN_DEVICE_FUNC inline void throw_std_bad_alloc() { #ifdef EIGEN_EXCEPTIONS throw std::bad_alloc(); #else - std::size_t huge = -1; + std::size_t huge = static_cast(-1); new int[huge]; #endif } @@ -103,9 +85,9 @@ inline void throw_std_bad_alloc() */ inline void* handmade_aligned_malloc(std::size_t size) { - void *original = std::malloc(size+16); + void *original = std::malloc(size+EIGEN_DEFAULT_ALIGN_BYTES); if (original == 0) return 0; - void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(std::size_t(15))) + 16); + void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); *(reinterpret_cast(aligned) - 1) = original; return aligned; } @@ -118,7 +100,7 @@ inline void handmade_aligned_free(void *ptr) /** \internal * \brief Reallocates aligned memory. - * Since we know that our handmade version is based on std::realloc + * Since we know that our handmade version is based on std::malloc * we can use std::realloc to implement efficient reallocation. */ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0) @@ -126,9 +108,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = if (ptr == 0) return handmade_aligned_malloc(size); void *original = *(reinterpret_cast(ptr) - 1); std::ptrdiff_t previous_offset = static_cast(ptr)-static_cast(original); - original = std::realloc(original,size+16); + original = std::realloc(original,size+EIGEN_DEFAULT_ALIGN_BYTES); if (original == 0) return 0; - void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(std::size_t(15))) + 16); + void *aligned = reinterpret_cast((reinterpret_cast(original) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) + EIGEN_DEFAULT_ALIGN_BYTES); void *previous_aligned = static_cast(original)+previous_offset; if(aligned!=previous_aligned) std::memmove(aligned, previous_aligned, size); @@ -137,93 +119,47 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = return aligned; } -/***************************************************************************** -*** Implementation of generic aligned realloc (when no realloc can be used)*** -*****************************************************************************/ - -void* aligned_malloc(std::size_t size); -void aligned_free(void *ptr); - -/** \internal - * \brief Reallocates aligned memory. - * Allows reallocation with aligned ptr types. This implementation will - * always create a new memory chunk and copy the old data. - */ -inline void* generic_aligned_realloc(void* ptr, size_t size, size_t old_size) -{ - if (ptr==0) - return aligned_malloc(size); - - if (size==0) - { - aligned_free(ptr); - return 0; - } - - void* newptr = aligned_malloc(size); - if (newptr == 0) - { - #ifdef EIGEN_HAS_ERRNO - errno = ENOMEM; // according to the standard - #endif - return 0; - } - - if (ptr != 0) - { - std::memcpy(newptr, ptr, (std::min)(size,old_size)); - aligned_free(ptr); - } - - return newptr; -} - /***************************************************************************** *** Implementation of portable aligned versions of malloc/free/realloc *** *****************************************************************************/ #ifdef EIGEN_NO_MALLOC -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)"); } #elif defined EIGEN_RUNTIME_NO_MALLOC -inline bool is_malloc_allowed_impl(bool update, bool new_value = false) +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false) { static bool value = true; if (update == 1) value = new_value; return value; } -inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } -inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); } +EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); } +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() { eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)"); } #else -inline void check_that_malloc_is_allowed() +EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed() {} #endif -/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment. +/** \internal Allocates \a size bytes. The returned pointer is guaranteed to have 16 or 32 bytes alignment depending on the requirements. * On allocation error, the returned pointer is null, and std::bad_alloc is thrown. */ -inline void* aligned_malloc(size_t size) +EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size) { check_that_malloc_is_allowed(); void *result; - #if !EIGEN_ALIGN - result = std::malloc(size); - #elif EIGEN_MALLOC_ALREADY_ALIGNED + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::malloc(size); - #elif EIGEN_HAS_POSIX_MEMALIGN - if(posix_memalign(&result, 16, size)) result = 0; - #elif EIGEN_HAS_MM_MALLOC - result = _mm_malloc(size, 16); - #elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) - result = _aligned_malloc(size, 16); + #if EIGEN_DEFAULT_ALIGN_BYTES==16 + eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade alignd memory allocator."); + #endif #else result = handmade_aligned_malloc(size); #endif @@ -235,50 +171,27 @@ inline void* aligned_malloc(size_t size) } /** \internal Frees memory allocated with aligned_malloc. */ -inline void aligned_free(void *ptr) +EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr) { - #if !EIGEN_ALIGN - std::free(ptr); - #elif EIGEN_MALLOC_ALREADY_ALIGNED + #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED std::free(ptr); - #elif EIGEN_HAS_POSIX_MEMALIGN - std::free(ptr); - #elif EIGEN_HAS_MM_MALLOC - _mm_free(ptr); - #elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) - _aligned_free(ptr); #else handmade_aligned_free(ptr); #endif } /** -* \internal -* \brief Reallocates an aligned block of memory. -* \throws std::bad_alloc on allocation failure -**/ -inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) + * \internal + * \brief Reallocates an aligned block of memory. + * \throws std::bad_alloc on allocation failure + */ +inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size) { EIGEN_UNUSED_VARIABLE(old_size); void *result; -#if !EIGEN_ALIGN - result = std::realloc(ptr,new_size); -#elif EIGEN_MALLOC_ALREADY_ALIGNED +#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED result = std::realloc(ptr,new_size); -#elif EIGEN_HAS_POSIX_MEMALIGN - result = generic_aligned_realloc(ptr,new_size,old_size); -#elif EIGEN_HAS_MM_MALLOC - // The defined(_mm_free) is just here to verify that this MSVC version - // implements _mm_malloc/_mm_free based on the corresponding _aligned_ - // functions. This may not always be the case and we just try to be safe. - #if defined(_MSC_VER) && (!defined(_WIN32_WCE)) && defined(_mm_free) - result = _aligned_realloc(ptr,new_size,16); - #else - result = generic_aligned_realloc(ptr,new_size,old_size); - #endif -#elif defined(_MSC_VER) && (!defined(_WIN32_WCE)) - result = _aligned_realloc(ptr,new_size,16); #else result = handmade_aligned_realloc(ptr,new_size,old_size); #endif @@ -296,12 +209,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size) /** \internal Allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned. * On allocation error, the returned pointer is null, and a std::bad_alloc is thrown. */ -template inline void* conditional_aligned_malloc(size_t size) +template EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) { return aligned_malloc(size); } -template<> inline void* conditional_aligned_malloc(size_t size) +template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size) { check_that_malloc_is_allowed(); @@ -312,22 +225,22 @@ template<> inline void* conditional_aligned_malloc(size_t size) } /** \internal Frees memory allocated with conditional_aligned_malloc */ -template inline void conditional_aligned_free(void *ptr) +template EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) { aligned_free(ptr); } -template<> inline void conditional_aligned_free(void *ptr) +template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr) { std::free(ptr); } -template inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t old_size) +template inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size) { return aligned_realloc(ptr, new_size, old_size); } -template<> inline void* conditional_aligned_realloc(void* ptr, size_t new_size, size_t) +template<> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t) { return std::realloc(ptr, new_size); } @@ -336,33 +249,43 @@ template<> inline void* conditional_aligned_realloc(void* ptr, size_t new *** Construction/destruction of array elements *** *****************************************************************************/ -/** \internal Constructs the elements of an array. - * The \a size parameter tells on how many objects to call the constructor of T. - */ -template inline T* construct_elements_of_array(T *ptr, size_t size) -{ - for (size_t i=0; i < size; ++i) ::new (ptr + i) T; - return ptr; -} - /** \internal Destructs the elements of an array. * The \a size parameters tells on how many objects to call the destructor of T. */ -template inline void destruct_elements_of_array(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size) { // always destruct an array starting from the end. if(ptr) while(size) ptr[--size].~T(); } +/** \internal Constructs the elements of an array. + * The \a size parameter tells on how many objects to call the constructor of T. + */ +template EIGEN_DEVICE_FUNC inline T* construct_elements_of_array(T *ptr, std::size_t size) +{ + std::size_t i; + EIGEN_TRY + { + for (i = 0; i < size; ++i) ::new (ptr + i) T; + return ptr; + } + EIGEN_CATCH(...) + { + destruct_elements_of_array(ptr, i); + EIGEN_THROW; + } + return NULL; +} + /***************************************************************************** *** Implementation of aligned new/delete-like functions *** *****************************************************************************/ template -EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size) { - if(size > size_t(-1) / sizeof(T)) + if(size > std::size_t(-1) / sizeof(T)) throw_std_bad_alloc(); } @@ -370,24 +293,42 @@ EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size) * On allocation error, the returned pointer is undefined, but a std::bad_alloc is thrown. * The default constructor of T is called. */ -template inline T* aligned_new(size_t size) +template EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size) { check_size_for_overflow(size); T *result = reinterpret_cast(aligned_malloc(sizeof(T)*size)); - return construct_elements_of_array(result, size); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + aligned_free(result); + EIGEN_THROW; + } + return result; } -template inline T* conditional_aligned_new(size_t size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size) { check_size_for_overflow(size); T *result = reinterpret_cast(conditional_aligned_malloc(sizeof(T)*size)); - return construct_elements_of_array(result, size); + EIGEN_TRY + { + return construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free(result); + EIGEN_THROW; + } + return result; } /** \internal Deletes objects constructed with aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template inline void aligned_delete(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array(ptr, size); aligned_free(ptr); @@ -396,13 +337,13 @@ template inline void aligned_delete(T *ptr, size_t size) /** \internal Deletes objects constructed with conditional_aligned_new * The \a size parameters tells on how many objects to call the destructor of T. */ -template inline void conditional_aligned_delete(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size) { destruct_elements_of_array(ptr, size); conditional_aligned_free(ptr); } -template inline T* conditional_aligned_realloc_new(T* pts, size_t new_size, size_t old_size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size) { check_size_for_overflow(new_size); check_size_for_overflow(old_size); @@ -410,23 +351,43 @@ template inline T* conditional_aligned_realloc_new(T* pt destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast(conditional_aligned_realloc(reinterpret_cast(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(new_size > old_size) - construct_elements_of_array(result+old_size, new_size-old_size); + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free(result); + EIGEN_THROW; + } + } return result; } -template inline T* conditional_aligned_new_auto(size_t size) +template EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size) { if(size==0) return 0; // short-cut. Also fixes Bug 884 check_size_for_overflow(size); T *result = reinterpret_cast(conditional_aligned_malloc(sizeof(T)*size)); if(NumTraits::RequireInitialization) - construct_elements_of_array(result, size); + { + EIGEN_TRY + { + construct_elements_of_array(result, size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free(result); + EIGEN_THROW; + } + } return result; } -template inline T* conditional_aligned_realloc_new_auto(T* pts, size_t new_size, size_t old_size) +template inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size) { check_size_for_overflow(new_size); check_size_for_overflow(old_size); @@ -434,11 +395,21 @@ template inline T* conditional_aligned_realloc_new_auto( destruct_elements_of_array(pts+new_size, old_size-new_size); T *result = reinterpret_cast(conditional_aligned_realloc(reinterpret_cast(pts), sizeof(T)*new_size, sizeof(T)*old_size)); if(NumTraits::RequireInitialization && (new_size > old_size)) - construct_elements_of_array(result+old_size, new_size-old_size); + { + EIGEN_TRY + { + construct_elements_of_array(result+old_size, new_size-old_size); + } + EIGEN_CATCH(...) + { + conditional_aligned_free(result); + EIGEN_THROW; + } + } return result; } -template inline void conditional_aligned_delete_auto(T *ptr, size_t size) +template EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size) { if(NumTraits::RequireInitialization) destruct_elements_of_array(ptr, size); @@ -447,51 +418,62 @@ template inline void conditional_aligned_delete_auto(T * /****************************************************************************/ -/** \internal Returns the index of the first element of the array that is well aligned for vectorization. +/** \internal Returns the index of the first element of the array that is well aligned with respect to the requested \a Alignment. * + * \tparam Alignment requested alignment in Bytes. * \param array the address of the start of the array * \param size the size of the array * - * \note If no element of the array is well aligned, the size of the array is returned. Typically, - * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the + * \note If no element of the array is well aligned or the requested alignment is not a multiple of a scalar, + * the size of the array is returned. For example with SSE, the requested alignment is typically 16-bytes. If * packet size for the given scalar type is 1, then everything is considered well-aligned. * - * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a - * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the - * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for + * \note Otherwise, if the Alignment is larger that the scalar size, we rely on the assumptions that sizeof(Scalar) is a + * power of 2. On the other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for * example with Scalar=double on certain 32-bit platforms, see bug #79. * * There is also the variant first_aligned(const MatrixBase&) defined in DenseCoeffsBase.h. + * \sa first_default_aligned() */ -template -static inline Index first_aligned(const Scalar* array, Index size) +template +EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) { - static const Index PacketSize = packet_traits::size; - static const Index PacketAlignedMask = PacketSize-1; + const Index ScalarSize = sizeof(Scalar); + const Index AlignmentSize = Alignment / ScalarSize; + const Index AlignmentMask = AlignmentSize-1; - if(PacketSize==1) + if(AlignmentSize<=1) { - // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements - // of the array have the same alignment. + // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar + // so that all elements of the array have the same alignment. return 0; } - else if(size_t(array) & (sizeof(Scalar)-1)) + else if( (UIntPtr(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0) { - // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar. + // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size. // Consequently, no element of the array is well aligned. return size; } else { - return std::min( (PacketSize - (Index((size_t(array)/sizeof(Scalar))) & PacketAlignedMask)) - & PacketAlignedMask, size); + Index first = (AlignmentSize - (Index((UIntPtr(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask; + return (first < size) ? first : size; } } +/** \internal Returns the index of the first element of the array that is well aligned with respect the largest packet requirement. + * \sa first_aligned(Scalar*,Index) and first_default_aligned(DenseBase) */ +template +EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index size) +{ + typedef typename packet_traits::type DefaultPacketType; + return first_aligned::alignment>(array, size); +} + /** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size */ template -inline static Index first_multiple(Index size, Index base) +inline Index first_multiple(Index size, Index base) { return ((size+base-1)/base)*base; } @@ -500,15 +482,15 @@ inline static Index first_multiple(Index size, Index base) // use memcpy on trivial types, i.e., on types that does not require an initialization ctor. template struct smart_copy_helper; -template void smart_copy(const T* start, const T* end, T* target) +template EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target) { smart_copy_helper::RequireInitialization>::run(start, end, target); } template struct smart_copy_helper { - static inline void run(const T* start, const T* end, T* target) + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { - std::ptrdiff_t size = std::ptrdiff_t(end)-std::ptrdiff_t(start); + IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); memcpy(target, start, size); @@ -516,10 +498,44 @@ template struct smart_copy_helper { }; template struct smart_copy_helper { - static inline void run(const T* start, const T* end, T* target) + EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target) { std::copy(start, end, target); } }; +// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise. +template struct smart_memmove_helper; + +template void smart_memmove(const T* start, const T* end, T* target) +{ + smart_memmove_helper::RequireInitialization>::run(start, end, target); +} + +template struct smart_memmove_helper { + static inline void run(const T* start, const T* end, T* target) + { + IntPtr size = IntPtr(end)-IntPtr(start); + if(size==0) return; + eigen_internal_assert(start!=0 && end!=0 && target!=0); + std::memmove(target, start, size); + } +}; + +template struct smart_memmove_helper { + static inline void run(const T* start, const T* end, T* target) + { + if (UIntPtr(target) < UIntPtr(start)) + { + std::copy(start, end, target); + } + else + { + std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T); + std::copy_backward(start, end, target + count); + } + } +}; + + /***************************************************************************** *** Implementation of runtime stack allocation (falling back to malloc) *** *****************************************************************************/ @@ -527,16 +543,16 @@ template struct smart_copy_helper { // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // to the appropriate stack allocation function #ifndef EIGEN_ALLOCA - #if (defined __linux__) || (defined __APPLE__) || (defined alloca) + #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #define EIGEN_ALLOCA alloca - #elif defined(_MSC_VER) + #elif EIGEN_COMP_MSVC #define EIGEN_ALLOCA _alloca #endif #endif // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions. -template class aligned_stack_memory_handler +template class aligned_stack_memory_handler : noncopyable { public: /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size. @@ -545,7 +561,7 @@ template class aligned_stack_memory_handler * In this case, the buffer elements will also be destructed when this handler will be destructed. * Finally, if \a dealloc is true, then the pointer \a ptr is freed. **/ - aligned_stack_memory_handler(T* ptr, size_t size, bool dealloc) + aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc) : m_ptr(ptr), m_size(size), m_deallocate(dealloc) { if(NumTraits::RequireInitialization && m_ptr) @@ -560,10 +576,34 @@ template class aligned_stack_memory_handler } protected: T* m_ptr; - size_t m_size; + std::size_t m_size; bool m_deallocate; }; +template class scoped_array : noncopyable +{ + T* m_ptr; +public: + explicit scoped_array(std::ptrdiff_t size) + { + m_ptr = new T[size]; + } + ~scoped_array() + { + delete[] m_ptr; + } + T& operator[](std::ptrdiff_t i) { return m_ptr[i]; } + const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; } + T* &ptr() { return m_ptr; } + const T* ptr() const { return m_ptr; } + operator const T*() const { return m_ptr; } +}; + +template void swap(scoped_array &a,scoped_array &b) +{ + std::swap(a.ptr(),b.ptr()); +} + } // end namespace internal /** \internal @@ -583,10 +623,12 @@ template class aligned_stack_memory_handler */ #ifdef EIGEN_ALLOCA - #if defined(__arm__) || defined(_WIN32) - #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((reinterpret_cast(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16) + #if EIGEN_DEFAULT_ALIGN_BYTES>0 + // We always manually re-align the result of EIGEN_ALLOCA. + // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment. + #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast((internal::UIntPtr(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1))) #else - #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA + #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE) #endif #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \ @@ -611,41 +653,33 @@ template class aligned_stack_memory_handler *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_ALIGN - #ifdef EIGEN_EXCEPTIONS - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) noexcept { \ - try { return Eigen::internal::conditional_aligned_malloc(size); } \ - catch (...) { return 0; } \ +#if EIGEN_MAX_ALIGN_BYTES!=0 + #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ + void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \ + EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc(size); } \ + EIGEN_CATCH (...) { return 0; } \ } - #else - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void* operator new(size_t size, const std::nothrow_t&) noexcept { \ - return Eigen::internal::conditional_aligned_malloc(size); \ - } - #endif - #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \ - void *operator new(size_t size) { \ + void *operator new(std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ - void *operator new[](size_t size) { \ + void *operator new[](std::size_t size) { \ return Eigen::internal::conditional_aligned_malloc(size); \ } \ - void operator delete(void * ptr) noexcept { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr) noexcept { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete(void * ptr, std::size_t /* sz */) noexcept { Eigen::internal::conditional_aligned_free(ptr); } \ - void operator delete[](void * ptr, std::size_t /* sz */) noexcept { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ + void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free(ptr); } \ /* in-place new and delete. since (at least afaik) there is no actual */ \ /* memory allocated we can safely let the default implementation handle */ \ /* this particular case. */ \ - static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \ - static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \ - void operator delete(void * memory, void *ptr) noexcept { return ::operator delete(memory,ptr); } \ - void operator delete[](void * memory, void *ptr) noexcept { return ::operator delete[](memory,ptr); } \ + static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \ + static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \ + void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \ + void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \ /* nothrow-new (returns zero instead of std::bad_alloc) */ \ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ - void operator delete(void *ptr, const std::nothrow_t&) noexcept { \ + void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \ Eigen::internal::conditional_aligned_free(ptr); \ } \ typedef void eigen_aligned_operator_new_marker_type; @@ -655,32 +689,31 @@ template class aligned_stack_memory_handler #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true) #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \ - EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%16==0))) + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0))) /****************************************************************************/ - /** \class aligned_allocator - * \ingroup Core_Module - * - * \brief STL compatible allocator to use with with 16 byte aligned types - * - * Example: - * \code - * // Matrix4f requires 16 bytes alignment: - * std::map< int, Matrix4f, std::less, - * aligned_allocator > > my_map_mat4; - * // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: - * std::map< int, Vector3f > my_map_vec3; - * \endcode - * - * \sa \blank \ref TopicStlContainers. - */ +* \ingroup Core_Module +* +* \brief STL compatible allocator to use with with 16 byte aligned types +* +* Example: +* \code +* // Matrix4f requires 16 bytes alignment: +* std::map< int, Matrix4f, std::less, +* aligned_allocator > > my_map_mat4; +* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator: +* std::map< int, Vector3f > my_map_vec3; +* \endcode +* +* \sa \blank \ref TopicStlContainers. +*/ template class aligned_allocator : public std::allocator { public: - typedef size_t size_type; + typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; typedef T* pointer; typedef const T* const_pointer; @@ -718,12 +751,12 @@ public: //---------- Cache sizes ---------- #if !defined(EIGEN_NO_CPUID) -# if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) ) -# if defined(__PIC__) && defined(__i386__) +# if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 +# if defined(__PIC__) && EIGEN_ARCH_i386 // Case for x86 with PIC # define EIGEN_CPUID(abcd,func,id) \ __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id)); -# elif defined(__PIC__) && defined(__x86_64__) +# elif defined(__PIC__) && EIGEN_ARCH_x86_64 // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model. // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway. # define EIGEN_CPUID(abcd,func,id) \ @@ -733,8 +766,8 @@ public: # define EIGEN_CPUID(abcd,func,id) \ __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) ); # endif -# elif defined(_MSC_VER) -# if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) ) +# elif EIGEN_COMP_MSVC +# if (EIGEN_COMP_MSVC > 1500) && EIGEN_ARCH_i386_OR_x86_64 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id) # endif # endif diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 71d5871..8de6055 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -1,7 +1,7 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2008-2015 Gael Guennebaud // Copyright (C) 2006-2008 Benoit Jacob // // This Source Code Form is subject to the terms of the Mozilla @@ -11,8 +11,27 @@ #ifndef EIGEN_META_H #define EIGEN_META_H +#if defined(__CUDA_ARCH__) +#include +#include +#endif + +#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +#include +#endif + namespace Eigen { +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; + +/** + * \brief The Index type as used for the API. + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \sa \blank \ref TopicPreprocessorDirectives, StorageIndex. + */ + +typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE Index; + namespace internal { /** \internal @@ -22,6 +41,16 @@ namespace internal { * we however don't want to add a dependency to Boost. */ +// Only recent versions of ICC complain about using ptrdiff_t to hold pointers, +// and older versions do not provide *intptr_t types. +#if EIGEN_COMP_ICC>=1600 && __cplusplus >= 201103L +typedef std::intptr_t IntPtr; +typedef std::uintptr_t UIntPtr; +#else +typedef std::ptrdiff_t IntPtr; +typedef std::size_t UIntPtr; +#endif + struct true_type { enum { value = 1 }; }; struct false_type { enum { value = 0 }; }; @@ -68,6 +97,23 @@ template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; template<> struct is_arithmetic { enum { value = true }; }; +#if EIGEN_HAS_CXX11 +using std::is_integral; +#else +template struct is_integral { enum { value = false }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +template<> struct is_integral { enum { value = true }; }; +#endif + + template struct add_const { typedef const T type; }; template struct add_const { typedef T& type; }; @@ -80,28 +126,215 @@ template struct add_const_on_value_type { typedef T const template struct add_const_on_value_type { typedef T const* const type; }; template struct add_const_on_value_type { typedef T const* const type; }; + +template +struct is_convertible_impl +{ +private: + struct any_conversion + { + template any_conversion(const volatile T&); + template any_conversion(T&); + }; + struct yes {int a[1];}; + struct no {int a[2];}; + + static yes test(const To&, int); + static no test(any_conversion, ...); + +public: + static From ms_from; +#ifdef __INTEL_COMPILER + #pragma warning push + #pragma warning ( disable : 2259 ) +#endif + enum { value = sizeof(test(ms_from, 0))==sizeof(yes) }; +#ifdef __INTEL_COMPILER + #pragma warning pop +#endif +}; + +template +struct is_convertible +{ + enum { value = is_convertible_impl::type, + typename remove_all::type>::value }; +}; + /** \internal Allows to enable/disable an overload * according to a compile time condition. */ -template struct enable_if; +template struct enable_if; template struct enable_if { typedef T type; }; +#if defined(__CUDA_ARCH__) +#if !defined(__FLT_EPSILON__) +#define __FLT_EPSILON__ FLT_EPSILON +#define __DBL_EPSILON__ DBL_EPSILON +#endif +namespace device { + +template struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static T epsilon() { return 0; } + static T (max)() { assert(false && "Highest not supported for this type"); } + static T (min)() { assert(false && "Lowest not supported for this type"); } + static T infinity() { assert(false && "Infinity not supported for this type"); } + static T quiet_NaN() { assert(false && "quiet_NaN not supported for this type"); } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static float epsilon() { return __FLT_EPSILON__; } + EIGEN_DEVICE_FUNC + static float (max)() { return CUDART_MAX_NORMAL_F; } + EIGEN_DEVICE_FUNC + static float (min)() { return FLT_MIN; } + EIGEN_DEVICE_FUNC + static float infinity() { return CUDART_INF_F; } + EIGEN_DEVICE_FUNC + static float quiet_NaN() { return CUDART_NAN_F; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static double epsilon() { return __DBL_EPSILON__; } + EIGEN_DEVICE_FUNC + static double (max)() { return DBL_MAX; } + EIGEN_DEVICE_FUNC + static double (min)() { return DBL_MIN; } + EIGEN_DEVICE_FUNC + static double infinity() { return CUDART_INF; } + EIGEN_DEVICE_FUNC + static double quiet_NaN() { return CUDART_NAN; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static int epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static int (max)() { return INT_MAX; } + EIGEN_DEVICE_FUNC + static int (min)() { return INT_MIN; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned int epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned int (max)() { return UINT_MAX; } + EIGEN_DEVICE_FUNC + static unsigned int (min)() { return 0; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static long (max)() { return LONG_MAX; } + EIGEN_DEVICE_FUNC + static long (min)() { return LONG_MIN; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long (max)() { return ULONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long (min)() { return 0; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static long long (max)() { return LLONG_MAX; } + EIGEN_DEVICE_FUNC + static long long (min)() { return LLONG_MIN; } +}; +template<> struct numeric_limits +{ + EIGEN_DEVICE_FUNC + static unsigned long long epsilon() { return 0; } + EIGEN_DEVICE_FUNC + static unsigned long long (max)() { return ULLONG_MAX; } + EIGEN_DEVICE_FUNC + static unsigned long long (min)() { return 0; } +}; + +} + +#endif /** \internal * A base class do disable default copy ctor and copy assignement operator. */ class noncopyable { - noncopyable(const noncopyable&); - const noncopyable& operator=(const noncopyable&); + EIGEN_DEVICE_FUNC noncopyable(const noncopyable&); + EIGEN_DEVICE_FUNC const noncopyable& operator=(const noncopyable&); protected: - noncopyable() {} - ~noncopyable() {} + EIGEN_DEVICE_FUNC noncopyable() {} + EIGEN_DEVICE_FUNC ~noncopyable() {} +}; + +/** \internal + * Provides access to the number of elements in the object of as a compile-time constant expression. + * It "returns" Eigen::Dynamic if the size cannot be resolved at compile-time (default). + * + * Similar to std::tuple_size, but more general. + * + * It currently supports: + * - any types T defining T::SizeAtCompileTime + * - plain C arrays as T[N] + * - std::array (c++11) + * - some internal types such as SingleRange and AllRange + * + * The second template parameter eases SFINAE-based specializations. + */ +template struct array_size { + enum { value = Dynamic }; }; +template struct array_size::type> { + enum { value = T::SizeAtCompileTime }; +}; + +template struct array_size { + enum { value = N }; +}; +template struct array_size { + enum { value = N }; +}; + +#if EIGEN_HAS_CXX11 +template struct array_size > { + enum { value = N }; +}; +template struct array_size > { + enum { value = N }; +}; +#endif + +/** \internal + * Analogue of the std::size free function. + * It returns the size of the container or view \a x of type \c T + * + * It currently supports: + * - any types T defining a member T::size() const + * - plain C arrays as T[N] + * + */ +template +Index size(const T& x) { return x.size(); } + +template +Index size(const T (&) [N]) { return N; } /** \internal * Convenient struct to get the result type of a unary or binary functor. @@ -110,14 +343,20 @@ protected: * upcoming next STL generation (using a templated result member). * If none of these members is provided, then the type of the first argument is returned. FIXME, that behavior is a pretty bad hack. */ -template struct result_of {}; +#if EIGEN_HAS_STD_RESULT_OF +template struct result_of { + typedef typename std::result_of::type type1; + typedef typename remove_all::type type; +}; +#else +template struct result_of { }; struct has_none {int a[1];}; struct has_std_result_type {int a[2];}; struct has_tr1_result {int a[3];}; template -struct unary_result_of_select {typedef ArgType type;}; +struct unary_result_of_select {typedef typename internal::remove_all::type type;}; template struct unary_result_of_select {typedef typename Func::result_type type;}; @@ -128,10 +367,10 @@ struct unary_result_of_select {typedef ty template struct result_of { template - static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); template - static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); - static has_none testFunctor(...); + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); // note that the following indirection is needed for gcc-3.3 enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; @@ -139,7 +378,7 @@ struct result_of { }; template -struct binary_result_of_select {typedef ArgType0 type;}; +struct binary_result_of_select {typedef typename internal::remove_all::type type;}; template struct binary_result_of_select @@ -152,16 +391,83 @@ struct binary_result_of_select template struct result_of { template - static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); template - static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); - static has_none testFunctor(...); + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); // note that the following indirection is needed for gcc-3.3 enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; typedef typename binary_result_of_select::type type; }; +template +struct ternary_result_of_select {typedef typename internal::remove_all::type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::result_type type;}; + +template +struct ternary_result_of_select +{typedef typename Func::template result::type type;}; + +template +struct result_of { + template + static has_std_result_type testFunctor(T const *, typename T::result_type const * = 0); + template + static has_tr1_result testFunctor(T const *, typename T::template result::type const * = 0); + static has_none testFunctor(...); + + // note that the following indirection is needed for gcc-3.3 + enum {FunctorType = sizeof(testFunctor(static_cast(0)))}; + typedef typename ternary_result_of_select::type type; +}; +#endif + +struct meta_yes { char a[1]; }; +struct meta_no { char a[2]; }; + +// Check whether T::ReturnType does exist +template +struct has_ReturnType +{ + template static meta_yes testFunctor(C const *, typename C::ReturnType const * = 0); + template static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + +template const T* return_ptr(); + +template +struct has_nullary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr()->operator()())>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + +template +struct has_unary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr()->operator()(IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + +template +struct has_binary_operator +{ + template static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ptr()->operator()(IndexType(0),IndexType(0)))>0)>::type * = 0); + static meta_no testFunctor(...); + + enum { value = sizeof(testFunctor(static_cast(0))) == sizeof(meta_yes) }; +}; + /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer. * Usage example: \code meta_sqrt<1023>::ret \endcode */ @@ -185,37 +491,26 @@ class meta_sqrt template class meta_sqrt { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; -/** \internal determines whether the product of two numeric types is allowed and what the return type is */ -template struct scalar_product_traits -{ - enum { Defined = 0 }; -}; -template struct scalar_product_traits +/** \internal Computes the least common multiple of two positive integer A and B + * at compile-time. It implements a naive algorithm testing all multiples of A. + * It thus works better if A>=B. + */ +template +struct meta_least_common_multiple { - enum { - // Cost = NumTraits::MulCost, - Defined = 1 - }; - typedef T ReturnType; + enum { ret = meta_least_common_multiple::ret }; }; - -template struct scalar_product_traits > +template +struct meta_least_common_multiple { - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; + enum { ret = A*K }; }; -template struct scalar_product_traits, T> +/** \internal determines whether the product of two numeric types is allowed and what the return type is */ +template struct scalar_product_traits { - enum { - // Cost = 2*NumTraits::MulCost, - Defined = 1 - }; - typedef std::complex ReturnType; + enum { Defined = 0 }; }; // FIXME quick workaround around current limitation of result_of @@ -224,19 +519,31 @@ template struct scalar_product_traits, T> // typedef typename scalar_product_traits::type, typename remove_all::type>::ReturnType type; // }; -template struct is_diagonal -{ enum { ret = false }; }; - -template struct is_diagonal > -{ enum { ret = true }; }; - -template struct is_diagonal > -{ enum { ret = true }; }; +} // end namespace internal -template struct is_diagonal > -{ enum { ret = true }; }; +namespace numext { + +#if defined(__CUDA_ARCH__) +template EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; } +#else +template EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); } +#endif + +#if defined(__CUDA_ARCH__) +using internal::device::numeric_limits; +#else +using std::numeric_limits; +#endif + +// Integer division with rounding up. +// T is assumed to be an integer type with a>=0, and b>0 +template +T div_ceil(const T &a, const T &b) +{ + return (a+b-1) / b; +} -} // end namespace internal +} // end namespace numext } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h b/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h index d573bbd..86b60f5 100644 --- a/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +++ b/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h @@ -12,6 +12,16 @@ #pragma GCC diagnostic pop #endif + #if defined __NVCC__ +// Don't reenable the diagnostic messages, as it turns out these messages need +// to be disabled at the point of the template instantiation (i.e the user code) +// otherwise they'll be triggered by nvcc. +// #pragma diag_default code_is_unreachable +// #pragma diag_default initialization_not_reachable +// #pragma diag_default 2651 +// #pragma diag_default 2653 + #endif + #endif #endif // EIGEN_WARNINGS_DISABLED diff --git a/eigen/Eigen/src/Core/util/StaticAssert.h b/eigen/Eigen/src/Core/util/StaticAssert.h index e53d2b8..983361a 100644 --- a/eigen/Eigen/src/Core/util/StaticAssert.h +++ b/eigen/Eigen/src/Core/util/StaticAssert.h @@ -26,7 +26,7 @@ #ifndef EIGEN_NO_STATIC_ASSERT - #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600) + #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) // if native static_assert is enabled, let's use it #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); @@ -50,6 +50,7 @@ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, + OUT_OF_RANGE_ACCESS, YOU_MADE_A_PROGRAMMING_MISTAKE, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, @@ -84,6 +85,7 @@ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, @@ -92,7 +94,14 @@ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH + STORAGE_LAYOUT_DOES_NOT_MATCH, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, + THIS_TYPE_IS_NOT_SUPPORTED, + STORAGE_KIND_MUST_MATCH, + STORAGE_INDEX_MUST_MATCH, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY }; }; @@ -103,15 +112,15 @@ // Specialized implementation for MSVC to avoid "conditional // expression is constant" warnings. This implementation doesn't // appear to work under GCC, hence the multiple implementations. - #ifdef _MSC_VER + #if EIGEN_COMP_MSVC #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \ {Eigen::internal::static_assertion::MSG;} #else - + // In some cases clang interprets bool(CONDITION) as function declaration #define EIGEN_STATIC_ASSERT(CONDITION,MSG) \ - if (Eigen::internal::static_assertion::MSG) {} + if (Eigen::internal::static_assertion(CONDITION)>::MSG) {} #endif @@ -159,7 +168,7 @@ #define EIGEN_PREDICATE_SAME_MATRIX_SIZE(TYPE0,TYPE1) \ ( \ - (int(TYPE0::SizeAtCompileTime)==0 && int(TYPE1::SizeAtCompileTime)==0) \ + (int(Eigen::internal::size_of_xpr_at_compile_time::ret)==0 && int(Eigen::internal::size_of_xpr_at_compile_time::ret)==0) \ || (\ (int(TYPE0::RowsAtCompileTime)==Eigen::Dynamic \ || int(TYPE1::RowsAtCompileTime)==Eigen::Dynamic \ @@ -170,13 +179,8 @@ ) \ ) -#ifdef EIGEN2_SUPPORT - #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \ - eigen_assert(!NumTraits::IsInteger); -#else - #define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \ +#define EIGEN_STATIC_ASSERT_NON_INTEGER(TYPE) \ EIGEN_STATIC_ASSERT(!NumTraits::IsInteger, THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) -#endif // static assertion failing if it is guaranteed at compile-time that the two matrix expression types have different sizes @@ -191,18 +195,22 @@ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS) #define EIGEN_STATIC_ASSERT_LVALUE(Derived) \ - EIGEN_STATIC_ASSERT(internal::is_lvalue::value, \ + EIGEN_STATIC_ASSERT(Eigen::internal::is_lvalue::value, \ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY) #define EIGEN_STATIC_ASSERT_ARRAYXPR(Derived) \ - EIGEN_STATIC_ASSERT((internal::is_same::XprKind, ArrayXpr>::value), \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same::XprKind, ArrayXpr>::value), \ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES) #define EIGEN_STATIC_ASSERT_SAME_XPR_KIND(Derived1, Derived2) \ - EIGEN_STATIC_ASSERT((internal::is_same::XprKind, \ - typename internal::traits::XprKind \ + EIGEN_STATIC_ASSERT((Eigen::internal::is_same::XprKind, \ + typename Eigen::internal::traits::XprKind \ >::value), \ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES) +// Check that a cost value is positive, and that is stay within a reasonable range +// TODO this check could be enabled for internal debugging only +#define EIGEN_INTERNAL_CHECK_COST_VALUE(C) \ + EIGEN_STATIC_ASSERT((C)>=0 && (C)<=HugeCost*HugeCost, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE); #endif // EIGEN_STATIC_ASSERT_H diff --git a/eigen/Eigen/src/Core/util/SymbolicIndex.h b/eigen/Eigen/src/Core/util/SymbolicIndex.h new file mode 100644 index 0000000..bb6349e --- /dev/null +++ b/eigen/Eigen/src/Core/util/SymbolicIndex.h @@ -0,0 +1,300 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SYMBOLIC_INDEX_H +#define EIGEN_SYMBOLIC_INDEX_H + +namespace Eigen { + +/** \namespace Eigen::Symbolic + * \ingroup Core_Module + * + * This namespace defines a set of classes and functions to build and evaluate symbolic expressions of scalar type Index. + * Here is a simple example: + * + * \code + * // First step, defines symbols: + * struct x_tag {}; static const Symbolic::SymbolExpr x; + * struct y_tag {}; static const Symbolic::SymbolExpr y; + * struct z_tag {}; static const Symbolic::SymbolExpr z; + * + * // Defines an expression: + * auto expr = (x+3)/y+z; + * + * // And evaluate it: (c++14) + * std::cout << expr.eval(x=6,y=3,z=-13) << "\n"; + * + * // In c++98/11, only one symbol per expression is supported for now: + * auto expr98 = (3-x)/2; + * std::cout << expr98.eval(x=6) << "\n"; + * \endcode + * + * It is currently only used internally to define and minipulate the placeholders::last and placeholders::end symbols in Eigen::seq and Eigen::seqN. + * + */ +namespace Symbolic { + +template class Symbol; +template class NegateExpr; +template class AddExpr; +template class ProductExpr; +template class QuotientExpr; + +// A simple wrapper around an integral value to provide the eval method. +// We could also use a free-function symbolic_eval... +template +class ValueExpr { +public: + ValueExpr(IndexType val) : m_value(val) {} + template + IndexType eval_impl(const T&) const { return m_value; } +protected: + IndexType m_value; +}; + +// Specialization for compile-time value, +// It is similar to ValueExpr(N) but this version helps the compiler to generate better code. +template +class ValueExpr > { +public: + ValueExpr() {} + template + Index eval_impl(const T&) const { return N; } +}; + + +/** \class BaseExpr + * \ingroup Core_Module + * Common base class of any symbolic expressions + */ +template +class BaseExpr +{ +public: + const Derived& derived() const { return *static_cast(this); } + + /** Evaluate the expression given the \a values of the symbols. + * + * \param values defines the values of the symbols, it can either be a SymbolValue or a std::tuple of SymbolValue + * as constructed by SymbolExpr::operator= operator. + * + */ + template + Index eval(const T& values) const { return derived().eval_impl(values); } + +#if EIGEN_HAS_CXX14 + template + Index eval(Types&&... values) const { return derived().eval_impl(std::make_tuple(values...)); } +#endif + + NegateExpr operator-() const { return NegateExpr(derived()); } + + AddExpr > operator+(Index b) const + { return AddExpr >(derived(), b); } + AddExpr > operator-(Index a) const + { return AddExpr >(derived(), -a); } + ProductExpr > operator*(Index a) const + { return ProductExpr >(derived(),a); } + QuotientExpr > operator/(Index a) const + { return QuotientExpr >(derived(),a); } + + friend AddExpr > operator+(Index a, const BaseExpr& b) + { return AddExpr >(b.derived(), a); } + friend AddExpr,ValueExpr<> > operator-(Index a, const BaseExpr& b) + { return AddExpr,ValueExpr<> >(-b.derived(), a); } + friend ProductExpr,Derived> operator*(Index a, const BaseExpr& b) + { return ProductExpr,Derived>(a,b.derived()); } + friend QuotientExpr,Derived> operator/(Index a, const BaseExpr& b) + { return QuotientExpr,Derived>(a,b.derived()); } + + template + AddExpr > > operator+(internal::FixedInt) const + { return AddExpr > >(derived(), ValueExpr >()); } + template + AddExpr > > operator-(internal::FixedInt) const + { return AddExpr > >(derived(), ValueExpr >()); } + template + ProductExpr > > operator*(internal::FixedInt) const + { return ProductExpr > >(derived(),ValueExpr >()); } + template + QuotientExpr > > operator/(internal::FixedInt) const + { return QuotientExpr > >(derived(),ValueExpr >()); } + + template + friend AddExpr > > operator+(internal::FixedInt, const BaseExpr& b) + { return AddExpr > >(b.derived(), ValueExpr >()); } + template + friend AddExpr,ValueExpr > > operator-(internal::FixedInt, const BaseExpr& b) + { return AddExpr,ValueExpr > >(-b.derived(), ValueExpr >()); } + template + friend ProductExpr >,Derived> operator*(internal::FixedInt, const BaseExpr& b) + { return ProductExpr >,Derived>(ValueExpr >(),b.derived()); } + template + friend QuotientExpr >,Derived> operator/(internal::FixedInt, const BaseExpr& b) + { return QuotientExpr > ,Derived>(ValueExpr >(),b.derived()); } + +#if (!EIGEN_HAS_CXX14) + template + AddExpr > > operator+(internal::FixedInt (*)()) const + { return AddExpr > >(derived(), ValueExpr >()); } + template + AddExpr > > operator-(internal::FixedInt (*)()) const + { return AddExpr > >(derived(), ValueExpr >()); } + template + ProductExpr > > operator*(internal::FixedInt (*)()) const + { return ProductExpr > >(derived(),ValueExpr >()); } + template + QuotientExpr > > operator/(internal::FixedInt (*)()) const + { return QuotientExpr > >(derived(),ValueExpr >()); } + + template + friend AddExpr > > operator+(internal::FixedInt (*)(), const BaseExpr& b) + { return AddExpr > >(b.derived(), ValueExpr >()); } + template + friend AddExpr,ValueExpr > > operator-(internal::FixedInt (*)(), const BaseExpr& b) + { return AddExpr,ValueExpr > >(-b.derived(), ValueExpr >()); } + template + friend ProductExpr >,Derived> operator*(internal::FixedInt (*)(), const BaseExpr& b) + { return ProductExpr >,Derived>(ValueExpr >(),b.derived()); } + template + friend QuotientExpr >,Derived> operator/(internal::FixedInt (*)(), const BaseExpr& b) + { return QuotientExpr > ,Derived>(ValueExpr >(),b.derived()); } +#endif + + + template + AddExpr operator+(const BaseExpr &b) const + { return AddExpr(derived(), b.derived()); } + + template + AddExpr > operator-(const BaseExpr &b) const + { return AddExpr >(derived(), -b.derived()); } + + template + ProductExpr operator*(const BaseExpr &b) const + { return ProductExpr(derived(), b.derived()); } + + template + QuotientExpr operator/(const BaseExpr &b) const + { return QuotientExpr(derived(), b.derived()); } +}; + +template +struct is_symbolic { + // BaseExpr has no conversion ctor, so we only have to check whether T can be staticaly cast to its base class BaseExpr. + enum { value = internal::is_convertible >::value }; +}; + +// Specialization for functions, because is_convertible fails in this case. +// Useful in c++98/11 mode when testing is_symbolic)> +template +struct is_symbolic { + enum { value = false }; +}; + +/** Represents the actual value of a symbol identified by its tag + * + * It is the return type of SymbolValue::operator=, and most of the time this is only way it is used. + */ +template +class SymbolValue +{ +public: + /** Default constructor from the value \a val */ + SymbolValue(Index val) : m_value(val) {} + + /** \returns the stored value of the symbol */ + Index value() const { return m_value; } +protected: + Index m_value; +}; + +/** Expression of a symbol uniquely identified by the template parameter type \c tag */ +template +class SymbolExpr : public BaseExpr > +{ +public: + /** Alias to the template parameter \c tag */ + typedef tag Tag; + + SymbolExpr() {} + + /** Associate the value \a val to the given symbol \c *this, uniquely identified by its \c Tag. + * + * The returned object should be passed to ExprBase::eval() to evaluate a given expression with this specified runtime-time value. + */ + SymbolValue operator=(Index val) const { + return SymbolValue(val); + } + + Index eval_impl(const SymbolValue &values) const { return values.value(); } + +#if EIGEN_HAS_CXX14 + // C++14 versions suitable for multiple symbols + template + Index eval_impl(const std::tuple& values) const { return std::get >(values).value(); } +#endif +}; + +template +class NegateExpr : public BaseExpr > +{ +public: + NegateExpr(const Arg0& arg0) : m_arg0(arg0) {} + + template + Index eval_impl(const T& values) const { return -m_arg0.eval_impl(values); } +protected: + Arg0 m_arg0; +}; + +template +class AddExpr : public BaseExpr > +{ +public: + AddExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) + m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template +class ProductExpr : public BaseExpr > +{ +public: + ProductExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) * m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +template +class QuotientExpr : public BaseExpr > +{ +public: + QuotientExpr(const Arg0& arg0, const Arg1& arg1) : m_arg0(arg0), m_arg1(arg1) {} + + template + Index eval_impl(const T& values) const { return m_arg0.eval_impl(values) / m_arg1.eval_impl(values); } +protected: + Arg0 m_arg0; + Arg1 m_arg1; +}; + +} // end namespace Symbolic + +} // end namespace Eigen + +#endif // EIGEN_SYMBOLIC_INDEX_H diff --git a/eigen/Eigen/src/Core/util/XprHelper.h b/eigen/Eigen/src/Core/util/XprHelper.h index d05f8e5..4b337f2 100644 --- a/eigen/Eigen/src/Core/util/XprHelper.h +++ b/eigen/Eigen/src/Core/util/XprHelper.h @@ -14,20 +14,77 @@ // just a workaround because GCC seems to not really like empty structs // FIXME: gcc 4.3 generates bad code when strict-aliasing is enabled // so currently we simply disable this optimization for gcc 4.3 -#if (defined __GNUG__) && !((__GNUC__==4) && (__GNUC_MINOR__==3)) +#if EIGEN_COMP_GNUC && !EIGEN_GNUC_AT(4,3) #define EIGEN_EMPTY_STRUCT_CTOR(X) \ - EIGEN_STRONG_INLINE X() {} \ - EIGEN_STRONG_INLINE X(const X& ) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {} #else #define EIGEN_EMPTY_STRUCT_CTOR(X) #endif namespace Eigen { -typedef EIGEN_DEFAULT_DENSE_INDEX_TYPE DenseIndex; - namespace internal { +template +EIGEN_DEVICE_FUNC +inline IndexDest convert_index(const IndexSrc& idx) { + // for sizeof(IndexDest)>=sizeof(IndexSrc) compilers should be able to optimize this away: + eigen_internal_assert(idx <= NumTraits::highest() && "Index value to big for target type"); + return IndexDest(idx); +} + + +// promote_scalar_arg is an helper used in operation between an expression and a scalar, like: +// expression * scalar +// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression. +// The IsSupported template parameter must be provided by the caller as: internal::has_ReturnType >::value using the proper order for ExprScalar and T. +// Then the logic is as follows: +// - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned. +// - otherwise, NumTraits::Literal is returned if T is implicitly convertible to NumTraits::Literal AND that this does not imply a float to integer conversion. +// - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion. +// - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE). +template +struct promote_scalar_arg; + +template +struct promote_scalar_arg +{ + typedef T type; +}; + +// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different. +template::value, + bool IsSafe = NumTraits::IsInteger || !NumTraits::IsInteger> +struct promote_scalar_arg_unsupported; + +// Start recursion with NumTraits::Literal +template +struct promote_scalar_arg : promote_scalar_arg_unsupported::Literal> {}; + +// We found a match! +template +struct promote_scalar_arg_unsupported +{ + typedef PromotedType type; +}; + +// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different, +// so let's try to promote to ExprScalar +template +struct promote_scalar_arg_unsupported + : promote_scalar_arg_unsupported +{}; + +// Unsafe real-to-integer, let's stop. +template +struct promote_scalar_arg_unsupported {}; + +// T is not even convertible to ExprScalar, let's stop. +template +struct promote_scalar_arg_unsupported {}; + //classes inheriting no_assignment_operator don't generate a default operator=. class no_assignment_operator { @@ -50,19 +107,21 @@ template class variable_if_dynamic { public: EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic) - explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); assert(v == T(Value)); } - static T value() { return T(Value); } - void setValue(T) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; template class variable_if_dynamic { T m_value; - variable_if_dynamic() { assert(false); } + EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); } public: - explicit variable_if_dynamic(T value) : m_value(value) {} - T value() const { return m_value; } - void setValue(T value) { m_value = value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamic(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator T() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; /** \internal like variable_if_dynamic but for DynamicIndex @@ -71,19 +130,19 @@ template class variable_if_dynamicindex { public: EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex) - explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); assert(v == T(Value)); } - static T value() { return T(Value); } - void setValue(T) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T value() { return T(Value); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T) {} }; template class variable_if_dynamicindex { T m_value; - variable_if_dynamicindex() { assert(false); } + EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); } public: - explicit variable_if_dynamicindex(T value) : m_value(value) {} - T value() const { return m_value; } - void setValue(T value) { m_value = value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit variable_if_dynamicindex(T value) : m_value(value) {} + EIGEN_DEVICE_FUNC T EIGEN_STRONG_INLINE value() const { return m_value; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void setValue(T value) { m_value = value; } }; template struct functor_traits @@ -101,7 +160,73 @@ template struct packet_traits; template struct unpacket_traits { typedef T type; - enum {size=1}; + typedef T half; + enum + { + size = 1, + alignment = 1 + }; +}; + +template::size)==0 || is_same::half>::value> +struct find_best_packet_helper; + +template< int Size, typename PacketType> +struct find_best_packet_helper +{ + typedef PacketType type; +}; + +template +struct find_best_packet_helper +{ + typedef typename find_best_packet_helper::half>::type type; +}; + +template +struct find_best_packet +{ + typedef typename find_best_packet_helper::type>::type type; +}; + +#if EIGEN_MAX_STATIC_ALIGN_BYTES>0 +template +struct compute_default_alignment_helper +{ + enum { value = 0 }; +}; + +template +struct compute_default_alignment_helper // Match +{ + enum { value = AlignmentBytes }; +}; + +template +struct compute_default_alignment_helper // Try-half +{ + // current packet too large, try with an half-packet + enum { value = compute_default_alignment_helper::value }; +}; +#else +// If static alignment is disabled, no need to bother. +// This also avoids a division by zero in "bool Match = bool((ArrayBytes%AlignmentBytes)==0)" +template +struct compute_default_alignment_helper +{ + enum { value = 0 }; +}; +#endif + +template struct compute_default_alignment { + enum { value = compute_default_alignment_helper::value }; +}; + +template struct compute_default_alignment { + enum { value = EIGEN_MAX_ALIGN_BYTES }; }; template class compute_matrix_flags { - enum { - row_major_bit = Options&RowMajor ? RowMajorBit : 0, - is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, - - aligned_bit = - ( - ((Options&DontAlign)==0) - && ( -#if EIGEN_ALIGN_STATICALLY - ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0)) -#else - 0 -#endif - - || - -#if EIGEN_ALIGN - is_dynamic_size_storage -#else - 0 -#endif - - ) - ) ? AlignedBit : 0, - packet_access_bit = packet_traits::Vectorizable && aligned_bit ? PacketAccessBit : 0 - }; - + enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 }; public: - enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit }; + // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<> + // and then propagate this information to the evaluator's flags. + // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage. + enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; }; template struct size_at_compile_time @@ -163,34 +265,43 @@ template struct size_at_compile_time enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols }; }; +template struct size_of_xpr_at_compile_time +{ + enum { ret = size_at_compile_time::RowsAtCompileTime,traits::ColsAtCompileTime>::ret }; +}; + /* plain_matrix_type : the difference from eval is that plain_matrix_type is always a plain matrix type, * whereas eval is a const reference in the case of a matrix */ template::StorageKind> struct plain_matrix_type; -template struct plain_matrix_type_dense; +template struct plain_matrix_type_dense; template struct plain_matrix_type { - typedef typename plain_matrix_type_dense::XprKind>::type type; + typedef typename plain_matrix_type_dense::XprKind, traits::Flags>::type type; +}; +template struct plain_matrix_type +{ + typedef typename T::PlainObject type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Matrix::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; }; -template struct plain_matrix_type_dense +template struct plain_matrix_type_dense { typedef Array::Scalar, traits::RowsAtCompileTime, traits::ColsAtCompileTime, - AutoAlign | (traits::Flags&RowMajorBit ? RowMajor : ColMajor), + AutoAlign | (Flags&RowMajorBit ? RowMajor : ColMajor), traits::MaxRowsAtCompileTime, traits::MaxColsAtCompileTime > type; @@ -215,6 +326,11 @@ template struct eval // > type; }; +template struct eval +{ + typedef typename plain_matrix_type::type type; +}; + // for matrices, no need to evaluate, just use a const reference to avoid a useless copy template struct eval, Dense> @@ -229,6 +345,15 @@ struct eval, Dense> }; +/* similar to plain_matrix_type, but using the evaluator's Flags */ +template::StorageKind> struct plain_object_eval; + +template +struct plain_object_eval +{ + typedef typename plain_matrix_type_dense::XprKind, evaluator::Flags>::type type; +}; + /* plain_matrix_type_column_major : same as plain_matrix_type but guaranteed to be column-major */ @@ -266,9 +391,6 @@ template struct plain_matrix_type_row_major > type; }; -// we should be able to get rid of this one too -template struct must_nest_by_value { enum { ret = false }; }; - /** \internal The reference selector for template expressions. The idea is that we don't * need to use references for expressions since they are light weight proxy * objects which should generate no copying overhead. */ @@ -280,6 +402,12 @@ struct ref_selector T const&, const T >::type type; + + typedef typename conditional< + bool(traits::Flags & NestByRefBit), + T &, + T + >::type non_const_type; }; /** \internal Adds the const qualifier on the value-type of T2 if and only if T1 is a const type */ @@ -293,54 +421,41 @@ struct transfer_constness >::type type; }; -/** \internal Determines how a given expression should be nested into another one. + +// However, we still need a mechanism to detect whether an expression which is evaluated multiple time +// has to be evaluated into a temporary. +// That's the purpose of this new nested_eval helper: +/** \internal Determines how a given expression should be nested when evaluated multiple times. * For example, when you do a * (b+c), Eigen will determine how the expression b+c should be - * nested into the bigger product expression. The choice is between nesting the expression b+c as-is, or + * evaluated into the bigger product expression. The choice is between nesting the expression b+c as-is, or * evaluating that expression b+c into a temporary variable d, and nest d so that the resulting expression is * a*d. Evaluating can be beneficial for example if every coefficient access in the resulting expression causes * many coefficient accesses in the nested expressions -- as is the case with matrix product for example. * - * \param T the type of the expression being nested - * \param n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. - * - * Note that if no evaluation occur, then the constness of T is preserved. - * - * Example. Suppose that a, b, and c are of type Matrix3d. The user forms the expression a*(b+c). - * b+c is an expression "sum of matrices", which we will denote by S. In order to determine how to nest it, - * the Product expression uses: nested::ret, which turns out to be Matrix3d because the internal logic of - * nested determined that in this case it was better to evaluate the expression b+c into a temporary. On the other hand, - * since a is of type Matrix3d, the Product expression nests it as nested::ret, which turns out to be - * const Matrix3d&, because the internal logic of nested determined that since a was already a matrix, there was no point - * in copying it into another matrix. + * \tparam T the type of the expression being nested. + * \tparam n the number of coefficient accesses in the nested expression for each coefficient access in the bigger expression. + * \tparam PlainObject the type of the temporary if needed. */ -template::type> struct nested +template::type> struct nested_eval { enum { - // for the purpose of this test, to keep it reasonably simple, we arbitrarily choose a value of Dynamic values. - // the choice of 10000 makes it larger than any practical fixed value and even most dynamic values. - // in extreme cases where these assumptions would be wrong, we would still at worst suffer performance issues - // (poor choice of temporaries). - // it's important that this value can still be squared without integer overflowing. - DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), - CoeffReadCost = traits::CoeffReadCost, - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), - NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, - CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, - CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger + CoeffReadCost = evaluator::CoeffReadCost, // NOTE What if an evaluator evaluate itself into a tempory? + // Then CoeffReadCost will be small (e.g., 1) but we still have to evaluate, especially if n>1. + // This situation is already taken care by the EvalBeforeNestingBit flag, which is turned ON + // for all evaluator creating a temporary. This flag is then propagated by the parent evaluators. + // Another solution could be to count the number of temps? + NAsInteger = n == Dynamic ? HugeCost : n, + CostEval = (NAsInteger+1) * ScalarReadCost + CoeffReadCost, + CostNoEval = NAsInteger * CoeffReadCost, + Evaluate = (int(evaluator::Flags) & EvalBeforeNestingBit) || (int(CostEval) < int(CostNoEval)) }; - typedef typename conditional< - ( (int(traits::Flags) & EvalBeforeNestingBit) || - int(CostEvalAsInteger) < int(CostNoEvalAsInteger) - ), - PlainObject, - typename ref_selector::type - >::type type; + typedef typename conditional::type>::type type; }; template +EIGEN_DEVICE_FUNC inline T* const_cast_ptr(const T* ptr) { return const_cast(ptr); @@ -364,30 +479,13 @@ struct dense_xpr_base typedef ArrayBase type; }; -/** \internal Helper base class to add a scalar multiple operator - * overloads for complex types */ -template::value > -struct special_scalar_op_base : public BaseType -{ - // dummy operator* so that the - // "using special_scalar_op_base::operator*" compiles - void operator*() const; -}; +template::XprKind, typename StorageKind = typename traits::StorageKind> +struct generic_xpr_base; -template -struct special_scalar_op_base : public BaseType +template +struct generic_xpr_base { - const CwiseUnaryOp, Derived> - operator*(const OtherScalar& scalar) const - { - return CwiseUnaryOp, Derived> - (*static_cast(this), scalar_multiple2_op(scalar)); - } - - inline friend const CwiseUnaryOp, Derived> - operator*(const OtherScalar& scalar, const Derived& matrix) - { return static_cast(matrix).operator*(scalar); } + typedef typename dense_xpr_base::type type; }; template struct cast_return_type @@ -405,9 +503,79 @@ template struct promote_storage_type { typedef A ret; }; +template struct promote_storage_type +{ + typedef A ret; +}; +template struct promote_storage_type +{ + typedef A ret; +}; + +/** \internal Specify the "storage kind" of applying a coefficient-wise + * binary operations between two expressions of kinds A and B respectively. + * The template parameter Functor permits to specialize the resulting storage kind wrt to + * the functor. + * The default rules are as follows: + * \code + * A op A -> A + * A op dense -> dense + * dense op B -> dense + * sparse op dense -> sparse + * dense op sparse -> sparse + * \endcode + */ +template struct cwise_promote_storage_type; + +template struct cwise_promote_storage_type { typedef A ret; }; +template struct cwise_promote_storage_type { typedef Dense ret; }; +template struct cwise_promote_storage_type { typedef Dense ret; }; +template struct cwise_promote_storage_type { typedef Dense ret; }; +template struct cwise_promote_storage_type { typedef Sparse ret; }; +template struct cwise_promote_storage_type { typedef Sparse ret; }; + +template struct cwise_promote_storage_order { + enum { value = LhsOrder }; +}; + +template struct cwise_promote_storage_order { enum { value = RhsOrder }; }; +template struct cwise_promote_storage_order { enum { value = LhsOrder }; }; +template struct cwise_promote_storage_order { enum { value = Order }; }; + + +/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. + * The template parameter ProductTag permits to specialize the resulting storage kind wrt to + * some compile-time properties of the product: GemmProduct, GemvProduct, OuterProduct, InnerProduct. + * The default rules are as follows: + * \code + * K * K -> K + * dense * K -> dense + * K * dense -> dense + * diag * K -> K + * K * diag -> K + * Perm * K -> K + * K * Perm -> K + * \endcode + */ +template struct product_promote_storage_type; + +template struct product_promote_storage_type { typedef A ret;}; +template struct product_promote_storage_type { typedef Dense ret;}; +template struct product_promote_storage_type { typedef Dense ret; }; +template struct product_promote_storage_type { typedef Dense ret; }; + +template struct product_promote_storage_type { typedef A ret; }; +template struct product_promote_storage_type { typedef B ret; }; +template struct product_promote_storage_type { typedef Dense ret; }; +template struct product_promote_storage_type { typedef Dense ret; }; + +template struct product_promote_storage_type { typedef A ret; }; +template struct product_promote_storage_type { typedef B ret; }; +template struct product_promote_storage_type { typedef Dense ret; }; +template struct product_promote_storage_type { typedef Dense ret; }; /** \internal gives the plain matrix or array type to store a row/column/diagonal of a matrix type. - * \param Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. + * \tparam Scalar optional parameter allowing to pass a different scalar type than the one of the MatrixType. */ template struct plain_row_type @@ -455,15 +623,201 @@ struct plain_diag_type >::type type; }; +template +struct plain_constant_type +{ + enum { Options = (traits::Flags&RowMajorBit)?RowMajor:0 }; + + typedef Array::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> array_type; + + typedef Matrix::RowsAtCompileTime, traits::ColsAtCompileTime, + Options, traits::MaxRowsAtCompileTime,traits::MaxColsAtCompileTime> matrix_type; + + typedef CwiseNullaryOp, const typename conditional::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type; +}; + template struct is_lvalue { - enum { value = !bool(is_const::value) && + enum { value = (!bool(is_const::value)) && bool(traits::Flags & LvalueBit) }; }; +template struct is_diagonal +{ enum { ret = false }; }; + +template struct is_diagonal > +{ enum { ret = true }; }; + +template struct is_diagonal > +{ enum { ret = true }; }; + +template struct is_diagonal > +{ enum { ret = true }; }; + +template struct glue_shapes; +template<> struct glue_shapes { typedef TriangularShape type; }; + +template +bool is_same_dense(const T1 &mat1, const T2 &mat2, typename enable_if::ret&&has_direct_access::ret, T1>::type * = 0) +{ + return (mat1.data()==mat2.data()) && (mat1.innerStride()==mat2.innerStride()) && (mat1.outerStride()==mat2.outerStride()); +} + +template +bool is_same_dense(const T1 &, const T2 &, typename enable_if::ret&&has_direct_access::ret), T1>::type * = 0) +{ + return false; +} + +// Internal helper defining the cost of a scalar division for the type T. +// The default heuristic can be specialized for each scalar type and architecture. +template +struct scalar_div_cost { + enum { value = 8*NumTraits::MulCost }; +}; + +template +struct scalar_div_cost, Vectorized> { + enum { value = 2*scalar_div_cost::value + + 6*NumTraits::MulCost + + 3*NumTraits::AddCost + }; +}; + + +template +struct scalar_div_cost::type> { enum { value = 24 }; }; +template +struct scalar_div_cost::type> { enum { value = 21 }; }; + + +#ifdef EIGEN_DEBUG_ASSIGN +std::string demangle_traversal(int t) +{ + if(t==DefaultTraversal) return "DefaultTraversal"; + if(t==LinearTraversal) return "LinearTraversal"; + if(t==InnerVectorizedTraversal) return "InnerVectorizedTraversal"; + if(t==LinearVectorizedTraversal) return "LinearVectorizedTraversal"; + if(t==SliceVectorizedTraversal) return "SliceVectorizedTraversal"; + return "?"; +} +std::string demangle_unrolling(int t) +{ + if(t==NoUnrolling) return "NoUnrolling"; + if(t==InnerUnrolling) return "InnerUnrolling"; + if(t==CompleteUnrolling) return "CompleteUnrolling"; + return "?"; +} +std::string demangle_flags(int f) +{ + std::string res; + if(f&RowMajorBit) res += " | RowMajor"; + if(f&PacketAccessBit) res += " | Packet"; + if(f&LinearAccessBit) res += " | Linear"; + if(f&LvalueBit) res += " | Lvalue"; + if(f&DirectAccessBit) res += " | Direct"; + if(f&NestByRefBit) res += " | NestByRef"; + if(f&NoPreferredStorageOrderBit) res += " | NoPreferredStorageOrderBit"; + + return res; +} +#endif + } // end namespace internal + +/** \class ScalarBinaryOpTraits + * \ingroup Core_Module + * + * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is. + * + * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations. + * + * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3. + * You can let %Eigen knows that by defining: + \code + template + struct ScalarBinaryOpTraits { typedef U3 ReturnType; }; + template + struct ScalarBinaryOpTraits { typedef U3 ReturnType; }; + \endcode + * You can then explicitly disable some particular operations to get more explicit error messages: + \code + template<> + struct ScalarBinaryOpTraits > {}; + \endcode + * Or customize the return type for individual operation: + \code + template<> + struct ScalarBinaryOpTraits > { typedef U1 ReturnType; }; + \endcode + * + * By default, the following generic combinations are supported: + + + + + +
ScalarAScalarBBinaryOpReturnTypeNote
\c T \c T \c * \c T
\c NumTraits::Real \c T \c * \c T Only if \c NumTraits::IsComplex
\c T \c NumTraits::Real \c * \c T Only if \c NumTraits::IsComplex
+ * + * \sa CwiseBinaryOp + */ +template > +struct ScalarBinaryOpTraits +#ifndef EIGEN_PARSED_BY_DOXYGEN + // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class. + : internal::scalar_product_traits +#endif // EIGEN_PARSED_BY_DOXYGEN +{}; + +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, BinaryOp> +{ + typedef T ReturnType; +}; +template +struct ScalarBinaryOpTraits::IsComplex,T>::type>::Real, T, BinaryOp> +{ + typedef T ReturnType; +}; + +// For Matrix * Permutation +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +// For Permutation * Matrix +template +struct ScalarBinaryOpTraits +{ + typedef T ReturnType; +}; + +// for Permutation*Permutation +template +struct ScalarBinaryOpTraits +{ + typedef void ReturnType; +}; + +// We require Lhs and Rhs to have "compatible" scalar types. +// It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths. +// So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to +// add together a float matrix and a double matrix. +#define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \ + EIGEN_STATIC_ASSERT((Eigen::internal::has_ReturnType >::value), \ + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + } // end namespace Eigen #endif // EIGEN_XPRHELPER_H -- cgit v1.2.3