From 407b6208604d2822b1067ac64949e78a9167572b Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Mon, 12 Nov 2018 06:42:35 +0100 Subject: eigen update --- eigen/Eigen/src/Core/AssignEvaluator.h | 4 +- eigen/Eigen/src/Core/Assign_MKL.h | 6 +- eigen/Eigen/src/Core/CoreEvaluators.h | 39 +++++-- eigen/Eigen/src/Core/Diagonal.h | 5 +- eigen/Eigen/src/Core/Dot.h | 17 +-- eigen/Eigen/src/Core/GeneralProduct.h | 21 ++-- eigen/Eigen/src/Core/Map.h | 17 ++- eigen/Eigen/src/Core/MathFunctions.h | 28 +---- eigen/Eigen/src/Core/MathFunctionsImpl.h | 23 ++++ eigen/Eigen/src/Core/MatrixBase.h | 9 -- eigen/Eigen/src/Core/PlainObjectBase.h | 4 + eigen/Eigen/src/Core/Product.h | 10 +- eigen/Eigen/src/Core/ProductEvaluators.h | 31 +++--- eigen/Eigen/src/Core/Redux.h | 2 +- eigen/Eigen/src/Core/Ref.h | 2 + eigen/Eigen/src/Core/SelfAdjointView.h | 6 +- eigen/Eigen/src/Core/SelfCwiseBinaryOp.h | 4 - eigen/Eigen/src/Core/StableNorm.h | 2 +- eigen/Eigen/src/Core/Transpositions.h | 2 +- eigen/Eigen/src/Core/arch/AVX/Complex.h | 36 +------ eigen/Eigen/src/Core/arch/AVX/PacketMath.h | 11 +- eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h | 33 +++--- eigen/Eigen/src/Core/arch/AVX512/PacketMath.h | 6 +- eigen/Eigen/src/Core/arch/AltiVec/Complex.h | 35 +----- eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h | 44 ++++++-- eigen/Eigen/src/Core/arch/CUDA/Half.h | 64 ++++++----- eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 2 +- eigen/Eigen/src/Core/arch/NEON/Complex.h | 12 ++- eigen/Eigen/src/Core/arch/NEON/PacketMath.h | 31 ++++++ eigen/Eigen/src/Core/arch/SSE/Complex.h | 40 +------ eigen/Eigen/src/Core/arch/SSE/PacketMath.h | 22 +++- eigen/Eigen/src/Core/arch/SSE/TypeCasting.h | 28 ++--- eigen/Eigen/src/Core/arch/ZVector/Complex.h | 3 + eigen/Eigen/src/Core/functors/BinaryFunctors.h | 25 ++--- eigen/Eigen/src/Core/functors/StlFunctors.h | 4 + .../products/GeneralMatrixMatrixTriangular_BLAS.h | 8 +- .../src/Core/products/GeneralMatrixMatrix_BLAS.h | 19 ++-- .../Eigen/src/Core/products/GeneralMatrixVector.h | 8 +- .../src/Core/products/GeneralMatrixVector_BLAS.h | 19 ++-- .../Core/products/SelfadjointMatrixMatrix_BLAS.h | 48 +++++---- .../Core/products/SelfadjointMatrixVector_BLAS.h | 9 +- .../src/Core/products/TriangularMatrixMatrix.h | 35 +++++- .../Core/products/TriangularMatrixMatrix_BLAS.h | 39 ++++--- .../src/Core/products/TriangularMatrixVector.h | 22 +++- .../Core/products/TriangularMatrixVector_BLAS.h | 46 +++++--- .../Core/products/TriangularSolverMatrix_BLAS.h | 40 ++++--- eigen/Eigen/src/Core/util/MKL_support.h | 10 +- eigen/Eigen/src/Core/util/Macros.h | 19 +++- eigen/Eigen/src/Core/util/Memory.h | 14 ++- eigen/Eigen/src/Core/util/Meta.h | 20 ++++ eigen/Eigen/src/Core/util/StaticAssert.h | 120 +++++++++++---------- 51 files changed, 646 insertions(+), 458 deletions(-) (limited to 'eigen/Eigen/src/Core') diff --git a/eigen/Eigen/src/Core/AssignEvaluator.h b/eigen/Eigen/src/Core/AssignEvaluator.h index b0ec7b7..dbe435d 100644 --- a/eigen/Eigen/src/Core/AssignEvaluator.h +++ b/eigen/Eigen/src/Core/AssignEvaluator.h @@ -39,7 +39,7 @@ public: enum { DstAlignment = DstEvaluator::Alignment, SrcAlignment = SrcEvaluator::Alignment, - DstHasDirectAccess = DstFlags & DirectAccessBit, + DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit, JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment) }; @@ -83,7 +83,7 @@ private: && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), - MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess + MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess) && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ diff --git a/eigen/Eigen/src/Core/Assign_MKL.h b/eigen/Eigen/src/Core/Assign_MKL.h index 6c2ab92..6866095 100644 --- a/eigen/Eigen/src/Core/Assign_MKL.h +++ b/eigen/Eigen/src/Core/Assign_MKL.h @@ -84,7 +84,8 @@ class vml_assign_traits struct Assignment, SrcXprNested>, assign_op, \ Dense2Dense, typename enable_if::EnableVml>::type> { \ typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ if(vml_assign_traits::Traversal==LinearTraversal) { \ VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \ @@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) Dense2Dense, typename enable_if::EnableVml>::type> { \ typedef CwiseBinaryOp, SrcXprNested, \ const CwiseNullaryOp,Plain> > SrcXprType; \ - static void run(DstXprType &dst, const SrcXprType &src, const assign_op &/*func*/) { \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ if(vml_assign_traits::Traversal==LinearTraversal) \ diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h index f7c1eff..910889e 100644 --- a/eigen/Eigen/src/Core/CoreEvaluators.h +++ b/eigen/Eigen/src/Core/CoreEvaluators.h @@ -977,7 +977,7 @@ struct evaluator > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -987,7 +987,9 @@ struct evaluator > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, PacketAlignment = unpacket_traits::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; @@ -1018,14 +1020,16 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block) : m_argImpl(block.nestedExpression()), m_startRow(block.startRow()), - m_startCol(block.startCol()) + m_startCol(block.startCol()), + m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0) { } typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime + RowsAtCompileTime = XprType::RowsAtCompileTime, + ForwardLinearAccess = InnerPanel && bool(evaluator::Flags&LinearAccessBit) }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1037,7 +1041,10 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeff(m_linear_offset.value() + index); + else + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1049,7 +1056,10 @@ struct unary_evaluator, IndexBa EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.coeffRef(m_linear_offset.value() + index); + else + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); } template @@ -1063,8 +1073,11 @@ struct unary_evaluator, IndexBa EIGEN_STRONG_INLINE PacketType packet(Index index) const { - return packet(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + if (ForwardLinearAccess) + return m_argImpl.template packet(m_linear_offset.value() + index); + else + return packet(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); } template @@ -1078,15 +1091,19 @@ struct unary_evaluator, IndexBa EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { - return writePacket(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + if (ForwardLinearAccess) + return m_argImpl.template writePacket(m_linear_offset.value() + index, x); + else + return writePacket(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, + x); } protected: evaluator m_argImpl; const variable_if_dynamic m_startRow; const variable_if_dynamic m_startCol; + const variable_if_dynamic m_linear_offset; }; // TODO: This evaluator does not actually use the child evaluator; diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h index 49e7112..afcaf35 100644 --- a/eigen/Eigen/src/Core/Diagonal.h +++ b/eigen/Eigen/src/Core/Diagonal.h @@ -70,7 +70,10 @@ template class Diagonal EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) EIGEN_DEVICE_FUNC - explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {} + explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) + { + eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() ); + } EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h index 06ef18b..1fe7a84 100644 --- a/eigen/Eigen/src/Core/Dot.h +++ b/eigen/Eigen/src/Core/Dot.h @@ -31,7 +31,8 @@ struct dot_nocheck typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.template binaryExpr(b).sum(); } @@ -43,7 +44,8 @@ struct dot_nocheck typedef scalar_conj_product_op::Scalar,typename traits::Scalar> conj_prod; typedef typename conj_prod::result_type ResScalar; EIGEN_DEVICE_FUNC - static inline ResScalar run(const MatrixBase& a, const MatrixBase& b) + EIGEN_STRONG_INLINE + static ResScalar run(const MatrixBase& a, const MatrixBase& b) { return a.transpose().template binaryExpr(b).sum(); } @@ -65,6 +67,7 @@ struct dot_nocheck template template EIGEN_DEVICE_FUNC +EIGEN_STRONG_INLINE typename ScalarBinaryOpTraits::Scalar,typename internal::traits::Scalar>::ReturnType MatrixBase::dot(const MatrixBase& other) const { @@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala * \sa lpNorm(), dot(), squaredNorm() */ template -inline typename NumTraits::Scalar>::Real MatrixBase::norm() const +EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real MatrixBase::norm() const { return numext::sqrt(squaredNorm()); } @@ -117,7 +120,7 @@ inline typename NumTraits::Scalar>::Real Matr * \sa norm(), normalize() */ template -inline const typename MatrixBase::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::normalized() const { typedef typename internal::nested_eval::type _Nested; @@ -139,7 +142,7 @@ MatrixBase::normalized() const * \sa norm(), normalized() */ template -inline void MatrixBase::normalize() +EIGEN_STRONG_INLINE void MatrixBase::normalize() { RealScalar z = squaredNorm(); // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU @@ -160,7 +163,7 @@ inline void MatrixBase::normalize() * \sa stableNorm(), stableNormalize(), normalized() */ template -inline const typename MatrixBase::PlainObject +EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::stableNormalized() const { typedef typename internal::nested_eval::type _Nested; @@ -185,7 +188,7 @@ MatrixBase::stableNormalized() const * \sa stableNorm(), stableNormalized(), normalize() */ template -inline void MatrixBase::stableNormalize() +EIGEN_STRONG_INLINE void MatrixBase::stableNormalize() { RealScalar w = cwiseAbs().maxCoeff(); RealScalar z = (derived()/w).squaredNorm(); diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h index 0f16cd8..6f0cc80 100644 --- a/eigen/Eigen/src/Core/GeneralProduct.h +++ b/eigen/Eigen/src/Core/GeneralProduct.h @@ -24,12 +24,17 @@ template struct product_type_selector; template struct product_size_category { - enum { is_large = MaxSize == Dynamic || - Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || - (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), - value = is_large ? Large - : Size == 1 ? 1 - : Small + enum { + #ifndef EIGEN_CUDA_ARCH + is_large = MaxSize == Dynamic || + Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), + #else + is_large = 0, + #endif + value = is_large ? Large + : Size == 1 ? 1 + : Small }; }; @@ -379,8 +384,6 @@ template<> struct gemv_dense_selector * * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() */ -#ifndef __CUDACC__ - template template inline const Product @@ -412,8 +415,6 @@ MatrixBase::operator*(const MatrixBase &other) const return Product(derived(), other.derived()); } -#endif // __CUDACC__ - /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. * * The returned product will behave like any other expressions: the coefficients of the product will be diff --git a/eigen/Eigen/src/Core/Map.h b/eigen/Eigen/src/Core/Map.h index 06d1967..548bf9a 100644 --- a/eigen/Eigen/src/Core/Map.h +++ b/eigen/Eigen/src/Core/Map.h @@ -20,11 +20,17 @@ struct traits > { typedef traits TraitsBase; enum { + PlainObjectTypeInnerSize = ((traits::Flags&RowMajorBit)==RowMajorBit) + ? PlainObjectType::ColsAtCompileTime + : PlainObjectType::RowsAtCompileTime, + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 ? int(PlainObjectType::InnerStrideAtCompileTime) : int(StrideType::InnerStrideAtCompileTime), OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 - ? int(PlainObjectType::OuterStrideAtCompileTime) + ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic + ? Dynamic + : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize)) : int(StrideType::OuterStrideAtCompileTime), Alignment = int(MapOptions)&int(AlignedMask), Flags0 = TraitsBase::Flags & (~NestByRefBit), @@ -107,10 +113,11 @@ template class Ma EIGEN_DEVICE_FUNC inline Index outerStride() const { - return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() - : IsVectorAtCompileTime ? this->size() - : int(Flags)&RowMajorBit ? this->cols() - : this->rows(); + return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer() + : int(internal::traits::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits::OuterStrideAtCompileTime) + : IsVectorAtCompileTime ? (this->size() * innerStride()) + : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride()) + : (this->rows() * innerStride()); } /** Constructor in the fixed-size case. diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h index a648aa0..6eb974d 100644 --- a/eigen/Eigen/src/Core/MathFunctions.h +++ b/eigen/Eigen/src/Core/MathFunctions.h @@ -348,31 +348,7 @@ struct norm1_retval * Implementation of hypot * ****************************************************************************/ -template -struct hypot_impl -{ - typedef typename NumTraits::Real RealScalar; - static inline RealScalar run(const Scalar& x, const Scalar& y) - { - EIGEN_USING_STD_MATH(abs); - EIGEN_USING_STD_MATH(sqrt); - RealScalar _x = abs(x); - RealScalar _y = abs(y); - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - if(p==RealScalar(0)) return RealScalar(0); - return p * sqrt(RealScalar(1) + qp*qp); - } -}; +template struct hypot_impl; template struct hypot_retval @@ -495,7 +471,7 @@ namespace std_fallback { typedef typename NumTraits::Real RealScalar; EIGEN_USING_STD_MATH(log); Scalar x1p = RealScalar(1) + x; - return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); + return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) ); } } diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h index 3c9ef22..9c1ceb0 100644 --- a/eigen/Eigen/src/Core/MathFunctionsImpl.h +++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x) return pdiv(p, q); } +template +EIGEN_STRONG_INLINE +RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) +{ + EIGEN_USING_STD_MATH(sqrt); + RealScalar p, qp; + p = numext::maxi(x,y); + if(p==RealScalar(0)) return RealScalar(0); + qp = numext::mini(y,x) / p; + return p * sqrt(RealScalar(1) + qp*qp); +} + +template +struct hypot_impl +{ + typedef typename NumTraits::Real RealScalar; + static inline RealScalar run(const Scalar& x, const Scalar& y) + { + EIGEN_USING_STD_MATH(abs); + return positive_real_hypot(abs(x), abs(y)); + } +}; + } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h index ce41218..05db488 100644 --- a/eigen/Eigen/src/Core/MatrixBase.h +++ b/eigen/Eigen/src/Core/MatrixBase.h @@ -160,20 +160,11 @@ template class MatrixBase EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase& other); -#ifdef __CUDACC__ template EIGEN_DEVICE_FUNC - const Product - operator*(const MatrixBase &other) const - { return this->lazyProduct(other); } -#else - - template const Product operator*(const MatrixBase &other) const; -#endif - template EIGEN_DEVICE_FUNC const Product diff --git a/eigen/Eigen/src/Core/PlainObjectBase.h b/eigen/Eigen/src/Core/PlainObjectBase.h index 77f4f60..1dc7e22 100644 --- a/eigen/Eigen/src/Core/PlainObjectBase.h +++ b/eigen/Eigen/src/Core/PlainObjectBase.h @@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base::type * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned * \a data pointers. * + * Here is an example using strides: + * \include Matrix_Map_stride.cpp + * Output: \verbinclude Matrix_Map_stride.out + * * \see class Map */ //@{ diff --git a/eigen/Eigen/src/Core/Product.h b/eigen/Eigen/src/Core/Product.h index ae0c94b..676c480 100644 --- a/eigen/Eigen/src/Core/Product.h +++ b/eigen/Eigen/src/Core/Product.h @@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, && "if you wanted a coeff-wise or a dot product use the respective explicit functions"); } - EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } - EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); } EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; } EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; } @@ -127,7 +127,7 @@ public: using Base::derived; typedef typename Base::Scalar Scalar; - operator const Scalar() const + EIGEN_STRONG_INLINE operator const Scalar() const { return internal::evaluator(derived()).coeff(0,0); } @@ -162,7 +162,7 @@ class ProductImpl public: - EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); @@ -170,7 +170,7 @@ class ProductImpl return internal::evaluator(derived()).coeff(row,col); } - EIGEN_DEVICE_FUNC Scalar coeff(Index i) const + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const { EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) ); diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h index c42725d..9b99bd7 100644 --- a/eigen/Eigen/src/Core/ProductEvaluators.h +++ b/eigen/Eigen/src/Core/ProductEvaluators.h @@ -32,7 +32,7 @@ struct evaluator > typedef Product XprType; typedef product_evaluator Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} }; // Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" @@ -55,7 +55,7 @@ struct evaluator, const Product > XprType; typedef evaluator > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} }; @@ -68,7 +68,7 @@ struct evaluator, DiagIndex> > typedef Diagonal, DiagIndex> XprType; typedef evaluator, DiagIndex> > Base; - EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(Diagonal, DiagIndex>( Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index() )) @@ -246,19 +246,19 @@ template struct generic_product_impl { template - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum(); } template - static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); } }; @@ -312,25 +312,25 @@ struct generic_product_impl }; template - static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); } template - static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); } template - static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) + static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); } template - static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) + static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); } @@ -785,7 +785,11 @@ public: _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0), - Alignment = evaluator::Alignment + Alignment = evaluator::Alignment, + + AsScalarProduct = (DiagonalType::SizeAtCompileTime==1) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft) + || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight) }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -797,7 +801,10 @@ public: EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { - return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + if(AsScalarProduct) + return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); + else + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); } protected: diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h index b6e8f88..760e9f8 100644 --- a/eigen/Eigen/src/Core/Redux.h +++ b/eigen/Eigen/src/Core/Redux.h @@ -407,7 +407,7 @@ protected: */ template template -typename internal::traits::Scalar +EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::redux(const Func& func) const { eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h index bdf24f5..9c6e3c5 100644 --- a/eigen/Eigen/src/Core/Ref.h +++ b/eigen/Eigen/src/Core/Ref.h @@ -95,6 +95,8 @@ protected: template EIGEN_DEVICE_FUNC void construct(Expression& expr) { + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression); + if(PlainObjectType::RowsAtCompileTime==1) { eigen_assert(expr.rows()==1 || expr.cols()==1); diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h index 504c98f..b2e51f3 100644 --- a/eigen/Eigen/src/Core/SelfAdjointView.h +++ b/eigen/Eigen/src/Core/SelfAdjointView.h @@ -71,7 +71,9 @@ template class SelfAdjointView EIGEN_DEVICE_FUNC explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) - {} + { + EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY); + } EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); } @@ -189,7 +191,7 @@ template class SelfAdjointView TriangularView >::type(tmp2); } - typedef SelfAdjointView ConjugateReturnType; + typedef SelfAdjointView ConjugateReturnType; /** \sa MatrixBase::conjugate() const */ EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const diff --git a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h index 50099df..7c89c2e 100644 --- a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +++ b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -17,7 +17,6 @@ namespace Eigen { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op()); return derived(); } @@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op()); return derived(); } @@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator+=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op()); return derived(); } @@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase::operator-=(co template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { - typedef typename Derived::PlainObject PlainObject; internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op()); return derived(); } diff --git a/eigen/Eigen/src/Core/StableNorm.h b/eigen/Eigen/src/Core/StableNorm.h index be04ed4..88c8d98 100644 --- a/eigen/Eigen/src/Core/StableNorm.h +++ b/eigen/Eigen/src/Core/StableNorm.h @@ -165,7 +165,7 @@ MatrixBase::stableNorm() const typedef typename internal::nested_eval::type DerivedCopy; typedef typename internal::remove_all::type DerivedCopyClean; - DerivedCopy copy(derived()); + const DerivedCopy copy(derived()); enum { CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit) diff --git a/eigen/Eigen/src/Core/Transpositions.h b/eigen/Eigen/src/Core/Transpositions.h index 19c17bb..86da5af 100644 --- a/eigen/Eigen/src/Core/Transpositions.h +++ b/eigen/Eigen/src/Core/Transpositions.h @@ -384,7 +384,7 @@ class Transpose > const Product operator*(const MatrixBase& matrix, const Transpose& trt) { - return Product(matrix.derived(), trt.derived()); + return Product(matrix.derived(), trt); } /** \returns the \a matrix with the inverse transpositions applied to the rows. diff --git a/eigen/Eigen/src/Core/arch/AVX/Complex.h b/eigen/Eigen/src/Core/arch/AVX/Complex.h index 99439c8..7fa6196 100644 --- a/eigen/Eigen/src/Core/arch/AVX/Complex.h +++ b/eigen/Eigen/src/Core/arch/AVX/Complex.h @@ -204,23 +204,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const - { return Packet4cf(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const - { return Packet4cf(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f) template<> EIGEN_STRONG_INLINE Packet4cf pdiv(const Packet4cf& a, const Packet4cf& b) { @@ -400,23 +384,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const - { return Packet2cd(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const - { return Packet2cd(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d) template<> EIGEN_STRONG_INLINE Packet2cd pdiv(const Packet2cd& a, const Packet2cd& b) { diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h index 195d40f..61c3dfc 100644 --- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h @@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) } #ifndef EIGEN_VECTORIZE_AVX512 -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif template<> EIGEN_STRONG_INLINE float pfirst(const Packet8f& a) { @@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a) { __m256d tmp = _mm256_shuffle_pd(a,a,5); return _mm256_permute2f128_pd(tmp, tmp, 1); - + #if 0 + // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd + // exhibit the same latency/throughput, but it is here for future reference/benchmarking... __m256d swap_halves = _mm256_permute2f128_pd(a,a,1); return _mm256_permute_pd(swap_halves,5); + #endif } // pabs should be ok diff --git a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h index 399be0e..9c1717f 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -88,9 +88,9 @@ plog(const Packet16f& _x) { // x = x + x - 1.0; // } else { x = x - 1.0; } __mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ); - Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps()); + Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x); x = psub(x, p16f_1); - e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps())); + e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1)); x = padd(x, tmp); Packet16f x2 = pmul(x, x); @@ -119,8 +119,9 @@ plog(const Packet16f& _x) { x = padd(x, y2); // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF. - return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf, - _mm512_mask_blend_ps(invalid_mask, p16f_nan, x)); + return _mm512_mask_blend_ps(iszero_mask, + _mm512_mask_blend_ps(invalid_mask, x, p16f_nan), + p16f_minus_inf); } #endif @@ -266,8 +267,7 @@ psqrt(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ); - Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x), - _mm512_setzero_ps()); + Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x)); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); @@ -289,8 +289,7 @@ psqrt(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ); - Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x), - _mm512_setzero_pd()); + Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x)); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -333,20 +332,18 @@ prsqrt(const Packet16f& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ); - Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), - _mm512_rsqrt14_ps(_x)); + Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps()); // Fill in NaNs and Infs for the negative/zero entries. __mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ); Packet16f infs_and_nans = _mm512_mask_blend_ps( - neg_mask, p16f_nan, - _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps())); + neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan); // Do a single step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans); } template <> @@ -363,14 +360,12 @@ prsqrt(const Packet8d& _x) { // select only the inverse sqrt of positive normal inputs (denormals are // flushed to zero and cause infs as well). __mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ); - Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), - _mm512_rsqrt14_pd(_x)); + Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd()); // Fill in NaNs and Infs for the negative/zero entries. __mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ); Packet8d infs_and_nans = _mm512_mask_blend_pd( - neg_mask, p8d_nan, - _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd())); + neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan); // Do a first step of Newton's iteration. x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); @@ -379,9 +374,9 @@ prsqrt(const Packet8d& _x) { x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five)); // Insert NaNs and Infs in all the right places. - return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x); + return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans); } -#else +#elif defined(EIGEN_VECTORIZE_AVX512ER) template <> EIGEN_STRONG_INLINE Packet16f prsqrt(const Packet16f& x) { return _mm512_rsqrt28_ps(x); diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h index f6500a1..8970524 100644 --- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1(int* to, const int& a) { pstore(to, pa); } -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template <> EIGEN_STRONG_INLINE float pfirst(const Packet16f& a) { diff --git a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h index 67db2f8..3e66573 100644 --- a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h @@ -224,23 +224,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { @@ -416,23 +400,8 @@ template<> struct conj_helper return pconj(internal::pmul(a, b)); } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(internal::pmul(x, y.v)); } -}; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { diff --git a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h index b3f1ea1..08a27d1 100644 --- a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16}; #else -static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; +static Packet16uc p16uc_FORWARD = p16uc_REVERSE32; static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 }; static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }; static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 }; @@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv(const Packet4i& /*a*/, co template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; } -template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_min(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); } -template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) +{ + #ifdef __VSX__ + Packet4f ret; + __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + #else + return vec_max(a, b); + #endif +} template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); } @@ -764,7 +782,7 @@ typedef __vector __bool long Packet2bl; static Packet2l p2l_ONE = { 1, 1 }; static Packet2l p2l_ZERO = reinterpret_cast(p4i_ZERO); -static Packet2d p2d_ONE = { 1.0, 1.0 }; +static Packet2d p2d_ONE = { 1.0, 1.0 }; static Packet2d p2d_ZERO = reinterpret_cast(p4f_ZERO); static Packet2d p2d_MZERO = { -0.0, -0.0 }; @@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv(const Packet2d& a, const // for some weird raisons, it has to be overloaded for packet of integers template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); } -template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; + } -template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); } +template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) +{ + Packet2d ret; + __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); + return ret; +} template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); } @@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) Packet2d v[2], sum; v[0] = vecs[0] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[0]), reinterpret_cast(vecs[0]), 8)); v[1] = vecs[1] + reinterpret_cast(vec_sld(reinterpret_cast(vecs[1]), reinterpret_cast(vecs[1]), 8)); - + #ifdef _BIG_ENDIAN sum = reinterpret_cast(vec_sld(reinterpret_cast(v[0]), reinterpret_cast(v[1]), 8)); #else @@ -1022,7 +1050,7 @@ ptranspose(PacketBlock& kernel) { template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) { Packet2l select = { ifPacket.select[0], ifPacket.select[1] }; - Packet2bl mask = vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p2l_ONE)); + Packet2bl mask = reinterpret_cast( vec_cmpeq(reinterpret_cast(select), reinterpret_cast(p2l_ONE)) ); return vec_sel(elsePacket, thenPacket, mask); } #endif // __VSX__ diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h index 294c517..02ac0c2 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/Half.h +++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h @@ -147,55 +147,55 @@ namespace half_impl { // versions to get the ALU speed increased), but you do save the // conversion steps back and forth. -__device__ half operator + (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) { return __hadd(a, b); } -__device__ half operator * (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) { return __hmul(a, b); } -__device__ half operator - (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) { return __hsub(a, b); } -__device__ half operator / (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) { float num = __half2float(a); float denom = __half2float(b); return __float2half(num / denom); } -__device__ half operator - (const half& a) { +EIGEN_STRONG_INLINE __device__ half operator - (const half& a) { return __hneg(a); } -__device__ half& operator += (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) { a = a + b; return a; } -__device__ half& operator *= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) { a = a * b; return a; } -__device__ half& operator -= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) { a = a - b; return a; } -__device__ half& operator /= (half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) { a = a / b; return a; } -__device__ bool operator == (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) { return __heq(a, b); } -__device__ bool operator != (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) { return __hne(a, b); } -__device__ bool operator < (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) { return __hlt(a, b); } -__device__ bool operator <= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) { return __hle(a, b); } -__device__ bool operator > (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) { return __hgt(a, b); } -__device__ bool operator >= (const half& a, const half& b) { +EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) { return __hge(a, b); } @@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b) return a; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) { - return float(a) == float(b); + return numext::equal_strict(float(a),float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) { - return float(a) != float(b); + return numext::not_equal_strict(float(a), float(b)); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) { return float(a) < float(b); @@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) { return result; } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { - return half(::expf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hexp(a)); +#else + return half(::expf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { -#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 - return Eigen::half(::hlog(a)); +#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 + return half(::hlog(a)); #else return half(::logf(float(a))); #endif @@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) { - return half(::sqrtf(float(a))); +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 + return half(hsqrt(a)); +#else + return half(::sqrtf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) { return half(::powf(float(a), float(b))); @@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) { return half(::tanhf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hfloor(a)); +#else return half(::floorf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) { +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300 + return half(hceil(a)); +#else return half(::ceilf(float(a))); +#endif } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) { @@ -493,8 +509,8 @@ struct numeric_limits { static const bool is_bounded = false; static const bool is_modulo = false; static const int digits = 11; - static const int digits10 = 2; - //static const int max_digits10 = ; + static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html + static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html static const int radix = 2; static const int min_exponent = -13; static const int min_exponent10 = -4; @@ -557,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) { return Eigen::half(::expf(float(a))); } EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) { -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530 return Eigen::half(::hlog(a)); #else return Eigen::half(::logf(float(a))); diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index ae54225..943e0b0 100644 --- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { return __floats2half2_rn(r1, r2); } -#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 +#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530 template<> __device__ EIGEN_STRONG_INLINE half2 plog(const half2& a) { diff --git a/eigen/Eigen/src/Core/arch/NEON/Complex.h b/eigen/Eigen/src/Core/arch/NEON/Complex.h index 57e9b43..306a309 100644 --- a/eigen/Eigen/src/Core/arch/NEON/Complex.h +++ b/eigen/Eigen/src/Core/arch/NEON/Complex.h @@ -67,7 +67,7 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { float32x2_t r64; - r64 = vld1_f32((float *)&from); + r64 = vld1_f32((const float *)&from); return Packet2cf(vcombine_f32(r64, r64)); } @@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf to[stride*1] = std::complex(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((float *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((const float *)addr); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -265,6 +265,8 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) + template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { // TODO optimize it for NEON @@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, con s = vmulq_f32(b.v, b.v); rev_s = vrev64q_f32(s); - return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); + return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s))); } EIGEN_DEVICE_FUNC inline void @@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((double *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { EIGEN_ARM_PREFETCH((const double *)addr); } template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather, Packet1cd>(const std::complex* from, Index stride) { @@ -456,6 +458,8 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for NEON diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h index 836fbc0..3d5ed0d 100644 --- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h @@ -36,12 +36,43 @@ namespace internal { #endif #endif +#if EIGEN_COMP_MSVC + +// In MSVC's arm_neon.h header file, all NEON vector types +// are aliases to the same underlying type __n128. +// We thus have to wrap them to make them different C++ types. +// (See also bug 1428) + +template +struct eigen_packet_wrapper +{ + operator T&() { return m_val; } + operator const T&() const { return m_val; } + eigen_packet_wrapper() {} + eigen_packet_wrapper(const T &v) : m_val(v) {} + eigen_packet_wrapper& operator=(const T &v) { + m_val = v; + return *this; + } + + T m_val; +}; +typedef eigen_packet_wrapper Packet2f; +typedef eigen_packet_wrapper Packet4f; +typedef eigen_packet_wrapper Packet4i; +typedef eigen_packet_wrapper Packet2i; +typedef eigen_packet_wrapper Packet4ui; + +#else + typedef float32x2_t Packet2f; typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; typedef int32x2_t Packet2i; typedef uint32x4_t Packet4ui; +#endif // EIGEN_COMP_MSVC + #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \ const Packet4f p4f_##NAME = pset1(X) diff --git a/eigen/Eigen/src/Core/arch/SSE/Complex.h b/eigen/Eigen/src/Core/arch/SSE/Complex.h index 5607fe0..d075043 100644 --- a/eigen/Eigen/src/Core/arch/SSE/Complex.h +++ b/eigen/Eigen/src/Core/arch/SSE/Complex.h @@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter, Packet2cf _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3))); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet2cf& a) { @@ -229,23 +229,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const - { return Packet2cf(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const - { return Packet2cf(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) template<> EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2cf& b) { @@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup(const std::complex< template<> EIGEN_STRONG_INLINE void pstore >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); } template<> EIGEN_STRONG_INLINE void pstoreu >(std::complex * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); } -template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch >(const std::complex * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } template<> EIGEN_STRONG_INLINE std::complex pfirst(const Packet1cd& a) { @@ -430,23 +414,7 @@ template<> struct conj_helper } }; -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const - { return Packet1cd(Eigen::internal::pmul(x, y.v)); } -}; - -template<> struct conj_helper -{ - EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const - { return padd(c, pmul(x,y)); } - - EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const - { return Packet1cd(Eigen::internal::pmul(x.v, y)); } -}; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h index 3832de1..5e652cc 100644 --- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h @@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1(double* to, const double& pstore(to, Packet2d(vec2d_swizzle1(pa,0,0))); } +#if EIGEN_COMP_PGI +typedef const void * SsePrefetchPtrType; +#else +typedef const char * SsePrefetchPtrType; +#endif + #ifndef EIGEN_VECTORIZE_AVX -template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } -template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE void prefetch(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); } #endif #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64 @@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co } // end namespace Eigen +#if EIGEN_COMP_PGI +// PGI++ does not define the following intrinsics in C++ mode. +static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); } +static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); } +static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); } +static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); } +static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); } +#endif + #endif // EIGEN_PACKET_MATH_SSE_H diff --git a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h index c848932..c6ca8c7 100644 --- a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -14,6 +14,7 @@ namespace Eigen { namespace internal { +#ifndef EIGEN_VECTORIZE_AVX template <> struct type_casting_traits { enum { @@ -23,11 +24,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { - return _mm_cvttps_epi32(a); -} - - template <> struct type_casting_traits { enum { @@ -37,11 +33,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { - return _mm_cvtepi32_ps(a); -} - - template <> struct type_casting_traits { enum { @@ -51,10 +42,6 @@ struct type_casting_traits { }; }; -template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { - return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); -} - template <> struct type_casting_traits { enum { @@ -63,6 +50,19 @@ struct type_casting_traits { TgtCoeffRatio = 2 }; }; +#endif + +template<> EIGEN_STRONG_INLINE Packet4i pcast(const Packet4f& a) { + return _mm_cvttps_epi32(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet4i& a) { + return _mm_cvtepi32_ps(a); +} + +template<> EIGEN_STRONG_INLINE Packet4f pcast(const Packet2d& a, const Packet2d& b) { + return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6)); +} template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f& a) { // Simply discard the second half of the input diff --git a/eigen/Eigen/src/Core/arch/ZVector/Complex.h b/eigen/Eigen/src/Core/arch/ZVector/Complex.h index d39d2d1..1bfb733 100644 --- a/eigen/Eigen/src/Core/arch/ZVector/Complex.h +++ b/eigen/Eigen/src/Core/arch/ZVector/Complex.h @@ -336,6 +336,9 @@ template<> struct conj_helper } }; +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f) +EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d) + template<> EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1cd& b) { // TODO optimize it for AltiVec diff --git a/eigen/Eigen/src/Core/functors/BinaryFunctors.h b/eigen/Eigen/src/Core/functors/BinaryFunctors.h index 96747ba..3eae6b8 100644 --- a/eigen/Eigen/src/Core/functors/BinaryFunctors.h +++ b/eigen/Eigen/src/Core/functors/BinaryFunctors.h @@ -255,7 +255,7 @@ struct scalar_cmp_op : binary_op_base struct scalar_hypot_op : binary_op_base { EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op) -// typedef typename NumTraits::Real result_type; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const { - EIGEN_USING_STD_MATH(sqrt) - Scalar p, qp; - if(_x>_y) - { - p = _x; - qp = _y / p; - } - else - { - p = _y; - qp = _x / p; - } - return p * sqrt(Scalar(1) + qp*qp); + // This functor is used by hypotNorm only for which it is faster to first apply abs + // on all coefficients prior to reduction through hypot. + // This way we avoid calling abs on positive and real entries, and this also permits + // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes + // through the same functor... + return internal::positive_real_hypot(x,y); } }; template diff --git a/eigen/Eigen/src/Core/functors/StlFunctors.h b/eigen/Eigen/src/Core/functors/StlFunctors.h index 6df3fa5..9c1d758 100644 --- a/eigen/Eigen/src/Core/functors/StlFunctors.h +++ b/eigen/Eigen/src/Core/functors/StlFunctors.h @@ -83,13 +83,17 @@ struct functor_traits > { enum { Cost = functor_traits::Cost, PacketAccess = false }; }; #endif +#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910) +// std::unary_negate is deprecated since c++17 and will be removed in c++20 template struct functor_traits > { enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; +// std::binary_negate is deprecated since c++17 and will be removed in c++20 template struct functor_traits > { enum { Cost = 1 + functor_traits::Cost, PacketAccess = false }; }; +#endif #ifdef EIGEN_STDEXT_SUPPORT diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h index 41e18ff..9176a13 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h @@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate(lhsStride), ldc=convert_index(resStride), n=convert_index(size), k=convert_index(depth); \ char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \ EIGTYPE beta(1); \ - BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \ + BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \ } \ }; @@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ }}; -GEMM_SPECIALIZATION(double, d, double, d) -GEMM_SPECIALIZATION(float, f, float, s) -GEMM_SPECIALIZATION(dcomplex, cd, double, z) -GEMM_SPECIALIZATION(scomplex, cf, float, c) +#ifdef EIGEN_USE_MKL +GEMM_SPECIALIZATION(double, d, double, dgemm) +GEMM_SPECIALIZATION(float, f, float, sgemm) +GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm) +GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm) +#else +GEMM_SPECIALIZATION(double, d, double, dgemm_) +GEMM_SPECIALIZATION(float, f, float, sgemm_) +GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_) +GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h index 3c1a7fc..a597c1f 100644 --- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h @@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product \ struct general_matrix_vector_product_gemv \ { \ @@ -113,14 +113,21 @@ static void run( \ x_ptr=x_tmp.data(); \ incx=1; \ } else x_ptr=rhs; \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; -EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d) -EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s) -EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z) -EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv) +#else +EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_) +EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h index a45238d..9a53185 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h @@ -40,7 +40,7 @@ namespace internal { /* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -81,13 +81,13 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } else b = _rhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -144,20 +144,26 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -EIGEN_BLAS_SYMM_L(double, double, d, d) -EIGEN_BLAS_SYMM_L(float, float, f, s) -EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_L(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_L(double, double, d, dsymm) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm) +EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_L(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_L(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_) +#endif /* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */ -#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -197,13 +203,13 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } else b = _lhs; \ \ - BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ \ } \ }; -#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -259,15 +265,21 @@ struct product_selfadjoint_matrix(b_tmp.outerStride()); \ } \ \ - BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ + BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \ } \ }; -EIGEN_BLAS_SYMM_R(double, double, d, d) -EIGEN_BLAS_SYMM_R(float, float, f, s) -EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z) -EIGEN_BLAS_HEMM_R(scomplex, float, cf, c) - +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMM_R(double, double, d, dsymm) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm) +EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm) +EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm) +#else +EIGEN_BLAS_SYMM_R(double, double, d, dsymm_) +EIGEN_BLAS_SYMM_R(float, float, f, ssymm_) +EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_) +EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_) +#endif } // end namespace internal } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h index 38f23ac..1238345 100644 --- a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h @@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \ x_tmp=map_x.conjugate(); \ x_ptr=x_tmp.data(); \ } else x_ptr=_rhs; \ - BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \ + BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \ }\ }; +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv) +EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv) +EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv) +#else EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_) EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_) EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_) +#endif } // end namespace internal diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h index 6ec5a8a..f784507 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h @@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); + // To work around an "error: member reference base type 'Matrix<...> + // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is + // not a structure or union" compilation error in nvcc (tested V8.0.61), + // create a dummy internal::constructor_without_unaligned_array_assert + // object to pass to the Matrix constructor. + internal::constructor_without_unaligned_array_assert a; + Matrix triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix triangularBuffer((internal::constructor_without_unaligned_array_assert())); + internal::constructor_without_unaligned_array_assert a; + Matrix triangularBuffer(a); triangularBuffer.setZero(); if((Mode&ZeroDiag)==ZeroDiag) triangularBuffer.diagonal().setZero(); @@ -393,7 +400,9 @@ struct triangular_product_impl { template static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha) { - typedef typename Dest::Scalar Scalar; + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar Scalar; typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; @@ -405,8 +414,9 @@ struct triangular_product_impl typename internal::add_const_on_value_type::type lhs = LhsBlasTraits::extract(a_lhs); typename internal::add_const_on_value_type::type rhs = RhsBlasTraits::extract(a_rhs); - Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) - * RhsBlasTraits::extractScalarFactor(a_rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs); + Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha; typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar, Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType; @@ -431,6 +441,21 @@ struct triangular_product_impl &dst.coeffRef(0,0), dst.outerStride(), // result info actualAlpha, blocking ); + + // Apply correction if the diagonal is unit and a scalar factor was nested: + if ((Mode&UnitDiag)==UnitDiag) + { + if (LhsIsTriangular && lhs_alpha!=LhsScalar(1)) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize); + } + else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1)) + { + Index diagSize = (std::min)(rhs.rows(),rhs.cols()); + dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize); + } + } } }; diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h index aecded6..a25197a 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h @@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true) EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false) // implements col-major += alpha * op(triangular) * op(general) -#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \ template \ @@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm \ @@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm > res_tmp(res,rows,cols,OuterStride<>(resStride)); \ @@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm struct trmv_selector typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); typename internal::add_const_on_value_type::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 @@ -274,6 +275,12 @@ template struct trmv_selector else dest = MappedDest(actualDestPtr, dest.size()); } + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; @@ -295,8 +302,9 @@ template struct trmv_selector typename add_const::type actualLhs = LhsBlasTraits::extract(lhs); typename add_const::type actualRhs = RhsBlasTraits::extract(rhs); - ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs) - * RhsBlasTraits::extractScalarFactor(rhs); + LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs); + RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs); + ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha; enum { DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1 @@ -326,6 +334,12 @@ template struct trmv_selector actualRhsPtr,1, dest.data(),dest.innerStride(), actualAlpha); + + if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) ) + { + Index diagSize = (std::min)(lhs.rows(),lhs.cols()); + dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize); + } } }; diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h index 07bf26c..3d47a2b 100644 --- a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h @@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex) EIGEN_BLAS_TRMV_SPECIALIZE(scomplex) // implements col-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template \ struct triangular_matrix_vector_product_trmv { \ enum { \ @@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv(size); \ n = convert_index(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_CM(double, double, d, d) -EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_CM(float, float, f, s) -EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_CM(double, double, d, d,) +EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_CM(float, float, f, s,) +EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_CM(double, double, d, d, _) +EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _) +EIGEN_BLAS_TRMV_CM(float, float, f, s, _) +EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _) +#endif // implements row-major: res += alpha * op(triangular) * vector -#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \ +#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \ template \ struct triangular_matrix_vector_product_trmv { \ enum { \ @@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv(size); \ n = convert_index(cols-size); \ } \ - BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \ + BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \ } \ } \ }; -EIGEN_BLAS_TRMV_RM(double, double, d, d) -EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z) -EIGEN_BLAS_TRMV_RM(float, float, f, s) -EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c) +#ifdef EIGEN_USE_MKL +EIGEN_BLAS_TRMV_RM(double, double, d, d,) +EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,) +EIGEN_BLAS_TRMV_RM(float, float, f, s,) +EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,) +#else +EIGEN_BLAS_TRMV_RM(double, double, d, d,_) +EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_) +EIGEN_BLAS_TRMV_RM(float, float, f, s,_) +EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_) +#endif } // end namespase internal diff --git a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h index 88c0fb7..f077511 100644 --- a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +++ b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h @@ -38,7 +38,7 @@ namespace Eigen { namespace internal { // implements LeftSide op(triangular)^-1 * general -#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \ +#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \ template \ struct triangular_solve_matrix \ { \ @@ -80,18 +80,24 @@ struct triangular_solve_matrix \ struct triangular_solve_matrix \ { \ @@ -133,16 +139,22 @@ struct triangular_solve_matrix /*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/ @@ -108,6 +109,10 @@ #endif #endif +#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL) +#include "../../misc/blas.h" +#endif + namespace Eigen { typedef std::complex dcomplex; @@ -121,8 +126,5 @@ typedef int BlasIndex; } // end namespace Eigen -#if defined(EIGEN_USE_BLAS) -#include "../../misc/blas.h" -#endif #endif // EIGEN_MKL_SUPPORT_H diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h index 38d6ddb..02d21d2 100644 --- a/eigen/Eigen/src/Core/util/Macros.h +++ b/eigen/Eigen/src/Core/util/Macros.h @@ -13,7 +13,7 @@ #define EIGEN_WORLD_VERSION 3 #define EIGEN_MAJOR_VERSION 3 -#define EIGEN_MINOR_VERSION 4 +#define EIGEN_MINOR_VERSION 5 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ @@ -399,7 +399,7 @@ // Does the compiler support variadic templates? #ifndef EIGEN_HAS_VARIADIC_TEMPLATES #if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \ - && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) ) + && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) ) // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 @@ -413,7 +413,7 @@ #ifdef __CUDACC__ // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above -#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500)) +#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) #define EIGEN_HAS_CONSTEXPR 1 #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ @@ -487,11 +487,13 @@ // EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC, // but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline // but GCC is still doing fine with just inline. +#ifndef EIGEN_STRONG_INLINE #if EIGEN_COMP_MSVC || EIGEN_COMP_ICC #define EIGEN_STRONG_INLINE __forceinline #else #define EIGEN_STRONG_INLINE inline #endif +#endif // EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible // attribute to maximize inlining. This should only be used when really necessary: in particular, @@ -812,7 +814,8 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0) + // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) @@ -986,7 +989,13 @@ namespace Eigen { # define EIGEN_NOEXCEPT # define EIGEN_NOEXCEPT_IF(x) # define EIGEN_NO_THROW throw() -# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# if EIGEN_COMP_MSVC + // MSVC does not support exception specifications (warning C4290), + // and they are deprecated in c++11 anyway. +# define EIGEN_EXCEPTION_SPEC(X) throw() +# else +# define EIGEN_EXCEPTION_SPEC(X) throw(X) +# endif #endif #endif // EIGEN_MACROS_H diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h index c634d7e..66cdbd8 100644 --- a/eigen/Eigen/src/Core/util/Memory.h +++ b/eigen/Eigen/src/Core/util/Memory.h @@ -70,7 +70,7 @@ inline void throw_std_bad_alloc() throw std::bad_alloc(); #else std::size_t huge = static_cast(-1); - new int[huge]; + ::operator new(huge); #endif } @@ -493,7 +493,7 @@ template struct smart_copy_helper { IntPtr size = IntPtr(end)-IntPtr(start); if(size==0) return; eigen_internal_assert(start!=0 && end!=0 && target!=0); - memcpy(target, start, size); + std::memcpy(target, start, size); } }; @@ -696,7 +696,15 @@ template void swap(scoped_array &a,scoped_array &b) /** \class aligned_allocator * \ingroup Core_Module * -* \brief STL compatible allocator to use with with 16 byte aligned types +* \brief STL compatible allocator to use with types requiring a non standrad alignment. +* +* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd. +* By default, it will thus provide at least 16 bytes alignment and more in following cases: +* - 32 bytes alignment if AVX is enabled. +* - 64 bytes alignment if AVX512 is enabled. +* +* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented +* \link TopicPreprocessorDirectivesPerformance there \endlink. * * Example: * \code diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h index 7f63707..1d73f05 100644 --- a/eigen/Eigen/src/Core/util/Meta.h +++ b/eigen/Eigen/src/Core/util/Meta.h @@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b) return (a+b-1) / b; } +// The aim of the following functions is to bypass -Wfloat-equal warnings +// when we really want a strict equality comparison on floating points. +template EIGEN_STRONG_INLINE +bool equal_strict(const X& x,const Y& y) { return x == y; } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const float& x,const float& y) { return std::equal_to()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool equal_strict(const double& x,const double& y) { return std::equal_to()(x,y); } + +template EIGEN_STRONG_INLINE +bool not_equal_strict(const X& x,const Y& y) { return x != y; } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to()(x,y); } + +template<> EIGEN_STRONG_INLINE +bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to()(x,y); } + } // end namespace numext } // end namespace Eigen diff --git a/eigen/Eigen/src/Core/util/StaticAssert.h b/eigen/Eigen/src/Core/util/StaticAssert.h index 983361a..500e477 100644 --- a/eigen/Eigen/src/Core/util/StaticAssert.h +++ b/eigen/Eigen/src/Core/util/StaticAssert.h @@ -24,6 +24,7 @@ * */ +#ifndef EIGEN_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT #if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)) @@ -44,64 +45,65 @@ struct static_assertion { enum { - YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX, - YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES, - YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES, - THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE, - THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE, - OUT_OF_RANGE_ACCESS, - YOU_MADE_A_PROGRAMMING_MISTAKE, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT, - EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE, - YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR, - YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR, - UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC, - THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES, - FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED, - NUMERIC_TYPE_MUST_BE_REAL, - COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED, - WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED, - THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE, - INVALID_MATRIX_PRODUCT, - INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS, - INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION, - YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES, - THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES, - INVALID_MATRIX_TEMPLATE_PARAMETERS, - INVALID_MATRIXBASE_TEMPLATE_PARAMETERS, - BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER, - THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX, - THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES, - YOU_ALREADY_SPECIFIED_THIS_STRIDE, - INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION, - THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD, - PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1, - THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS, - YOU_CANNOT_MIX_ARRAYS_AND_MATRICES, - YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION, - THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY, - YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT, - THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS, - THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS, - THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL, - THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES, - YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED, - YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED, - THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE, - THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH, - OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG, - IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY, - STORAGE_LAYOUT_DOES_NOT_MATCH, - EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE, - THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS, - MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY, - THIS_TYPE_IS_NOT_SUPPORTED, - STORAGE_KIND_MUST_MATCH, - STORAGE_INDEX_MUST_MATCH, - CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY + YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1, + YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1, + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1, + THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1, + THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1, + OUT_OF_RANGE_ACCESS=1, + YOU_MADE_A_PROGRAMMING_MISTAKE=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1, + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1, + YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1, + YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1, + UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1, + THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1, + FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1, + NUMERIC_TYPE_MUST_BE_REAL=1, + COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1, + WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1, + INVALID_MATRIX_PRODUCT=1, + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1, + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1, + THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1, + INVALID_MATRIX_TEMPLATE_PARAMETERS=1, + INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1, + BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1, + THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1, + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1, + YOU_ALREADY_SPECIFIED_THIS_STRIDE=1, + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1, + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1, + THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1, + YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1, + THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1, + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1, + THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1, + THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1, + THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1, + THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1, + YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1, + YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1, + THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1, + THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1, + OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1, + IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1, + STORAGE_LAYOUT_DOES_NOT_MATCH=1, + EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1, + MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1, + THIS_TYPE_IS_NOT_SUPPORTED=1, + STORAGE_KIND_MUST_MATCH=1, + STORAGE_INDEX_MUST_MATCH=1, + CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1, + SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1 }; }; @@ -131,7 +133,7 @@ #define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG); #endif // EIGEN_NO_STATIC_ASSERT - +#endif // EIGEN_STATIC_ASSERT // static assertion failing if the type \a TYPE is not a vector type #define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \ -- cgit v1.2.3