summaryrefslogtreecommitdiffhomepage
path: root/eigen/Eigen/src/Core
diff options
context:
space:
mode:
authorStanislaw Halik <sthalik@misaki.pl>2018-11-12 06:42:35 +0100
committerStanislaw Halik <sthalik@misaki.pl>2018-11-12 06:42:35 +0100
commit407b6208604d2822b1067ac64949e78a9167572b (patch)
tree8e4371deef2804e77e2fe6e17158be2536de28da /eigen/Eigen/src/Core
parentca2e0fcdcfff03747500344e2522ff330ccafa14 (diff)
eigen update
Diffstat (limited to 'eigen/Eigen/src/Core')
-rw-r--r--eigen/Eigen/src/Core/AssignEvaluator.h4
-rw-r--r--eigen/Eigen/src/Core/Assign_MKL.h6
-rw-r--r--eigen/Eigen/src/Core/CoreEvaluators.h39
-rw-r--r--eigen/Eigen/src/Core/Diagonal.h5
-rw-r--r--eigen/Eigen/src/Core/Dot.h17
-rw-r--r--eigen/Eigen/src/Core/GeneralProduct.h21
-rw-r--r--eigen/Eigen/src/Core/Map.h17
-rw-r--r--eigen/Eigen/src/Core/MathFunctions.h28
-rw-r--r--eigen/Eigen/src/Core/MathFunctionsImpl.h23
-rw-r--r--eigen/Eigen/src/Core/MatrixBase.h9
-rw-r--r--eigen/Eigen/src/Core/PlainObjectBase.h4
-rw-r--r--eigen/Eigen/src/Core/Product.h10
-rw-r--r--eigen/Eigen/src/Core/ProductEvaluators.h31
-rw-r--r--eigen/Eigen/src/Core/Redux.h2
-rw-r--r--eigen/Eigen/src/Core/Ref.h2
-rw-r--r--eigen/Eigen/src/Core/SelfAdjointView.h6
-rw-r--r--eigen/Eigen/src/Core/SelfCwiseBinaryOp.h4
-rw-r--r--eigen/Eigen/src/Core/StableNorm.h2
-rw-r--r--eigen/Eigen/src/Core/Transpositions.h2
-rw-r--r--eigen/Eigen/src/Core/arch/AVX/Complex.h36
-rw-r--r--eigen/Eigen/src/Core/arch/AVX/PacketMath.h11
-rw-r--r--eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h33
-rw-r--r--eigen/Eigen/src/Core/arch/AVX512/PacketMath.h6
-rw-r--r--eigen/Eigen/src/Core/arch/AltiVec/Complex.h35
-rw-r--r--eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h44
-rw-r--r--eigen/Eigen/src/Core/arch/CUDA/Half.h64
-rw-r--r--eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h2
-rw-r--r--eigen/Eigen/src/Core/arch/NEON/Complex.h12
-rw-r--r--eigen/Eigen/src/Core/arch/NEON/PacketMath.h31
-rw-r--r--eigen/Eigen/src/Core/arch/SSE/Complex.h40
-rw-r--r--eigen/Eigen/src/Core/arch/SSE/PacketMath.h22
-rw-r--r--eigen/Eigen/src/Core/arch/SSE/TypeCasting.h28
-rw-r--r--eigen/Eigen/src/Core/arch/ZVector/Complex.h3
-rw-r--r--eigen/Eigen/src/Core/functors/BinaryFunctors.h25
-rw-r--r--eigen/Eigen/src/Core/functors/StlFunctors.h4
-rw-r--r--eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h8
-rw-r--r--eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h19
-rw-r--r--eigen/Eigen/src/Core/products/GeneralMatrixVector.h8
-rw-r--r--eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h19
-rw-r--r--eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h48
-rw-r--r--eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h9
-rw-r--r--eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h35
-rw-r--r--eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h39
-rw-r--r--eigen/Eigen/src/Core/products/TriangularMatrixVector.h22
-rw-r--r--eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h46
-rw-r--r--eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h40
-rw-r--r--eigen/Eigen/src/Core/util/MKL_support.h10
-rw-r--r--eigen/Eigen/src/Core/util/Macros.h19
-rw-r--r--eigen/Eigen/src/Core/util/Memory.h14
-rw-r--r--eigen/Eigen/src/Core/util/Meta.h20
-rw-r--r--eigen/Eigen/src/Core/util/StaticAssert.h120
51 files changed, 646 insertions, 458 deletions
diff --git a/eigen/Eigen/src/Core/AssignEvaluator.h b/eigen/Eigen/src/Core/AssignEvaluator.h
index b0ec7b7..dbe435d 100644
--- a/eigen/Eigen/src/Core/AssignEvaluator.h
+++ b/eigen/Eigen/src/Core/AssignEvaluator.h
@@ -39,7 +39,7 @@ public:
enum {
DstAlignment = DstEvaluator::Alignment,
SrcAlignment = SrcEvaluator::Alignment,
- DstHasDirectAccess = DstFlags & DirectAccessBit,
+ DstHasDirectAccess = (DstFlags & DirectAccessBit) == DirectAccessBit,
JointAlignment = EIGEN_PLAIN_ENUM_MIN(DstAlignment,SrcAlignment)
};
@@ -83,7 +83,7 @@ private:
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
- MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
+ MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize) && bool(DstHasDirectAccess)
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
so it's only good for large enough sizes. */
diff --git a/eigen/Eigen/src/Core/Assign_MKL.h b/eigen/Eigen/src/Core/Assign_MKL.h
index 6c2ab92..6866095 100644
--- a/eigen/Eigen/src/Core/Assign_MKL.h
+++ b/eigen/Eigen/src/Core/Assign_MKL.h
@@ -84,7 +84,8 @@ class vml_assign_traits
struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
- static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
+ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
+ resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
@@ -144,7 +145,8 @@ EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _)
Dense2Dense, typename enable_if<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>::type> { \
typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
- static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &/*func*/) { \
+ static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
+ resize_if_allowed(dst, src, func); \
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
diff --git a/eigen/Eigen/src/Core/CoreEvaluators.h b/eigen/Eigen/src/Core/CoreEvaluators.h
index f7c1eff..910889e 100644
--- a/eigen/Eigen/src/Core/CoreEvaluators.h
+++ b/eigen/Eigen/src/Core/CoreEvaluators.h
@@ -977,7 +977,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret),
- MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
+ MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@@ -987,7 +987,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
- Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
+ Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
+ && (OuterStrideAtCompileTime!=0)
+ && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
@@ -1018,14 +1020,16 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& block)
: m_argImpl(block.nestedExpression()),
m_startRow(block.startRow()),
- m_startCol(block.startCol())
+ m_startCol(block.startCol()),
+ m_linear_offset(InnerPanel?(XprType::IsRowMajor ? block.startRow()*block.cols() : block.startCol()*block.rows()):0)
{ }
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
enum {
- RowsAtCompileTime = XprType::RowsAtCompileTime
+ RowsAtCompileTime = XprType::RowsAtCompileTime,
+ ForwardLinearAccess = InnerPanel && bool(evaluator<ArgType>::Flags&LinearAccessBit)
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1037,7 +1041,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{
- return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ if (ForwardLinearAccess)
+ return m_argImpl.coeff(m_linear_offset.value() + index);
+ else
+ return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1049,7 +1056,10 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{
- return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
+ if (ForwardLinearAccess)
+ return m_argImpl.coeffRef(m_linear_offset.value() + index);
+ else
+ return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
}
template<int LoadMode, typename PacketType>
@@ -1063,8 +1073,11 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE
PacketType packet(Index index) const
{
- return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0);
+ if (ForwardLinearAccess)
+ return m_argImpl.template packet<LoadMode,PacketType>(m_linear_offset.value() + index);
+ else
+ return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0);
}
template<int StoreMode, typename PacketType>
@@ -1078,15 +1091,19 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x)
{
- return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
- RowsAtCompileTime == 1 ? index : 0,
- x);
+ if (ForwardLinearAccess)
+ return m_argImpl.template writePacket<StoreMode,PacketType>(m_linear_offset.value() + index, x);
+ else
+ return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
+ RowsAtCompileTime == 1 ? index : 0,
+ x);
}
protected:
evaluator<ArgType> m_argImpl;
const variable_if_dynamic<Index, (ArgType::RowsAtCompileTime == 1 && BlockRows==1) ? 0 : Dynamic> m_startRow;
const variable_if_dynamic<Index, (ArgType::ColsAtCompileTime == 1 && BlockCols==1) ? 0 : Dynamic> m_startCol;
+ const variable_if_dynamic<Index, InnerPanel ? Dynamic : 0> m_linear_offset;
};
// TODO: This evaluator does not actually use the child evaluator;
diff --git a/eigen/Eigen/src/Core/Diagonal.h b/eigen/Eigen/src/Core/Diagonal.h
index 49e7112..afcaf35 100644
--- a/eigen/Eigen/src/Core/Diagonal.h
+++ b/eigen/Eigen/src/Core/Diagonal.h
@@ -70,7 +70,10 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
EIGEN_DEVICE_FUNC
- explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
+ explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index)
+ {
+ eigen_assert( a_index <= m_matrix.cols() && -a_index <= m_matrix.rows() );
+ }
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
diff --git a/eigen/Eigen/src/Core/Dot.h b/eigen/Eigen/src/Core/Dot.h
index 06ef18b..1fe7a84 100644
--- a/eigen/Eigen/src/Core/Dot.h
+++ b/eigen/Eigen/src/Core/Dot.h
@@ -31,7 +31,8 @@ struct dot_nocheck
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC
- static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+ EIGEN_STRONG_INLINE
+ static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.template binaryExpr<conj_prod>(b).sum();
}
@@ -43,7 +44,8 @@ struct dot_nocheck<T, U, true>
typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
typedef typename conj_prod::result_type ResScalar;
EIGEN_DEVICE_FUNC
- static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
+ EIGEN_STRONG_INLINE
+ static ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
{
return a.transpose().template binaryExpr<conj_prod>(b).sum();
}
@@ -65,6 +67,7 @@ struct dot_nocheck<T, U, true>
template<typename Derived>
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
+EIGEN_STRONG_INLINE
typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
{
@@ -102,7 +105,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
* \sa lpNorm(), dot(), squaredNorm()
*/
template<typename Derived>
-inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
+EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
return numext::sqrt(squaredNorm());
}
@@ -117,7 +120,7 @@ inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real Matr
* \sa norm(), normalize()
*/
template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::normalized() const
{
typedef typename internal::nested_eval<Derived,2>::type _Nested;
@@ -139,7 +142,7 @@ MatrixBase<Derived>::normalized() const
* \sa norm(), normalized()
*/
template<typename Derived>
-inline void MatrixBase<Derived>::normalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::normalize()
{
RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
@@ -160,7 +163,7 @@ inline void MatrixBase<Derived>::normalize()
* \sa stableNorm(), stableNormalize(), normalized()
*/
template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainObject
+EIGEN_STRONG_INLINE const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const
{
typedef typename internal::nested_eval<Derived,3>::type _Nested;
@@ -185,7 +188,7 @@ MatrixBase<Derived>::stableNormalized() const
* \sa stableNorm(), stableNormalized(), normalize()
*/
template<typename Derived>
-inline void MatrixBase<Derived>::stableNormalize()
+EIGEN_STRONG_INLINE void MatrixBase<Derived>::stableNormalize()
{
RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm();
diff --git a/eigen/Eigen/src/Core/GeneralProduct.h b/eigen/Eigen/src/Core/GeneralProduct.h
index 0f16cd8..6f0cc80 100644
--- a/eigen/Eigen/src/Core/GeneralProduct.h
+++ b/eigen/Eigen/src/Core/GeneralProduct.h
@@ -24,12 +24,17 @@ template<int Rows, int Cols, int Depth> struct product_type_selector;
template<int Size, int MaxSize> struct product_size_category
{
- enum { is_large = MaxSize == Dynamic ||
- Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
- (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
- value = is_large ? Large
- : Size == 1 ? 1
- : Small
+ enum {
+ #ifndef EIGEN_CUDA_ARCH
+ is_large = MaxSize == Dynamic ||
+ Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD ||
+ (Size==Dynamic && MaxSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD),
+ #else
+ is_large = 0,
+ #endif
+ value = is_large ? Large
+ : Size == 1 ? 1
+ : Small
};
};
@@ -379,8 +384,6 @@ template<> struct gemv_dense_selector<OnTheRight,RowMajor,false>
*
* \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
*/
-#ifndef __CUDACC__
-
template<typename Derived>
template<typename OtherDerived>
inline const Product<Derived, OtherDerived>
@@ -412,8 +415,6 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
return Product<Derived, OtherDerived>(derived(), other.derived());
}
-#endif // __CUDACC__
-
/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
*
* The returned product will behave like any other expressions: the coefficients of the product will be
diff --git a/eigen/Eigen/src/Core/Map.h b/eigen/Eigen/src/Core/Map.h
index 06d1967..548bf9a 100644
--- a/eigen/Eigen/src/Core/Map.h
+++ b/eigen/Eigen/src/Core/Map.h
@@ -20,11 +20,17 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
{
typedef traits<PlainObjectType> TraitsBase;
enum {
+ PlainObjectTypeInnerSize = ((traits<PlainObjectType>::Flags&RowMajorBit)==RowMajorBit)
+ ? PlainObjectType::ColsAtCompileTime
+ : PlainObjectType::RowsAtCompileTime,
+
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
- ? int(PlainObjectType::OuterStrideAtCompileTime)
+ ? (InnerStrideAtCompileTime==Dynamic || PlainObjectTypeInnerSize==Dynamic
+ ? Dynamic
+ : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize))
: int(StrideType::OuterStrideAtCompileTime),
Alignment = int(MapOptions)&int(AlignedMask),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
@@ -107,10 +113,11 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
EIGEN_DEVICE_FUNC
inline Index outerStride() const
{
- return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
- : IsVectorAtCompileTime ? this->size()
- : int(Flags)&RowMajorBit ? this->cols()
- : this->rows();
+ return int(StrideType::OuterStrideAtCompileTime) != 0 ? m_stride.outer()
+ : int(internal::traits<Map>::OuterStrideAtCompileTime) != Dynamic ? Index(internal::traits<Map>::OuterStrideAtCompileTime)
+ : IsVectorAtCompileTime ? (this->size() * innerStride())
+ : (int(Flags)&RowMajorBit) ? (this->cols() * innerStride())
+ : (this->rows() * innerStride());
}
/** Constructor in the fixed-size case.
diff --git a/eigen/Eigen/src/Core/MathFunctions.h b/eigen/Eigen/src/Core/MathFunctions.h
index a648aa0..6eb974d 100644
--- a/eigen/Eigen/src/Core/MathFunctions.h
+++ b/eigen/Eigen/src/Core/MathFunctions.h
@@ -348,31 +348,7 @@ struct norm1_retval
* Implementation of hypot *
****************************************************************************/
-template<typename Scalar>
-struct hypot_impl
-{
- typedef typename NumTraits<Scalar>::Real RealScalar;
- static inline RealScalar run(const Scalar& x, const Scalar& y)
- {
- EIGEN_USING_STD_MATH(abs);
- EIGEN_USING_STD_MATH(sqrt);
- RealScalar _x = abs(x);
- RealScalar _y = abs(y);
- Scalar p, qp;
- if(_x>_y)
- {
- p = _x;
- qp = _y / p;
- }
- else
- {
- p = _y;
- qp = _x / p;
- }
- if(p==RealScalar(0)) return RealScalar(0);
- return p * sqrt(RealScalar(1) + qp*qp);
- }
-};
+template<typename Scalar> struct hypot_impl;
template<typename Scalar>
struct hypot_retval
@@ -495,7 +471,7 @@ namespace std_fallback {
typedef typename NumTraits<Scalar>::Real RealScalar;
EIGEN_USING_STD_MATH(log);
Scalar x1p = RealScalar(1) + x;
- return ( x1p == Scalar(1) ) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
+ return numext::equal_strict(x1p, Scalar(1)) ? x : x * ( log(x1p) / (x1p - RealScalar(1)) );
}
}
diff --git a/eigen/Eigen/src/Core/MathFunctionsImpl.h b/eigen/Eigen/src/Core/MathFunctionsImpl.h
index 3c9ef22..9c1ceb0 100644
--- a/eigen/Eigen/src/Core/MathFunctionsImpl.h
+++ b/eigen/Eigen/src/Core/MathFunctionsImpl.h
@@ -71,6 +71,29 @@ T generic_fast_tanh_float(const T& a_x)
return pdiv(p, q);
}
+template<typename RealScalar>
+EIGEN_STRONG_INLINE
+RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
+{
+ EIGEN_USING_STD_MATH(sqrt);
+ RealScalar p, qp;
+ p = numext::maxi(x,y);
+ if(p==RealScalar(0)) return RealScalar(0);
+ qp = numext::mini(y,x) / p;
+ return p * sqrt(RealScalar(1) + qp*qp);
+}
+
+template<typename Scalar>
+struct hypot_impl
+{
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+ static inline RealScalar run(const Scalar& x, const Scalar& y)
+ {
+ EIGEN_USING_STD_MATH(abs);
+ return positive_real_hypot<RealScalar>(abs(x), abs(y));
+ }
+};
+
} // end namespace internal
} // end namespace Eigen
diff --git a/eigen/Eigen/src/Core/MatrixBase.h b/eigen/Eigen/src/Core/MatrixBase.h
index ce41218..05db488 100644
--- a/eigen/Eigen/src/Core/MatrixBase.h
+++ b/eigen/Eigen/src/Core/MatrixBase.h
@@ -160,20 +160,11 @@ template<typename Derived> class MatrixBase
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other);
-#ifdef __CUDACC__
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
- const Product<Derived,OtherDerived,LazyProduct>
- operator*(const MatrixBase<OtherDerived> &other) const
- { return this->lazyProduct(other); }
-#else
-
- template<typename OtherDerived>
const Product<Derived,OtherDerived>
operator*(const MatrixBase<OtherDerived> &other) const;
-#endif
-
template<typename OtherDerived>
EIGEN_DEVICE_FUNC
const Product<Derived,OtherDerived,LazyProduct>
diff --git a/eigen/Eigen/src/Core/PlainObjectBase.h b/eigen/Eigen/src/Core/PlainObjectBase.h
index 77f4f60..1dc7e22 100644
--- a/eigen/Eigen/src/Core/PlainObjectBase.h
+++ b/eigen/Eigen/src/Core/PlainObjectBase.h
@@ -577,6 +577,10 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
* while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned
* \a data pointers.
*
+ * Here is an example using strides:
+ * \include Matrix_Map_stride.cpp
+ * Output: \verbinclude Matrix_Map_stride.out
+ *
* \see class Map
*/
//@{
diff --git a/eigen/Eigen/src/Core/Product.h b/eigen/Eigen/src/Core/Product.h
index ae0c94b..676c480 100644
--- a/eigen/Eigen/src/Core/Product.h
+++ b/eigen/Eigen/src/Core/Product.h
@@ -97,8 +97,8 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
&& "if you wanted a coeff-wise or a dot product use the respective explicit functions");
}
- EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
- EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
EIGEN_DEVICE_FUNC const LhsNestedCleaned& lhs() const { return m_lhs; }
EIGEN_DEVICE_FUNC const RhsNestedCleaned& rhs() const { return m_rhs; }
@@ -127,7 +127,7 @@ public:
using Base::derived;
typedef typename Base::Scalar Scalar;
- operator const Scalar() const
+ EIGEN_STRONG_INLINE operator const Scalar() const
{
return internal::evaluator<ProductXpr>(derived()).coeff(0,0);
}
@@ -162,7 +162,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
public:
- EIGEN_DEVICE_FUNC Scalar coeff(Index row, Index col) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const
{
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
@@ -170,7 +170,7 @@ class ProductImpl<Lhs,Rhs,Option,Dense>
return internal::evaluator<Derived>(derived()).coeff(row,col);
}
- EIGEN_DEVICE_FUNC Scalar coeff(Index i) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const
{
EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS);
eigen_assert( (Option==LazyProduct) || (this->rows() == 1 && this->cols() == 1) );
diff --git a/eigen/Eigen/src/Core/ProductEvaluators.h b/eigen/Eigen/src/Core/ProductEvaluators.h
index c42725d..9b99bd7 100644
--- a/eigen/Eigen/src/Core/ProductEvaluators.h
+++ b/eigen/Eigen/src/Core/ProductEvaluators.h
@@ -32,7 +32,7 @@ struct evaluator<Product<Lhs, Rhs, Options> >
typedef Product<Lhs, Rhs, Options> XprType;
typedef product_evaluator<XprType> Base;
- EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {}
};
// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
@@ -55,7 +55,7 @@ struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
const Product<Lhs, Rhs, DefaultProduct> > XprType;
typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
- EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
{}
};
@@ -68,7 +68,7 @@ struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
- EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr)
: Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
xpr.index() ))
@@ -246,19 +246,19 @@ template<typename Lhs, typename Rhs>
struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
{
template<typename Dst>
- static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
- static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
}
template<typename Dst>
- static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
};
@@ -312,25 +312,25 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
};
template<typename Dst>
- static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
}
template<typename Dst>
- static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
}
template<typename Dst>
- static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
+ static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{
internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
}
template<typename Dst>
- static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
+ static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
{
internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
}
@@ -785,7 +785,11 @@ public:
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
- Alignment = evaluator<MatrixType>::Alignment
+ Alignment = evaluator<MatrixType>::Alignment,
+
+ AsScalarProduct = (DiagonalType::SizeAtCompileTime==1)
+ || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::RowsAtCompileTime==1 && ProductOrder==OnTheLeft)
+ || (DiagonalType::SizeAtCompileTime==Dynamic && MatrixType::ColsAtCompileTime==1 && ProductOrder==OnTheRight)
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@@ -797,7 +801,10 @@ public:
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
{
- return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
+ if(AsScalarProduct)
+ return m_diagImpl.coeff(0) * m_matImpl.coeff(idx);
+ else
+ return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
}
protected:
diff --git a/eigen/Eigen/src/Core/Redux.h b/eigen/Eigen/src/Core/Redux.h
index b6e8f88..760e9f8 100644
--- a/eigen/Eigen/src/Core/Redux.h
+++ b/eigen/Eigen/src/Core/Redux.h
@@ -407,7 +407,7 @@ protected:
*/
template<typename Derived>
template<typename Func>
-typename internal::traits<Derived>::Scalar
+EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
DenseBase<Derived>::redux(const Func& func) const
{
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
diff --git a/eigen/Eigen/src/Core/Ref.h b/eigen/Eigen/src/Core/Ref.h
index bdf24f5..9c6e3c5 100644
--- a/eigen/Eigen/src/Core/Ref.h
+++ b/eigen/Eigen/src/Core/Ref.h
@@ -95,6 +95,8 @@ protected:
template<typename Expression>
EIGEN_DEVICE_FUNC void construct(Expression& expr)
{
+ EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(PlainObjectType,Expression);
+
if(PlainObjectType::RowsAtCompileTime==1)
{
eigen_assert(expr.rows()==1 || expr.cols()==1);
diff --git a/eigen/Eigen/src/Core/SelfAdjointView.h b/eigen/Eigen/src/Core/SelfAdjointView.h
index 504c98f..b2e51f3 100644
--- a/eigen/Eigen/src/Core/SelfAdjointView.h
+++ b/eigen/Eigen/src/Core/SelfAdjointView.h
@@ -71,7 +71,9 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC
explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
- {}
+ {
+ EIGEN_STATIC_ASSERT(UpLo==Lower || UpLo==Upper,SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY);
+ }
EIGEN_DEVICE_FUNC
inline Index rows() const { return m_matrix.rows(); }
@@ -189,7 +191,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
TriangularView<typename MatrixType::AdjointReturnType,TriMode> >::type(tmp2);
}
- typedef SelfAdjointView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
+ typedef SelfAdjointView<const MatrixConjugateReturnType,UpLo> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */
EIGEN_DEVICE_FUNC
inline const ConjugateReturnType conjugate() const
diff --git a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
index 50099df..7c89c2e 100644
--- a/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -17,7 +17,6 @@ namespace Eigen {
template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
{
- typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
return derived();
}
@@ -25,7 +24,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(co
template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
{
- typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
return derived();
}
@@ -33,7 +31,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(co
template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
{
- typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
return derived();
}
@@ -41,7 +38,6 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(co
template<typename Derived>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
{
- typedef typename Derived::PlainObject PlainObject;
internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
return derived();
}
diff --git a/eigen/Eigen/src/Core/StableNorm.h b/eigen/Eigen/src/Core/StableNorm.h
index be04ed4..88c8d98 100644
--- a/eigen/Eigen/src/Core/StableNorm.h
+++ b/eigen/Eigen/src/Core/StableNorm.h
@@ -165,7 +165,7 @@ MatrixBase<Derived>::stableNorm() const
typedef typename internal::nested_eval<Derived,2>::type DerivedCopy;
typedef typename internal::remove_all<DerivedCopy>::type DerivedCopyClean;
- DerivedCopy copy(derived());
+ const DerivedCopy copy(derived());
enum {
CanAlign = ( (int(DerivedCopyClean::Flags)&DirectAccessBit)
diff --git a/eigen/Eigen/src/Core/Transpositions.h b/eigen/Eigen/src/Core/Transpositions.h
index 19c17bb..86da5af 100644
--- a/eigen/Eigen/src/Core/Transpositions.h
+++ b/eigen/Eigen/src/Core/Transpositions.h
@@ -384,7 +384,7 @@ class Transpose<TranspositionsBase<TranspositionsDerived> >
const Product<OtherDerived, Transpose, AliasFreeProduct>
operator*(const MatrixBase<OtherDerived>& matrix, const Transpose& trt)
{
- return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt.derived());
+ return Product<OtherDerived, Transpose, AliasFreeProduct>(matrix.derived(), trt);
}
/** \returns the \a matrix with the inverse transpositions applied to the rows.
diff --git a/eigen/Eigen/src/Core/arch/AVX/Complex.h b/eigen/Eigen/src/Core/arch/AVX/Complex.h
index 99439c8..7fa6196 100644
--- a/eigen/Eigen/src/Core/arch/AVX/Complex.h
+++ b/eigen/Eigen/src/Core/arch/AVX/Complex.h
@@ -204,23 +204,7 @@ template<> struct conj_helper<Packet4cf, Packet4cf, true,true>
}
};
-template<> struct conj_helper<Packet8f, Packet4cf, false,false>
-{
- EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet8f& x, const Packet4cf& y, const Packet4cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet4cf pmul(const Packet8f& x, const Packet4cf& y) const
- { return Packet4cf(Eigen::internal::pmul(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet4cf, Packet8f, false,false>
-{
- EIGEN_STRONG_INLINE Packet4cf pmadd(const Packet4cf& x, const Packet8f& y, const Packet4cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet4cf pmul(const Packet4cf& x, const Packet8f& y) const
- { return Packet4cf(Eigen::internal::pmul(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet4cf,Packet8f)
template<> EIGEN_STRONG_INLINE Packet4cf pdiv<Packet4cf>(const Packet4cf& a, const Packet4cf& b)
{
@@ -400,23 +384,7 @@ template<> struct conj_helper<Packet2cd, Packet2cd, true,true>
}
};
-template<> struct conj_helper<Packet4d, Packet2cd, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet4d& x, const Packet2cd& y, const Packet2cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cd pmul(const Packet4d& x, const Packet2cd& y) const
- { return Packet2cd(Eigen::internal::pmul(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cd, Packet4d, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cd pmadd(const Packet2cd& x, const Packet4d& y, const Packet2cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cd pmul(const Packet2cd& x, const Packet4d& y) const
- { return Packet2cd(Eigen::internal::pmul(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cd,Packet4d)
template<> EIGEN_STRONG_INLINE Packet2cd pdiv<Packet2cd>(const Packet2cd& a, const Packet2cd& b)
{
diff --git a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
index 195d40f..61c3dfc 100644
--- a/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -308,9 +308,9 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet8i>(int* to, const int& a)
}
#ifndef EIGEN_VECTORIZE_AVX512
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif
template<> EIGEN_STRONG_INLINE float pfirst<Packet8f>(const Packet8f& a) {
@@ -333,9 +333,12 @@ template<> EIGEN_STRONG_INLINE Packet4d preverse(const Packet4d& a)
{
__m256d tmp = _mm256_shuffle_pd(a,a,5);
return _mm256_permute2f128_pd(tmp, tmp, 1);
-
+ #if 0
+ // This version is unlikely to be faster as _mm256_shuffle_ps and _mm256_permute_pd
+ // exhibit the same latency/throughput, but it is here for future reference/benchmarking...
__m256d swap_halves = _mm256_permute2f128_pd(a,a,1);
return _mm256_permute_pd(swap_halves,5);
+ #endif
}
// pabs should be ok
diff --git a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
index 399be0e..9c1717f 100644
--- a/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h
@@ -88,9 +88,9 @@ plog<Packet16f>(const Packet16f& _x) {
// x = x + x - 1.0;
// } else { x = x - 1.0; }
__mmask16 mask = _mm512_cmp_ps_mask(x, p16f_cephes_SQRTHF, _CMP_LT_OQ);
- Packet16f tmp = _mm512_mask_blend_ps(mask, x, _mm512_setzero_ps());
+ Packet16f tmp = _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), x);
x = psub(x, p16f_1);
- e = psub(e, _mm512_mask_blend_ps(mask, p16f_1, _mm512_setzero_ps()));
+ e = psub(e, _mm512_mask_blend_ps(mask, _mm512_setzero_ps(), p16f_1));
x = padd(x, tmp);
Packet16f x2 = pmul(x, x);
@@ -119,8 +119,9 @@ plog<Packet16f>(const Packet16f& _x) {
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
- return _mm512_mask_blend_ps(iszero_mask, p16f_minus_inf,
- _mm512_mask_blend_ps(invalid_mask, p16f_nan, x));
+ return _mm512_mask_blend_ps(iszero_mask,
+ _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
+ p16f_minus_inf);
}
#endif
@@ -266,8 +267,7 @@ psqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 non_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_GE_OQ);
- Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_rsqrt14_ps(_x),
- _mm512_setzero_ps());
+ Packet16f x = _mm512_mask_blend_ps(non_zero_mask, _mm512_setzero_ps(), _mm512_rsqrt14_ps(_x));
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
@@ -289,8 +289,7 @@ psqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 non_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_GE_OQ);
- Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_rsqrt14_pd(_x),
- _mm512_setzero_pd());
+ Packet8d x = _mm512_mask_blend_pd(non_zero_mask, _mm512_setzero_pd(), _mm512_rsqrt14_pd(_x));
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@@ -333,20 +332,18 @@ prsqrt<Packet16f>(const Packet16f& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask16 le_zero_mask = _mm512_cmp_ps_mask(_x, p16f_flt_min, _CMP_LT_OQ);
- Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(),
- _mm512_rsqrt14_ps(_x));
+ Packet16f x = _mm512_mask_blend_ps(le_zero_mask, _mm512_rsqrt14_ps(_x), _mm512_setzero_ps());
// Fill in NaNs and Infs for the negative/zero entries.
__mmask16 neg_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LT_OQ);
Packet16f infs_and_nans = _mm512_mask_blend_ps(
- neg_mask, p16f_nan,
- _mm512_mask_blend_ps(le_zero_mask, p16f_inf, _mm512_setzero_ps()));
+ neg_mask, _mm512_mask_blend_ps(le_zero_mask, _mm512_setzero_ps(), p16f_inf), p16f_nan);
// Do a single step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p16f_one_point_five));
// Insert NaNs and Infs in all the right places.
- return _mm512_mask_blend_ps(le_zero_mask, infs_and_nans, x);
+ return _mm512_mask_blend_ps(le_zero_mask, x, infs_and_nans);
}
template <>
@@ -363,14 +360,12 @@ prsqrt<Packet8d>(const Packet8d& _x) {
// select only the inverse sqrt of positive normal inputs (denormals are
// flushed to zero and cause infs as well).
__mmask8 le_zero_mask = _mm512_cmp_pd_mask(_x, p8d_dbl_min, _CMP_LT_OQ);
- Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(),
- _mm512_rsqrt14_pd(_x));
+ Packet8d x = _mm512_mask_blend_pd(le_zero_mask, _mm512_rsqrt14_pd(_x), _mm512_setzero_pd());
// Fill in NaNs and Infs for the negative/zero entries.
__mmask8 neg_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LT_OQ);
Packet8d infs_and_nans = _mm512_mask_blend_pd(
- neg_mask, p8d_nan,
- _mm512_mask_blend_pd(le_zero_mask, p8d_inf, _mm512_setzero_pd()));
+ neg_mask, _mm512_mask_blend_pd(le_zero_mask, _mm512_setzero_pd(), p8d_inf), p8d_nan);
// Do a first step of Newton's iteration.
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
@@ -379,9 +374,9 @@ prsqrt<Packet8d>(const Packet8d& _x) {
x = pmul(x, pmadd(neg_half, pmul(x, x), p8d_one_point_five));
// Insert NaNs and Infs in all the right places.
- return _mm512_mask_blend_pd(le_zero_mask, infs_and_nans, x);
+ return _mm512_mask_blend_pd(le_zero_mask, x, infs_and_nans);
}
-#else
+#elif defined(EIGEN_VECTORIZE_AVX512ER)
template <>
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
return _mm512_rsqrt28_ps(x);
diff --git a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
index f6500a1..8970524 100644
--- a/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h
@@ -618,9 +618,9 @@ EIGEN_STRONG_INLINE void pstore1<Packet16i>(int* to, const int& a) {
pstore(to, pa);
}
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template <>
EIGEN_STRONG_INLINE float pfirst<Packet16f>(const Packet16f& a) {
diff --git a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
index 67db2f8..3e66573 100644
--- a/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
+++ b/eigen/Eigen/src/Core/arch/AltiVec/Complex.h
@@ -224,23 +224,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
- { return Packet2cf(internal::pmul<Packet4f>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cf, Packet4f, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
- { return Packet2cf(internal::pmul<Packet4f>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
@@ -416,23 +400,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
return pconj(internal::pmul(a, b));
}
};
-template<> struct conj_helper<Packet2d, Packet1cd, false,false>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
- { return Packet1cd(internal::pmul<Packet2d>(x, y.v)); }
-};
-template<> struct conj_helper<Packet1cd, Packet2d, false,false>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
- { return Packet1cd(internal::pmul<Packet2d>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
diff --git a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
index b3f1ea1..08a27d1 100644
--- a/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h
@@ -103,7 +103,7 @@ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4u
static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
#else
-static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
+static Packet16uc p16uc_FORWARD = p16uc_REVERSE32;
static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 1), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
static Packet16uc p16uc_PSET32_WEVEN = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
@@ -388,10 +388,28 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return a*b + c; }
-template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ #ifdef __VSX__
+ Packet4f ret;
+ __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+ return ret;
+ #else
+ return vec_min(a, b);
+ #endif
+}
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
-template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
+{
+ #ifdef __VSX__
+ Packet4f ret;
+ __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+ return ret;
+ #else
+ return vec_max(a, b);
+ #endif
+}
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
@@ -764,7 +782,7 @@ typedef __vector __bool long Packet2bl;
static Packet2l p2l_ONE = { 1, 1 };
static Packet2l p2l_ZERO = reinterpret_cast<Packet2l>(p4i_ZERO);
-static Packet2d p2d_ONE = { 1.0, 1.0 };
+static Packet2d p2d_ONE = { 1.0, 1.0 };
static Packet2d p2d_ZERO = reinterpret_cast<Packet2d>(p4f_ZERO);
static Packet2d p2d_MZERO = { -0.0, -0.0 };
@@ -910,9 +928,19 @@ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const
// for some weird raisons, it has to be overloaded for packet of integers
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
-template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ Packet2d ret;
+ __asm__ ("xvcmpgedp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+ return ret;
+ }
-template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
+template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b)
+{
+ Packet2d ret;
+ __asm__ ("xvcmpgtdp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b));
+ return ret;
+}
template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
@@ -969,7 +997,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
Packet2d v[2], sum;
v[0] = vecs[0] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[0]), reinterpret_cast<Packet4f>(vecs[0]), 8));
v[1] = vecs[1] + reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(vecs[1]), reinterpret_cast<Packet4f>(vecs[1]), 8));
-
+
#ifdef _BIG_ENDIAN
sum = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4f>(v[0]), reinterpret_cast<Packet4f>(v[1]), 8));
#else
@@ -1022,7 +1050,7 @@ ptranspose(PacketBlock<Packet2d,2>& kernel) {
template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
Packet2l select = { ifPacket.select[0], ifPacket.select[1] };
- Packet2bl mask = vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE));
+ Packet2bl mask = reinterpret_cast<Packet2bl>( vec_cmpeq(reinterpret_cast<Packet2d>(select), reinterpret_cast<Packet2d>(p2l_ONE)) );
return vec_sel(elsePacket, thenPacket, mask);
}
#endif // __VSX__
diff --git a/eigen/Eigen/src/Core/arch/CUDA/Half.h b/eigen/Eigen/src/Core/arch/CUDA/Half.h
index 294c517..02ac0c2 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/Half.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/Half.h
@@ -147,55 +147,55 @@ namespace half_impl {
// versions to get the ALU speed increased), but you do save the
// conversion steps back and forth.
-__device__ half operator + (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator + (const half& a, const half& b) {
return __hadd(a, b);
}
-__device__ half operator * (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator * (const half& a, const half& b) {
return __hmul(a, b);
}
-__device__ half operator - (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a, const half& b) {
return __hsub(a, b);
}
-__device__ half operator / (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half operator / (const half& a, const half& b) {
float num = __half2float(a);
float denom = __half2float(b);
return __float2half(num / denom);
}
-__device__ half operator - (const half& a) {
+EIGEN_STRONG_INLINE __device__ half operator - (const half& a) {
return __hneg(a);
}
-__device__ half& operator += (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator += (half& a, const half& b) {
a = a + b;
return a;
}
-__device__ half& operator *= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator *= (half& a, const half& b) {
a = a * b;
return a;
}
-__device__ half& operator -= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator -= (half& a, const half& b) {
a = a - b;
return a;
}
-__device__ half& operator /= (half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ half& operator /= (half& a, const half& b) {
a = a / b;
return a;
}
-__device__ bool operator == (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator == (const half& a, const half& b) {
return __heq(a, b);
}
-__device__ bool operator != (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator != (const half& a, const half& b) {
return __hne(a, b);
}
-__device__ bool operator < (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator < (const half& a, const half& b) {
return __hlt(a, b);
}
-__device__ bool operator <= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator <= (const half& a, const half& b) {
return __hle(a, b);
}
-__device__ bool operator > (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b);
}
-__device__ bool operator >= (const half& a, const half& b) {
+EIGEN_STRONG_INLINE __device__ bool operator >= (const half& a, const half& b) {
return __hge(a, b);
}
@@ -238,10 +238,10 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half& operator /= (half& a, const half& b)
return a;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const half& a, const half& b) {
- return float(a) == float(b);
+ return numext::equal_strict(float(a),float(b));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const half& a, const half& b) {
- return float(a) != float(b);
+ return numext::not_equal_strict(float(a), float(b));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const half& a, const half& b) {
return float(a) < float(b);
@@ -386,11 +386,15 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half abs(const half& a) {
return result;
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) {
- return half(::expf(float(a)));
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+ return half(hexp(a));
+#else
+ return half(::expf(float(a)));
+#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) {
-#if defined(EIGEN_HAS_CUDA_FP16) && defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
- return Eigen::half(::hlog(a));
+#if defined(EIGEN_HAS_CUDA_FP16) && EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
+ return half(::hlog(a));
#else
return half(::logf(float(a)));
#endif
@@ -402,7 +406,11 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) {
return half(::log10f(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half sqrt(const half& a) {
- return half(::sqrtf(float(a)));
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
+ return half(hsqrt(a));
+#else
+ return half(::sqrtf(float(a)));
+#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half pow(const half& a, const half& b) {
return half(::powf(float(a), float(b)));
@@ -420,10 +428,18 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half tanh(const half& a) {
return half(::tanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+ return half(hfloor(a));
+#else
return half(::floorf(float(a)));
+#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half ceil(const half& a) {
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300
+ return half(hceil(a));
+#else
return half(::ceilf(float(a)));
+#endif
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half (min)(const half& a, const half& b) {
@@ -493,8 +509,8 @@ struct numeric_limits<Eigen::half> {
static const bool is_bounded = false;
static const bool is_modulo = false;
static const int digits = 11;
- static const int digits10 = 2;
- //static const int max_digits10 = ;
+ static const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
+ static const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html
static const int radix = 2;
static const int min_exponent = -13;
static const int min_exponent10 = -4;
@@ -557,7 +573,7 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half exph(const Eigen::half& a) {
return Eigen::half(::expf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half logh(const Eigen::half& a) {
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 530
return Eigen::half(::hlog(a));
#else
return Eigen::half(::logf(float(a)));
diff --git a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
index ae54225..943e0b0 100644
--- a/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@@ -275,7 +275,7 @@ template<> __device__ EIGEN_STRONG_INLINE half2 plog1p<half2>(const half2& a) {
return __floats2half2_rn(r1, r2);
}
-#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
+#if EIGEN_CUDACC_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 530
template<> __device__ EIGEN_STRONG_INLINE
half2 plog<half2>(const half2& a) {
diff --git a/eigen/Eigen/src/Core/arch/NEON/Complex.h b/eigen/Eigen/src/Core/arch/NEON/Complex.h
index 57e9b43..306a309 100644
--- a/eigen/Eigen/src/Core/arch/NEON/Complex.h
+++ b/eigen/Eigen/src/Core/arch/NEON/Complex.h
@@ -67,7 +67,7 @@ template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type;
template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
{
float32x2_t r64;
- r64 = vld1_f32((float *)&from);
+ r64 = vld1_f32((const float *)&from);
return Packet2cf(vcombine_f32(r64, r64));
}
@@ -142,7 +142,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
}
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((const float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
@@ -265,6 +265,8 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
// TODO optimize it for NEON
@@ -275,7 +277,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, con
s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s);
- return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
+ return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s,rev_s)));
}
EIGEN_DEVICE_FUNC inline void
@@ -381,7 +383,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, from.v); }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((double *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { EIGEN_ARM_PREFETCH((const double *)addr); }
template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(const std::complex<double>* from, Index stride)
{
@@ -456,6 +458,8 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
}
};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for NEON
diff --git a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
index 836fbc0..3d5ed0d 100644
--- a/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -36,12 +36,43 @@ namespace internal {
#endif
#endif
+#if EIGEN_COMP_MSVC
+
+// In MSVC's arm_neon.h header file, all NEON vector types
+// are aliases to the same underlying type __n128.
+// We thus have to wrap them to make them different C++ types.
+// (See also bug 1428)
+
+template<typename T,int unique_id>
+struct eigen_packet_wrapper
+{
+ operator T&() { return m_val; }
+ operator const T&() const { return m_val; }
+ eigen_packet_wrapper() {}
+ eigen_packet_wrapper(const T &v) : m_val(v) {}
+ eigen_packet_wrapper& operator=(const T &v) {
+ m_val = v;
+ return *this;
+ }
+
+ T m_val;
+};
+typedef eigen_packet_wrapper<float32x2_t,0> Packet2f;
+typedef eigen_packet_wrapper<float32x4_t,1> Packet4f;
+typedef eigen_packet_wrapper<int32x4_t ,2> Packet4i;
+typedef eigen_packet_wrapper<int32x2_t ,3> Packet2i;
+typedef eigen_packet_wrapper<uint32x4_t ,4> Packet4ui;
+
+#else
+
typedef float32x2_t Packet2f;
typedef float32x4_t Packet4f;
typedef int32x4_t Packet4i;
typedef int32x2_t Packet2i;
typedef uint32x4_t Packet4ui;
+#endif // EIGEN_COMP_MSVC
+
#define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
const Packet4f p4f_##NAME = pset1<Packet4f>(X)
diff --git a/eigen/Eigen/src/Core/arch/SSE/Complex.h b/eigen/Eigen/src/Core/arch/SSE/Complex.h
index 5607fe0..d075043 100644
--- a/eigen/Eigen/src/Core/arch/SSE/Complex.h
+++ b/eigen/Eigen/src/Core/arch/SSE/Complex.h
@@ -128,7 +128,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf
_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
}
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
@@ -229,23 +229,7 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
-template<> struct conj_helper<Packet4f, Packet2cf, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet4f& x, const Packet2cf& y, const Packet2cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet4f& x, const Packet2cf& y) const
- { return Packet2cf(Eigen::internal::pmul<Packet4f>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet2cf, Packet4f, false,false>
-{
- EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet4f& y, const Packet2cf& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& x, const Packet4f& y) const
- { return Packet2cf(Eigen::internal::pmul<Packet4f>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
@@ -340,7 +324,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
{
@@ -430,23 +414,7 @@ template<> struct conj_helper<Packet1cd, Packet1cd, true,true>
}
};
-template<> struct conj_helper<Packet2d, Packet1cd, false,false>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet2d& x, const Packet1cd& y, const Packet1cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet2d& x, const Packet1cd& y) const
- { return Packet1cd(Eigen::internal::pmul<Packet2d>(x, y.v)); }
-};
-
-template<> struct conj_helper<Packet1cd, Packet2d, false,false>
-{
- EIGEN_STRONG_INLINE Packet1cd pmadd(const Packet1cd& x, const Packet2d& y, const Packet1cd& c) const
- { return padd(c, pmul(x,y)); }
-
- EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& x, const Packet2d& y) const
- { return Packet1cd(Eigen::internal::pmul<Packet2d>(x.v, y)); }
-};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
diff --git a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
index 3832de1..5e652cc 100644
--- a/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/eigen/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -409,10 +409,16 @@ template<> EIGEN_STRONG_INLINE void pstore1<Packet2d>(double* to, const double&
pstore(to, Packet2d(vec2d_swizzle1(pa,0,0)));
}
+#if EIGEN_COMP_PGI
+typedef const void * SsePrefetchPtrType;
+#else
+typedef const char * SsePrefetchPtrType;
+#endif
+
#ifndef EIGEN_VECTORIZE_AVX
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
#endif
#if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
@@ -876,4 +882,14 @@ template<> EIGEN_STRONG_INLINE double pmadd(const double& a, const double& b, co
} // end namespace Eigen
+#if EIGEN_COMP_PGI
+// PGI++ does not define the following intrinsics in C++ mode.
+static inline __m128 _mm_castpd_ps (__m128d x) { return reinterpret_cast<__m128&>(x); }
+static inline __m128i _mm_castpd_si128(__m128d x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128d _mm_castps_pd (__m128 x) { return reinterpret_cast<__m128d&>(x); }
+static inline __m128i _mm_castps_si128(__m128 x) { return reinterpret_cast<__m128i&>(x); }
+static inline __m128 _mm_castsi128_ps(__m128i x) { return reinterpret_cast<__m128&>(x); }
+static inline __m128d _mm_castsi128_pd(__m128i x) { return reinterpret_cast<__m128d&>(x); }
+#endif
+
#endif // EIGEN_PACKET_MATH_SSE_H
diff --git a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
index c848932..c6ca8c7 100644
--- a/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
+++ b/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h
@@ -14,6 +14,7 @@ namespace Eigen {
namespace internal {
+#ifndef EIGEN_VECTORIZE_AVX
template <>
struct type_casting_traits<float, int> {
enum {
@@ -23,11 +24,6 @@ struct type_casting_traits<float, int> {
};
};
-template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
- return _mm_cvttps_epi32(a);
-}
-
-
template <>
struct type_casting_traits<int, float> {
enum {
@@ -37,11 +33,6 @@ struct type_casting_traits<int, float> {
};
};
-template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
- return _mm_cvtepi32_ps(a);
-}
-
-
template <>
struct type_casting_traits<double, float> {
enum {
@@ -51,10 +42,6 @@ struct type_casting_traits<double, float> {
};
};
-template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
- return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
-}
-
template <>
struct type_casting_traits<float, double> {
enum {
@@ -63,6 +50,19 @@ struct type_casting_traits<float, double> {
TgtCoeffRatio = 2
};
};
+#endif
+
+template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
+ return _mm_cvttps_epi32(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
+ return _mm_cvtepi32_ps(a);
+}
+
+template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
+ return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
+}
template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
// Simply discard the second half of the input
diff --git a/eigen/Eigen/src/Core/arch/ZVector/Complex.h b/eigen/Eigen/src/Core/arch/ZVector/Complex.h
index d39d2d1..1bfb733 100644
--- a/eigen/Eigen/src/Core/arch/ZVector/Complex.h
+++ b/eigen/Eigen/src/Core/arch/ZVector/Complex.h
@@ -336,6 +336,9 @@ template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
}
};
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
+EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
+
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
// TODO optimize it for AltiVec
diff --git a/eigen/Eigen/src/Core/functors/BinaryFunctors.h b/eigen/Eigen/src/Core/functors/BinaryFunctors.h
index 96747ba..3eae6b8 100644
--- a/eigen/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/eigen/Eigen/src/Core/functors/BinaryFunctors.h
@@ -255,7 +255,7 @@ struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,Rh
/** \internal
- * \brief Template functor to compute the hypot of two scalars
+ * \brief Template functor to compute the hypot of two \b positive \b and \b real scalars
*
* \sa MatrixBase::stableNorm(), class Redux
*/
@@ -263,22 +263,15 @@ template<typename Scalar>
struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
{
EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
-// typedef typename NumTraits<Scalar>::Real result_type;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar &x, const Scalar &y) const
{
- EIGEN_USING_STD_MATH(sqrt)
- Scalar p, qp;
- if(_x>_y)
- {
- p = _x;
- qp = _y / p;
- }
- else
- {
- p = _y;
- qp = _x / p;
- }
- return p * sqrt(Scalar(1) + qp*qp);
+ // This functor is used by hypotNorm only for which it is faster to first apply abs
+ // on all coefficients prior to reduction through hypot.
+ // This way we avoid calling abs on positive and real entries, and this also permits
+ // to seamlessly handle complexes. Otherwise we would have to handle both real and complexes
+ // through the same functor...
+ return internal::positive_real_hypot(x,y);
}
};
template<typename Scalar>
diff --git a/eigen/Eigen/src/Core/functors/StlFunctors.h b/eigen/Eigen/src/Core/functors/StlFunctors.h
index 6df3fa5..9c1d758 100644
--- a/eigen/Eigen/src/Core/functors/StlFunctors.h
+++ b/eigen/Eigen/src/Core/functors/StlFunctors.h
@@ -83,13 +83,17 @@ struct functor_traits<std::binder1st<T> >
{ enum { Cost = functor_traits<T>::Cost, PacketAccess = false }; };
#endif
+#if (__cplusplus < 201703L) && (EIGEN_COMP_MSVC < 1910)
+// std::unary_negate is deprecated since c++17 and will be removed in c++20
template<typename T>
struct functor_traits<std::unary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+// std::binary_negate is deprecated since c++17 and will be removed in c++20
template<typename T>
struct functor_traits<std::binary_negate<T> >
{ enum { Cost = 1 + functor_traits<T>::Cost, PacketAccess = false }; };
+#endif
#ifdef EIGEN_STDEXT_SUPPORT
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
index 41e18ff..9176a13 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h
@@ -88,7 +88,7 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
BlasIndex lda=convert_index<BlasIndex>(lhsStride), ldc=convert_index<BlasIndex>(resStride), n=convert_index<BlasIndex>(size), k=convert_index<BlasIndex>(depth); \
char uplo=((IsLower) ? 'L' : 'U'), trans=((AStorageOrder==RowMajor) ? 'T':'N'); \
EIGTYPE beta(1); \
- BLASFUNC(&uplo, &trans, &n, &k, &numext::real_ref(alpha), lhs, &lda, &numext::real_ref(beta), res, &ldc); \
+ BLASFUNC(&uplo, &trans, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), lhs, &lda, (const BLASTYPE*)&numext::real_ref(beta), res, &ldc); \
} \
};
@@ -125,9 +125,13 @@ struct general_matrix_matrix_rankupdate<Index,EIGTYPE,AStorageOrder,ConjugateA,C
} \
};
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk)
+EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk)
+#else
EIGEN_BLAS_RANKUPDATE_R(double, double, dsyrk_)
EIGEN_BLAS_RANKUPDATE_R(float, float, ssyrk_)
+#endif
// TODO hanlde complex cases
// EIGEN_BLAS_RANKUPDATE_C(dcomplex, double, double, zherk_)
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
index 7a3bdbf..b0f6b0d 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h
@@ -46,7 +46,7 @@ namespace internal {
// gemm specialization
-#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASPREFIX) \
+#define GEMM_SPECIALIZATION(EIGTYPE, EIGPREFIX, BLASTYPE, BLASFUNC) \
template< \
typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
@@ -100,13 +100,20 @@ static void run(Index rows, Index cols, Index depth, \
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- BLASPREFIX##gemm_(&transa, &transb, &m, &n, &k, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+ BLASFUNC(&transa, &transb, &m, &n, &k, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
}};
-GEMM_SPECIALIZATION(double, d, double, d)
-GEMM_SPECIALIZATION(float, f, float, s)
-GEMM_SPECIALIZATION(dcomplex, cd, double, z)
-GEMM_SPECIALIZATION(scomplex, cf, float, c)
+#ifdef EIGEN_USE_MKL
+GEMM_SPECIALIZATION(double, d, double, dgemm)
+GEMM_SPECIALIZATION(float, f, float, sgemm)
+GEMM_SPECIALIZATION(dcomplex, cd, MKL_Complex16, zgemm)
+GEMM_SPECIALIZATION(scomplex, cf, MKL_Complex8, cgemm)
+#else
+GEMM_SPECIALIZATION(double, d, double, dgemm_)
+GEMM_SPECIALIZATION(float, f, float, sgemm_)
+GEMM_SPECIALIZATION(dcomplex, cd, double, zgemm_)
+GEMM_SPECIALIZATION(scomplex, cf, float, cgemm_)
+#endif
} // end namespase internal
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
index 3c1a7fc..a597c1f 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector.h
@@ -183,8 +183,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
alignmentPattern = AllAligned;
}
- const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
- const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index columnBound = ((cols-skipColumns)/columnsAtOnce)*columnsAtOnce + skipColumns;
for (Index i=skipColumns; i<columnBound; i+=columnsAtOnce)
@@ -457,8 +457,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
alignmentPattern = AllAligned;
}
- const Index offset1 = (FirstAligned && alignmentStep==1)?3:1;
- const Index offset3 = (FirstAligned && alignmentStep==1)?1:3;
+ const Index offset1 = (alignmentPattern==FirstAligned && alignmentStep==1)?3:1;
+ const Index offset3 = (alignmentPattern==FirstAligned && alignmentStep==1)?1:3;
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
diff --git a/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
index e3a5d58..6e36c2b 100644
--- a/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
+++ b/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h
@@ -85,7 +85,7 @@ EIGEN_BLAS_GEMV_SPECIALIZE(float)
EIGEN_BLAS_GEMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_GEMV_SPECIALIZE(scomplex)
-#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASPREFIX) \
+#define EIGEN_BLAS_GEMV_SPECIALIZATION(EIGTYPE,BLASTYPE,BLASFUNC) \
template<typename Index, int LhsStorageOrder, bool ConjugateLhs, bool ConjugateRhs> \
struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,ConjugateLhs,EIGTYPE,ConjugateRhs> \
{ \
@@ -113,14 +113,21 @@ static void run( \
x_ptr=x_tmp.data(); \
incx=1; \
} else x_ptr=rhs; \
- BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+ BLASFUNC(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
-EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, d)
-EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, s)
-EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, z)
-EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, MKL_Complex16, zgemv)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, MKL_Complex8 , cgemv)
+#else
+EIGEN_BLAS_GEMV_SPECIALIZATION(double, double, dgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(float, float, sgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(dcomplex, double, zgemv_)
+EIGEN_BLAS_GEMV_SPECIALIZATION(scomplex, float, cgemv_)
+#endif
} // end namespase internal
diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
index a45238d..9a53185 100644
--- a/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
+++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h
@@ -40,7 +40,7 @@ namespace internal {
/* Optimized selfadjoint matrix * matrix (?SYMM/?HEMM) product */
-#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -81,13 +81,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _rhs; \
\
- BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -144,20 +144,26 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
- BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-EIGEN_BLAS_SYMM_L(double, double, d, d)
-EIGEN_BLAS_SYMM_L(float, float, f, s)
-EIGEN_BLAS_HEMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_L(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_L(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_L(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_L(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_L(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_L(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
+#endif
/* Optimized matrix * selfadjoint matrix (?SYMM/?HEMM) product */
-#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_SYMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -197,13 +203,13 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} else b = _lhs; \
\
- BLASPREFIX##symm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
\
} \
};
-#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_HEMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -259,15 +265,21 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
ldb = convert_index<BlasIndex>(b_tmp.outerStride()); \
} \
\
- BLASPREFIX##hemm_(&side, &uplo, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, &numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
+ BLASFUNC(&side, &uplo, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)b, &ldb, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &ldc); \
} \
};
-EIGEN_BLAS_SYMM_R(double, double, d, d)
-EIGEN_BLAS_SYMM_R(float, float, f, s)
-EIGEN_BLAS_HEMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_HEMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm)
+EIGEN_BLAS_HEMM_R(dcomplex, MKL_Complex16, cd, zhemm)
+EIGEN_BLAS_HEMM_R(scomplex, MKL_Complex8, cf, chemm)
+#else
+EIGEN_BLAS_SYMM_R(double, double, d, dsymm_)
+EIGEN_BLAS_SYMM_R(float, float, f, ssymm_)
+EIGEN_BLAS_HEMM_R(dcomplex, double, cd, zhemm_)
+EIGEN_BLAS_HEMM_R(scomplex, float, cf, chemm_)
+#endif
} // end namespace internal
} // end namespace Eigen
diff --git a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
index 38f23ac..1238345 100644
--- a/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
+++ b/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h
@@ -95,14 +95,21 @@ const EIGTYPE* _rhs, EIGTYPE* res, EIGTYPE alpha) \
x_tmp=map_x.conjugate(); \
x_ptr=x_tmp.data(); \
} else x_ptr=_rhs; \
- BLASFUNC(&uplo, &n, &numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, &numext::real_ref(beta), (BLASTYPE*)res, &incy); \
+ BLASFUNC(&uplo, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)lhs, &lda, (const BLASTYPE*)x_ptr, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)res, &incy); \
}\
};
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, MKL_Complex16, zhemv)
+EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, MKL_Complex8, chemv)
+#else
EIGEN_BLAS_SYMV_SPECIALIZATION(double, double, dsymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(float, float, ssymv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(dcomplex, double, zhemv_)
EIGEN_BLAS_SYMV_SPECIALIZATION(scomplex, float, chemv_)
+#endif
} // end namespace internal
diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
index 6ec5a8a..f784507 100644
--- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h
@@ -137,7 +137,13 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
- Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+ // To work around an "error: member reference base type 'Matrix<...>
+ // (Eigen::internal::constructor_without_unaligned_array_assert (*)())' is
+ // not a structure or union" compilation error in nvcc (tested V8.0.61),
+ // create a dummy internal::constructor_without_unaligned_array_assert
+ // object to pass to the Matrix constructor.
+ internal::constructor_without_unaligned_array_assert a;
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
@@ -284,7 +290,8 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
- Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer((internal::constructor_without_unaligned_array_assert()));
+ internal::constructor_without_unaligned_array_assert a;
+ Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,RhsStorageOrder> triangularBuffer(a);
triangularBuffer.setZero();
if((Mode&ZeroDiag)==ZeroDiag)
triangularBuffer.diagonal().setZero();
@@ -393,7 +400,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
{
template<typename Dest> static void run(Dest& dst, const Lhs &a_lhs, const Rhs &a_rhs, const typename Dest::Scalar& alpha)
{
- typedef typename Dest::Scalar Scalar;
+ typedef typename Lhs::Scalar LhsScalar;
+ typedef typename Rhs::Scalar RhsScalar;
+ typedef typename Dest::Scalar Scalar;
typedef internal::blas_traits<Lhs> LhsBlasTraits;
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
@@ -405,8 +414,9 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(a_lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(a_rhs);
- Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
- * RhsBlasTraits::extractScalarFactor(a_rhs);
+ LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(a_lhs);
+ RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(a_rhs);
+ Scalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,4> BlockingType;
@@ -431,6 +441,21 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
&dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha, blocking
);
+
+ // Apply correction if the diagonal is unit and a scalar factor was nested:
+ if ((Mode&UnitDiag)==UnitDiag)
+ {
+ if (LhsIsTriangular && lhs_alpha!=LhsScalar(1))
+ {
+ Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+ dst.topRows(diagSize) -= ((lhs_alpha-LhsScalar(1))*a_rhs).topRows(diagSize);
+ }
+ else if ((!LhsIsTriangular) && rhs_alpha!=RhsScalar(1))
+ {
+ Index diagSize = (std::min)(rhs.rows(),rhs.cols());
+ dst.leftCols(diagSize) -= (rhs_alpha-RhsScalar(1))*a_lhs.leftCols(diagSize);
+ }
+ }
}
};
diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
index aecded6..a25197a 100644
--- a/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
+++ b/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h
@@ -75,7 +75,7 @@ EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, true)
EIGEN_BLAS_TRMM_SPECIALIZE(scomplex, false)
// implements col-major += alpha * op(triangular) * op(general)
-#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_L(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -172,7 +172,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
/*std::cout << "TRMM_L: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -180,13 +180,20 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
} \
};
-EIGEN_BLAS_TRMM_L(double, double, d, d)
-EIGEN_BLAS_TRMM_L(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_L(float, float, f, s)
-EIGEN_BLAS_TRMM_L(scomplex, float, cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_L(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm)
+EIGEN_BLAS_TRMM_L(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_L(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_L(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_L(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_L(scomplex, float, cf, ctrmm_)
+#endif
// implements col-major += alpha * op(general) * op(triangular)
-#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMM_R(EIGTYPE, BLASTYPE, EIGPREFIX, BLASFUNC) \
template <typename Index, int Mode, \
int LhsStorageOrder, bool ConjugateLhs, \
int RhsStorageOrder, bool ConjugateRhs> \
@@ -282,7 +289,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
/*std::cout << "TRMM_R: A is square! Go to BLAS TRMM implementation! \n";*/ \
/* call ?trmm*/ \
- BLASPREFIX##trmm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
+ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)b, &ldb); \
\
/* Add op(a_triangular)*b into res*/ \
Map<MatrixX##EIGPREFIX, 0, OuterStride<> > res_tmp(res,rows,cols,OuterStride<>(resStride)); \
@@ -290,11 +297,17 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
} \
};
-EIGEN_BLAS_TRMM_R(double, double, d, d)
-EIGEN_BLAS_TRMM_R(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMM_R(float, float, f, s)
-EIGEN_BLAS_TRMM_R(scomplex, float, cf, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm)
+EIGEN_BLAS_TRMM_R(dcomplex, MKL_Complex16, cd, ztrmm)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm)
+EIGEN_BLAS_TRMM_R(scomplex, MKL_Complex8, cf, ctrmm)
+#else
+EIGEN_BLAS_TRMM_R(double, double, d, dtrmm_)
+EIGEN_BLAS_TRMM_R(dcomplex, double, cd, ztrmm_)
+EIGEN_BLAS_TRMM_R(float, float, f, strmm_)
+EIGEN_BLAS_TRMM_R(scomplex, float, cf, ctrmm_)
+#endif
} // end namespace internal
} // end namespace Eigen
diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixVector.h b/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
index 4b292e7..76bfa15 100644
--- a/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/eigen/Eigen/src/Core/products/TriangularMatrixVector.h
@@ -221,8 +221,9 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename internal::add_const_on_value_type<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
- * RhsBlasTraits::extractScalarFactor(rhs);
+ LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+ RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+ ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
// FIXME find a way to allow an inner stride on the result if packet_traits<Scalar>::size==1
@@ -274,6 +275,12 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
else
dest = MappedDest(actualDestPtr, dest.size());
}
+
+ if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+ {
+ Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+ dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+ }
}
};
@@ -295,8 +302,9 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
typename add_const<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);
typename add_const<ActualRhsType>::type actualRhs = RhsBlasTraits::extract(rhs);
- ResScalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(lhs)
- * RhsBlasTraits::extractScalarFactor(rhs);
+ LhsScalar lhs_alpha = LhsBlasTraits::extractScalarFactor(lhs);
+ RhsScalar rhs_alpha = RhsBlasTraits::extractScalarFactor(rhs);
+ ResScalar actualAlpha = alpha * lhs_alpha * rhs_alpha;
enum {
DirectlyUseRhs = ActualRhsTypeCleaned::InnerStrideAtCompileTime==1
@@ -326,6 +334,12 @@ template<int Mode> struct trmv_selector<Mode,RowMajor>
actualRhsPtr,1,
dest.data(),dest.innerStride(),
actualAlpha);
+
+ if ( ((Mode&UnitDiag)==UnitDiag) && (lhs_alpha!=LhsScalar(1)) )
+ {
+ Index diagSize = (std::min)(lhs.rows(),lhs.cols());
+ dest.head(diagSize) -= (lhs_alpha-LhsScalar(1))*rhs.head(diagSize);
+ }
}
};
diff --git a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
index 07bf26c..3d47a2b 100644
--- a/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
+++ b/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h
@@ -71,7 +71,7 @@ EIGEN_BLAS_TRMV_SPECIALIZE(dcomplex)
EIGEN_BLAS_TRMV_SPECIALIZE(scomplex)
// implements col-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_CM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor> { \
enum { \
@@ -121,10 +121,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -142,18 +142,25 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
- BLASPREFIX##gemv_(&trans, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_BLAS_TRMV_CM(double, double, d, d)
-EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_CM(float, float, f, s)
-EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_CM(double, double, d, d,)
+EIGEN_BLAS_TRMV_CM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_CM(float, float, f, s,)
+EIGEN_BLAS_TRMV_CM(scomplex, MKL_Complex8, cf, c,)
+#else
+EIGEN_BLAS_TRMV_CM(double, double, d, d, _)
+EIGEN_BLAS_TRMV_CM(dcomplex, double, cd, z, _)
+EIGEN_BLAS_TRMV_CM(float, float, f, s, _)
+EIGEN_BLAS_TRMV_CM(scomplex, float, cf, c, _)
+#endif
// implements row-major: res += alpha * op(triangular) * vector
-#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX) \
+#define EIGEN_BLAS_TRMV_RM(EIGTYPE, BLASTYPE, EIGPREFIX, BLASPREFIX, BLASPOSTFIX) \
template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor> { \
enum { \
@@ -203,10 +210,10 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
diag = IsUnitDiag ? 'U' : 'N'; \
\
/* call ?TRMV*/ \
- BLASPREFIX##trmv_(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
+ BLASPREFIX##trmv##BLASPOSTFIX(&uplo, &trans, &diag, &n, (const BLASTYPE*)_lhs, &lda, (BLASTYPE*)x, &incx); \
\
/* Add op(a_tr)rhs into res*/ \
- BLASPREFIX##axpy_(&n, &numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
+ BLASPREFIX##axpy##BLASPOSTFIX(&n, (const BLASTYPE*)&numext::real_ref(alpha),(const BLASTYPE*)x, &incx, (BLASTYPE*)_res, &incy); \
/* Non-square case - doesn't fit to BLAS ?TRMV. Fall to default triangular product*/ \
if (size<(std::max)(rows,cols)) { \
if (ConjRhs) x_tmp = rhs.conjugate(); else x_tmp = rhs; \
@@ -224,15 +231,22 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
m = convert_index<BlasIndex>(size); \
n = convert_index<BlasIndex>(cols-size); \
} \
- BLASPREFIX##gemv_(&trans, &n, &m, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, &numext::real_ref(beta), (BLASTYPE*)y, &incy); \
+ BLASPREFIX##gemv##BLASPOSTFIX(&trans, &n, &m, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (const BLASTYPE*)x, &incx, (const BLASTYPE*)&numext::real_ref(beta), (BLASTYPE*)y, &incy); \
} \
} \
};
-EIGEN_BLAS_TRMV_RM(double, double, d, d)
-EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z)
-EIGEN_BLAS_TRMV_RM(float, float, f, s)
-EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c)
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRMV_RM(double, double, d, d,)
+EIGEN_BLAS_TRMV_RM(dcomplex, MKL_Complex16, cd, z,)
+EIGEN_BLAS_TRMV_RM(float, float, f, s,)
+EIGEN_BLAS_TRMV_RM(scomplex, MKL_Complex8, cf, c,)
+#else
+EIGEN_BLAS_TRMV_RM(double, double, d, d,_)
+EIGEN_BLAS_TRMV_RM(dcomplex, double, cd, z,_)
+EIGEN_BLAS_TRMV_RM(float, float, f, s,_)
+EIGEN_BLAS_TRMV_RM(scomplex, float, cf, c,_)
+#endif
} // end namespase internal
diff --git a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
index 88c0fb7..f077511 100644
--- a/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
+++ b/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h
@@ -38,7 +38,7 @@ namespace Eigen {
namespace internal {
// implements LeftSide op(triangular)^-1 * general
-#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -80,18 +80,24 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
} \
};
-EIGEN_BLAS_TRSM_L(double, double, d)
-EIGEN_BLAS_TRSM_L(dcomplex, double, z)
-EIGEN_BLAS_TRSM_L(float, float, s)
-EIGEN_BLAS_TRSM_L(scomplex, float, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_L(double, double, dtrsm)
+EIGEN_BLAS_TRSM_L(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_L(float, float, strsm)
+EIGEN_BLAS_TRSM_L(scomplex, MKL_Complex8, ctrsm)
+#else
+EIGEN_BLAS_TRSM_L(double, double, dtrsm_)
+EIGEN_BLAS_TRSM_L(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_L(float, float, strsm_)
+EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
+#endif
// implements RightSide general * op(triangular)^-1
-#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASPREFIX) \
+#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
{ \
@@ -133,16 +139,22 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
} \
if (IsUnitDiag) diag='U'; \
/* call ?trsm*/ \
- BLASPREFIX##trsm_(&side, &uplo, &transa, &diag, &m, &n, &numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
+ BLASFUNC(&side, &uplo, &transa, &diag, &m, &n, (const BLASTYPE*)&numext::real_ref(alpha), (const BLASTYPE*)a, &lda, (BLASTYPE*)_other, &ldb); \
/*std::cout << "TRMS_L specialization!\n";*/ \
} \
};
-EIGEN_BLAS_TRSM_R(double, double, d)
-EIGEN_BLAS_TRSM_R(dcomplex, double, z)
-EIGEN_BLAS_TRSM_R(float, float, s)
-EIGEN_BLAS_TRSM_R(scomplex, float, c)
-
+#ifdef EIGEN_USE_MKL
+EIGEN_BLAS_TRSM_R(double, double, dtrsm)
+EIGEN_BLAS_TRSM_R(dcomplex, MKL_Complex16, ztrsm)
+EIGEN_BLAS_TRSM_R(float, float, strsm)
+EIGEN_BLAS_TRSM_R(scomplex, MKL_Complex8, ctrsm)
+#else
+EIGEN_BLAS_TRSM_R(double, double, dtrsm_)
+EIGEN_BLAS_TRSM_R(dcomplex, double, ztrsm_)
+EIGEN_BLAS_TRSM_R(float, float, strsm_)
+EIGEN_BLAS_TRSM_R(scomplex, float, ctrsm_)
+#endif
} // end namespace internal
diff --git a/eigen/Eigen/src/Core/util/MKL_support.h b/eigen/Eigen/src/Core/util/MKL_support.h
index 26b5966..b7d6ecc 100644
--- a/eigen/Eigen/src/Core/util/MKL_support.h
+++ b/eigen/Eigen/src/Core/util/MKL_support.h
@@ -49,10 +49,11 @@
#define EIGEN_USE_LAPACKE
#endif
-#if defined(EIGEN_USE_MKL_VML)
+#if defined(EIGEN_USE_MKL_VML) && !defined(EIGEN_USE_MKL)
#define EIGEN_USE_MKL
#endif
+
#if defined EIGEN_USE_MKL
# include <mkl.h>
/*Check IMKL version for compatibility: < 10.3 is not usable with Eigen*/
@@ -108,6 +109,10 @@
#endif
#endif
+#if defined(EIGEN_USE_BLAS) && !defined(EIGEN_USE_MKL)
+#include "../../misc/blas.h"
+#endif
+
namespace Eigen {
typedef std::complex<double> dcomplex;
@@ -121,8 +126,5 @@ typedef int BlasIndex;
} // end namespace Eigen
-#if defined(EIGEN_USE_BLAS)
-#include "../../misc/blas.h"
-#endif
#endif // EIGEN_MKL_SUPPORT_H
diff --git a/eigen/Eigen/src/Core/util/Macros.h b/eigen/Eigen/src/Core/util/Macros.h
index 38d6ddb..02d21d2 100644
--- a/eigen/Eigen/src/Core/util/Macros.h
+++ b/eigen/Eigen/src/Core/util/Macros.h
@@ -13,7 +13,7 @@
#define EIGEN_WORLD_VERSION 3
#define EIGEN_MAJOR_VERSION 3
-#define EIGEN_MINOR_VERSION 4
+#define EIGEN_MINOR_VERSION 5
#define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
(EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
@@ -399,7 +399,7 @@
// Does the compiler support variadic templates?
#ifndef EIGEN_HAS_VARIADIC_TEMPLATES
#if EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) \
- && ( !defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000) )
+ && (!defined(__NVCC__) || !EIGEN_ARCH_ARM_OR_ARM64 || (EIGEN_CUDACC_VER >= 80000) )
// ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices:
// this prevents nvcc from crashing when compiling Eigen on Tegra X1
#define EIGEN_HAS_VARIADIC_TEMPLATES 1
@@ -413,7 +413,7 @@
#ifdef __CUDACC__
// Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above
-#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && defined(__CUDACC_VER__) && (EIGEN_COMP_CLANG || __CUDACC_VER__ >= 70500))
+#if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500))
#define EIGEN_HAS_CONSTEXPR 1
#endif
#elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \
@@ -487,11 +487,13 @@
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
+#ifndef EIGEN_STRONG_INLINE
#if EIGEN_COMP_MSVC || EIGEN_COMP_ICC
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
+#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
@@ -812,7 +814,8 @@ namespace Eigen {
// just an empty macro !
#define EIGEN_EMPTY
-#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || defined(__CUDACC_VER__)) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
+ // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
using Base::operator =;
#elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)
@@ -986,7 +989,13 @@ namespace Eigen {
# define EIGEN_NOEXCEPT
# define EIGEN_NOEXCEPT_IF(x)
# define EIGEN_NO_THROW throw()
-# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+# if EIGEN_COMP_MSVC
+ // MSVC does not support exception specifications (warning C4290),
+ // and they are deprecated in c++11 anyway.
+# define EIGEN_EXCEPTION_SPEC(X) throw()
+# else
+# define EIGEN_EXCEPTION_SPEC(X) throw(X)
+# endif
#endif
#endif // EIGEN_MACROS_H
diff --git a/eigen/Eigen/src/Core/util/Memory.h b/eigen/Eigen/src/Core/util/Memory.h
index c634d7e..66cdbd8 100644
--- a/eigen/Eigen/src/Core/util/Memory.h
+++ b/eigen/Eigen/src/Core/util/Memory.h
@@ -70,7 +70,7 @@ inline void throw_std_bad_alloc()
throw std::bad_alloc();
#else
std::size_t huge = static_cast<std::size_t>(-1);
- new int[huge];
+ ::operator new(huge);
#endif
}
@@ -493,7 +493,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
- memcpy(target, start, size);
+ std::memcpy(target, start, size);
}
};
@@ -696,7 +696,15 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
/** \class aligned_allocator
* \ingroup Core_Module
*
-* \brief STL compatible allocator to use with with 16 byte aligned types
+* \brief STL compatible allocator to use with types requiring a non standrad alignment.
+*
+* The memory is aligned as for dynamically aligned matrix/array types such as MatrixXd.
+* By default, it will thus provide at least 16 bytes alignment and more in following cases:
+* - 32 bytes alignment if AVX is enabled.
+* - 64 bytes alignment if AVX512 is enabled.
+*
+* This can be controled using the \c EIGEN_MAX_ALIGN_BYTES macro as documented
+* \link TopicPreprocessorDirectivesPerformance there \endlink.
*
* Example:
* \code
diff --git a/eigen/Eigen/src/Core/util/Meta.h b/eigen/Eigen/src/Core/util/Meta.h
index 7f63707..1d73f05 100644
--- a/eigen/Eigen/src/Core/util/Meta.h
+++ b/eigen/Eigen/src/Core/util/Meta.h
@@ -485,6 +485,26 @@ T div_ceil(const T &a, const T &b)
return (a+b-1) / b;
}
+// The aim of the following functions is to bypass -Wfloat-equal warnings
+// when we really want a strict equality comparison on floating points.
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool equal_strict(const X& x,const Y& y) { return x == y; }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const float& x,const float& y) { return std::equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool equal_strict(const double& x,const double& y) { return std::equal_to<double>()(x,y); }
+
+template<typename X, typename Y> EIGEN_STRONG_INLINE
+bool not_equal_strict(const X& x,const Y& y) { return x != y; }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const float& x,const float& y) { return std::not_equal_to<float>()(x,y); }
+
+template<> EIGEN_STRONG_INLINE
+bool not_equal_strict(const double& x,const double& y) { return std::not_equal_to<double>()(x,y); }
+
} // end namespace numext
} // end namespace Eigen
diff --git a/eigen/Eigen/src/Core/util/StaticAssert.h b/eigen/Eigen/src/Core/util/StaticAssert.h
index 983361a..500e477 100644
--- a/eigen/Eigen/src/Core/util/StaticAssert.h
+++ b/eigen/Eigen/src/Core/util/StaticAssert.h
@@ -24,6 +24,7 @@
*
*/
+#ifndef EIGEN_STATIC_ASSERT
#ifndef EIGEN_NO_STATIC_ASSERT
#if EIGEN_MAX_CPP_VER>=11 && (__has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600))
@@ -44,64 +45,65 @@
struct static_assertion<true>
{
enum {
- YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX,
- YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES,
- YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES,
- THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE,
- THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE,
- THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE,
- OUT_OF_RANGE_ACCESS,
- YOU_MADE_A_PROGRAMMING_MISTAKE,
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT,
- EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE,
- YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR,
- YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR,
- UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC,
- THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES,
- FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED,
- NUMERIC_TYPE_MUST_BE_REAL,
- COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED,
- WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED,
- THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE,
- INVALID_MATRIX_PRODUCT,
- INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS,
- INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION,
- YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY,
- THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES,
- THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES,
- INVALID_MATRIX_TEMPLATE_PARAMETERS,
- INVALID_MATRIXBASE_TEMPLATE_PARAMETERS,
- BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER,
- THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX,
- THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE,
- THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES,
- YOU_ALREADY_SPECIFIED_THIS_STRIDE,
- INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION,
- THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD,
- PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1,
- THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS,
- YOU_CANNOT_MIX_ARRAYS_AND_MATRICES,
- YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION,
- THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY,
- YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT,
- THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS,
- THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS,
- THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL,
- THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES,
- YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED,
- YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED,
- THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE,
- THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH,
- OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG,
- IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY,
- STORAGE_LAYOUT_DOES_NOT_MATCH,
- EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE,
- THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS,
- MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY,
- THIS_TYPE_IS_NOT_SUPPORTED,
- STORAGE_KIND_MUST_MATCH,
- STORAGE_INDEX_MUST_MATCH,
- CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY
+ YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX=1,
+ YOU_MIXED_VECTORS_OF_DIFFERENT_SIZES=1,
+ YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES=1,
+ THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE=1,
+ THIS_METHOD_IS_ONLY_FOR_MATRICES_OF_A_SPECIFIC_SIZE=1,
+ THIS_METHOD_IS_ONLY_FOR_OBJECTS_OF_A_SPECIFIC_SIZE=1,
+ OUT_OF_RANGE_ACCESS=1,
+ YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT=1,
+ EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE=1,
+ YOU_CALLED_A_FIXED_SIZE_METHOD_ON_A_DYNAMIC_SIZE_MATRIX_OR_VECTOR=1,
+ YOU_CALLED_A_DYNAMIC_SIZE_METHOD_ON_A_FIXED_SIZE_MATRIX_OR_VECTOR=1,
+ UNALIGNED_LOAD_AND_STORE_OPERATIONS_UNIMPLEMENTED_ON_ALTIVEC=1,
+ THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES=1,
+ FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED=1,
+ NUMERIC_TYPE_MUST_BE_REAL=1,
+ COEFFICIENT_WRITE_ACCESS_TO_SELFADJOINT_NOT_SUPPORTED=1,
+ WRITING_TO_TRIANGULAR_PART_WITH_UNIT_DIAGONAL_IS_NOT_SUPPORTED=1,
+ THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE=1,
+ INVALID_MATRIX_PRODUCT=1,
+ INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS=1,
+ INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION=1,
+ YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY=1,
+ THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES=1,
+ THIS_METHOD_IS_ONLY_FOR_ROW_MAJOR_MATRICES=1,
+ INVALID_MATRIX_TEMPLATE_PARAMETERS=1,
+ INVALID_MATRIXBASE_TEMPLATE_PARAMETERS=1,
+ BOTH_MATRICES_MUST_HAVE_THE_SAME_STORAGE_ORDER=1,
+ THIS_METHOD_IS_ONLY_FOR_DIAGONAL_MATRIX=1,
+ THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE=1,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES=1,
+ YOU_ALREADY_SPECIFIED_THIS_STRIDE=1,
+ INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION=1,
+ THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD=1,
+ PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1=1,
+ THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS=1,
+ YOU_CANNOT_MIX_ARRAYS_AND_MATRICES=1,
+ YOU_PERFORMED_AN_INVALID_TRANSFORMATION_CONVERSION=1,
+ THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY=1,
+ YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT=1,
+ THIS_METHOD_IS_ONLY_FOR_1x1_EXPRESSIONS=1,
+ THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS=1,
+ THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL=1,
+ THIS_METHOD_IS_ONLY_FOR_ARRAYS_NOT_MATRICES=1,
+ YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED=1,
+ YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED=1,
+ THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE=1,
+ THE_STORAGE_ORDER_OF_BOTH_SIDES_MUST_MATCH=1,
+ OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG=1,
+ IMPLICIT_CONVERSION_TO_SCALAR_IS_FOR_INNER_PRODUCT_ONLY=1,
+ STORAGE_LAYOUT_DOES_NOT_MATCH=1,
+ EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT__INVALID_COST_VALUE=1,
+ THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS=1,
+ MATRIX_FREE_CONJUGATE_GRADIENT_IS_COMPATIBLE_WITH_UPPER_UNION_LOWER_MODE_ONLY=1,
+ THIS_TYPE_IS_NOT_SUPPORTED=1,
+ STORAGE_KIND_MUST_MATCH=1,
+ STORAGE_INDEX_MUST_MATCH=1,
+ CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY=1,
+ SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY=1
};
};
@@ -131,7 +133,7 @@
#define EIGEN_STATIC_ASSERT(CONDITION,MSG) eigen_assert((CONDITION) && #MSG);
#endif // EIGEN_NO_STATIC_ASSERT
-
+#endif // EIGEN_STATIC_ASSERT
// static assertion failing if the type \a TYPE is not a vector type
#define EIGEN_STATIC_ASSERT_VECTOR_ONLY(TYPE) \