From 44861dcbfeee041223c4aac1ee075e92fa4daa01 Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Sun, 18 Sep 2016 12:42:15 +0200 Subject: update --- eigen/Eigen/src/Core/Assign.h | 590 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 590 insertions(+) create mode 100644 eigen/Eigen/src/Core/Assign.h (limited to 'eigen/Eigen/src/Core/Assign.h') diff --git a/eigen/Eigen/src/Core/Assign.h b/eigen/Eigen/src/Core/Assign.h new file mode 100644 index 0000000..f481731 --- /dev/null +++ b/eigen/Eigen/src/Core/Assign.h @@ -0,0 +1,590 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Michael Olbrich +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_H +#define EIGEN_ASSIGN_H + +namespace Eigen { + +namespace internal { + +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +template +struct assign_traits +{ +public: + enum { + DstIsAligned = Derived::Flags & AlignedBit, + DstHasDirectAccess = Derived::Flags & DirectAccessBit, + SrcIsAligned = OtherDerived::Flags & AlignedBit, + JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned + }; + +private: + enum { + InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime) + : int(Derived::RowsAtCompileTime), + InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime) + : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime) + : int(Derived::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Derived::SizeAtCompileTime, + PacketSize = packet_traits::size + }; + + enum { + StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)), + MightVectorize = StorageOrdersAgree + && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit), + MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 + && int(DstIsAligned) && int(SrcIsAligned), + MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), + MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess + && (DstIsAligned || MaxSizeAtCompileTime == Dynamic), + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + MaySliceVectorize = MightVectorize && DstHasDirectAccess + && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize) + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix */ + }; + +public: + enum { + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal), + Vectorized = int(Traversal) == InnerVectorizedTraversal + || int(Traversal) == LinearVectorizedTraversal + || int(Traversal) == SliceVectorizedTraversal + }; + +private: + enum { + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1), + MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit), + MayUnrollInner = int(InnerSize) != Dynamic + && int(OtherDerived::CoeffReadCost) != Dynamic + && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit) + }; + +public: + enum { + Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) + ? ( + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) + : int(NoUnrolling) + ) + : int(Traversal) == int(LinearVectorizedTraversal) + ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) + : int(Traversal) == int(LinearTraversal) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) + : int(NoUnrolling) + }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() + { + EIGEN_DEBUG_VAR(DstIsAligned) + EIGEN_DEBUG_VAR(SrcIsAligned) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSize) + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + EIGEN_DEBUG_VAR(Traversal) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + EIGEN_DEBUG_VAR(Unrolling) + } +#endif +}; + +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct assign_DefaultTraversal_CompleteUnrolling +{ + enum { + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime + }; + + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.copyCoeffByOuterInner(outer, inner, src); + assign_DefaultTraversal_CompleteUnrolling::run(dst, src); + } +}; + +template +struct assign_DefaultTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +template +struct assign_DefaultTraversal_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) + { + dst.copyCoeffByOuterInner(outer, Index, src); + assign_DefaultTraversal_InnerUnrolling::run(dst, src, outer); + } +}; + +template +struct assign_DefaultTraversal_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct assign_LinearTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.copyCoeff(Index, src); + assign_LinearTraversal_CompleteUnrolling::run(dst, src); + } +}; + +template +struct assign_LinearTraversal_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct assign_innervec_CompleteUnrolling +{ + enum { + outer = Index / Derived1::InnerSizeAtCompileTime, + inner = Index % Derived1::InnerSizeAtCompileTime, + JointAlignment = assign_traits::JointAlignment + }; + + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + dst.template copyPacketByOuterInner(outer, inner, src); + assign_innervec_CompleteUnrolling::size, Stop>::run(dst, src); + } +}; + +template +struct assign_innervec_CompleteUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {} +}; + +template +struct assign_innervec_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer) + { + dst.template copyPacketByOuterInner(outer, Index, src); + assign_innervec_InnerUnrolling::size, Stop>::run(dst, src, outer); + } +}; + +template +struct assign_innervec_InnerUnrolling +{ + static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {} +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + +template::Traversal, + int Unrolling = assign_traits::Unrolling, + int Version = Specialized> +struct assign_impl; + +/************************ +*** Default traversal *** +************************/ + +template +struct assign_impl +{ + static inline void run(Derived1 &, const Derived2 &) { } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; ++inner) + dst.copyCoeffByOuterInner(outer, inner, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_DefaultTraversal_CompleteUnrolling + ::run(dst, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + assign_DefaultTraversal_InnerUnrolling + ::run(dst, src, outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index size = dst.size(); + for(Index i = 0; i < size; ++i) + dst.copyCoeff(i, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_LinearTraversal_CompleteUnrolling + ::run(dst, src); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index packetSize = packet_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + dst.template copyPacketByOuterInner(outer, inner, src); + } +}; + +template +struct assign_impl +{ + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + assign_innervec_CompleteUnrolling + ::run(dst, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + assign_innervec_InnerUnrolling + ::run(dst, src, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + +template +struct unaligned_assign_impl +{ + template + static EIGEN_STRONG_INLINE void run(const Derived&, OtherDerived&, typename Derived::Index, typename Derived::Index) {} +}; + +template <> +struct unaligned_assign_impl +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template + static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) +#else + template + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, typename Derived::Index start, typename Derived::Index end) +#endif + { + for (typename Derived::Index index = start; index < end; ++index) + dst.copyCoeff(index, src); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + const Index size = dst.size(); + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : int(assign_traits::DstIsAligned) , + srcAlignment = assign_traits::JointAlignment + }; + const Index alignedStart = assign_traits::DstIsAligned ? 0 + : internal::first_aligned(&dst.coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + { + dst.template copyPacket(index, src); + } + + unaligned_assign_impl<>::run(src,dst,alignedEnd,size); + } +}; + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src) + { + enum { size = Derived1::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + assign_innervec_CompleteUnrolling::run(dst, src); + assign_DefaultTraversal_CompleteUnrolling::run(dst, src); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct assign_impl +{ + typedef typename Derived1::Index Index; + static inline void run(Derived1 &dst, const Derived2 &src) + { + typedef typename Derived1::Scalar Scalar; + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + alignable = PacketTraits::AlignedOnScalar, + dstIsAligned = assign_traits::DstIsAligned, + dstAlignment = alignable ? Aligned : int(dstIsAligned), + srcAlignment = assign_traits::JointAlignment + }; + const Scalar *dst_ptr = &dst.coeffRef(0,0); + if((!bool(dstIsAligned)) && (size_t(dst_ptr) % sizeof(Scalar))>0) + { + // the pointer is not aligend-on scalar, so alignment is not possible + return assign_impl::run(dst, src); + } + const Index packetAlignedMask = packetSize - 1; + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || bool(dstIsAligned)) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for(Index outer = 0; outer < outerSize; ++outer) + { + const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); + // do the non-vectorizable part of the assignment + for(Index inner = 0; inner(outer, inner, src); + + // do the non-vectorizable part of the assignment + for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + } + } +}; + +} // end namespace internal + +/*************************************************************************** +* Part 4 : implementation of DenseBase methods +***************************************************************************/ + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase + ::lazyAssign(const DenseBase& other) +{ + enum{ + SameType = internal::is_same::value + }; + + EIGEN_STATIC_ASSERT_LVALUE(Derived) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived) + EIGEN_STATIC_ASSERT(SameType,YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + +#ifdef EIGEN_DEBUG_ASSIGN + internal::assign_traits::debug(); +#endif + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::assign_impl::Traversal) + : int(InvalidTraversal)>::run(derived(),other.derived()); +#ifndef EIGEN_NO_DEBUG + checkTransposeAliasing(other.derived()); +#endif + return derived(); +} + +namespace internal { + +template::Flags) & EvalBeforeAssigningBit) != 0, + bool NeedToTranspose = ((int(Derived::RowsAtCompileTime) == 1 && int(OtherDerived::ColsAtCompileTime) == 1) + | // FIXME | instead of || to please GCC 4.4.0 stupid warning "suggest parentheses around &&". + // revert to || as soon as not needed anymore. + (int(Derived::ColsAtCompileTime) == 1 && int(OtherDerived::RowsAtCompileTime) == 1)) + && int(Derived::SizeAtCompileTime) != 1> +struct assign_selector; + +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); } + template + static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); } + template + static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose dstTrans(dst); other.evalTo(dstTrans); return dst; } +}; +template +struct assign_selector { + static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); } +}; + +} // end namespace internal + +template +template +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) +{ + return internal::assign_selector::run(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) +{ + return internal::assign_selector::evalTo(derived(), other.derived()); +} + +template +template +EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const ReturnByValue& other) +{ + return internal::assign_selector::evalTo(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_H -- cgit v1.2.3