diff options
Diffstat (limited to 'eigen/Eigen/src/Jacobi/Jacobi.h')
-rw-r--r-- | eigen/Eigen/src/Jacobi/Jacobi.h | 240 |
1 files changed, 131 insertions, 109 deletions
diff --git a/eigen/Eigen/src/Jacobi/Jacobi.h b/eigen/Eigen/src/Jacobi/Jacobi.h index c30326e..437e666 100644 --- a/eigen/Eigen/src/Jacobi/Jacobi.h +++ b/eigen/Eigen/src/Jacobi/Jacobi.h @@ -298,61 +298,119 @@ inline void MatrixBase<Derived>::applyOnTheRight(Index p, Index q, const JacobiR } namespace internal { -template<typename VectorX, typename VectorY, typename OtherScalar> -void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) + +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment, bool Vectorizable> +struct apply_rotation_in_the_plane_selector { - typedef typename VectorX::Scalar Scalar; - enum { - PacketSize = packet_traits<Scalar>::size, - OtherPacketSize = packet_traits<OtherScalar>::size - }; - typedef typename packet_traits<Scalar>::type Packet; - typedef typename packet_traits<OtherScalar>::type OtherPacket; - eigen_assert(xpr_x.size() == xpr_y.size()); - Index size = xpr_x.size(); - Index incrx = xpr_x.derived().innerStride(); - Index incry = xpr_y.derived().innerStride(); + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + for(Index i=0; i<size; ++i) + { + Scalar xi = *x; + Scalar yi = *y; + *x = c * xi + numext::conj(s) * yi; + *y = -s * xi + numext::conj(c) * yi; + x += incrx; + y += incry; + } + } +}; - Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); - Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); - - OtherScalar c = j.c(); - OtherScalar s = j.s(); - if (c==OtherScalar(1) && s==OtherScalar(0)) - return; +template<typename Scalar, typename OtherScalar, + int SizeAtCompileTime, int MinAlignment> +struct apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,true /* vectorizable */> +{ + static inline void run(Scalar *x, Index incrx, Scalar *y, Index incry, Index size, OtherScalar c, OtherScalar s) + { + enum { + PacketSize = packet_traits<Scalar>::size, + OtherPacketSize = packet_traits<OtherScalar>::size + }; + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<OtherScalar>::type OtherPacket; + + /*** dynamic-size vectorized paths ***/ + if(SizeAtCompileTime == Dynamic && ((incrx==1 && incry==1) || PacketSize == 1)) + { + // both vectors are sequentially stored in memory => vectorization + enum { Peeling = 2 }; - /*** dynamic-size vectorized paths ***/ + Index alignedStart = internal::first_default_aligned(y, size); + Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; - if(VectorX::SizeAtCompileTime == Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - ((incrx==1 && incry==1) || PacketSize == 1)) - { - // both vectors are sequentially stored in memory => vectorization - enum { Peeling = 2 }; + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; - Index alignedStart = internal::first_default_aligned(y, size); - Index alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize; + for(Index i=0; i<alignedStart; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; + } - const OtherPacket pc = pset1<OtherPacket>(c); - const OtherPacket ps = pset1<OtherPacket>(s); - conj_helper<OtherPacket,Packet,NumTraits<OtherScalar>::IsComplex,false> pcj; - conj_helper<OtherPacket,Packet,false,false> pm; + Scalar* EIGEN_RESTRICT px = x + alignedStart; + Scalar* EIGEN_RESTRICT py = y + alignedStart; - for(Index i=0; i<alignedStart; ++i) - { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; - } + if(internal::first_default_aligned(x, size)==alignedStart) + { + for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + { + Packet xi = pload<Packet>(px); + Packet yi = pload<Packet>(py); + pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + px += PacketSize; + py += PacketSize; + } + } + else + { + Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); + for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) + { + Packet xi = ploadu<Packet>(px); + Packet xi1 = ploadu<Packet>(px+PacketSize); + Packet yi = pload <Packet>(py); + Packet yi1 = pload <Packet>(py+PacketSize); + pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); + pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); + px += Peeling*PacketSize; + py += Peeling*PacketSize; + } + if(alignedEnd!=peelingEnd) + { + Packet xi = ploadu<Packet>(x+peelingEnd); + Packet yi = pload <Packet>(y+peelingEnd); + pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); + pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); + } + } - Scalar* EIGEN_RESTRICT px = x + alignedStart; - Scalar* EIGEN_RESTRICT py = y + alignedStart; + for(Index i=alignedEnd; i<size; ++i) + { + Scalar xi = x[i]; + Scalar yi = y[i]; + x[i] = c * xi + numext::conj(s) * yi; + y[i] = -s * xi + numext::conj(c) * yi; + } + } - if(internal::first_default_aligned(x, size)==alignedStart) + /*** fixed-size vectorized path ***/ + else if(SizeAtCompileTime != Dynamic && MinAlignment>0) // FIXME should be compared to the required alignment { - for(Index i=alignedStart; i<alignedEnd; i+=PacketSize) + const OtherPacket pc = pset1<OtherPacket>(c); + const OtherPacket ps = pset1<OtherPacket>(s); + conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; + conj_helper<OtherPacket,Packet,false,false> pm; + Scalar* EIGEN_RESTRICT px = x; + Scalar* EIGEN_RESTRICT py = y; + for(Index i=0; i<size; i+=PacketSize) { Packet xi = pload<Packet>(px); Packet yi = pload<Packet>(py); @@ -362,76 +420,40 @@ void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x py += PacketSize; } } - else - { - Index peelingEnd = alignedStart + ((size-alignedStart)/(Peeling*PacketSize))*(Peeling*PacketSize); - for(Index i=alignedStart; i<peelingEnd; i+=Peeling*PacketSize) - { - Packet xi = ploadu<Packet>(px); - Packet xi1 = ploadu<Packet>(px+PacketSize); - Packet yi = pload <Packet>(py); - Packet yi1 = pload <Packet>(py+PacketSize); - pstoreu(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstoreu(px+PacketSize, padd(pm.pmul(pc,xi1),pcj.pmul(ps,yi1))); - pstore (py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - pstore (py+PacketSize, psub(pcj.pmul(pc,yi1),pm.pmul(ps,xi1))); - px += Peeling*PacketSize; - py += Peeling*PacketSize; - } - if(alignedEnd!=peelingEnd) - { - Packet xi = ploadu<Packet>(x+peelingEnd); - Packet yi = pload <Packet>(y+peelingEnd); - pstoreu(x+peelingEnd, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore (y+peelingEnd, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - } - } - for(Index i=alignedEnd; i<size; ++i) + /*** non-vectorized path ***/ + else { - Scalar xi = x[i]; - Scalar yi = y[i]; - x[i] = c * xi + numext::conj(s) * yi; - y[i] = -s * xi + numext::conj(c) * yi; + apply_rotation_in_the_plane_selector<Scalar,OtherScalar,SizeAtCompileTime,MinAlignment,false>::run(x,incrx,y,incry,size,c,s); } } +}; - /*** fixed-size vectorized path ***/ - else if(VectorX::SizeAtCompileTime != Dynamic && - (VectorX::Flags & VectorY::Flags & PacketAccessBit) && - (PacketSize == OtherPacketSize) && - (EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment)>0)) // FIXME should be compared to the required alignment - { - const OtherPacket pc = pset1<OtherPacket>(c); - const OtherPacket ps = pset1<OtherPacket>(s); - conj_helper<OtherPacket,Packet,NumTraits<OtherPacket>::IsComplex,false> pcj; - conj_helper<OtherPacket,Packet,false,false> pm; - Scalar* EIGEN_RESTRICT px = x; - Scalar* EIGEN_RESTRICT py = y; - for(Index i=0; i<size; i+=PacketSize) - { - Packet xi = pload<Packet>(px); - Packet yi = pload<Packet>(py); - pstore(px, padd(pm.pmul(pc,xi),pcj.pmul(ps,yi))); - pstore(py, psub(pcj.pmul(pc,yi),pm.pmul(ps,xi))); - px += PacketSize; - py += PacketSize; - } - } +template<typename VectorX, typename VectorY, typename OtherScalar> +void /*EIGEN_DONT_INLINE*/ apply_rotation_in_the_plane(DenseBase<VectorX>& xpr_x, DenseBase<VectorY>& xpr_y, const JacobiRotation<OtherScalar>& j) +{ + typedef typename VectorX::Scalar Scalar; + const bool Vectorizable = (VectorX::Flags & VectorY::Flags & PacketAccessBit) + && (int(packet_traits<Scalar>::size) == int(packet_traits<OtherScalar>::size)); - /*** non-vectorized path ***/ - else - { - for(Index i=0; i<size; ++i) - { - Scalar xi = *x; - Scalar yi = *y; - *x = c * xi + numext::conj(s) * yi; - *y = -s * xi + numext::conj(c) * yi; - x += incrx; - y += incry; - } - } + eigen_assert(xpr_x.size() == xpr_y.size()); + Index size = xpr_x.size(); + Index incrx = xpr_x.derived().innerStride(); + Index incry = xpr_y.derived().innerStride(); + + Scalar* EIGEN_RESTRICT x = &xpr_x.derived().coeffRef(0); + Scalar* EIGEN_RESTRICT y = &xpr_y.derived().coeffRef(0); + + OtherScalar c = j.c(); + OtherScalar s = j.s(); + if (c==OtherScalar(1) && s==OtherScalar(0)) + return; + + apply_rotation_in_the_plane_selector< + Scalar,OtherScalar, + VectorX::SizeAtCompileTime, + EIGEN_PLAIN_ENUM_MIN(evaluator<VectorX>::Alignment, evaluator<VectorY>::Alignment), + Vectorizable>::run(x,incrx,y,incry,size,c,s); } } // end namespace internal |