diff options
author | Stanislaw Halik <sthalik@misaki.pl> | 2017-03-25 14:17:07 +0100 |
---|---|---|
committer | Stanislaw Halik <sthalik@misaki.pl> | 2017-03-25 14:17:07 +0100 |
commit | 35f7829af10c61e33dd2e2a7a015058e11a11ea0 (patch) | |
tree | 7135010dcf8fd0a49f3020d52112709bcb883bd6 /eigen/Eigen/src/LU | |
parent | 6e8724193e40a932faf9064b664b529e7301c578 (diff) |
update
Diffstat (limited to 'eigen/Eigen/src/LU')
-rw-r--r-- | eigen/Eigen/src/LU/CMakeLists.txt | 8 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/Determinant.h | 2 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/FullPivLU.h | 312 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/InverseImpl.h (renamed from eigen/Eigen/src/LU/Inverse.h) | 79 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/PartialPivLU.h | 252 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h (renamed from eigen/Eigen/src/LU/PartialPivLU_MKL.h) | 26 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/arch/CMakeLists.txt | 6 | ||||
-rw-r--r-- | eigen/Eigen/src/LU/arch/Inverse_SSE.h | 47 |
8 files changed, 490 insertions, 242 deletions
diff --git a/eigen/Eigen/src/LU/CMakeLists.txt b/eigen/Eigen/src/LU/CMakeLists.txt deleted file mode 100644 index e0d8d78..0000000 --- a/eigen/Eigen/src/LU/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_LU_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/eigen/Eigen/src/LU/Determinant.h b/eigen/Eigen/src/LU/Determinant.h index bb8e78a..d6a3c1e 100644 --- a/eigen/Eigen/src/LU/Determinant.h +++ b/eigen/Eigen/src/LU/Determinant.h @@ -92,7 +92,7 @@ template<typename Derived> inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const { eigen_assert(rows() == cols()); - typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested; + typedef typename internal::nested_eval<Derived,Base::RowsAtCompileTime>::type Nested; return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived()); } diff --git a/eigen/Eigen/src/LU/FullPivLU.h b/eigen/Eigen/src/LU/FullPivLU.h index e384704..ec61086 100644 --- a/eigen/Eigen/src/LU/FullPivLU.h +++ b/eigen/Eigen/src/LU/FullPivLU.h @@ -10,7 +10,18 @@ #ifndef EIGEN_LU_H #define EIGEN_LU_H -namespace Eigen { +namespace Eigen { + +namespace internal { +template<typename _MatrixType> struct traits<FullPivLU<_MatrixType> > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + enum { Flags = 0 }; +}; + +} // end namespace internal /** \ingroup LU_Module * @@ -18,7 +29,7 @@ namespace Eigen { * * \brief LU decomposition of a matrix with complete pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is @@ -41,27 +52,28 @@ namespace Eigen { * \include class_FullPivLU.cpp * Output: \verbinclude class_FullPivLU.out * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template<typename _MatrixType> class FullPivLU + : public SolverBase<FullPivLU<_MatrixType> > { public: typedef _MatrixType MatrixType; + typedef SolverBase<FullPivLU> Base; + + EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - typedef typename internal::traits<MatrixType>::StorageKind StorageKind; - typedef typename MatrixType::Index Index; - typedef typename internal::plain_row_type<MatrixType, Index>::type IntRowVectorType; - typedef typename internal::plain_col_type<MatrixType, Index>::type IntColVectorType; + typedef typename internal::plain_row_type<MatrixType, StorageIndex>::type IntRowVectorType; + typedef typename internal::plain_col_type<MatrixType, StorageIndex>::type IntColVectorType; typedef PermutationMatrix<ColsAtCompileTime, MaxColsAtCompileTime> PermutationQType; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationPType; + typedef typename MatrixType::PlainObject PlainObject; /** * \brief Default Constructor. @@ -84,7 +96,17 @@ template<typename _MatrixType> class FullPivLU * \param matrix the matrix of which to compute the LU decomposition. * It is required to be nonzero. */ - FullPivLU(const MatrixType& matrix); + template<typename InputType> + explicit FullPivLU(const EigenBase<InputType>& matrix); + + /** \brief Constructs a LU factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa FullPivLU(const EigenBase&) + */ + template<typename InputType> + explicit FullPivLU(EigenBase<InputType>& matrix); /** Computes the LU decomposition of the given matrix. * @@ -93,7 +115,12 @@ template<typename _MatrixType> class FullPivLU * * \returns a reference to *this */ - FullPivLU& compute(const MatrixType& matrix); + template<typename InputType> + FullPivLU& compute(const EigenBase<InputType>& matrix) { + m_lu = matrix.derived(); + computeInPlace(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -129,7 +156,7 @@ template<typename _MatrixType> class FullPivLU * * \sa permutationQ() */ - inline const PermutationPType& permutationP() const + EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const { eigen_assert(m_isInitialized && "LU is not initialized."); return m_p; @@ -166,7 +193,7 @@ template<typename _MatrixType> class FullPivLU } /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix - * will form a basis of the kernel. + * will form a basis of the image (column-space). * * \param originalMatrix the original matrix, of which *this is the LU decomposition. * The reason why it is needed to pass it here, is that this allows @@ -210,12 +237,22 @@ template<typename _MatrixType> class FullPivLU * * \sa TriangularView::solve(), kernel(), inverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template<typename Rhs> - inline const internal::solve_retval<FullPivLU, Rhs> + inline const Solve<FullPivLU, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::solve_retval<FullPivLU, Rhs>(*this, b.derived()); + return Solve<FullPivLU, Rhs>(*this, b.derived()); + } + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); } /** \returns the determinant of the matrix of which @@ -360,33 +397,44 @@ template<typename _MatrixType> class FullPivLU * * \sa MatrixBase::inverse() */ - inline const internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> inverse() const + inline const Inverse<FullPivLU> inverse() const { eigen_assert(m_isInitialized && "LU is not initialized."); eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!"); - return internal::solve_retval<FullPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse<FullPivLU>(*this); } MatrixType reconstructedMatrix() const; - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + void _solve_impl(const RhsType &rhs, DstType &dst) const; + + template<bool Conjugate, typename RhsType, typename DstType> + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; + #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + + void computeInPlace(); + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; IntColVectorType m_rowsTranspositions; IntRowVectorType m_colsTranspositions; - Index m_det_pq, m_nonzero_pivots; + Index m_nonzero_pivots; + RealScalar m_l1_norm; RealScalar m_maxpivot, m_prescribedThreshold; + signed char m_det_pq; bool m_isInitialized, m_usePrescribedThreshold; }; @@ -409,7 +457,8 @@ FullPivLU<MatrixType>::FullPivLU(Index rows, Index cols) } template<typename MatrixType> -FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix) +template<typename InputType> +FullPivLU<MatrixType>::FullPivLU(const EigenBase<InputType>& matrix) : m_lu(matrix.rows(), matrix.cols()), m_p(matrix.rows()), m_q(matrix.cols()), @@ -418,28 +467,41 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix) m_isInitialized(false), m_usePrescribedThreshold(false) { - compute(matrix); + compute(matrix.derived()); +} + +template<typename MatrixType> +template<typename InputType> +FullPivLU<MatrixType>::FullPivLU(EigenBase<InputType>& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_q(matrix.cols()), + m_rowsTranspositions(matrix.rows()), + m_colsTranspositions(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) +{ + computeInPlace(); } template<typename MatrixType> -FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) +void FullPivLU<MatrixType>::computeInPlace() { check_template_parameters(); - + // the permutations are stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest()); - - m_isInitialized = true; - m_lu = matrix; + eigen_assert(m_lu.rows()<=NumTraits<int>::highest() && m_lu.cols()<=NumTraits<int>::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - const Index size = matrix.diagonalSize(); - const Index rows = matrix.rows(); - const Index cols = matrix.cols(); + const Index size = m_lu.diagonalSize(); + const Index rows = m_lu.rows(); + const Index cols = m_lu.cols(); // will store the transpositions, before we accumulate them at the end. // can't accumulate on-the-fly because that will be done in reverse order for the rows. - m_rowsTranspositions.resize(matrix.rows()); - m_colsTranspositions.resize(matrix.cols()); + m_rowsTranspositions.resize(m_lu.rows()); + m_colsTranspositions.resize(m_lu.cols()); Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) @@ -451,14 +513,16 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) // biggest coefficient in the remaining bottom-right corner (starting at row k, col k) Index row_of_biggest_in_corner, col_of_biggest_in_corner; - RealScalar biggest_in_corner; + typedef internal::scalar_score_coeff_op<Scalar> Scoring; + typedef typename Scoring::result_type Score; + Score biggest_in_corner; biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k) - .cwiseAbs() + .unaryExpr(Scoring()) .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, col_of_biggest_in_corner += k; // need to add k to them. - if(biggest_in_corner==RealScalar(0)) + if(biggest_in_corner==Score(0)) { // before exiting, make sure to initialize the still uninitialized transpositions // in a sane state without destroying what we already have. @@ -471,7 +535,8 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) break; } - if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner; + RealScalar abs_pivot = internal::abs_knowing_score<Scalar>()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner); + if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot; // Now that we've found the pivot, we need to apply the row/col swaps to // bring it to the location (k,k). @@ -508,7 +573,8 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix) m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k)); m_det_pq = (number_of_transpositions%2) ? -1 : 1; - return *this; + + m_isInitialized = true; } template<typename MatrixType> @@ -671,64 +737,136 @@ struct image_retval<FullPivLU<_MatrixType> > /***** Implementation of solve() *****************************************************/ -template<typename _MatrixType, typename Rhs> -struct solve_retval<FullPivLU<_MatrixType>, Rhs> - : solve_retval_base<FullPivLU<_MatrixType>, Rhs> +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template<typename _MatrixType> +template<typename RhsType, typename DstType> +void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { - EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs) + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. + * So we proceed as follows: + * Step 1: compute c = P * rhs. + * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. + * Step 3: replace c by the solution x to Ux = c. May or may not exist. + * Step 4: result = Q * c; + */ - template<typename Dest> void evalTo(Dest& dst) const + const Index rows = this->rows(), + cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == rows); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) { - /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. - * So we proceed as follows: - * Step 1: compute c = P * rhs. - * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. - * Step 3: replace c by the solution x to Ux = c. May or may not exist. - * Step 4: result = Q * c; - */ - - const Index rows = dec().rows(), cols = dec().cols(), - nonzero_pivots = dec().rank(); - eigen_assert(rhs().rows() == rows); - const Index smalldim = (std::min)(rows, cols); - - if(nonzero_pivots == 0) - { - dst.setZero(); - return; - } + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationP() * rhs; - typename Rhs::PlainObject c(rhs().rows(), rhs().cols()); + // Step 2 + m_lu.topLeftCorner(smalldim,smalldim) + .template triangularView<UnitLower>() + .solveInPlace(c.topRows(smalldim)); + if(rows>cols) + c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols); - // Step 1 - c = dec().permutationP() * rhs(); + // Step 3 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 4 + for(Index i = 0; i < nonzero_pivots; ++i) + dst.row(permutationQ().indices().coeff(i)) = c.row(i); + for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) + dst.row(permutationQ().indices().coeff(i)).setZero(); +} + +template<typename _MatrixType> +template<bool Conjugate, typename RhsType, typename DstType> +void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, + * and since permutations are real and unitary, we can write this + * as A^T = Q U^T L^T P, + * So we proceed as follows: + * Step 1: compute c = Q^T rhs. + * Step 2: replace c by the solution x to U^T x = c. May or may not exist. + * Step 3: replace c by the solution x to L^T x = c. + * Step 4: result = P^T c. + * If Conjugate is true, replace "^T" by "^*" above. + */ + + const Index rows = this->rows(), cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == cols); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationQ().inverse() * rhs; + + if (Conjugate) { // Step 2 - dec().matrixLU() - .topLeftCorner(smalldim,smalldim) + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView<Upper>() + .adjoint() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) .template triangularView<UnitLower>() + .adjoint() .solveInPlace(c.topRows(smalldim)); - if(rows>cols) - { - c.bottomRows(rows-cols) - -= dec().matrixLU().bottomRows(rows-cols) - * c.topRows(cols); - } - - // Step 3 - dec().matrixLU() - .topLeftCorner(nonzero_pivots, nonzero_pivots) + } else { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) .template triangularView<Upper>() + .transpose() .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView<UnitLower>() + .transpose() + .solveInPlace(c.topRows(smalldim)); + } + + // Step 4 + PermutationPType invp = permutationP().inverse().eval(); + for(Index i = 0; i < smalldim; ++i) + dst.row(invp.indices().coeff(i)) = c.row(i); + for(Index i = smalldim; i < rows; ++i) + dst.row(invp.indices().coeff(i)).setZero(); +} + +#endif + +namespace internal { + - // Step 4 - for(Index i = 0; i < nonzero_pivots; ++i) - dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i); - for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i) - dst.row(dec().permutationQ().indices().coeff(i)).setZero(); +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType> +struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense> +{ + typedef FullPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******* MatrixBase methods *****************************************************************/ diff --git a/eigen/Eigen/src/LU/Inverse.h b/eigen/Eigen/src/LU/InverseImpl.h index e836fd6..018f99b 100644 --- a/eigen/Eigen/src/LU/Inverse.h +++ b/eigen/Eigen/src/LU/InverseImpl.h @@ -2,13 +2,14 @@ // for linear algebra. // // Copyright (C) 2008-2010 Benoit Jacob <jacob.benoit.1@gmail.com> +// Copyright (C) 2014 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_INVERSE_H -#define EIGEN_INVERSE_H +#ifndef EIGEN_INVERSE_IMPL_H +#define EIGEN_INVERSE_IMPL_H namespace Eigen { @@ -21,6 +22,7 @@ namespace internal { template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime> struct compute_inverse { + EIGEN_DEVICE_FUNC static inline void run(const MatrixType& matrix, ResultType& result) { result = matrix.partialPivLu().inverse(); @@ -37,16 +39,19 @@ struct compute_inverse_and_det_with_check { /* nothing! general case not support template<typename MatrixType, typename ResultType> struct compute_inverse<MatrixType, ResultType, 1> { + EIGEN_DEVICE_FUNC static inline void run(const MatrixType& matrix, ResultType& result) { typedef typename MatrixType::Scalar Scalar; - result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0); + internal::evaluator<MatrixType> matrixEval(matrix); + result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0); } }; template<typename MatrixType, typename ResultType> struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1> { + EIGEN_DEVICE_FUNC static inline void run( const MatrixType& matrix, const typename MatrixType::RealScalar& absDeterminantThreshold, @@ -67,19 +72,21 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1> ****************************/ template<typename MatrixType, typename ResultType> +EIGEN_DEVICE_FUNC inline void compute_inverse_size2_helper( const MatrixType& matrix, const typename ResultType::Scalar& invdet, ResultType& result) { - result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; + result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; - result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; + result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; } template<typename MatrixType, typename ResultType> struct compute_inverse<MatrixType, ResultType, 2> { + EIGEN_DEVICE_FUNC static inline void run(const MatrixType& matrix, ResultType& result) { typedef typename ResultType::Scalar Scalar; @@ -91,6 +98,7 @@ struct compute_inverse<MatrixType, ResultType, 2> template<typename MatrixType, typename ResultType> struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2> { + EIGEN_DEVICE_FUNC static inline void run( const MatrixType& matrix, const typename MatrixType::RealScalar& absDeterminantThreshold, @@ -114,6 +122,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2> ****************************/ template<typename MatrixType, int i, int j> +EIGEN_DEVICE_FUNC inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) { enum { @@ -127,6 +136,7 @@ inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) } template<typename MatrixType, typename ResultType> +EIGEN_DEVICE_FUNC inline void compute_inverse_size3_helper( const MatrixType& matrix, const typename ResultType::Scalar& invdet, @@ -145,6 +155,7 @@ inline void compute_inverse_size3_helper( template<typename MatrixType, typename ResultType> struct compute_inverse<MatrixType, ResultType, 3> { + EIGEN_DEVICE_FUNC static inline void run(const MatrixType& matrix, ResultType& result) { typedef typename ResultType::Scalar Scalar; @@ -161,6 +172,7 @@ struct compute_inverse<MatrixType, ResultType, 3> template<typename MatrixType, typename ResultType> struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3> { + EIGEN_DEVICE_FUNC static inline void run( const MatrixType& matrix, const typename MatrixType::RealScalar& absDeterminantThreshold, @@ -188,6 +200,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3> ****************************/ template<typename Derived> +EIGEN_DEVICE_FUNC inline const typename Derived::Scalar general_det3_helper (const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3) { @@ -196,6 +209,7 @@ inline const typename Derived::Scalar general_det3_helper } template<typename MatrixType, int i, int j> +EIGEN_DEVICE_FUNC inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) { enum { @@ -214,6 +228,7 @@ inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) template<int Arch, typename Scalar, typename MatrixType, typename ResultType> struct compute_inverse_size4 { + EIGEN_DEVICE_FUNC static void run(const MatrixType& matrix, ResultType& result) { result.coeffRef(0,0) = cofactor_4x4<MatrixType,0,0>(matrix); @@ -246,6 +261,7 @@ struct compute_inverse<MatrixType, ResultType, 4> template<typename MatrixType, typename ResultType> struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4> { + EIGEN_DEVICE_FUNC static inline void run( const MatrixType& matrix, const typename MatrixType::RealScalar& absDeterminantThreshold, @@ -265,38 +281,37 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4> *** MatrixBase methods *** *************************/ -template<typename MatrixType> -struct traits<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; - -template<typename MatrixType> -struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> > -{ - typedef typename MatrixType::Index Index; - typedef typename internal::eval<MatrixType>::type MatrixTypeNested; - typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; - MatrixTypeNested m_matrix; - - inverse_impl(const MatrixType& matrix) - : m_matrix(matrix) - {} +} // end namespace internal - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } +namespace internal { - template<typename Dest> inline void evalTo(Dest& dst) const +// Specialization for "dense = dense_xpr.inverse()" +template<typename DstXprType, typename XprType> +struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense> +{ + typedef Inverse<XprType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &) { - const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime); + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); EIGEN_ONLY_USED_FOR_DEBUG(Size); - eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=0 && extract_data(m_matrix)!=extract_data(dst))) + eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst))) && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); - compute_inverse<MatrixTypeNestedCleaned, Dest>::run(m_matrix, dst); + typedef typename internal::nested_eval<XprType,XprType::ColsAtCompileTime>::type ActualXprType; + typedef typename internal::remove_all<ActualXprType>::type ActualXprTypeCleanded; + + ActualXprType actual_xpr(src.nestedExpression()); + + compute_inverse<ActualXprTypeCleanded, DstXprType>::run(actual_xpr, dst); } }; + } // end namespace internal /** \lu_module @@ -317,11 +332,11 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> > * \sa computeInverseAndDetWithCheck() */ template<typename Derived> -inline const internal::inverse_impl<Derived> MatrixBase<Derived>::inverse() const +inline const Inverse<Derived> MatrixBase<Derived>::inverse() const { EIGEN_STATIC_ASSERT(!NumTraits<Scalar>::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) eigen_assert(rows() == cols()); - return internal::inverse_impl<Derived>(derived()); + return Inverse<Derived>(derived()); } /** \lu_module @@ -357,7 +372,7 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck( // for larger sizes, evaluating has negligible cost and limits code size. typedef typename internal::conditional< RowsAtCompileTime == 2, - typename internal::remove_all<typename internal::nested<Derived, 2>::type>::type, + typename internal::remove_all<typename internal::nested_eval<Derived, 2>::type>::type, PlainObject >::type MatrixType; internal::compute_inverse_and_det_with_check<MatrixType, ResultType>::run @@ -397,4 +412,4 @@ inline void MatrixBase<Derived>::computeInverseWithCheck( } // end namespace Eigen -#endif // EIGEN_INVERSE_H +#endif // EIGEN_INVERSE_IMPL_H diff --git a/eigen/Eigen/src/LU/PartialPivLU.h b/eigen/Eigen/src/LU/PartialPivLU.h index 7d1db94..d439618 100644 --- a/eigen/Eigen/src/LU/PartialPivLU.h +++ b/eigen/Eigen/src/LU/PartialPivLU.h @@ -11,7 +11,33 @@ #ifndef EIGEN_PARTIALLU_H #define EIGEN_PARTIALLU_H -namespace Eigen { +namespace Eigen { + +namespace internal { +template<typename _MatrixType> struct traits<PartialPivLU<_MatrixType> > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef traits<_MatrixType> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit, + CoeffReadCost = Dynamic + }; +}; + +template<typename T,typename Derived> +struct enable_if_ref; +// { +// typedef Derived type; +// }; + +template<typename T,typename Derived> +struct enable_if_ref<Ref<T>,Derived> { + typedef Derived type; +}; + +} // end namespace internal /** \ingroup LU_Module * @@ -19,7 +45,7 @@ namespace Eigen { * * \brief LU decomposition of a matrix with partial pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P @@ -42,34 +68,33 @@ namespace Eigen { * * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template<typename _MatrixType> class PartialPivLU + : public SolverBase<PartialPivLU<_MatrixType> > { public: typedef _MatrixType MatrixType; + typedef SolverBase<PartialPivLU> Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar; - typedef typename internal::traits<MatrixType>::StorageKind StorageKind; - typedef typename MatrixType::Index Index; typedef PermutationMatrix<RowsAtCompileTime, MaxRowsAtCompileTime> PermutationType; typedef Transpositions<RowsAtCompileTime, MaxRowsAtCompileTime> TranspositionType; - + typedef typename MatrixType::PlainObject PlainObject; /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via PartialPivLU::compute(const MatrixType&). - */ + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via PartialPivLU::compute(const MatrixType&). + */ PartialPivLU(); /** \brief Default Constructor with memory preallocation @@ -78,7 +103,7 @@ template<typename _MatrixType> class PartialPivLU * according to the specified problem \a size. * \sa PartialPivLU() */ - PartialPivLU(Index size); + explicit PartialPivLU(Index size); /** Constructor. * @@ -87,9 +112,25 @@ template<typename _MatrixType> class PartialPivLU * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). * If you need to deal with non-full rank, use class FullPivLU instead. */ - PartialPivLU(const MatrixType& matrix); + template<typename InputType> + explicit PartialPivLU(const EigenBase<InputType>& matrix); - PartialPivLU& compute(const MatrixType& matrix); + /** Constructor for \link InplaceDecomposition inplace decomposition \endlink + * + * \param matrix the matrix of which to compute the LU decomposition. + * + * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). + * If you need to deal with non-full rank, use class FullPivLU instead. + */ + template<typename InputType> + explicit PartialPivLU(EigenBase<InputType>& matrix); + + template<typename InputType> + PartialPivLU& compute(const EigenBase<InputType>& matrix) { + m_lu = matrix.derived(); + compute(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -128,12 +169,22 @@ template<typename _MatrixType> class PartialPivLU * * \sa TriangularView::solve(), inverse(), computeInverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template<typename Rhs> - inline const internal::solve_retval<PartialPivLU, Rhs> + inline const Solve<PartialPivLU, Rhs> solve(const MatrixBase<Rhs>& b) const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU, Rhs>(*this, b.derived()); + return Solve<PartialPivLU, Rhs>(*this, b.derived()); + } + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); } /** \returns the inverse of the matrix of which *this is the LU decomposition. @@ -143,11 +194,10 @@ template<typename _MatrixType> class PartialPivLU * * \sa MatrixBase::inverse(), LU::inverse() */ - inline const internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> inverse() const + inline const Inverse<PartialPivLU> inverse() const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval<PartialPivLU,typename MatrixType::IdentityReturnType> - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse<PartialPivLU>(*this); } /** \returns the determinant of the matrix of which @@ -163,24 +213,78 @@ template<typename _MatrixType> class PartialPivLU * * \sa MatrixBase::determinant() */ - typename internal::traits<MatrixType>::Scalar determinant() const; + Scalar determinant() const; MatrixType reconstructedMatrix() const; inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } + #ifndef EIGEN_PARSED_BY_DOXYGEN + template<typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.rows()); + + // Step 1 + dst = permutationP() * rhs; + + // Step 2 + m_lu.template triangularView<UnitLower>().solveInPlace(dst); + + // Step 3 + m_lu.template triangularView<Upper>().solveInPlace(dst); + } + + template<bool Conjugate, typename RhsType, typename DstType> + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.cols()); + + if (Conjugate) { + // Step 1 + dst = m_lu.template triangularView<Upper>().adjoint().solve(rhs); + // Step 2 + m_lu.template triangularView<UnitLower>().adjoint().solveInPlace(dst); + } else { + // Step 1 + dst = m_lu.template triangularView<Upper>().transpose().solve(rhs); + // Step 2 + m_lu.template triangularView<UnitLower>().transpose().solveInPlace(dst); + } + // Step 3 + dst = permutationP().transpose() * dst; + } + #endif + protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + + void compute(); + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; - Index m_det_p; + RealScalar m_l1_norm; + signed char m_det_p; bool m_isInitialized; }; @@ -189,6 +293,7 @@ PartialPivLU<MatrixType>::PartialPivLU() : m_lu(), m_p(), m_rowsTranspositions(), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { @@ -199,20 +304,36 @@ PartialPivLU<MatrixType>::PartialPivLU(Index size) : m_lu(size, size), m_p(size), m_rowsTranspositions(size), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { } template<typename MatrixType> -PartialPivLU<MatrixType>::PartialPivLU(const MatrixType& matrix) - : m_lu(matrix.rows(), matrix.rows()), +template<typename InputType> +PartialPivLU<MatrixType>::PartialPivLU(const EigenBase<InputType>& matrix) + : m_lu(matrix.rows(),matrix.cols()), m_p(matrix.rows()), m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); +} + +template<typename MatrixType> +template<typename InputType> +PartialPivLU<MatrixType>::PartialPivLU(EigenBase<InputType>& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ + compute(); } namespace internal { @@ -230,7 +351,6 @@ struct partial_lu_impl typedef Block<MapLU, Dynamic, Dynamic> MatrixType; typedef Block<MatrixType,Dynamic,Dynamic> BlockType; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; /** \internal performs the LU decomposition in-place of the matrix \a lu * using an unblocked algorithm. @@ -244,6 +364,8 @@ struct partial_lu_impl */ static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) { + typedef scalar_score_coeff_op<Scalar> Scoring; + typedef typename Scoring::result_type Score; const Index rows = lu.rows(); const Index cols = lu.cols(); const Index size = (std::min)(rows,cols); @@ -253,15 +375,15 @@ struct partial_lu_impl { Index rrows = rows-k-1; Index rcols = cols-k-1; - + Index row_of_biggest_in_col; - RealScalar biggest_in_corner - = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col); + Score biggest_in_corner + = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); row_of_biggest_in_col += k; row_transpositions[k] = PivIndex(row_of_biggest_in_col); - if(biggest_in_corner != RealScalar(0)) + if(biggest_in_corner != Score(0)) { if(k != row_of_biggest_in_col) { @@ -354,7 +476,7 @@ struct partial_lu_impl // update permutations and apply them to A_0 for(Index i=k; i<k+bs; ++i) { - Index piv = (row_transpositions[i] += k); + Index piv = (row_transpositions[i] += internal::convert_index<PivIndex>(k)); A_0.row(i).swap(A_0.row(piv)); } @@ -377,45 +499,44 @@ struct partial_lu_impl /** \internal performs the LU decomposition with partial pivoting in-place. */ template<typename MatrixType, typename TranspositionType> -void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::Index& nb_transpositions) +void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions) { eigen_assert(lu.cols() == row_transpositions.size()); eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); partial_lu_impl - <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::Index> + <typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, typename TranspositionType::StorageIndex> ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); } } // end namespace internal template<typename MatrixType> -PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix) +void PartialPivLU<MatrixType>::compute() { check_template_parameters(); - + // the row permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<NumTraits<int>::highest()); - - m_lu = matrix; + eigen_assert(m_lu.rows()<NumTraits<int>::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); - const Index size = matrix.rows(); + eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); + const Index size = m_lu.rows(); m_rowsTranspositions.resize(size); - typename TranspositionType::Index nb_transpositions; + typename TranspositionType::StorageIndex nb_transpositions; internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions); m_det_p = (nb_transpositions%2) ? -1 : 1; m_p = m_rowsTranspositions; m_isInitialized = true; - return *this; } template<typename MatrixType> -typename internal::traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const +typename PartialPivLU<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); return Scalar(m_det_p) * m_lu.diagonal().prod(); @@ -438,38 +559,21 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const return res; } -/***** Implementation of solve() *****************************************************/ +/***** Implementation details *****************************************************/ namespace internal { -template<typename _MatrixType, typename Rhs> -struct solve_retval<PartialPivLU<_MatrixType>, Rhs> - : solve_retval_base<PartialPivLU<_MatrixType>, Rhs> +/***** Implementation of inverse() *****************************************************/ +template<typename DstXprType, typename MatrixType> +struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense> { - EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs) - - template<typename Dest> void evalTo(Dest& dst) const + typedef PartialPivLU<MatrixType> LuType; + typedef Inverse<LuType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &) { - /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. - * So we proceed as follows: - * Step 1: compute c = Pb. - * Step 2: replace c by the solution x to Lx = c. - * Step 3: replace c by the solution x to Ux = c. - */ - - eigen_assert(rhs().rows() == dec().matrixLU().rows()); - - // Step 1 - dst = dec().permutationP() * rhs(); - - // Step 2 - dec().matrixLU().template triangularView<UnitLower>().solveInPlace(dst); - - // Step 3 - dec().matrixLU().template triangularView<Upper>().solveInPlace(dst); + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******** MatrixBase methods *******/ @@ -487,7 +591,6 @@ MatrixBase<Derived>::partialPivLu() const return PartialPivLU<PlainObject>(eval()); } -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS /** \lu_module * * Synonym of partialPivLu(). @@ -502,7 +605,6 @@ MatrixBase<Derived>::lu() const { return PartialPivLU<PlainObject>(eval()); } -#endif } // end namespace Eigen diff --git a/eigen/Eigen/src/LU/PartialPivLU_MKL.h b/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h index 9035953..755168a 100644 --- a/eigen/Eigen/src/LU/PartialPivLU_MKL.h +++ b/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h @@ -25,7 +25,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL + * Content : Eigen bindings to LAPACKe * LU decomposition with partial pivoting based on LAPACKE_?getrf function. ******************************************************************************** */ @@ -33,20 +33,18 @@ #ifndef EIGEN_PARTIALLU_LAPACK_H #define EIGEN_PARTIALLU_LAPACK_H -#include "Eigen/src/Core/util/MKL_support.h" - namespace Eigen { namespace internal { -/** \internal Specialization for the data types supported by MKL */ +/** \internal Specialization for the data types supported by LAPACKe */ -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ +#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ template<int StorageOrder> \ struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ { \ /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ + static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ { \ EIGEN_UNUSED_VARIABLE(maxBlockSize);\ lapack_int matrix_order, first_zero_pivot; \ @@ -54,14 +52,14 @@ struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ EIGTYPE* a; \ /* Set up parameters for ?getrf */ \ matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ + lda = convert_index<lapack_int>(luStride); \ a = lu_data; \ ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ + m = convert_index<lapack_int>(rows); \ + n = convert_index<lapack_int>(cols); \ nb_transpositions = 0; \ \ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ + info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ \ for(int i=0;i<m;i++) { ipiv[i]--; if (ipiv[i]!=i) nb_transpositions++; } \ \ @@ -73,10 +71,10 @@ struct partial_lu_impl<EIGTYPE, StorageOrder, lapack_int> \ } \ }; -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) +EIGEN_LAPACKE_LU_PARTPIV(double, double, d) +EIGEN_LAPACKE_LU_PARTPIV(float, float, s) +EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c) } // end namespace internal diff --git a/eigen/Eigen/src/LU/arch/CMakeLists.txt b/eigen/Eigen/src/LU/arch/CMakeLists.txt deleted file mode 100644 index f6b7ed9..0000000 --- a/eigen/Eigen/src/LU/arch/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_LU_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel - ) diff --git a/eigen/Eigen/src/LU/arch/Inverse_SSE.h b/eigen/Eigen/src/LU/arch/Inverse_SSE.h index 60b7a23..ebb64a6 100644 --- a/eigen/Eigen/src/LU/arch/Inverse_SSE.h +++ b/eigen/Eigen/src/LU/arch/Inverse_SSE.h @@ -35,13 +35,15 @@ template<typename MatrixType, typename ResultType> struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits<MatrixType>::Alignment, + ResultAlignment = traits<ResultType>::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; - static void run(const MatrixType& matrix, ResultType& result) + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // Load the full matrix into registers @@ -151,10 +153,12 @@ struct compute_inverse_size4<Architecture::SSE, float, MatrixType, ResultType> iC = _mm_mul_ps(rd,iC); iD = _mm_mul_ps(rd,iD); - result.template writePacket<ResultAlignment>( 0, _mm_shuffle_ps(iA,iB,0x77)); - result.template writePacket<ResultAlignment>( 4, _mm_shuffle_ps(iA,iB,0x22)); - result.template writePacket<ResultAlignment>( 8, _mm_shuffle_ps(iC,iD,0x77)); - result.template writePacket<ResultAlignment>(12, _mm_shuffle_ps(iC,iD,0x22)); + Index res_stride = result.outerStride(); + float* res = result.data(); + pstoret<float, Packet4f, ResultAlignment>(res+0, _mm_shuffle_ps(iA,iB,0x77)); + pstoret<float, Packet4f, ResultAlignment>(res+res_stride, _mm_shuffle_ps(iA,iB,0x22)); + pstoret<float, Packet4f, ResultAlignment>(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77)); + pstoret<float, Packet4f, ResultAlignment>(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22)); } }; @@ -163,18 +167,21 @@ template<typename MatrixType, typename ResultType> struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits<MatrixType>::Alignment, + ResultAlignment = traits<ResultType>::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; - static void run(const MatrixType& matrix, ResultType& result) + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are + // register of the matrix holds two elements, the smaller matrices are // consisted of two registers. Hence we get a better locality of the // calculations. @@ -311,14 +318,16 @@ struct compute_inverse_size4<Architecture::SSE, double, MatrixType, ResultType> iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1); iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2); - result.template writePacket<ResultAlignment>( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det - result.template writePacket<ResultAlignment>( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); - result.template writePacket<ResultAlignment>( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det - result.template writePacket<ResultAlignment>( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); - result.template writePacket<ResultAlignment>( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det - result.template writePacket<ResultAlignment>(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); - result.template writePacket<ResultAlignment>(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det - result.template writePacket<ResultAlignment>(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); + Index res_stride = result.outerStride(); + double* res = result.data(); + pstoret<double, Packet2d, ResultAlignment>(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); + pstoret<double, Packet2d, ResultAlignment>(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); + pstoret<double, Packet2d, ResultAlignment>(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); } }; |