From 35f7829af10c61e33dd2e2a7a015058e11a11ea0 Mon Sep 17 00:00:00 2001 From: Stanislaw Halik Date: Sat, 25 Mar 2017 14:17:07 +0100 Subject: update --- eigen/Eigen/src/LU/CMakeLists.txt | 8 - eigen/Eigen/src/LU/Determinant.h | 2 +- eigen/Eigen/src/LU/FullPivLU.h | 312 +++++++++++++++------- eigen/Eigen/src/LU/Inverse.h | 400 ---------------------------- eigen/Eigen/src/LU/InverseImpl.h | 415 ++++++++++++++++++++++++++++++ eigen/Eigen/src/LU/PartialPivLU.h | 252 ++++++++++++------ eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h | 83 ++++++ eigen/Eigen/src/LU/PartialPivLU_MKL.h | 85 ------ eigen/Eigen/src/LU/arch/CMakeLists.txt | 6 - eigen/Eigen/src/LU/arch/Inverse_SSE.h | 47 ++-- 10 files changed, 929 insertions(+), 681 deletions(-) delete mode 100644 eigen/Eigen/src/LU/CMakeLists.txt delete mode 100644 eigen/Eigen/src/LU/Inverse.h create mode 100644 eigen/Eigen/src/LU/InverseImpl.h create mode 100644 eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h delete mode 100644 eigen/Eigen/src/LU/PartialPivLU_MKL.h delete mode 100644 eigen/Eigen/src/LU/arch/CMakeLists.txt (limited to 'eigen/Eigen/src/LU') diff --git a/eigen/Eigen/src/LU/CMakeLists.txt b/eigen/Eigen/src/LU/CMakeLists.txt deleted file mode 100644 index e0d8d78..0000000 --- a/eigen/Eigen/src/LU/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_LU_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU COMPONENT Devel - ) - -ADD_SUBDIRECTORY(arch) diff --git a/eigen/Eigen/src/LU/Determinant.h b/eigen/Eigen/src/LU/Determinant.h index bb8e78a..d6a3c1e 100644 --- a/eigen/Eigen/src/LU/Determinant.h +++ b/eigen/Eigen/src/LU/Determinant.h @@ -92,7 +92,7 @@ template inline typename internal::traits::Scalar MatrixBase::determinant() const { eigen_assert(rows() == cols()); - typedef typename internal::nested::type Nested; + typedef typename internal::nested_eval::type Nested; return internal::determinant_impl::type>::run(derived()); } diff --git a/eigen/Eigen/src/LU/FullPivLU.h b/eigen/Eigen/src/LU/FullPivLU.h index e384704..ec61086 100644 --- a/eigen/Eigen/src/LU/FullPivLU.h +++ b/eigen/Eigen/src/LU/FullPivLU.h @@ -10,7 +10,18 @@ #ifndef EIGEN_LU_H #define EIGEN_LU_H -namespace Eigen { +namespace Eigen { + +namespace internal { +template struct traits > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + enum { Flags = 0 }; +}; + +} // end namespace internal /** \ingroup LU_Module * @@ -18,7 +29,7 @@ namespace Eigen { * * \brief LU decomposition of a matrix with complete pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of any matrix, with complete pivoting: the matrix A is * decomposed as \f$ A = P^{-1} L U Q^{-1} \f$ where L is unit-lower-triangular, U is @@ -41,27 +52,28 @@ namespace Eigen { * \include class_FullPivLU.cpp * Output: \verbinclude class_FullPivLU.out * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::fullPivLu(), MatrixBase::determinant(), MatrixBase::inverse() */ template class FullPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + + EIGEN_GENERIC_PUBLIC_INTERFACE(FullPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - typedef typename MatrixType::Index Index; - typedef typename internal::plain_row_type::type IntRowVectorType; - typedef typename internal::plain_col_type::type IntColVectorType; + typedef typename internal::plain_row_type::type IntRowVectorType; + typedef typename internal::plain_col_type::type IntColVectorType; typedef PermutationMatrix PermutationQType; typedef PermutationMatrix PermutationPType; + typedef typename MatrixType::PlainObject PlainObject; /** * \brief Default Constructor. @@ -84,7 +96,17 @@ template class FullPivLU * \param matrix the matrix of which to compute the LU decomposition. * It is required to be nonzero. */ - FullPivLU(const MatrixType& matrix); + template + explicit FullPivLU(const EigenBase& matrix); + + /** \brief Constructs a LU factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c MatrixType is a Eigen::Ref. + * + * \sa FullPivLU(const EigenBase&) + */ + template + explicit FullPivLU(EigenBase& matrix); /** Computes the LU decomposition of the given matrix. * @@ -93,7 +115,12 @@ template class FullPivLU * * \returns a reference to *this */ - FullPivLU& compute(const MatrixType& matrix); + template + FullPivLU& compute(const EigenBase& matrix) { + m_lu = matrix.derived(); + computeInPlace(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -129,7 +156,7 @@ template class FullPivLU * * \sa permutationQ() */ - inline const PermutationPType& permutationP() const + EIGEN_DEVICE_FUNC inline const PermutationPType& permutationP() const { eigen_assert(m_isInitialized && "LU is not initialized."); return m_p; @@ -166,7 +193,7 @@ template class FullPivLU } /** \returns the image of the matrix, also called its column-space. The columns of the returned matrix - * will form a basis of the kernel. + * will form a basis of the image (column-space). * * \param originalMatrix the original matrix, of which *this is the LU decomposition. * The reason why it is needed to pass it here, is that this allows @@ -210,12 +237,22 @@ template class FullPivLU * * \sa TriangularView::solve(), kernel(), inverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "LU is not initialized."); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); + } + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); } /** \returns the determinant of the matrix of which @@ -360,33 +397,44 @@ template class FullPivLU * * \sa MatrixBase::inverse() */ - inline const internal::solve_retval inverse() const + inline const Inverse inverse() const { eigen_assert(m_isInitialized && "LU is not initialized."); eigen_assert(m_lu.rows() == m_lu.cols() && "You can't take the inverse of a non-square matrix!"); - return internal::solve_retval - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse(*this); } MatrixType reconstructedMatrix() const; - inline Index rows() const { return m_lu.rows(); } - inline Index cols() const { return m_lu.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lu.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_lu.cols(); } + + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + void _solve_impl(const RhsType &rhs, DstType &dst) const; + + template + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const; + #endif protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + + void computeInPlace(); + MatrixType m_lu; PermutationPType m_p; PermutationQType m_q; IntColVectorType m_rowsTranspositions; IntRowVectorType m_colsTranspositions; - Index m_det_pq, m_nonzero_pivots; + Index m_nonzero_pivots; + RealScalar m_l1_norm; RealScalar m_maxpivot, m_prescribedThreshold; + signed char m_det_pq; bool m_isInitialized, m_usePrescribedThreshold; }; @@ -409,7 +457,8 @@ FullPivLU::FullPivLU(Index rows, Index cols) } template -FullPivLU::FullPivLU(const MatrixType& matrix) +template +FullPivLU::FullPivLU(const EigenBase& matrix) : m_lu(matrix.rows(), matrix.cols()), m_p(matrix.rows()), m_q(matrix.cols()), @@ -418,28 +467,41 @@ FullPivLU::FullPivLU(const MatrixType& matrix) m_isInitialized(false), m_usePrescribedThreshold(false) { - compute(matrix); + compute(matrix.derived()); +} + +template +template +FullPivLU::FullPivLU(EigenBase& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_q(matrix.cols()), + m_rowsTranspositions(matrix.rows()), + m_colsTranspositions(matrix.cols()), + m_isInitialized(false), + m_usePrescribedThreshold(false) +{ + computeInPlace(); } template -FullPivLU& FullPivLU::compute(const MatrixType& matrix) +void FullPivLU::computeInPlace() { check_template_parameters(); - + // the permutations are stored as int indices, so just to be sure: - eigen_assert(matrix.rows()<=NumTraits::highest() && matrix.cols()<=NumTraits::highest()); - - m_isInitialized = true; - m_lu = matrix; + eigen_assert(m_lu.rows()<=NumTraits::highest() && m_lu.cols()<=NumTraits::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - const Index size = matrix.diagonalSize(); - const Index rows = matrix.rows(); - const Index cols = matrix.cols(); + const Index size = m_lu.diagonalSize(); + const Index rows = m_lu.rows(); + const Index cols = m_lu.cols(); // will store the transpositions, before we accumulate them at the end. // can't accumulate on-the-fly because that will be done in reverse order for the rows. - m_rowsTranspositions.resize(matrix.rows()); - m_colsTranspositions.resize(matrix.cols()); + m_rowsTranspositions.resize(m_lu.rows()); + m_colsTranspositions.resize(m_lu.cols()); Index number_of_transpositions = 0; // number of NONTRIVIAL transpositions, i.e. m_rowsTranspositions[i]!=i m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case) @@ -451,14 +513,16 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) // biggest coefficient in the remaining bottom-right corner (starting at row k, col k) Index row_of_biggest_in_corner, col_of_biggest_in_corner; - RealScalar biggest_in_corner; + typedef internal::scalar_score_coeff_op Scoring; + typedef typename Scoring::result_type Score; + Score biggest_in_corner; biggest_in_corner = m_lu.bottomRightCorner(rows-k, cols-k) - .cwiseAbs() + .unaryExpr(Scoring()) .maxCoeff(&row_of_biggest_in_corner, &col_of_biggest_in_corner); row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner, col_of_biggest_in_corner += k; // need to add k to them. - if(biggest_in_corner==RealScalar(0)) + if(biggest_in_corner==Score(0)) { // before exiting, make sure to initialize the still uninitialized transpositions // in a sane state without destroying what we already have. @@ -471,7 +535,8 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) break; } - if(biggest_in_corner > m_maxpivot) m_maxpivot = biggest_in_corner; + RealScalar abs_pivot = internal::abs_knowing_score()(m_lu(row_of_biggest_in_corner, col_of_biggest_in_corner), biggest_in_corner); + if(abs_pivot > m_maxpivot) m_maxpivot = abs_pivot; // Now that we've found the pivot, we need to apply the row/col swaps to // bring it to the location (k,k). @@ -508,7 +573,8 @@ FullPivLU& FullPivLU::compute(const MatrixType& matrix) m_q.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k)); m_det_pq = (number_of_transpositions%2) ? -1 : 1; - return *this; + + m_isInitialized = true; } template @@ -671,64 +737,136 @@ struct image_retval > /***** Implementation of solve() *****************************************************/ -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +} // end namespace internal + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void FullPivLU<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &dst) const { - EIGEN_MAKE_SOLVE_HELPERS(FullPivLU<_MatrixType>,Rhs) + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. + * So we proceed as follows: + * Step 1: compute c = P * rhs. + * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. + * Step 3: replace c by the solution x to Ux = c. May or may not exist. + * Step 4: result = Q * c; + */ - template void evalTo(Dest& dst) const + const Index rows = this->rows(), + cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == rows); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) { - /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}. - * So we proceed as follows: - * Step 1: compute c = P * rhs. - * Step 2: replace c by the solution x to Lx = c. Exists because L is invertible. - * Step 3: replace c by the solution x to Ux = c. May or may not exist. - * Step 4: result = Q * c; - */ - - const Index rows = dec().rows(), cols = dec().cols(), - nonzero_pivots = dec().rank(); - eigen_assert(rhs().rows() == rows); - const Index smalldim = (std::min)(rows, cols); - - if(nonzero_pivots == 0) - { - dst.setZero(); - return; - } + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationP() * rhs; - typename Rhs::PlainObject c(rhs().rows(), rhs().cols()); + // Step 2 + m_lu.topLeftCorner(smalldim,smalldim) + .template triangularView() + .solveInPlace(c.topRows(smalldim)); + if(rows>cols) + c.bottomRows(rows-cols) -= m_lu.bottomRows(rows-cols) * c.topRows(cols); - // Step 1 - c = dec().permutationP() * rhs(); + // Step 3 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 4 + for(Index i = 0; i < nonzero_pivots; ++i) + dst.row(permutationQ().indices().coeff(i)) = c.row(i); + for(Index i = nonzero_pivots; i < m_lu.cols(); ++i) + dst.row(permutationQ().indices().coeff(i)).setZero(); +} + +template +template +void FullPivLU<_MatrixType>::_solve_impl_transposed(const RhsType &rhs, DstType &dst) const +{ + /* The decomposition PAQ = LU can be rewritten as A = P^{-1} L U Q^{-1}, + * and since permutations are real and unitary, we can write this + * as A^T = Q U^T L^T P, + * So we proceed as follows: + * Step 1: compute c = Q^T rhs. + * Step 2: replace c by the solution x to U^T x = c. May or may not exist. + * Step 3: replace c by the solution x to L^T x = c. + * Step 4: result = P^T c. + * If Conjugate is true, replace "^T" by "^*" above. + */ + + const Index rows = this->rows(), cols = this->cols(), + nonzero_pivots = this->rank(); + eigen_assert(rhs.rows() == cols); + const Index smalldim = (std::min)(rows, cols); + + if(nonzero_pivots == 0) + { + dst.setZero(); + return; + } + + typename RhsType::PlainObject c(rhs.rows(), rhs.cols()); + + // Step 1 + c = permutationQ().inverse() * rhs; + + if (Conjugate) { // Step 2 - dec().matrixLU() - .topLeftCorner(smalldim,smalldim) + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) + .template triangularView() + .adjoint() + .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) .template triangularView() + .adjoint() .solveInPlace(c.topRows(smalldim)); - if(rows>cols) - { - c.bottomRows(rows-cols) - -= dec().matrixLU().bottomRows(rows-cols) - * c.topRows(cols); - } - - // Step 3 - dec().matrixLU() - .topLeftCorner(nonzero_pivots, nonzero_pivots) + } else { + // Step 2 + m_lu.topLeftCorner(nonzero_pivots, nonzero_pivots) .template triangularView() + .transpose() .solveInPlace(c.topRows(nonzero_pivots)); + // Step 3 + m_lu.topLeftCorner(smalldim, smalldim) + .template triangularView() + .transpose() + .solveInPlace(c.topRows(smalldim)); + } + + // Step 4 + PermutationPType invp = permutationP().inverse().eval(); + for(Index i = 0; i < smalldim; ++i) + dst.row(invp.indices().coeff(i)) = c.row(i); + for(Index i = smalldim; i < rows; ++i) + dst.row(invp.indices().coeff(i)).setZero(); +} + +#endif + +namespace internal { + - // Step 4 - for(Index i = 0; i < nonzero_pivots; ++i) - dst.row(dec().permutationQ().indices().coeff(i)) = c.row(i); - for(Index i = nonzero_pivots; i < dec().matrixLU().cols(); ++i) - dst.row(dec().permutationQ().indices().coeff(i)).setZero(); +/***** Implementation of inverse() *****************************************************/ +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> +{ + typedef FullPivLU LuType; + typedef Inverse SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******* MatrixBase methods *****************************************************************/ diff --git a/eigen/Eigen/src/LU/Inverse.h b/eigen/Eigen/src/LU/Inverse.h deleted file mode 100644 index e836fd6..0000000 --- a/eigen/Eigen/src/LU/Inverse.h +++ /dev/null @@ -1,400 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2008-2010 Benoit Jacob -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_INVERSE_H -#define EIGEN_INVERSE_H - -namespace Eigen { - -namespace internal { - -/********************************** -*** General case implementation *** -**********************************/ - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - result = matrix.partialPivLu().inverse(); - } -}; - -template -struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ }; - -/**************************** -*** Size 1 implementation *** -****************************/ - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename MatrixType::Scalar Scalar; - result.coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& result, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.coeff(0,0); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant; - } -}; - -/**************************** -*** Size 2 implementation *** -****************************/ - -template -inline void compute_inverse_size2_helper( - const MatrixType& matrix, const typename ResultType::Scalar& invdet, - ResultType& result) -{ - result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; - result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; - result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; - result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; -} - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant(); - compute_inverse_size2_helper(matrix, invdet, result); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size2_helper(matrix, invdet, inverse); - } -}; - -/**************************** -*** Size 3 implementation *** -****************************/ - -template -inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) -{ - enum { - i1 = (i+1) % 3, - i2 = (i+2) % 3, - j1 = (j+1) % 3, - j2 = (j+2) % 3 - }; - return m.coeff(i1, j1) * m.coeff(i2, j2) - - m.coeff(i1, j2) * m.coeff(i2, j1); -} - -template -inline void compute_inverse_size3_helper( - const MatrixType& matrix, - const typename ResultType::Scalar& invdet, - const Matrix& cofactors_col0, - ResultType& result) -{ - result.row(0) = cofactors_col0 * invdet; - result.coeffRef(1,0) = cofactor_3x3(matrix) * invdet; - result.coeffRef(1,1) = cofactor_3x3(matrix) * invdet; - result.coeffRef(1,2) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,0) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,1) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,2) = cofactor_3x3(matrix) * invdet; -} - -template -struct compute_inverse -{ - static inline void run(const MatrixType& matrix, ResultType& result) - { - typedef typename ResultType::Scalar Scalar; - Matrix cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); - const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - const Scalar invdet = Scalar(1) / det; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); - } -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - typedef typename ResultType::Scalar Scalar; - Matrix cofactors_col0; - cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); - cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); - determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - invertible = abs(determinant) > absDeterminantThreshold; - if(!invertible) return; - const Scalar invdet = Scalar(1) / determinant; - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); - } -}; - -/**************************** -*** Size 4 implementation *** -****************************/ - -template -inline const typename Derived::Scalar general_det3_helper -(const MatrixBase& matrix, int i1, int i2, int i3, int j1, int j2, int j3) -{ - return matrix.coeff(i1,j1) - * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2)); -} - -template -inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) -{ - enum { - i1 = (i+1) % 4, - i2 = (i+2) % 4, - i3 = (i+3) % 4, - j1 = (j+1) % 4, - j2 = (j+2) % 4, - j3 = (j+3) % 4 - }; - return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) - + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) - + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); -} - -template -struct compute_inverse_size4 -{ - static void run(const MatrixType& matrix, ResultType& result) - { - result.coeffRef(0,0) = cofactor_4x4(matrix); - result.coeffRef(1,0) = -cofactor_4x4(matrix); - result.coeffRef(2,0) = cofactor_4x4(matrix); - result.coeffRef(3,0) = -cofactor_4x4(matrix); - result.coeffRef(0,2) = cofactor_4x4(matrix); - result.coeffRef(1,2) = -cofactor_4x4(matrix); - result.coeffRef(2,2) = cofactor_4x4(matrix); - result.coeffRef(3,2) = -cofactor_4x4(matrix); - result.coeffRef(0,1) = -cofactor_4x4(matrix); - result.coeffRef(1,1) = cofactor_4x4(matrix); - result.coeffRef(2,1) = -cofactor_4x4(matrix); - result.coeffRef(3,1) = cofactor_4x4(matrix); - result.coeffRef(0,3) = -cofactor_4x4(matrix); - result.coeffRef(1,3) = cofactor_4x4(matrix); - result.coeffRef(2,3) = -cofactor_4x4(matrix); - result.coeffRef(3,3) = cofactor_4x4(matrix); - result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum(); - } -}; - -template -struct compute_inverse - : compute_inverse_size4 -{ -}; - -template -struct compute_inverse_and_det_with_check -{ - static inline void run( - const MatrixType& matrix, - const typename MatrixType::RealScalar& absDeterminantThreshold, - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible - ) - { - using std::abs; - determinant = matrix.determinant(); - invertible = abs(determinant) > absDeterminantThreshold; - if(invertible) compute_inverse::run(matrix, inverse); - } -}; - -/************************* -*** MatrixBase methods *** -*************************/ - -template -struct traits > -{ - typedef typename MatrixType::PlainObject ReturnType; -}; - -template -struct inverse_impl : public ReturnByValue > -{ - typedef typename MatrixType::Index Index; - typedef typename internal::eval::type MatrixTypeNested; - typedef typename remove_all::type MatrixTypeNestedCleaned; - MatrixTypeNested m_matrix; - - inverse_impl(const MatrixType& matrix) - : m_matrix(matrix) - {} - - inline Index rows() const { return m_matrix.rows(); } - inline Index cols() const { return m_matrix.cols(); } - - template inline void evalTo(Dest& dst) const - { - const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime); - EIGEN_ONLY_USED_FOR_DEBUG(Size); - eigen_assert(( (Size<=1) || (Size>4) || (extract_data(m_matrix)!=0 && extract_data(m_matrix)!=extract_data(dst))) - && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); - - compute_inverse::run(m_matrix, dst); - } -}; - -} // end namespace internal - -/** \lu_module - * - * \returns the matrix inverse of this matrix. - * - * For small fixed sizes up to 4x4, this method uses cofactors. - * In the general case, this method uses class PartialPivLU. - * - * \note This matrix must be invertible, otherwise the result is undefined. If you need an - * invertibility check, do the following: - * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck(). - * \li for the general case, use class FullPivLU. - * - * Example: \include MatrixBase_inverse.cpp - * Output: \verbinclude MatrixBase_inverse.out - * - * \sa computeInverseAndDetWithCheck() - */ -template -inline const internal::inverse_impl MatrixBase::inverse() const -{ - EIGEN_STATIC_ASSERT(!NumTraits::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) - eigen_assert(rows() == cols()); - return internal::inverse_impl(derived()); -} - -/** \lu_module - * - * Computation of matrix inverse and determinant, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param determinant Reference to the variable in which to store the determinant. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out - * - * \sa inverse(), computeInverseWithCheck() - */ -template -template -inline void MatrixBase::computeInverseAndDetWithCheck( - ResultType& inverse, - typename ResultType::Scalar& determinant, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - // for 2x2, it's worth giving a chance to avoid evaluating. - // for larger sizes, evaluating has negligible cost and limits code size. - typedef typename internal::conditional< - RowsAtCompileTime == 2, - typename internal::remove_all::type>::type, - PlainObject - >::type MatrixType; - internal::compute_inverse_and_det_with_check::run - (derived(), absDeterminantThreshold, inverse, determinant, invertible); -} - -/** \lu_module - * - * Computation of matrix inverse, with invertibility check. - * - * This is only for fixed-size square matrices of size up to 4x4. - * - * \param inverse Reference to the matrix in which to store the inverse. - * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. - * \param absDeterminantThreshold Optional parameter controlling the invertibility check. - * The matrix will be declared invertible if the absolute value of its - * determinant is greater than this threshold. - * - * Example: \include MatrixBase_computeInverseWithCheck.cpp - * Output: \verbinclude MatrixBase_computeInverseWithCheck.out - * - * \sa inverse(), computeInverseAndDetWithCheck() - */ -template -template -inline void MatrixBase::computeInverseWithCheck( - ResultType& inverse, - bool& invertible, - const RealScalar& absDeterminantThreshold - ) const -{ - RealScalar determinant; - // i'd love to put some static assertions there, but SFINAE means that they have no effect... - eigen_assert(rows() == cols()); - computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); -} - -} // end namespace Eigen - -#endif // EIGEN_INVERSE_H diff --git a/eigen/Eigen/src/LU/InverseImpl.h b/eigen/Eigen/src/LU/InverseImpl.h new file mode 100644 index 0000000..018f99b --- /dev/null +++ b/eigen/Eigen/src/LU/InverseImpl.h @@ -0,0 +1,415 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Benoit Jacob +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_IMPL_H +#define EIGEN_INVERSE_IMPL_H + +namespace Eigen { + +namespace internal { + +/********************************** +*** General case implementation *** +**********************************/ + +template +struct compute_inverse +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + result = matrix.partialPivLu().inverse(); + } +}; + +template +struct compute_inverse_and_det_with_check { /* nothing! general case not supported. */ }; + +/**************************** +*** Size 1 implementation *** +****************************/ + +template +struct compute_inverse +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename MatrixType::Scalar Scalar; + internal::evaluator matrixEval(matrix); + result.coeffRef(0,0) = Scalar(1) / matrixEval.coeff(0,0); + } +}; + +template +struct compute_inverse_and_det_with_check +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& result, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + determinant = matrix.coeff(0,0); + invertible = abs(determinant) > absDeterminantThreshold; + if(invertible) result.coeffRef(0,0) = typename ResultType::Scalar(1) / determinant; + } +}; + +/**************************** +*** Size 2 implementation *** +****************************/ + +template +EIGEN_DEVICE_FUNC +inline void compute_inverse_size2_helper( + const MatrixType& matrix, const typename ResultType::Scalar& invdet, + ResultType& result) +{ + result.coeffRef(0,0) = matrix.coeff(1,1) * invdet; + result.coeffRef(1,0) = -matrix.coeff(1,0) * invdet; + result.coeffRef(0,1) = -matrix.coeff(0,1) * invdet; + result.coeffRef(1,1) = matrix.coeff(0,0) * invdet; +} + +template +struct compute_inverse +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename ResultType::Scalar Scalar; + const Scalar invdet = typename MatrixType::Scalar(1) / matrix.determinant(); + compute_inverse_size2_helper(matrix, invdet, result); + } +}; + +template +struct compute_inverse_and_det_with_check +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + typedef typename ResultType::Scalar Scalar; + determinant = matrix.determinant(); + invertible = abs(determinant) > absDeterminantThreshold; + if(!invertible) return; + const Scalar invdet = Scalar(1) / determinant; + compute_inverse_size2_helper(matrix, invdet, inverse); + } +}; + +/**************************** +*** Size 3 implementation *** +****************************/ + +template +EIGEN_DEVICE_FUNC +inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m) +{ + enum { + i1 = (i+1) % 3, + i2 = (i+2) % 3, + j1 = (j+1) % 3, + j2 = (j+2) % 3 + }; + return m.coeff(i1, j1) * m.coeff(i2, j2) + - m.coeff(i1, j2) * m.coeff(i2, j1); +} + +template +EIGEN_DEVICE_FUNC +inline void compute_inverse_size3_helper( + const MatrixType& matrix, + const typename ResultType::Scalar& invdet, + const Matrix& cofactors_col0, + ResultType& result) +{ + result.row(0) = cofactors_col0 * invdet; + result.coeffRef(1,0) = cofactor_3x3(matrix) * invdet; + result.coeffRef(1,1) = cofactor_3x3(matrix) * invdet; + result.coeffRef(1,2) = cofactor_3x3(matrix) * invdet; + result.coeffRef(2,0) = cofactor_3x3(matrix) * invdet; + result.coeffRef(2,1) = cofactor_3x3(matrix) * invdet; + result.coeffRef(2,2) = cofactor_3x3(matrix) * invdet; +} + +template +struct compute_inverse +{ + EIGEN_DEVICE_FUNC + static inline void run(const MatrixType& matrix, ResultType& result) + { + typedef typename ResultType::Scalar Scalar; + Matrix cofactors_col0; + cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); + cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); + cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); + const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); + const Scalar invdet = Scalar(1) / det; + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); + } +}; + +template +struct compute_inverse_and_det_with_check +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + typedef typename ResultType::Scalar Scalar; + Matrix cofactors_col0; + cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); + cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); + cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); + determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); + invertible = abs(determinant) > absDeterminantThreshold; + if(!invertible) return; + const Scalar invdet = Scalar(1) / determinant; + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); + } +}; + +/**************************** +*** Size 4 implementation *** +****************************/ + +template +EIGEN_DEVICE_FUNC +inline const typename Derived::Scalar general_det3_helper +(const MatrixBase& matrix, int i1, int i2, int i3, int j1, int j2, int j3) +{ + return matrix.coeff(i1,j1) + * (matrix.coeff(i2,j2) * matrix.coeff(i3,j3) - matrix.coeff(i2,j3) * matrix.coeff(i3,j2)); +} + +template +EIGEN_DEVICE_FUNC +inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix) +{ + enum { + i1 = (i+1) % 4, + i2 = (i+2) % 4, + i3 = (i+3) % 4, + j1 = (j+1) % 4, + j2 = (j+2) % 4, + j3 = (j+3) % 4 + }; + return general_det3_helper(matrix, i1, i2, i3, j1, j2, j3) + + general_det3_helper(matrix, i2, i3, i1, j1, j2, j3) + + general_det3_helper(matrix, i3, i1, i2, j1, j2, j3); +} + +template +struct compute_inverse_size4 +{ + EIGEN_DEVICE_FUNC + static void run(const MatrixType& matrix, ResultType& result) + { + result.coeffRef(0,0) = cofactor_4x4(matrix); + result.coeffRef(1,0) = -cofactor_4x4(matrix); + result.coeffRef(2,0) = cofactor_4x4(matrix); + result.coeffRef(3,0) = -cofactor_4x4(matrix); + result.coeffRef(0,2) = cofactor_4x4(matrix); + result.coeffRef(1,2) = -cofactor_4x4(matrix); + result.coeffRef(2,2) = cofactor_4x4(matrix); + result.coeffRef(3,2) = -cofactor_4x4(matrix); + result.coeffRef(0,1) = -cofactor_4x4(matrix); + result.coeffRef(1,1) = cofactor_4x4(matrix); + result.coeffRef(2,1) = -cofactor_4x4(matrix); + result.coeffRef(3,1) = cofactor_4x4(matrix); + result.coeffRef(0,3) = -cofactor_4x4(matrix); + result.coeffRef(1,3) = cofactor_4x4(matrix); + result.coeffRef(2,3) = -cofactor_4x4(matrix); + result.coeffRef(3,3) = cofactor_4x4(matrix); + result /= (matrix.col(0).cwiseProduct(result.row(0).transpose())).sum(); + } +}; + +template +struct compute_inverse + : compute_inverse_size4 +{ +}; + +template +struct compute_inverse_and_det_with_check +{ + EIGEN_DEVICE_FUNC + static inline void run( + const MatrixType& matrix, + const typename MatrixType::RealScalar& absDeterminantThreshold, + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible + ) + { + using std::abs; + determinant = matrix.determinant(); + invertible = abs(determinant) > absDeterminantThreshold; + if(invertible) compute_inverse::run(matrix, inverse); + } +}; + +/************************* +*** MatrixBase methods *** +*************************/ + +} // end namespace internal + +namespace internal { + +// Specialization for "dense = dense_xpr.inverse()" +template +struct Assignment, internal::assign_op, Dense2Dense> +{ + typedef Inverse SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if((dst.rows()!=dstRows) || (dst.cols()!=dstCols)) + dst.resize(dstRows, dstCols); + + const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime); + EIGEN_ONLY_USED_FOR_DEBUG(Size); + eigen_assert(( (Size<=1) || (Size>4) || (extract_data(src.nestedExpression())!=extract_data(dst))) + && "Aliasing problem detected in inverse(), you need to do inverse().eval() here."); + + typedef typename internal::nested_eval::type ActualXprType; + typedef typename internal::remove_all::type ActualXprTypeCleanded; + + ActualXprType actual_xpr(src.nestedExpression()); + + compute_inverse::run(actual_xpr, dst); + } +}; + + +} // end namespace internal + +/** \lu_module + * + * \returns the matrix inverse of this matrix. + * + * For small fixed sizes up to 4x4, this method uses cofactors. + * In the general case, this method uses class PartialPivLU. + * + * \note This matrix must be invertible, otherwise the result is undefined. If you need an + * invertibility check, do the following: + * \li for fixed sizes up to 4x4, use computeInverseAndDetWithCheck(). + * \li for the general case, use class FullPivLU. + * + * Example: \include MatrixBase_inverse.cpp + * Output: \verbinclude MatrixBase_inverse.out + * + * \sa computeInverseAndDetWithCheck() + */ +template +inline const Inverse MatrixBase::inverse() const +{ + EIGEN_STATIC_ASSERT(!NumTraits::IsInteger,THIS_FUNCTION_IS_NOT_FOR_INTEGER_NUMERIC_TYPES) + eigen_assert(rows() == cols()); + return Inverse(derived()); +} + +/** \lu_module + * + * Computation of matrix inverse and determinant, with invertibility check. + * + * This is only for fixed-size square matrices of size up to 4x4. + * + * \param inverse Reference to the matrix in which to store the inverse. + * \param determinant Reference to the variable in which to store the determinant. + * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. + * \param absDeterminantThreshold Optional parameter controlling the invertibility check. + * The matrix will be declared invertible if the absolute value of its + * determinant is greater than this threshold. + * + * Example: \include MatrixBase_computeInverseAndDetWithCheck.cpp + * Output: \verbinclude MatrixBase_computeInverseAndDetWithCheck.out + * + * \sa inverse(), computeInverseWithCheck() + */ +template +template +inline void MatrixBase::computeInverseAndDetWithCheck( + ResultType& inverse, + typename ResultType::Scalar& determinant, + bool& invertible, + const RealScalar& absDeterminantThreshold + ) const +{ + // i'd love to put some static assertions there, but SFINAE means that they have no effect... + eigen_assert(rows() == cols()); + // for 2x2, it's worth giving a chance to avoid evaluating. + // for larger sizes, evaluating has negligible cost and limits code size. + typedef typename internal::conditional< + RowsAtCompileTime == 2, + typename internal::remove_all::type>::type, + PlainObject + >::type MatrixType; + internal::compute_inverse_and_det_with_check::run + (derived(), absDeterminantThreshold, inverse, determinant, invertible); +} + +/** \lu_module + * + * Computation of matrix inverse, with invertibility check. + * + * This is only for fixed-size square matrices of size up to 4x4. + * + * \param inverse Reference to the matrix in which to store the inverse. + * \param invertible Reference to the bool variable in which to store whether the matrix is invertible. + * \param absDeterminantThreshold Optional parameter controlling the invertibility check. + * The matrix will be declared invertible if the absolute value of its + * determinant is greater than this threshold. + * + * Example: \include MatrixBase_computeInverseWithCheck.cpp + * Output: \verbinclude MatrixBase_computeInverseWithCheck.out + * + * \sa inverse(), computeInverseAndDetWithCheck() + */ +template +template +inline void MatrixBase::computeInverseWithCheck( + ResultType& inverse, + bool& invertible, + const RealScalar& absDeterminantThreshold + ) const +{ + RealScalar determinant; + // i'd love to put some static assertions there, but SFINAE means that they have no effect... + eigen_assert(rows() == cols()); + computeInverseAndDetWithCheck(inverse,determinant,invertible,absDeterminantThreshold); +} + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_IMPL_H diff --git a/eigen/Eigen/src/LU/PartialPivLU.h b/eigen/Eigen/src/LU/PartialPivLU.h index 7d1db94..d439618 100644 --- a/eigen/Eigen/src/LU/PartialPivLU.h +++ b/eigen/Eigen/src/LU/PartialPivLU.h @@ -11,7 +11,33 @@ #ifndef EIGEN_PARTIALLU_H #define EIGEN_PARTIALLU_H -namespace Eigen { +namespace Eigen { + +namespace internal { +template struct traits > + : traits<_MatrixType> +{ + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef traits<_MatrixType> BaseTraits; + enum { + Flags = BaseTraits::Flags & RowMajorBit, + CoeffReadCost = Dynamic + }; +}; + +template +struct enable_if_ref; +// { +// typedef Derived type; +// }; + +template +struct enable_if_ref,Derived> { + typedef Derived type; +}; + +} // end namespace internal /** \ingroup LU_Module * @@ -19,7 +45,7 @@ namespace Eigen { * * \brief LU decomposition of a matrix with partial pivoting, and related features * - * \param MatrixType the type of the matrix of which we are computing the LU decomposition + * \tparam _MatrixType the type of the matrix of which we are computing the LU decomposition * * This class represents a LU decomposition of a \b square \b invertible matrix, with partial pivoting: the matrix A * is decomposed as A = PLU where L is unit-lower-triangular, U is upper-triangular, and P @@ -42,34 +68,33 @@ namespace Eigen { * * The data of the LU decomposition can be directly accessed through the methods matrixLU(), permutationP(). * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * * \sa MatrixBase::partialPivLu(), MatrixBase::determinant(), MatrixBase::inverse(), MatrixBase::computeInverse(), class FullPivLU */ template class PartialPivLU + : public SolverBase > { public: typedef _MatrixType MatrixType; + typedef SolverBase Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(PartialPivLU) + // FIXME StorageIndex defined in EIGEN_GENERIC_PUBLIC_INTERFACE should be int enum { - RowsAtCompileTime = MatrixType::RowsAtCompileTime, - ColsAtCompileTime = MatrixType::ColsAtCompileTime, - Options = MatrixType::Options, MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; - typedef typename MatrixType::Scalar Scalar; - typedef typename NumTraits::Real RealScalar; - typedef typename internal::traits::StorageKind StorageKind; - typedef typename MatrixType::Index Index; typedef PermutationMatrix PermutationType; typedef Transpositions TranspositionType; - + typedef typename MatrixType::PlainObject PlainObject; /** - * \brief Default Constructor. - * - * The default constructor is useful in cases in which the user intends to - * perform decompositions via PartialPivLU::compute(const MatrixType&). - */ + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via PartialPivLU::compute(const MatrixType&). + */ PartialPivLU(); /** \brief Default Constructor with memory preallocation @@ -78,7 +103,7 @@ template class PartialPivLU * according to the specified problem \a size. * \sa PartialPivLU() */ - PartialPivLU(Index size); + explicit PartialPivLU(Index size); /** Constructor. * @@ -87,9 +112,25 @@ template class PartialPivLU * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). * If you need to deal with non-full rank, use class FullPivLU instead. */ - PartialPivLU(const MatrixType& matrix); + template + explicit PartialPivLU(const EigenBase& matrix); - PartialPivLU& compute(const MatrixType& matrix); + /** Constructor for \link InplaceDecomposition inplace decomposition \endlink + * + * \param matrix the matrix of which to compute the LU decomposition. + * + * \warning The matrix should have full rank (e.g. if it's square, it should be invertible). + * If you need to deal with non-full rank, use class FullPivLU instead. + */ + template + explicit PartialPivLU(EigenBase& matrix); + + template + PartialPivLU& compute(const EigenBase& matrix) { + m_lu = matrix.derived(); + compute(); + return *this; + } /** \returns the LU decomposition matrix: the upper-triangular part is U, the * unit-lower-triangular part is L (at least for square matrices; in the non-square @@ -128,12 +169,22 @@ template class PartialPivLU * * \sa TriangularView::solve(), inverse(), computeInverse() */ + // FIXME this is a copy-paste of the base-class member to add the isInitialized assertion. template - inline const internal::solve_retval + inline const Solve solve(const MatrixBase& b) const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval(*this, b.derived()); + return Solve(*this, b.derived()); + } + + /** \returns an estimate of the reciprocal condition number of the matrix of which \c *this is + the LU decomposition. + */ + inline RealScalar rcond() const + { + eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); } /** \returns the inverse of the matrix of which *this is the LU decomposition. @@ -143,11 +194,10 @@ template class PartialPivLU * * \sa MatrixBase::inverse(), LU::inverse() */ - inline const internal::solve_retval inverse() const + inline const Inverse inverse() const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); - return internal::solve_retval - (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols())); + return Inverse(*this); } /** \returns the determinant of the matrix of which @@ -163,24 +213,78 @@ template class PartialPivLU * * \sa MatrixBase::determinant() */ - typename internal::traits::Scalar determinant() const; + Scalar determinant() const; MatrixType reconstructedMatrix() const; inline Index rows() const { return m_lu.rows(); } inline Index cols() const { return m_lu.cols(); } + #ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC + void _solve_impl(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.rows()); + + // Step 1 + dst = permutationP() * rhs; + + // Step 2 + m_lu.template triangularView().solveInPlace(dst); + + // Step 3 + m_lu.template triangularView().solveInPlace(dst); + } + + template + EIGEN_DEVICE_FUNC + void _solve_impl_transposed(const RhsType &rhs, DstType &dst) const { + /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. + * So we proceed as follows: + * Step 1: compute c = Pb. + * Step 2: replace c by the solution x to Lx = c. + * Step 3: replace c by the solution x to Ux = c. + */ + + eigen_assert(rhs.rows() == m_lu.cols()); + + if (Conjugate) { + // Step 1 + dst = m_lu.template triangularView().adjoint().solve(rhs); + // Step 2 + m_lu.template triangularView().adjoint().solveInPlace(dst); + } else { + // Step 1 + dst = m_lu.template triangularView().transpose().solve(rhs); + // Step 2 + m_lu.template triangularView().transpose().solveInPlace(dst); + } + // Step 3 + dst = permutationP().transpose() * dst; + } + #endif + protected: - + static void check_template_parameters() { EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar); } - + + void compute(); + MatrixType m_lu; PermutationType m_p; TranspositionType m_rowsTranspositions; - Index m_det_p; + RealScalar m_l1_norm; + signed char m_det_p; bool m_isInitialized; }; @@ -189,6 +293,7 @@ PartialPivLU::PartialPivLU() : m_lu(), m_p(), m_rowsTranspositions(), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { @@ -199,20 +304,36 @@ PartialPivLU::PartialPivLU(Index size) : m_lu(size, size), m_p(size), m_rowsTranspositions(size), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { } template -PartialPivLU::PartialPivLU(const MatrixType& matrix) - : m_lu(matrix.rows(), matrix.rows()), +template +PartialPivLU::PartialPivLU(const EigenBase& matrix) + : m_lu(matrix.rows(),matrix.cols()), m_p(matrix.rows()), m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), m_det_p(0), m_isInitialized(false) { - compute(matrix); + compute(matrix.derived()); +} + +template +template +PartialPivLU::PartialPivLU(EigenBase& matrix) + : m_lu(matrix.derived()), + m_p(matrix.rows()), + m_rowsTranspositions(matrix.rows()), + m_l1_norm(0), + m_det_p(0), + m_isInitialized(false) +{ + compute(); } namespace internal { @@ -230,7 +351,6 @@ struct partial_lu_impl typedef Block MatrixType; typedef Block BlockType; typedef typename MatrixType::RealScalar RealScalar; - typedef typename MatrixType::Index Index; /** \internal performs the LU decomposition in-place of the matrix \a lu * using an unblocked algorithm. @@ -244,6 +364,8 @@ struct partial_lu_impl */ static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) { + typedef scalar_score_coeff_op Scoring; + typedef typename Scoring::result_type Score; const Index rows = lu.rows(); const Index cols = lu.cols(); const Index size = (std::min)(rows,cols); @@ -253,15 +375,15 @@ struct partial_lu_impl { Index rrows = rows-k-1; Index rcols = cols-k-1; - + Index row_of_biggest_in_col; - RealScalar biggest_in_corner - = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col); + Score biggest_in_corner + = lu.col(k).tail(rows-k).unaryExpr(Scoring()).maxCoeff(&row_of_biggest_in_col); row_of_biggest_in_col += k; row_transpositions[k] = PivIndex(row_of_biggest_in_col); - if(biggest_in_corner != RealScalar(0)) + if(biggest_in_corner != Score(0)) { if(k != row_of_biggest_in_col) { @@ -354,7 +476,7 @@ struct partial_lu_impl // update permutations and apply them to A_0 for(Index i=k; i(k)); A_0.row(i).swap(A_0.row(piv)); } @@ -377,45 +499,44 @@ struct partial_lu_impl /** \internal performs the LU decomposition with partial pivoting in-place. */ template -void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::Index& nb_transpositions) +void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, typename TranspositionType::StorageIndex& nb_transpositions) { eigen_assert(lu.cols() == row_transpositions.size()); eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); partial_lu_impl - + ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); } } // end namespace internal template -PartialPivLU& PartialPivLU::compute(const MatrixType& matrix) +void PartialPivLU::compute() { check_template_parameters(); - + // the row permutation is stored as int indices, so just to be sure: - eigen_assert(matrix.rows()::highest()); - - m_lu = matrix; + eigen_assert(m_lu.rows()::highest()); + + m_l1_norm = m_lu.cwiseAbs().colwise().sum().maxCoeff(); - eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); - const Index size = matrix.rows(); + eigen_assert(m_lu.rows() == m_lu.cols() && "PartialPivLU is only for square (and moreover invertible) matrices"); + const Index size = m_lu.rows(); m_rowsTranspositions.resize(size); - typename TranspositionType::Index nb_transpositions; + typename TranspositionType::StorageIndex nb_transpositions; internal::partial_lu_inplace(m_lu, m_rowsTranspositions, nb_transpositions); m_det_p = (nb_transpositions%2) ? -1 : 1; m_p = m_rowsTranspositions; m_isInitialized = true; - return *this; } template -typename internal::traits::Scalar PartialPivLU::determinant() const +typename PartialPivLU::Scalar PartialPivLU::determinant() const { eigen_assert(m_isInitialized && "PartialPivLU is not initialized."); return Scalar(m_det_p) * m_lu.diagonal().prod(); @@ -438,38 +559,21 @@ MatrixType PartialPivLU::reconstructedMatrix() const return res; } -/***** Implementation of solve() *****************************************************/ +/***** Implementation details *****************************************************/ namespace internal { -template -struct solve_retval, Rhs> - : solve_retval_base, Rhs> +/***** Implementation of inverse() *****************************************************/ +template +struct Assignment >, internal::assign_op::Scalar>, Dense2Dense> { - EIGEN_MAKE_SOLVE_HELPERS(PartialPivLU<_MatrixType>,Rhs) - - template void evalTo(Dest& dst) const + typedef PartialPivLU LuType; + typedef Inverse SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { - /* The decomposition PA = LU can be rewritten as A = P^{-1} L U. - * So we proceed as follows: - * Step 1: compute c = Pb. - * Step 2: replace c by the solution x to Lx = c. - * Step 3: replace c by the solution x to Ux = c. - */ - - eigen_assert(rhs().rows() == dec().matrixLU().rows()); - - // Step 1 - dst = dec().permutationP() * rhs(); - - // Step 2 - dec().matrixLU().template triangularView().solveInPlace(dst); - - // Step 3 - dec().matrixLU().template triangularView().solveInPlace(dst); + dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols())); } }; - } // end namespace internal /******** MatrixBase methods *******/ @@ -487,7 +591,6 @@ MatrixBase::partialPivLu() const return PartialPivLU(eval()); } -#if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS /** \lu_module * * Synonym of partialPivLu(). @@ -502,7 +605,6 @@ MatrixBase::lu() const { return PartialPivLU(eval()); } -#endif } // end namespace Eigen diff --git a/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h b/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h new file mode 100644 index 0000000..755168a --- /dev/null +++ b/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h @@ -0,0 +1,83 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to LAPACKe + * LU decomposition with partial pivoting based on LAPACKE_?getrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_PARTIALLU_LAPACK_H +#define EIGEN_PARTIALLU_LAPACK_H + +namespace Eigen { + +namespace internal { + +/** \internal Specialization for the data types supported by LAPACKe */ + +#define EIGEN_LAPACKE_LU_PARTPIV(EIGTYPE, LAPACKE_TYPE, LAPACKE_PREFIX) \ +template \ +struct partial_lu_impl \ +{ \ + /* \internal performs the LU decomposition in-place of the matrix represented */ \ + static lapack_int blocked_lu(Index rows, Index cols, EIGTYPE* lu_data, Index luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ + { \ + EIGEN_UNUSED_VARIABLE(maxBlockSize);\ + lapack_int matrix_order, first_zero_pivot; \ + lapack_int m, n, lda, *ipiv, info; \ + EIGTYPE* a; \ +/* Set up parameters for ?getrf */ \ + matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ + lda = convert_index(luStride); \ + a = lu_data; \ + ipiv = row_transpositions; \ + m = convert_index(rows); \ + n = convert_index(cols); \ + nb_transpositions = 0; \ +\ + info = LAPACKE_##LAPACKE_PREFIX##getrf( matrix_order, m, n, (LAPACKE_TYPE*)a, lda, ipiv ); \ +\ + for(int i=0;i= 0); \ +/* something should be done with nb_transpositions */ \ +\ + first_zero_pivot = info; \ + return first_zero_pivot; \ + } \ +}; + +EIGEN_LAPACKE_LU_PARTPIV(double, double, d) +EIGEN_LAPACKE_LU_PARTPIV(float, float, s) +EIGEN_LAPACKE_LU_PARTPIV(dcomplex, lapack_complex_double, z) +EIGEN_LAPACKE_LU_PARTPIV(scomplex, lapack_complex_float, c) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/eigen/Eigen/src/LU/PartialPivLU_MKL.h b/eigen/Eigen/src/LU/PartialPivLU_MKL.h deleted file mode 100644 index 9035953..0000000 --- a/eigen/Eigen/src/LU/PartialPivLU_MKL.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - Copyright (c) 2011, Intel Corporation. All rights reserved. - - Redistribution and use in source and binary forms, with or without modification, - are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of Intel Corporation nor the names of its contributors may - be used to endorse or promote products derived from this software without - specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR - ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ******************************************************************************** - * Content : Eigen bindings to Intel(R) MKL - * LU decomposition with partial pivoting based on LAPACKE_?getrf function. - ******************************************************************************** -*/ - -#ifndef EIGEN_PARTIALLU_LAPACK_H -#define EIGEN_PARTIALLU_LAPACK_H - -#include "Eigen/src/Core/util/MKL_support.h" - -namespace Eigen { - -namespace internal { - -/** \internal Specialization for the data types supported by MKL */ - -#define EIGEN_MKL_LU_PARTPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \ -template \ -struct partial_lu_impl \ -{ \ - /* \internal performs the LU decomposition in-place of the matrix represented */ \ - static lapack_int blocked_lu(lapack_int rows, lapack_int cols, EIGTYPE* lu_data, lapack_int luStride, lapack_int* row_transpositions, lapack_int& nb_transpositions, lapack_int maxBlockSize=256) \ - { \ - EIGEN_UNUSED_VARIABLE(maxBlockSize);\ - lapack_int matrix_order, first_zero_pivot; \ - lapack_int m, n, lda, *ipiv, info; \ - EIGTYPE* a; \ -/* Set up parameters for ?getrf */ \ - matrix_order = StorageOrder==RowMajor ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \ - lda = luStride; \ - a = lu_data; \ - ipiv = row_transpositions; \ - m = rows; \ - n = cols; \ - nb_transpositions = 0; \ -\ - info = LAPACKE_##MKLPREFIX##getrf( matrix_order, m, n, (MKLTYPE*)a, lda, ipiv ); \ -\ - for(int i=0;i= 0); \ -/* something should be done with nb_transpositions */ \ -\ - first_zero_pivot = info; \ - return first_zero_pivot; \ - } \ -}; - -EIGEN_MKL_LU_PARTPIV(double, double, d) -EIGEN_MKL_LU_PARTPIV(float, float, s) -EIGEN_MKL_LU_PARTPIV(dcomplex, MKL_Complex16, z) -EIGEN_MKL_LU_PARTPIV(scomplex, MKL_Complex8, c) - -} // end namespace internal - -} // end namespace Eigen - -#endif // EIGEN_PARTIALLU_LAPACK_H diff --git a/eigen/Eigen/src/LU/arch/CMakeLists.txt b/eigen/Eigen/src/LU/arch/CMakeLists.txt deleted file mode 100644 index f6b7ed9..0000000 --- a/eigen/Eigen/src/LU/arch/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_LU_arch_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LU_arch_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/LU/arch COMPONENT Devel - ) diff --git a/eigen/Eigen/src/LU/arch/Inverse_SSE.h b/eigen/Eigen/src/LU/arch/Inverse_SSE.h index 60b7a23..ebb64a6 100644 --- a/eigen/Eigen/src/LU/arch/Inverse_SSE.h +++ b/eigen/Eigen/src/LU/arch/Inverse_SSE.h @@ -35,13 +35,15 @@ template struct compute_inverse_size4 { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits::Alignment, + ResultAlignment = traits::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; - static void run(const MatrixType& matrix, ResultType& result) + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); EIGEN_ALIGN16 const unsigned int _Sign_PNNP[4] = { 0x00000000, 0x80000000, 0x80000000, 0x00000000 }; // Load the full matrix into registers @@ -151,10 +153,12 @@ struct compute_inverse_size4 iC = _mm_mul_ps(rd,iC); iD = _mm_mul_ps(rd,iD); - result.template writePacket( 0, _mm_shuffle_ps(iA,iB,0x77)); - result.template writePacket( 4, _mm_shuffle_ps(iA,iB,0x22)); - result.template writePacket( 8, _mm_shuffle_ps(iC,iD,0x77)); - result.template writePacket(12, _mm_shuffle_ps(iC,iD,0x22)); + Index res_stride = result.outerStride(); + float* res = result.data(); + pstoret(res+0, _mm_shuffle_ps(iA,iB,0x77)); + pstoret(res+res_stride, _mm_shuffle_ps(iA,iB,0x22)); + pstoret(res+2*res_stride, _mm_shuffle_ps(iC,iD,0x77)); + pstoret(res+3*res_stride, _mm_shuffle_ps(iC,iD,0x22)); } }; @@ -163,18 +167,21 @@ template struct compute_inverse_size4 { enum { - MatrixAlignment = bool(MatrixType::Flags&AlignedBit), - ResultAlignment = bool(ResultType::Flags&AlignedBit), + MatrixAlignment = traits::Alignment, + ResultAlignment = traits::Alignment, StorageOrdersMatch = (MatrixType::Flags&RowMajorBit) == (ResultType::Flags&RowMajorBit) }; - static void run(const MatrixType& matrix, ResultType& result) + typedef typename conditional<(MatrixType::Flags&LinearAccessBit),MatrixType const &,typename MatrixType::PlainObject>::type ActualMatrixType; + + static void run(const MatrixType& mat, ResultType& result) { + ActualMatrixType matrix(mat); const __m128d _Sign_NP = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0)); const __m128d _Sign_PN = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0)); // The inverse is calculated using "Divide and Conquer" technique. The // original matrix is divide into four 2x2 sub-matrices. Since each - // register of the matrix holds two element, the smaller matrices are + // register of the matrix holds two elements, the smaller matrices are // consisted of two registers. Hence we get a better locality of the // calculations. @@ -311,14 +318,16 @@ struct compute_inverse_size4 iC1 = _mm_sub_pd(_mm_mul_pd(B1, dC), iC1); iC2 = _mm_sub_pd(_mm_mul_pd(B2, dC), iC2); - result.template writePacket( 0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); // iA# / det - result.template writePacket( 4, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); - result.template writePacket( 2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); // iB# / det - result.template writePacket( 6, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); - result.template writePacket( 8, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); // iC# / det - result.template writePacket(12, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); - result.template writePacket(10, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); // iD# / det - result.template writePacket(14, _mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); + Index res_stride = result.outerStride(); + double* res = result.data(); + pstoret(res+0, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 3), d1)); + pstoret(res+res_stride, _mm_mul_pd(_mm_shuffle_pd(iA2, iA1, 0), d2)); + pstoret(res+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 3), d1)); + pstoret(res+res_stride+2, _mm_mul_pd(_mm_shuffle_pd(iB2, iB1, 0), d2)); + pstoret(res+2*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 3), d1)); + pstoret(res+3*res_stride, _mm_mul_pd(_mm_shuffle_pd(iC2, iC1, 0), d2)); + pstoret(res+2*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 3), d1)); + pstoret(res+3*res_stride+2,_mm_mul_pd(_mm_shuffle_pd(iD2, iD1, 0), d2)); } }; -- cgit v1.2.3