diff options
Diffstat (limited to 'eigen/Eigen/src/SparseLU')
18 files changed, 3429 insertions, 0 deletions
diff --git a/eigen/Eigen/src/SparseLU/CMakeLists.txt b/eigen/Eigen/src/SparseLU/CMakeLists.txt new file mode 100644 index 0000000..69729ee --- /dev/null +++ b/eigen/Eigen/src/SparseLU/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_SparseLU_SRCS "*.h") + +INSTALL(FILES + ${Eigen_SparseLU_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel + ) diff --git a/eigen/Eigen/src/SparseLU/SparseLU.h b/eigen/Eigen/src/SparseLU/SparseLU.h new file mode 100644 index 0000000..bdc4f19 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU.h @@ -0,0 +1,806 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSE_LU_H +#define EIGEN_SPARSE_LU_H + +namespace Eigen { + +template <typename _MatrixType, typename _OrderingType = COLAMDOrdering<typename _MatrixType::Index> > class SparseLU; +template <typename MappedSparseMatrixType> struct SparseLUMatrixLReturnType; +template <typename MatrixLType, typename MatrixUType> struct SparseLUMatrixUReturnType; + +/** \ingroup SparseLU_Module + * \class SparseLU + * + * \brief Sparse supernodal LU factorization for general matrices + * + * This class implements the supernodal LU factorization for general matrices. + * It uses the main techniques from the sequential SuperLU package + * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real + * and complex arithmetics with single and double precision, depending on the + * scalar type of your input matrix. + * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. + * It benefits directly from the built-in high-performant Eigen BLAS routines. + * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to + * enable a better optimization from the compiler. For best performance, + * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. + * + * An important parameter of this class is the ordering method. It is used to reorder the columns + * (and eventually the rows) of the matrix to reduce the number of new elements that are created during + * numerical factorization. The cheapest method available is COLAMD. + * See \link OrderingMethods_Module the OrderingMethods module \endlink for the list of + * built-in and external ordering methods. + * + * Simple example with key steps + * \code + * VectorXd x(n), b(n); + * SparseMatrix<double, ColMajor> A; + * SparseLU<SparseMatrix<scalar, ColMajor>, COLAMDOrdering<Index> > solver; + * // fill A and b; + * // Compute the ordering permutation vector from the structural pattern of A + * solver.analyzePattern(A); + * // Compute the numerical factorization + * solver.factorize(A); + * //Use the factors to solve the linear system + * x = solver.solve(b); + * \endcode + * + * \warning The input matrix A should be in a \b compressed and \b column-major form. + * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. + * + * \note Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. + * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. + * If this is the case for your matrices, you can try the basic scaling method at + * "unsupported/Eigen/src/IterativeSolvers/Scaling.h" + * + * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> + * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS. Default is COLMAD + * + * + * \sa \ref TutorialSparseDirectSolvers + * \sa \ref OrderingMethods_Module + */ +template <typename _MatrixType, typename _OrderingType> +class SparseLU : public internal::SparseLUImpl<typename _MatrixType::Scalar, typename _MatrixType::Index> +{ + public: + typedef _MatrixType MatrixType; + typedef _OrderingType OrderingType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix; + typedef internal::MappedSuperNodalMatrix<Scalar, Index> SCMatrix; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType; + typedef internal::SparseLUImpl<Scalar, Index> Base; + + public: + SparseLU():m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + { + initperfvalues(); + } + SparseLU(const MatrixType& matrix):m_isInitialized(true),m_lastError(""),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0),m_detPermR(1) + { + initperfvalues(); + compute(matrix); + } + + ~SparseLU() + { + // Free all explicit dynamic pointers + } + + void analyzePattern (const MatrixType& matrix); + void factorize (const MatrixType& matrix); + void simplicialfactorize(const MatrixType& matrix); + + /** + * Compute the symbolic and numeric factorization of the input sparse matrix. + * The input matrix should be in column-major storage. + */ + void compute (const MatrixType& matrix) + { + // Analyze + analyzePattern(matrix); + //Factorize + factorize(matrix); + } + + inline Index rows() const { return m_mat.rows(); } + inline Index cols() const { return m_mat.cols(); } + /** Indicate that the pattern of the input matrix is symmetric */ + void isSymmetric(bool sym) + { + m_symmetricmode = sym; + } + + /** \returns an expression of the matrix L, internally stored as supernodes + * The only operation available with this expression is the triangular solve + * \code + * y = b; matrixL().solveInPlace(y); + * \endcode + */ + SparseLUMatrixLReturnType<SCMatrix> matrixL() const + { + return SparseLUMatrixLReturnType<SCMatrix>(m_Lstore); + } + /** \returns an expression of the matrix U, + * The only operation available with this expression is the triangular solve + * \code + * y = b; matrixU().solveInPlace(y); + * \endcode + */ + SparseLUMatrixUReturnType<SCMatrix,MappedSparseMatrix<Scalar,ColMajor,Index> > matrixU() const + { + return SparseLUMatrixUReturnType<SCMatrix, MappedSparseMatrix<Scalar,ColMajor,Index> >(m_Lstore, m_Ustore); + } + + /** + * \returns a reference to the row matrix permutation \f$ P_r \f$ such that \f$P_r A P_c^T = L U\f$ + * \sa colsPermutation() + */ + inline const PermutationType& rowsPermutation() const + { + return m_perm_r; + } + /** + * \returns a reference to the column matrix permutation\f$ P_c^T \f$ such that \f$P_r A P_c^T = L U\f$ + * \sa rowsPermutation() + */ + inline const PermutationType& colsPermutation() const + { + return m_perm_c; + } + /** Set the threshold used for a diagonal entry to be an acceptable pivot. */ + void setPivotThreshold(const RealScalar& thresh) + { + m_diagpivotthresh = thresh; + } + + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. + * + * \warning the destination matrix X in X = this->solve(B) must be colmun-major. + * + * \sa compute() + */ + template<typename Rhs> + inline const internal::solve_retval<SparseLU, Rhs> solve(const MatrixBase<Rhs>& B) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==B.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); + return internal::solve_retval<SparseLU, Rhs>(*this, B.derived()); + } + + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. + * + * \sa compute() + */ + template<typename Rhs> + inline const internal::sparse_solve_retval<SparseLU, Rhs> solve(const SparseMatrixBase<Rhs>& B) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==B.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); + return internal::sparse_solve_retval<SparseLU, Rhs>(*this, B.derived()); + } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the LU factorization reports a problem, zero diagonal for instance + * \c InvalidInput if the input matrix is invalid + * + * \sa iparm() + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + /** + * \returns A string describing the type of error + */ + std::string lastErrorMessage() const + { + return m_lastError; + } + + template<typename Rhs, typename Dest> + bool _solve(const MatrixBase<Rhs> &B, MatrixBase<Dest> &X_base) const + { + Dest& X(X_base.derived()); + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + // Permute the right hand side to form X = Pr*B + // on return, X is overwritten by the computed solution + X.resize(B.rows(),B.cols()); + + // this ugly const_cast_derived() helps to detect aliasing when applying the permutations + for(Index j = 0; j < B.cols(); ++j) + X.col(j) = rowsPermutation() * B.const_cast_derived().col(j); + + //Forward substitution with L + this->matrixL().solveInPlace(X); + this->matrixU().solveInPlace(X); + + // Permute back the solution + for (Index j = 0; j < B.cols(); ++j) + X.col(j) = colsPermutation().inverse() * X.col(j); + + return true; + } + + /** + * \returns the absolute value of the determinant of the matrix of which + * *this is the QR decomposition. + * + * \warning a determinant can be very big or small, so for matrices + * of large enough dimension, there is a risk of overflow/underflow. + * One way to work around that is to use logAbsDeterminant() instead. + * + * \sa logAbsDeterminant(), signDeterminant() + */ + Scalar absDeterminant() + { + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); + // Initialize with the determinant of the row matrix + Scalar det = Scalar(1.); + // Note that the diagonal blocks of U are stored in supernodes, + // which are available in the L part :) + for (Index j = 0; j < this->cols(); ++j) + { + for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) + { + if(it.index() == j) + { + using std::abs; + det *= abs(it.value()); + break; + } + } + } + return det; + } + + /** \returns the natural log of the absolute value of the determinant of the matrix + * of which **this is the QR decomposition + * + * \note This method is useful to work around the risk of overflow/underflow that's + * inherent to the determinant computation. + * + * \sa absDeterminant(), signDeterminant() + */ + Scalar logAbsDeterminant() const + { + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); + Scalar det = Scalar(0.); + for (Index j = 0; j < this->cols(); ++j) + { + for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) + { + if(it.row() < j) continue; + if(it.row() == j) + { + using std::log; using std::abs; + det += log(abs(it.value())); + break; + } + } + } + return det; + } + + /** \returns A number representing the sign of the determinant + * + * \sa absDeterminant(), logAbsDeterminant() + */ + Scalar signDeterminant() + { + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); + // Initialize with the determinant of the row matrix + Index det = 1; + // Note that the diagonal blocks of U are stored in supernodes, + // which are available in the L part :) + for (Index j = 0; j < this->cols(); ++j) + { + for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) + { + if(it.index() == j) + { + if(it.value()<0) + det = -det; + else if(it.value()==0) + return 0; + break; + } + } + } + return det * m_detPermR * m_detPermC; + } + + /** \returns The determinant of the matrix. + * + * \sa absDeterminant(), logAbsDeterminant() + */ + Scalar determinant() + { + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first."); + // Initialize with the determinant of the row matrix + Scalar det = Scalar(1.); + // Note that the diagonal blocks of U are stored in supernodes, + // which are available in the L part :) + for (Index j = 0; j < this->cols(); ++j) + { + for (typename SCMatrix::InnerIterator it(m_Lstore, j); it; ++it) + { + if(it.index() == j) + { + det *= it.value(); + break; + } + } + } + return det * Scalar(m_detPermR * m_detPermC); + } + + protected: + // Functions + void initperfvalues() + { + m_perfv.panel_size = 16; + m_perfv.relax = 1; + m_perfv.maxsuper = 128; + m_perfv.rowblk = 16; + m_perfv.colblk = 8; + m_perfv.fillfactor = 20; + } + + // Variables + mutable ComputationInfo m_info; + bool m_isInitialized; + bool m_factorizationIsOk; + bool m_analysisIsOk; + std::string m_lastError; + NCMatrix m_mat; // The input (permuted ) matrix + SCMatrix m_Lstore; // The lower triangular matrix (supernodal) + MappedSparseMatrix<Scalar,ColMajor,Index> m_Ustore; // The upper triangular matrix + PermutationType m_perm_c; // Column permutation + PermutationType m_perm_r ; // Row permutation + IndexVector m_etree; // Column elimination tree + + typename Base::GlobalLU_t m_glu; + + // SparseLU options + bool m_symmetricmode; + // values for performance + internal::perfvalues<Index> m_perfv; + RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot + Index m_nnzL, m_nnzU; // Nonzeros in L and U factors + Index m_detPermR, m_detPermC; // Determinants of the permutation matrices + private: + // Disable copy constructor + SparseLU (const SparseLU& ); + +}; // End class SparseLU + + + +// Functions needed by the anaysis phase +/** + * Compute the column permutation to minimize the fill-in + * + * - Apply this permutation to the input matrix - + * + * - Compute the column elimination tree on the permuted matrix + * + * - Postorder the elimination tree and the column permutation + * + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::analyzePattern(const MatrixType& mat) +{ + + //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. + + OrderingType ord; + ord(mat,m_perm_c); + + // Apply the permutation to the column of the input matrix + //First copy the whole input matrix. + m_mat = mat; + if (m_perm_c.size()) { + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + //Then, permute only the column pointers + const Index * outerIndexPtr; + if (mat.isCompressed()) outerIndexPtr = mat.outerIndexPtr(); + else + { + Index *outerIndexPtr_t = new Index[mat.cols()+1]; + for(Index i = 0; i <= mat.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i]; + outerIndexPtr = outerIndexPtr_t; + } + for (Index i = 0; i < mat.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i]; + } + if(!mat.isCompressed()) delete[] outerIndexPtr; + } + // Compute the column elimination tree of the permuted matrix + IndexVector firstRowElt; + internal::coletree(m_mat, m_etree,firstRowElt); + + // In symmetric mode, do not do postorder here + if (!m_symmetricmode) { + IndexVector post, iwork; + // Post order etree + internal::treePostorder(m_mat.cols(), m_etree, post); + + + // Renumber etree in postorder + Index m = m_mat.cols(); + iwork.resize(m+1); + for (Index i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); + m_etree = iwork; + + // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree + PermutationType post_perm(m); + for (Index i = 0; i < m; i++) + post_perm.indices()(i) = post(i); + + // Combine the two permutations : postorder the permutation for future use + if(m_perm_c.size()) { + m_perm_c = post_perm * m_perm_c; + } + + } // end postordering + + m_analysisIsOk = true; +} + +// Functions needed by the numerical factorization phase + + +/** + * - Numerical factorization + * - Interleaved with the symbolic factorization + * On exit, info is + * + * = 0: successful factorization + * + * > 0: if info = i, and i is + * + * <= A->ncol: U(i,i) is exactly zero. The factorization has + * been completed, but the factor U is exactly singular, + * and division by zero will occur if it is used to solve a + * system of equations. + * + * > A->ncol: number of bytes allocated when memory allocation + * failure occurred, plus A->ncol. If lwork = -1, it is + * the estimated amount of space needed, plus A->ncol. + */ +template <typename MatrixType, typename OrderingType> +void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix) +{ + using internal::emptyIdxLU; + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); + eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + typedef typename IndexVector::Scalar Index; + + + // Apply the column permutation computed in analyzepattern() + // m_mat = matrix * m_perm_c.inverse(); + m_mat = matrix; + if (m_perm_c.size()) + { + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + //Then, permute only the column pointers + const Index * outerIndexPtr; + if (matrix.isCompressed()) outerIndexPtr = matrix.outerIndexPtr(); + else + { + Index* outerIndexPtr_t = new Index[matrix.cols()+1]; + for(Index i = 0; i <= matrix.cols(); i++) outerIndexPtr_t[i] = m_mat.outerIndexPtr()[i]; + outerIndexPtr = outerIndexPtr_t; + } + for (Index i = 0; i < matrix.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = outerIndexPtr[i+1] - outerIndexPtr[i]; + } + if(!matrix.isCompressed()) delete[] outerIndexPtr; + } + else + { //FIXME This should not be needed if the empty permutation is handled transparently + m_perm_c.resize(matrix.cols()); + for(Index i = 0; i < matrix.cols(); ++i) m_perm_c.indices()(i) = i; + } + + Index m = m_mat.rows(); + Index n = m_mat.cols(); + Index nnz = m_mat.nonZeros(); + Index maxpanel = m_perfv.panel_size * m; + // Allocate working storage common to the factor routines + Index lwork = 0; + Index info = Base::memInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); + if (info) + { + m_lastError = "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; + m_factorizationIsOk = false; + return ; + } + + // Set up pointers for integer working arrays + IndexVector segrep(m); segrep.setZero(); + IndexVector parent(m); parent.setZero(); + IndexVector xplore(m); xplore.setZero(); + IndexVector repfnz(maxpanel); + IndexVector panel_lsub(maxpanel); + IndexVector xprune(n); xprune.setZero(); + IndexVector marker(m*internal::LUNoMarker); marker.setZero(); + + repfnz.setConstant(-1); + panel_lsub.setConstant(-1); + + // Set up pointers for scalar working arrays + ScalarVector dense; + dense.setZero(maxpanel); + ScalarVector tempv; + tempv.setZero(internal::LUnumTempV(m, m_perfv.panel_size, m_perfv.maxsuper, /*m_perfv.rowblk*/m) ); + + // Compute the inverse of perm_c + PermutationType iperm_c(m_perm_c.inverse()); + + // Identify initial relaxed snodes + IndexVector relax_end(n); + if ( m_symmetricmode == true ) + Base::heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + else + Base::relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + + + m_perm_r.resize(m); + m_perm_r.indices().setConstant(-1); + marker.setConstant(-1); + m_detPermR = 1; // Record the determinant of the row permutation + + m_glu.supno(0) = emptyIdxLU; m_glu.xsup.setConstant(0); + m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0); + + // Work on one 'panel' at a time. A panel is one of the following : + // (a) a relaxed supernode at the bottom of the etree, or + // (b) panel_size contiguous columns, <panel_size> defined by the user + Index jcol; + IndexVector panel_histo(n); + Index pivrow; // Pivotal row number in the original row matrix + Index nseg1; // Number of segments in U-column above panel row jcol + Index nseg; // Number of segments in each U-column + Index irep; + Index i, k, jj; + for (jcol = 0; jcol < n; ) + { + // Adjust panel size so that a panel won't overlap with the next relaxed snode. + Index panel_size = m_perfv.panel_size; // upper bound on panel width + for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) + { + if (relax_end(k) != emptyIdxLU) + { + panel_size = k - jcol; + break; + } + } + if (k == n) + panel_size = n - jcol; + + // Symbolic outer factorization on a panel of columns + Base::panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + + // Numeric sup-panel updates in topological order + Base::panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); + + // Sparse LU within the panel, and below the panel diagonal + for ( jj = jcol; jj< jcol + panel_size; jj++) + { + k = (jj - jcol) * m; // Column index for w-wide arrays + + nseg = nseg1; // begin after all the panel segments + //Depth-first-search for the current column + VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); + VectorBlock<IndexVector> repfnz_k(repfnz, k, m); + info = Base::column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + if ( info ) + { + m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() "; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + // Numeric updates to this column + VectorBlock<ScalarVector> dense_k(dense, k, m); + VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); + info = Base::column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); + if ( info ) + { + m_lastError = "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() "; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Copy the U-segments to ucol(*) + info = Base::copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); + if ( info ) + { + m_lastError = "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() "; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Form the L-segment + info = Base::pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + if ( info ) + { + m_lastError = "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT "; + std::ostringstream returnInfo; + returnInfo << info; + m_lastError += returnInfo.str(); + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Update the determinant of the row permutation matrix + // FIXME: the following test is not correct, we should probably take iperm_c into account and pivrow is not directly the row pivot. + if (pivrow != jj) m_detPermR = -m_detPermR; + + // Prune columns (0:jj-1) using column jj + Base::pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + + // Reset repfnz for this column + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + repfnz_k(irep) = emptyIdxLU; + } + } // end SparseLU within the panel + jcol += panel_size; // Move to the next panel + } // end for -- end elimination + + m_detPermR = m_perm_r.determinant(); + m_detPermC = m_perm_c.determinant(); + + // Count the number of nonzeros in factors + Base::countnz(n, m_nnzL, m_nnzU, m_glu); + // Apply permutation to the L subscripts + Base::fixupL(n, m_perm_r.indices(), m_glu); + + // Create supernode matrix L + m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); + // Create the column major upper sparse matrix U; + new (&m_Ustore) MappedSparseMatrix<Scalar, ColMajor, Index> ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + + m_info = Success; + m_factorizationIsOk = true; +} + +template<typename MappedSupernodalType> +struct SparseLUMatrixLReturnType : internal::no_assignment_operator +{ + typedef typename MappedSupernodalType::Index Index; + typedef typename MappedSupernodalType::Scalar Scalar; + SparseLUMatrixLReturnType(const MappedSupernodalType& mapL) : m_mapL(mapL) + { } + Index rows() { return m_mapL.rows(); } + Index cols() { return m_mapL.cols(); } + template<typename Dest> + void solveInPlace( MatrixBase<Dest> &X) const + { + m_mapL.solveInPlace(X); + } + const MappedSupernodalType& m_mapL; +}; + +template<typename MatrixLType, typename MatrixUType> +struct SparseLUMatrixUReturnType : internal::no_assignment_operator +{ + typedef typename MatrixLType::Index Index; + typedef typename MatrixLType::Scalar Scalar; + SparseLUMatrixUReturnType(const MatrixLType& mapL, const MatrixUType& mapU) + : m_mapL(mapL),m_mapU(mapU) + { } + Index rows() { return m_mapL.rows(); } + Index cols() { return m_mapL.cols(); } + + template<typename Dest> void solveInPlace(MatrixBase<Dest> &X) const + { + Index nrhs = X.cols(); + Index n = X.rows(); + // Backward solve with U + for (Index k = m_mapL.nsuper(); k >= 0; k--) + { + Index fsupc = m_mapL.supToCol()[k]; + Index lda = m_mapL.colIndexPtr()[fsupc+1] - m_mapL.colIndexPtr()[fsupc]; // leading dimension + Index nsupc = m_mapL.supToCol()[k+1] - fsupc; + Index luptr = m_mapL.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (Index j = 0; j < nrhs; j++) + { + X(fsupc, j) /= m_mapL.valuePtr()[luptr]; + } + } + else + { + Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(m_mapL.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); + Map< Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<Upper>().solve(U); + } + + for (Index j = 0; j < nrhs; ++j) + { + for (Index jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + typename MatrixUType::InnerIterator it(m_mapU, jcol); + for ( ; it; ++it) + { + Index irow = it.index(); + X(irow, j) -= X(jcol, j) * it.value(); + } + } + } + } // End For U-solve + } + const MatrixLType& m_mapL; + const MatrixUType& m_mapU; +}; + +namespace internal { + +template<typename _MatrixType, typename Derived, typename Rhs> +struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs> + : solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> +{ + typedef SparseLU<_MatrixType,Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template<typename Dest> void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +template<typename _MatrixType, typename Derived, typename Rhs> +struct sparse_solve_retval<SparseLU<_MatrixType,Derived>, Rhs> + : sparse_solve_retval_base<SparseLU<_MatrixType,Derived>, Rhs> +{ + typedef SparseLU<_MatrixType,Derived> Dec; + EIGEN_MAKE_SPARSE_SOLVE_HELPERS(Dec,Rhs) + + template<typename Dest> void evalTo(Dest& dst) const + { + this->defaultEvalTo(dst); + } +}; +} // end namespace internal + +} // End namespace Eigen + +#endif diff --git a/eigen/Eigen/src/SparseLU/SparseLUImpl.h b/eigen/Eigen/src/SparseLU/SparseLUImpl.h new file mode 100644 index 0000000..99d651e --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLUImpl.h @@ -0,0 +1,66 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef SPARSELU_IMPL_H +#define SPARSELU_IMPL_H + +namespace Eigen { +namespace internal { + +/** \ingroup SparseLU_Module + * \class SparseLUImpl + * Base class for sparseLU + */ +template <typename Scalar, typename Index> +class SparseLUImpl +{ + public: + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + typedef Matrix<Scalar,Dynamic,Dynamic,ColMajor> ScalarMatrix; + typedef Map<ScalarMatrix, 0, OuterStride<> > MappedMatrixBlock; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef typename ScalarVector::RealScalar RealScalar; + typedef Ref<Matrix<Scalar,Dynamic,1> > BlockScalarVector; + typedef Ref<Matrix<Index,Dynamic,1> > BlockIndexVector; + typedef LU_GlobalLU_t<IndexVector, ScalarVector> GlobalLU_t; + typedef SparseMatrix<Scalar,ColMajor,Index> MatrixType; + + protected: + template <typename VectorType> + Index expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions); + Index memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu); + template <typename VectorType> + Index memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions); + void heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); + void relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end); + Index snode_dfs(const Index jcol, const Index kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu); + Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector& dense, GlobalLU_t& glu); + Index pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu); + template <typename Traits> + void dfs_kernel(const Index jj, IndexVector& perm_r, + Index& nseg, IndexVector& panel_lsub, IndexVector& segrep, + Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, Index& nextl_col, Index krow, Traits& traits); + void panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + + void panel_bmod(const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu); + Index column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + Index column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu); + Index copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu); + void pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu); + void countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu); + void fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu); + + template<typename , typename > + friend struct column_dfs_traits; +}; + +} // end namespace internal +} // namespace Eigen + +#endif diff --git a/eigen/Eigen/src/SparseLU/SparseLU_Memory.h b/eigen/Eigen/src/SparseLU/SparseLU_Memory.h new file mode 100644 index 0000000..45f96d1 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_Memory.h @@ -0,0 +1,227 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_SPARSELU_MEMORY +#define EIGEN_SPARSELU_MEMORY + +namespace Eigen { +namespace internal { + +enum { LUNoMarker = 3 }; +enum {emptyIdxLU = -1}; +template<typename Index> +inline Index LUnumTempV(Index& m, Index& w, Index& t, Index& b) +{ + return (std::max)(m, (t+b)*w); +} + +template< typename Scalar, typename Index> +inline Index LUTempSpace(Index&m, Index& w) +{ + return (2*w + 4 + LUNoMarker) * m * sizeof(Index) + (w + 1) * m * sizeof(Scalar); +} + + + + +/** + * Expand the existing storage to accomodate more fill-ins + * \param vec Valid pointer to the vector to allocate or expand + * \param[in,out] length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector + * \param[in] nbElts Current number of elements in the factors + * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand + * \param[in,out] num_expansions Number of times the memory has been expanded + */ +template <typename Scalar, typename Index> +template <typename VectorType> +Index SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index nbElts, Index keep_prev, Index& num_expansions) +{ + + float alpha = 1.5; // Ratio of the memory increase + Index new_len; // New size of the allocated memory + + if(num_expansions == 0 || keep_prev) + new_len = length ; // First time allocate requested + else + new_len = (std::max)(length+1,Index(alpha * length)); + + VectorType old_vec; // Temporary vector to hold the previous values + if (nbElts > 0 ) + old_vec = vec.segment(0,nbElts); + + //Allocate or expand the current vector +#ifdef EIGEN_EXCEPTIONS + try +#endif + { + vec.resize(new_len); + } +#ifdef EIGEN_EXCEPTIONS + catch(std::bad_alloc& ) +#else + if(!vec.size()) +#endif + { + if (!num_expansions) + { + // First time to allocate from LUMemInit() + // Let LUMemInit() deals with it. + return -1; + } + if (keep_prev) + { + // In this case, the memory length should not not be reduced + return new_len; + } + else + { + // Reduce the size and increase again + Index tries = 0; // Number of attempts + do + { + alpha = (alpha + 1)/2; + new_len = (std::max)(length+1,Index(alpha * length)); +#ifdef EIGEN_EXCEPTIONS + try +#endif + { + vec.resize(new_len); + } +#ifdef EIGEN_EXCEPTIONS + catch(std::bad_alloc& ) +#else + if (!vec.size()) +#endif + { + tries += 1; + if ( tries > 10) return new_len; + } + } while (!vec.size()); + } + } + //Copy the previous values to the newly allocated space + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; + + + length = new_len; + if(num_expansions) ++num_expansions; + return 0; +} + +/** + * \brief Allocate various working space for the numerical factorization phase. + * \param m number of rows of the input matrix + * \param n number of columns + * \param annz number of initial nonzeros in the matrix + * \param lwork if lwork=-1, this routine returns an estimated size of the required memory + * \param glu persistent data to facilitate multiple factors : will be deleted later ?? + * \param fillratio estimated ratio of fill in the factors + * \param panel_size Size of a panel + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success + * \note Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation + */ +template <typename Scalar, typename Index> +Index SparseLUImpl<Scalar,Index>::memInit(Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t& glu) +{ + Index& num_expansions = glu.num_expansions; //No memory expansions so far + num_expansions = 0; + glu.nzumax = glu.nzlumax = (std::min)(fillratio * (annz+1) / n, m) * n; // estimated number of nonzeros in U + glu.nzlmax = (std::max)(Index(4), fillratio) * (annz+1) / 4; // estimated nnz in L factor + // Return the estimated size to the user if necessary + Index tempSpace; + tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar); + if (lwork == emptyIdxLU) + { + Index estimated_size; + estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace + + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n; + return estimated_size; + } + + // Setup the required space + + // First allocate Integer pointers for L\U factors + glu.xsup.resize(n+1); + glu.supno.resize(n+1); + glu.xlsub.resize(n+1); + glu.xlusup.resize(n+1); + glu.xusub.resize(n+1); + + // Reserve memory for L/U factors + do + { + if( (expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions)<0) + || (expand<ScalarVector>(glu.ucol, glu.nzumax, 0, 0, num_expansions)<0) + || (expand<IndexVector> (glu.lsub, glu.nzlmax, 0, 0, num_expansions)<0) + || (expand<IndexVector> (glu.usub, glu.nzumax, 0, 1, num_expansions)<0) ) + { + //Reduce the estimated size and retry + glu.nzlumax /= 2; + glu.nzumax /= 2; + glu.nzlmax /= 2; + if (glu.nzlumax < annz ) return glu.nzlumax; + } + } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); + + ++num_expansions; + return 0; + +} // end LuMemInit + +/** + * \brief Expand the existing storage + * \param vec vector to expand + * \param[in,out] maxlen On input, previous size of vec (Number of elements to copy ). on output, new size + * \param nbElts current number of elements in the vector. + * \param memtype Type of the element to expand + * \param num_expansions Number of expansions + * \return 0 on success, > 0 size of the memory allocated so far + */ +template <typename Scalar, typename Index> +template <typename VectorType> +Index SparseLUImpl<Scalar,Index>::memXpand(VectorType& vec, Index& maxlen, Index nbElts, MemType memtype, Index& num_expansions) +{ + Index failed_size; + if (memtype == USUB) + failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions); + else + failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions); + + if (failed_size) + return failed_size; + + return 0 ; +} + +} // end namespace internal + +} // end namespace Eigen +#endif // EIGEN_SPARSELU_MEMORY diff --git a/eigen/Eigen/src/SparseLU/SparseLU_Structs.h b/eigen/Eigen/src/SparseLU/SparseLU_Structs.h new file mode 100644 index 0000000..24d6bf1 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_Structs.h @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h + * -- SuperLU routine (version 4.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November, 2010 + * + * Global data structures used in LU factorization - + * + * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. + * (xsup,supno): supno[i] is the supernode no to which i belongs; + * xsup(s) points to the beginning of the s-th supernode. + * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) + * xsup 0 1 2 4 7 12 + * Note: dfs will be performed on supernode rep. relative to the new + * row pivoting ordering + * + * (xlsub,lsub): lsub[*] contains the compressed subscript of + * rectangular supernodes; xlsub[j] points to the starting + * location of the j-th column in lsub[*]. Note that xlsub + * is indexed by column. + * Storage: original row subscripts + * + * During the course of sparse LU factorization, we also use + * (xlsub,lsub) for the purpose of symmetric pruning. For each + * supernode {s,s+1,...,t=s+r} with first column s and last + * column t, the subscript set + * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 + * is the structure of column s (i.e. structure of this supernode). + * It is used for the storage of numerical values. + * Furthermore, + * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 + * is the structure of the last column t of this supernode. + * It is for the purpose of symmetric pruning. Therefore, the + * structural subscripts can be rearranged without making physical + * interchanges among the numerical values. + * + * However, if the supernode has only one column, then we + * only keep one set of subscripts. For any subscript interchange + * performed, similar interchange must be done on the numerical + * values. + * + * The last column structures (for pruning) will be removed + * after the numercial LU factorization phase. + * + * (xlusup,lusup): lusup[*] contains the numerical values of the + * rectangular supernodes; xlusup[j] points to the starting + * location of the j-th column in storage vector lusup[*] + * Note: xlusup is indexed by column. + * Each rectangular supernode is stored by column-major + * scheme, consistent with Fortran 2-dim array storage. + * + * (xusub,ucol,usub): ucol[*] stores the numerical values of + * U-columns outside the rectangular supernodes. The row + * subscript of nonzero ucol[k] is stored in usub[k]. + * xusub[i] points to the starting location of column i in ucol. + * Storage: new row subscripts; that is subscripts of PA. + */ + +#ifndef EIGEN_LU_STRUCTS +#define EIGEN_LU_STRUCTS +namespace Eigen { +namespace internal { + +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; + +template <typename IndexVector, typename ScalarVector> +struct LU_GlobalLU_t { + typedef typename IndexVector::Scalar Index; + IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode + IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) + ScalarVector lusup; // nonzero values of L ordered by columns + IndexVector lsub; // Compressed row indices of L rectangular supernodes. + IndexVector xlusup; // pointers to the beginning of each column in lusup + IndexVector xlsub; // pointers to the beginning of each column in lsub + Index nzlmax; // Current max size of lsub + Index nzlumax; // Current max size of lusup + ScalarVector ucol; // nonzero values of U ordered by columns + IndexVector usub; // row indices of U columns in ucol + IndexVector xusub; // Pointers to the beginning of each column of U in ucol + Index nzumax; // Current max size of ucol + Index n; // Number of columns in the matrix + Index num_expansions; +}; + +// Values to set for performance +template <typename Index> +struct perfvalues { + Index panel_size; // a panel consists of at most <panel_size> consecutive columns + Index relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + Index maxsuper; // The maximum size for a supernode in complete LU + Index rowblk; // The minimum row dimension for 2-D blocking to be used; + Index colblk; // The minimum column dimension for 2-D blocking to be used; + Index fillfactor; // The estimated fills factors for L and U, compared with A +}; + +} // end namespace internal + +} // end namespace Eigen +#endif // EIGEN_LU_STRUCTS diff --git a/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h b/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h new file mode 100644 index 0000000..54a5694 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h @@ -0,0 +1,298 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_SUPERNODAL_MATRIX_H +#define EIGEN_SPARSELU_SUPERNODAL_MATRIX_H + +namespace Eigen { +namespace internal { + +/** \ingroup SparseLU_Module + * \brief a class to manipulate the L supernodal factor from the SparseLU factorization + * + * This class contain the data to easily store + * and manipulate the supernodes during the factorization and solution phase of Sparse LU. + * Only the lower triangular matrix has supernodes. + * + * NOTE : This class corresponds to the SCformat structure in SuperLU + * + */ +/* TODO + * InnerIterator as for sparsematrix + * SuperInnerIterator to iterate through all supernodes + * Function for triangular solve + */ +template <typename _Scalar, typename _Index> +class MappedSuperNodalMatrix +{ + public: + typedef _Scalar Scalar; + typedef _Index Index; + typedef Matrix<Index,Dynamic,1> IndexVector; + typedef Matrix<Scalar,Dynamic,1> ScalarVector; + public: + MappedSuperNodalMatrix() + { + + } + MappedSuperNodalMatrix(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); + } + + ~MappedSuperNodalMatrix() + { + + } + /** + * Set appropriate pointers for the lower triangular supernodal matrix + * These infos are available at the end of the numerical factorization + * FIXME This class will be modified such that it can be use in the course + * of the factorization. + */ + void setInfos(Index m, Index n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) + { + m_row = m; + m_col = n; + m_nzval = nzval.data(); + m_nzval_colptr = nzval_colptr.data(); + m_rowind = rowind.data(); + m_rowind_colptr = rowind_colptr.data(); + m_nsuper = col_to_sup(n); + m_col_to_sup = col_to_sup.data(); + m_sup_to_col = sup_to_col.data(); + } + + /** + * Number of rows + */ + Index rows() { return m_row; } + + /** + * Number of columns + */ + Index cols() { return m_col; } + + /** + * Return the array of nonzero values packed by column + * + * The size is nnz + */ + Scalar* valuePtr() { return m_nzval; } + + const Scalar* valuePtr() const + { + return m_nzval; + } + /** + * Return the pointers to the beginning of each column in \ref valuePtr() + */ + Index* colIndexPtr() + { + return m_nzval_colptr; + } + + const Index* colIndexPtr() const + { + return m_nzval_colptr; + } + + /** + * Return the array of compressed row indices of all supernodes + */ + Index* rowIndex() { return m_rowind; } + + const Index* rowIndex() const + { + return m_rowind; + } + + /** + * Return the location in \em rowvaluePtr() which starts each column + */ + Index* rowIndexPtr() { return m_rowind_colptr; } + + const Index* rowIndexPtr() const + { + return m_rowind_colptr; + } + + /** + * Return the array of column-to-supernode mapping + */ + Index* colToSup() { return m_col_to_sup; } + + const Index* colToSup() const + { + return m_col_to_sup; + } + /** + * Return the array of supernode-to-column mapping + */ + Index* supToCol() { return m_sup_to_col; } + + const Index* supToCol() const + { + return m_sup_to_col; + } + + /** + * Return the number of supernodes + */ + Index nsuper() const + { + return m_nsuper; + } + + class InnerIterator; + template<typename Dest> + void solveInPlace( MatrixBase<Dest>&X) const; + + + + + protected: + Index m_row; // Number of rows + Index m_col; // Number of columns + Index m_nsuper; // Number of supernodes + Scalar* m_nzval; //array of nonzero values packed by column + Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j + Index* m_rowind; // Array of compressed row indices of rectangular supernodes + Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j + Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + + private : +}; + +/** + * \brief InnerIterator class to iterate over nonzero values of the current column in the supernodal matrix L + * + */ +template<typename Scalar, typename Index> +class MappedSuperNodalMatrix<Scalar,Index>::InnerIterator +{ + public: + InnerIterator(const MappedSuperNodalMatrix& mat, Index outer) + : m_matrix(mat), + m_outer(outer), + m_supno(mat.colToSup()[outer]), + m_idval(mat.colIndexPtr()[outer]), + m_startidval(m_idval), + m_endidval(mat.colIndexPtr()[outer+1]), + m_idrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]]), + m_endidrow(mat.rowIndexPtr()[mat.supToCol()[mat.colToSup()[outer]]+1]) + {} + inline InnerIterator& operator++() + { + m_idval++; + m_idrow++; + return *this; + } + inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } + + inline Scalar& valueRef() { return const_cast<Scalar&>(m_matrix.valuePtr()[m_idval]); } + + inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } + inline Index row() const { return index(); } + inline Index col() const { return m_outer; } + + inline Index supIndex() const { return m_supno; } + + inline operator bool() const + { + return ( (m_idval < m_endidval) && (m_idval >= m_startidval) + && (m_idrow < m_endidrow) ); + } + + protected: + const MappedSuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + const Index m_outer; // Current column + const Index m_supno; // Current SuperNode number + Index m_idval; // Index to browse the values in the current column + const Index m_startidval; // Start of the column value + const Index m_endidval; // End of the column value + Index m_idrow; // Index to browse the row indices + Index m_endidrow; // End index of row indices of the current column +}; + +/** + * \brief Solve with the supernode triangular matrix + * + */ +template<typename Scalar, typename Index> +template<typename Dest> +void MappedSuperNodalMatrix<Scalar,Index>::solveInPlace( MatrixBase<Dest>&X) const +{ + Index n = X.rows(); + Index nrhs = X.cols(); + const Scalar * Lval = valuePtr(); // Nonzero values + Matrix<Scalar,Dynamic,Dynamic, ColMajor> work(n, nrhs); // working vector + work.setZero(); + for (Index k = 0; k <= nsuper(); k ++) + { + Index fsupc = supToCol()[k]; // First column of the current supernode + Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column + Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode + Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode + Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode + Index irow; //Current index row + + if (nsupc == 1 ) + { + for (Index j = 0; j < nrhs; j++) + { + InnerIterator it(*this, fsupc); + ++it; // Skip the diagonal element + for (; it; ++it) + { + irow = it.row(); + X(irow, j) -= X(fsupc, j) * it.value(); + } + } + } + else + { + // The supernode has more than one column + Index luptr = colIndexPtr()[fsupc]; + Index lda = colIndexPtr()[fsupc+1] - luptr; + + // Triangular solve + Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(lda) ); + Map< Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView<UnitLower>().solve(U); + + // Matrix-vector product + new (&A) Map<const Matrix<Scalar,Dynamic,Dynamic, ColMajor>, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (Index j = 0; j < nrhs; j++) + { + Index iptr = istart + nsupc; + for (Index i = 0; i < nrow; i++) + { + irow = rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SPARSELU_MATRIX_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_Utils.h b/eigen/Eigen/src/SparseLU/SparseLU_Utils.h new file mode 100644 index 0000000..15352ac --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_Utils.h @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#ifndef EIGEN_SPARSELU_UTILS_H +#define EIGEN_SPARSELU_UTILS_H + +namespace Eigen { +namespace internal { + +/** + * \brief Count Nonzero elements in the factors + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::countnz(const Index n, Index& nnzL, Index& nnzU, GlobalLU_t& glu) +{ + nnzL = 0; + nnzU = (glu.xusub)(n); + Index nsuper = (glu.supno)(n); + Index jlen; + Index i, j, fsupc; + if (n <= 0 ) return; + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + + for (j = fsupc; j < glu.xsup(i+1); j++) + { + nnzL += jlen; + nnzU += j - fsupc + 1; + jlen--; + } + } +} + +/** + * \brief Fix up the data storage lsub for L-subscripts. + * + * It removes the subscripts sets for structural pruning, + * and applies permutation to the remaining subscripts + * + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::fixupL(const Index n, const IndexVector& perm_r, GlobalLU_t& glu) +{ + Index fsupc, i, j, k, jstart; + + Index nextl = 0; + Index nsuper = (glu.supno)(n); + + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = glu.xsup(i); + jstart = glu.xlsub(fsupc); + glu.xlsub(fsupc) = nextl; + for (j = jstart; j < glu.xlsub(fsupc + 1); j++) + { + glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A + nextl++; + } + for (k = fsupc+1; k < glu.xsup(i+1); k++) + glu.xlsub(k) = nextl; // other columns in supernode i + } + + glu.xlsub(n) = nextl; +} + +} // end namespace internal + +} // end namespace Eigen +#endif // EIGEN_SPARSELU_UTILS_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h b/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h new file mode 100644 index 0000000..cacc7e9 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -0,0 +1,180 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_BMOD_H +#define SPARSELU_COLUMN_BMOD_H + +namespace Eigen { + +namespace internal { +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the column + * \param tempv working array + * \param segrep segment representative ... + * \param repfnz ??? First nonzero column in each row ??? ... + * \param fpanelc First column in the current panel + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename Scalar, typename Index> +Index SparseLUImpl<Scalar,Index>::column_bmod(const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector& tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t& glu) +{ + Index jsupno, k, ksub, krep, ksupno; + Index lptr, nrow, isub, irow, nextlu, new_next, ufirst; + Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + * luptr = location of supernodal LU-block in storage + * kfnz = first nonz in the k-th supernodal segment + * no_zeros = no lf leading zeros in a supernodal U-segment + */ + + jsupno = glu.supno(jcol); + // For each nonzero supernode segment of U[*,j] in topological order + k = nseg - 1; + Index d_fsupc; // distance between the first column of the current panel and the + // first column of the current snode + Index fst_col; // First column within small LU update + Index segsize; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) + { + // outside the rectangular supernode + fsupc = glu.xsup(ksupno); + fst_col = (std::max)(fsupc, fpanelc); + + // Distance from the current supernode to the current panel; + // d_fsupc = 0 if fsupc > fpanelc + d_fsupc = fst_col - fsupc; + + luptr = glu.xlusup(fst_col) + d_fsupc; + lptr = glu.xlsub(fsupc) + d_fsupc; + + kfnz = repfnz(krep); + kfnz = (std::max)(kfnz, fpanelc); + + segsize = krep - kfnz + 1; + nsupc = krep - fst_col + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - d_fsupc - nsupc; + Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col); + + + // Perform a triangular solver and block update, + // then scatter the result of sup-col update to dense + no_zeros = kfnz - fst_col; + if(segsize==1) + LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + else + LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + } // end if jsupno + } // end for each segment + + // Process the supernodal portion of L\U[*,j] + nextlu = glu.xlusup(jcol); + fsupc = glu.xsup(jsupno); + + // copy the SPA dense into L\U[*,j] + Index mem; + new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); + Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next; + if(offset) + new_next += offset; + while (new_next > glu.nzlumax ) + { + mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); + if (mem) return mem; + } + + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) + { + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); + dense(irow) = Scalar(0.0); + ++nextlu; + } + + if(offset) + { + glu.lusup.segment(nextlu,offset).setZero(); + nextlu += offset; + } + glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); + + /* For more updates within the panel (also within the current supernode), + * should start from the first column of the panel, or the first column + * of the supernode, whichever is bigger. There are two cases: + * 1) fsupc < fpanelc, then fst_col <-- fpanelc + * 2) fsupc >= fpanelc, then fst_col <-- fsupc + */ + fst_col = (std::max)(fsupc, fpanelc); + + if (fst_col < jcol) + { + // Distance between the current supernode and the current panel + // d_fsupc = 0 if fsupc >= fpanelc + d_fsupc = fst_col - fsupc; + + lptr = glu.xlsub(fsupc) + d_fsupc; + luptr = glu.xlusup(fst_col) + d_fsupc; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension + nsupc = jcol - fst_col; // excluding jcol + nrow = nsupr - d_fsupc - nsupc; + + // points to the beginning of jcol in snode L\U(jsupno) + ufirst = glu.xlusup(jcol) + d_fsupc; + Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol); + MappedMatrixBlock A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) ); + VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc); + u = A.template triangularView<UnitLower>().solve(u); + + new (&A) MappedMatrixBlock ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); + VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); + l.noalias() -= A * u; + + } // End if fst_col + return 0; +} + +} // end namespace internal +} // end namespace Eigen + +#endif // SPARSELU_COLUMN_BMOD_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h b/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h new file mode 100644 index 0000000..4c04b0e --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -0,0 +1,177 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_DFS_H +#define SPARSELU_COLUMN_DFS_H + +template <typename Scalar, typename Index> class SparseLUImpl; +namespace Eigen { + +namespace internal { + +template<typename IndexVector, typename ScalarVector> +struct column_dfs_traits : no_assignment_operator +{ + typedef typename ScalarVector::Scalar Scalar; + typedef typename IndexVector::Scalar Index; + column_dfs_traits(Index jcol, Index& jsuper, typename SparseLUImpl<Scalar, Index>::GlobalLU_t& glu, SparseLUImpl<Scalar, Index>& luImpl) + : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu), m_luImpl(luImpl) + {} + bool update_segrep(Index /*krep*/, Index /*jj*/) + { + return true; + } + void mem_expand(IndexVector& lsub, Index& nextl, Index chmark) + { + if (nextl >= m_glu.nzlmax) + m_luImpl.memXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + if (chmark != (m_jcol-1)) m_jsuper_ref = emptyIdxLU; + } + enum { ExpandMem = true }; + + Index m_jcol; + Index& m_jsuper_ref; + typename SparseLUImpl<Scalar, Index>::GlobalLU_t& m_glu; + SparseLUImpl<Scalar, Index>& m_luImpl; +}; + + +/** + * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives. + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. + * + * \param m number of rows in the matrix + * \param jcol Current column + * \param perm_r Row permutation + * \param maxsuper Maximum number of column allowed in a supernode + * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended + * \param lsub_col defines the rhs vector to start the dfs + * \param [in,out] segrep Segment representatives - new segments appended + * \param repfnz First nonzero location in each row + * \param xprune + * \param marker marker[i] == jj, if i was visited during dfs of current column jj; + * \param parent + * \param xplore working array + * \param glu global LU data + * \return 0 success + * > 0 number of bytes allocated when run out of space + * + */ +template <typename Scalar, typename Index> +Index SparseLUImpl<Scalar,Index>::column_dfs(const Index m, const Index jcol, IndexVector& perm_r, Index maxsuper, Index& nseg, BlockIndexVector lsub_col, IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) +{ + + Index jsuper = glu.supno(jcol); + Index nextl = glu.xlsub(jcol); + VectorBlock<IndexVector> marker2(marker, 2*m, m); + + + column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this); + + // For each nonzero in A(*,jcol) do dfs + for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++) + { + Index krow = lsub_col(k); + lsub_col(k) = emptyIdxLU; + Index kmark = marker2(krow); + + // krow was visited before, go to the next nonz; + if (kmark == jcol) continue; + + dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent, + xplore, glu, nextl, krow, traits); + } // for each nonzero ... + + Index fsupc, jptr, jm1ptr, ito, ifrom, istop; + Index nsuper = glu.supno(jcol); + Index jcolp1 = jcol + 1; + Index jcolm1 = jcol - 1; + + // check to see if j belongs in the same supernode as j-1 + if ( jcol == 0 ) + { // Do nothing for column 0 + nsuper = glu.supno(0) = 0 ; + } + else + { + fsupc = glu.xsup(nsuper); + jptr = glu.xlsub(jcol); // Not yet compressed + jm1ptr = glu.xlsub(jcolm1); + + // Use supernodes of type T2 : see SuperLU paper + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU; + + // Make sure the number of columns in a supernode doesn't + // exceed threshold + if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU; + + /* If jcol starts a new supernode, reclaim storage space in + * glu.lsub from previous supernode. Note we only store + * the subscript set of the first and last columns of + * a supernode. (first for num values, last for pruning) + */ + if (jsuper == emptyIdxLU) + { // starts a new supernode + if ( (fsupc < jcolm1-1) ) + { // >= 3 columns in nsuper + ito = glu.xlsub(fsupc+1); + glu.xlsub(jcolm1) = ito; + istop = ito + jptr - jm1ptr; + xprune(jcolm1) = istop; // intialize xprune(jcol-1) + glu.xlsub(jcol) = istop; + + for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) + glu.lsub(ito) = glu.lsub(ifrom); + nextl = ito; // = istop + length(jcol) + } + nsuper++; + glu.supno(jcol) = nsuper; + } // if a new supernode + } // end else: jcol > 0 + + // Tidy up the pointers before exit + glu.xsup(nsuper+1) = jcolp1; + glu.supno(jcolp1) = nsuper; + xprune(jcol) = nextl; // Intialize upper bound for pruning + glu.xlsub(jcolp1) = nextl; + + return 0; +} + +} // end namespace internal + +} // end namespace Eigen + +#endif diff --git a/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h new file mode 100644 index 0000000..170610d --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -0,0 +1,106 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +/* + + * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COPY_TO_UCOL_H +#define SPARSELU_COPY_TO_UCOL_H + +namespace Eigen { +namespace internal { + +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param segrep segment representative ... + * \param repfnz First nonzero column in each row ... + * \param perm_r Row permutation + * \param dense Store the full representation of the column + * \param glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template <typename Scalar, typename Index> +Index SparseLUImpl<Scalar,Index>::copy_to_ucol(const Index jcol, const Index nseg, IndexVector& segrep, BlockIndexVector repfnz ,IndexVector& perm_r, BlockScalarVector dense, GlobalLU_t& glu) +{ + Index ksub, krep, ksupno; + + Index jsupno = glu.supno(jcol); + + // For each nonzero supernode segment of U[*,j] in topological order + Index k = nseg - 1, i; + Index nextu = glu.xusub(jcol); + Index kfnz, isub, segsize; + Index new_next,irow; + Index fsupc, mem; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = glu.supno(krep); + if (jsupno != ksupno ) // should go into ucol(); + { + kfnz = repfnz(krep); + if (kfnz != emptyIdxLU) + { // Nonzero U-segment + fsupc = glu.xsup(ksupno); + isub = glu.xlsub(fsupc) + kfnz - fsupc; + segsize = krep - kfnz + 1; + new_next = nextu + segsize; + while (new_next > glu.nzumax) + { + mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); + if (mem) return mem; + mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); + if (mem) return mem; + + } + + for (i = 0; i < segsize; i++) + { + irow = glu.lsub(isub); + glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order + glu.ucol(nextu) = dense(irow); + dense(irow) = Scalar(0.0); + nextu++; + isub++; + } + + } // end nonzero U-segment + + } // end if jsupno + + } // end for each segment + glu.xusub(jcol + 1) = nextu; // close U(*,jcol) + return 0; +} + +} // namespace internal +} // end namespace Eigen + +#endif // SPARSELU_COPY_TO_UCOL_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h b/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h new file mode 100644 index 0000000..9e4e3e7 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h @@ -0,0 +1,279 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_GEMM_KERNEL_H +#define EIGEN_SPARSELU_GEMM_KERNEL_H + +namespace Eigen { + +namespace internal { + + +/** \internal + * A general matrix-matrix product kernel optimized for the SparseLU factorization. + * - A, B, and C must be column major + * - lda and ldc must be multiples of the respective packet size + * - C must have the same alignment as A + */ +template<typename Scalar,typename Index> +EIGEN_DONT_INLINE +void sparselu_gemm(Index m, Index n, Index d, const Scalar* A, Index lda, const Scalar* B, Index ldb, Scalar* C, Index ldc) +{ + using namespace Eigen::internal; + + typedef typename packet_traits<Scalar>::type Packet; + enum { + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, + PacketSize = packet_traits<Scalar>::size, + PM = 8, // peeling in M + RN = 2, // register blocking + RK = NumberOfRegisters>=16 ? 4 : 2, // register blocking + BM = 4096/sizeof(Scalar), // number of rows of A-C per chunk + SM = PM*PacketSize // step along M + }; + Index d_end = (d/RK)*RK; // number of columns of A (rows of B) suitable for full register blocking + Index n_end = (n/RN)*RN; // number of columns of B-C suitable for processing RN columns at once + Index i0 = internal::first_aligned(A,m); + + eigen_internal_assert(((lda%PacketSize)==0) && ((ldc%PacketSize)==0) && (i0==internal::first_aligned(C,m))); + + // handle the non aligned rows of A and C without any optimization: + for(Index i=0; i<i0; ++i) + { + for(Index j=0; j<n; ++j) + { + Scalar c = C[i+j*ldc]; + for(Index k=0; k<d; ++k) + c += B[k+j*ldb] * A[i+k*lda]; + C[i+j*ldc] = c; + } + } + // process the remaining rows per chunk of BM rows + for(Index ib=i0; ib<m; ib+=BM) + { + Index actual_b = std::min<Index>(BM, m-ib); // actual number of rows + Index actual_b_end1 = (actual_b/SM)*SM; // actual number of rows suitable for peeling + Index actual_b_end2 = (actual_b/PacketSize)*PacketSize; // actual number of rows suitable for vectorization + + // Let's process two columns of B-C at once + for(Index j=0; j<n_end; j+=RN) + { + const Scalar* Bc0 = B+(j+0)*ldb; + const Scalar* Bc1 = B+(j+1)*ldb; + + for(Index k=0; k<d_end; k+=RK) + { + + // load and expand a RN x RK block of B + Packet b00, b10, b20, b30, b01, b11, b21, b31; + b00 = pset1<Packet>(Bc0[0]); + b10 = pset1<Packet>(Bc0[1]); + if(RK==4) b20 = pset1<Packet>(Bc0[2]); + if(RK==4) b30 = pset1<Packet>(Bc0[3]); + b01 = pset1<Packet>(Bc1[0]); + b11 = pset1<Packet>(Bc1[1]); + if(RK==4) b21 = pset1<Packet>(Bc1[2]); + if(RK==4) b31 = pset1<Packet>(Bc1[3]); + + Packet a0, a1, a2, a3, c0, c1, t0, t1; + + const Scalar* A0 = A+ib+(k+0)*lda; + const Scalar* A1 = A+ib+(k+1)*lda; + const Scalar* A2 = A+ib+(k+2)*lda; + const Scalar* A3 = A+ib+(k+3)*lda; + + Scalar* C0 = C+ib+(j+0)*ldc; + Scalar* C1 = C+ib+(j+1)*ldc; + + a0 = pload<Packet>(A0); + a1 = pload<Packet>(A1); + if(RK==4) + { + a2 = pload<Packet>(A2); + a3 = pload<Packet>(A3); + } + else + { + // workaround "may be used uninitialized in this function" warning + a2 = a3 = a0; + } + +#define KMADD(c, a, b, tmp) {tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);} +#define WORK(I) \ + c0 = pload<Packet>(C0+i+(I)*PacketSize); \ + c1 = pload<Packet>(C1+i+(I)*PacketSize); \ + KMADD(c0, a0, b00, t0) \ + KMADD(c1, a0, b01, t1) \ + a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ + KMADD(c0, a1, b10, t0) \ + KMADD(c1, a1, b11, t1) \ + a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ + if(RK==4) KMADD(c0, a2, b20, t0) \ + if(RK==4) KMADD(c1, a2, b21, t1) \ + if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ + if(RK==4) KMADD(c0, a3, b30, t0) \ + if(RK==4) KMADD(c1, a3, b31, t1) \ + if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ + pstore(C0+i+(I)*PacketSize, c0); \ + pstore(C1+i+(I)*PacketSize, c1) + + // process rows of A' - C' with aggressive vectorization and peeling + for(Index i=0; i<actual_b_end1; i+=PacketSize*8) + { + EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL1"); + prefetch((A0+i+(5)*PacketSize)); + prefetch((A1+i+(5)*PacketSize)); + if(RK==4) prefetch((A2+i+(5)*PacketSize)); + if(RK==4) prefetch((A3+i+(5)*PacketSize)); + WORK(0); + WORK(1); + WORK(2); + WORK(3); + WORK(4); + WORK(5); + WORK(6); + WORK(7); + } + // process the remaining rows with vectorization only + for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize) + { + WORK(0); + } +#undef WORK + // process the remaining rows without vectorization + for(Index i=actual_b_end2; i<actual_b; ++i) + { + if(RK==4) + { + C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3]; + C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]+A2[i]*Bc1[2]+A3[i]*Bc1[3]; + } + else + { + C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]; + C1[i] += A0[i]*Bc1[0]+A1[i]*Bc1[1]; + } + } + + Bc0 += RK; + Bc1 += RK; + } // peeled loop on k + } // peeled loop on the columns j + // process the last column (we now perform a matrux-vector product) + if((n-n_end)>0) + { + const Scalar* Bc0 = B+(n-1)*ldb; + + for(Index k=0; k<d_end; k+=RK) + { + + // load and expand a 1 x RK block of B + Packet b00, b10, b20, b30; + b00 = pset1<Packet>(Bc0[0]); + b10 = pset1<Packet>(Bc0[1]); + if(RK==4) b20 = pset1<Packet>(Bc0[2]); + if(RK==4) b30 = pset1<Packet>(Bc0[3]); + + Packet a0, a1, a2, a3, c0, t0/*, t1*/; + + const Scalar* A0 = A+ib+(k+0)*lda; + const Scalar* A1 = A+ib+(k+1)*lda; + const Scalar* A2 = A+ib+(k+2)*lda; + const Scalar* A3 = A+ib+(k+3)*lda; + + Scalar* C0 = C+ib+(n_end)*ldc; + + a0 = pload<Packet>(A0); + a1 = pload<Packet>(A1); + if(RK==4) + { + a2 = pload<Packet>(A2); + a3 = pload<Packet>(A3); + } + else + { + // workaround "may be used uninitialized in this function" warning + a2 = a3 = a0; + } + +#define WORK(I) \ + c0 = pload<Packet>(C0+i+(I)*PacketSize); \ + KMADD(c0, a0, b00, t0) \ + a0 = pload<Packet>(A0+i+(I+1)*PacketSize); \ + KMADD(c0, a1, b10, t0) \ + a1 = pload<Packet>(A1+i+(I+1)*PacketSize); \ + if(RK==4) KMADD(c0, a2, b20, t0) \ + if(RK==4) a2 = pload<Packet>(A2+i+(I+1)*PacketSize); \ + if(RK==4) KMADD(c0, a3, b30, t0) \ + if(RK==4) a3 = pload<Packet>(A3+i+(I+1)*PacketSize); \ + pstore(C0+i+(I)*PacketSize, c0); + + // agressive vectorization and peeling + for(Index i=0; i<actual_b_end1; i+=PacketSize*8) + { + EIGEN_ASM_COMMENT("SPARSELU_GEMML_KERNEL2"); + WORK(0); + WORK(1); + WORK(2); + WORK(3); + WORK(4); + WORK(5); + WORK(6); + WORK(7); + } + // vectorization only + for(Index i=actual_b_end1; i<actual_b_end2; i+=PacketSize) + { + WORK(0); + } + // remaining scalars + for(Index i=actual_b_end2; i<actual_b; ++i) + { + if(RK==4) + C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]+A2[i]*Bc0[2]+A3[i]*Bc0[3]; + else + C0[i] += A0[i]*Bc0[0]+A1[i]*Bc0[1]; + } + + Bc0 += RK; +#undef WORK + } + } + + // process the last columns of A, corresponding to the last rows of B + Index rd = d-d_end; + if(rd>0) + { + for(Index j=0; j<n; ++j) + { + enum { + Alignment = PacketSize>1 ? Aligned : 0 + }; + typedef Map<Matrix<Scalar,Dynamic,1>, Alignment > MapVector; + typedef Map<const Matrix<Scalar,Dynamic,1>, Alignment > ConstMapVector; + if(rd==1) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b); + + else if(rd==2) MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) + + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b); + + else MapVector(C+j*ldc+ib,actual_b) += B[0+d_end+j*ldb] * ConstMapVector(A+(d_end+0)*lda+ib, actual_b) + + B[1+d_end+j*ldb] * ConstMapVector(A+(d_end+1)*lda+ib, actual_b) + + B[2+d_end+j*ldb] * ConstMapVector(A+(d_end+2)*lda+ib, actual_b); + } + } + + } // blocking on the rows of A and C +} +#undef KMADD + +} // namespace internal + +} // namespace Eigen + +#endif // EIGEN_SPARSELU_GEMM_KERNEL_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h new file mode 100644 index 0000000..7a4e430 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_HEAP_RELAX_SNODE_H +#define SPARSELU_HEAP_RELAX_SNODE_H + +namespace Eigen { +namespace internal { + +/** + * \brief Identify the initial relaxed supernodes + * + * This routine applied to a symmetric elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param n The number of columns + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::heap_relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // The etree may not be postordered, but its heap ordered + IndexVector post; + internal::treePostorder(n, et, post); // Post order etree + IndexVector inv_post(n+1); + Index i; + for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? + + // Renumber etree in postorder + IndexVector iwork(n); + IndexVector et_save(n+1); + for (i = 0; i < n; ++i) + { + iwork(post(i)) = post(et(i)); + } + et_save = et; // Save the original etree + et = iwork; + + // compute the number of descendants of each node in the etree + relax_end.setConstant(emptyIdxLU); + Index j, parent; + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + Index snode_start; // beginning of a snode + Index k; + Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree + Index nsuper_et = 0; // Number of relaxed snodes in the original etree + Index l; + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + ++nsuper_et_post; + k = n; + for (i = snode_start; i <= j; ++i) + k = (std::min)(k, inv_post(i)); + l = inv_post(j); + if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode + { + // This is also a supernode in the original etree + relax_end(k) = l; // Record last column + ++nsuper_et; + } + else + { + for (i = snode_start; i <= j; ++i) + { + l = inv_post(i); + if (descendants(i) == 0) + { + relax_end(l) = l; + ++nsuper_et; + } + } + } + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + + // Recover the original etree + et = et_save; +} + +} // end namespace internal + +} // end namespace Eigen +#endif // SPARSELU_HEAP_RELAX_SNODE_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h new file mode 100644 index 0000000..6af0267 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SPARSELU_KERNEL_BMOD_H +#define SPARSELU_KERNEL_BMOD_H + +namespace Eigen { +namespace internal { + +/** + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param[in,out] dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param lda Leading dimension in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + * \return 0 on success + */ +template <int SegSizeAtCompileTime> struct LU_kernel_bmod +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> + static EIGEN_DONT_INLINE void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, + const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); +}; + +template <int SegSizeAtCompileTime> +template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> +EIGEN_DONT_INLINE void LU_kernel_bmod<SegSizeAtCompileTime>::run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, Index& luptr, const Index lda, + const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) +{ + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + Index isub = lptr + no_zeros; + int i; + Index irow; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += lda * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map<Matrix<Scalar,SegSizeAtCompileTime,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(lda) ); + Map<Matrix<Scalar,SegSizeAtCompileTime,1> > u(tempv.data(), segsize); + + u = A.template triangularView<UnitLower>().solve(u); + + // Dense matrix-vector product y <-- B*x + luptr += segsize; + const Index PacketSize = internal::packet_traits<Scalar>::size; + Index ldl = internal::first_multiple(nrow, PacketSize); + Map<Matrix<Scalar,Dynamic,SegSizeAtCompileTime, ColMajor>, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(lda) ); + Index aligned_offset = internal::first_aligned(tempv.data()+segsize, PacketSize); + Index aligned_with_B_offset = (PacketSize-internal::first_aligned(B.data(), PacketSize))%PacketSize; + Map<Matrix<Scalar,Dynamic,1>, 0, OuterStride<> > l(tempv.data()+segsize+aligned_offset+aligned_with_B_offset, nrow, OuterStride<>(ldl) ); + + l.setZero(); + internal::sparselu_gemm<Scalar>(l.rows(), l.cols(), B.cols(), B.data(), B.outerStride(), u.data(), u.outerStride(), l.data(), l.outerStride()); + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub++); + dense(irow) = tempv(i); + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub++); + dense(irow) -= l(i); + } +} + +template <> struct LU_kernel_bmod<1> +{ + template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> + static EIGEN_DONT_INLINE void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, + const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros); +}; + + +template <typename BlockScalarVector, typename ScalarVector, typename IndexVector, typename Index> +EIGEN_DONT_INLINE void LU_kernel_bmod<1>::run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, Index& luptr, + const Index lda, const Index nrow, IndexVector& lsub, const Index lptr, const Index no_zeros) +{ + typedef typename ScalarVector::Scalar Scalar; + Scalar f = dense(lsub(lptr + no_zeros)); + luptr += lda * no_zeros + no_zeros + 1; + const Scalar* a(lusup.data() + luptr); + const /*typename IndexVector::Scalar*/Index* irow(lsub.data()+lptr + no_zeros + 1); + Index i = 0; + for (; i+1 < nrow; i+=2) + { + Index i0 = *(irow++); + Index i1 = *(irow++); + Scalar a0 = *(a++); + Scalar a1 = *(a++); + Scalar d0 = dense.coeff(i0); + Scalar d1 = dense.coeff(i1); + d0 -= f*a0; + d1 -= f*a1; + dense.coeffRef(i0) = d0; + dense.coeffRef(i1) = d1; + } + if(i<nrow) + dense.coeffRef(*(irow++)) -= f * *(a++); +} + +} // end namespace internal + +} // end namespace Eigen +#endif // SPARSELU_KERNEL_BMOD_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h new file mode 100644 index 0000000..9d2ff29 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -0,0 +1,223 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_BMOD_H +#define SPARSELU_PANEL_BMOD_H + +namespace Eigen { +namespace internal { + +/** + * \brief Performs numeric block updates (sup-panel) in topological order. + * + * Before entering this routine, the original nonzeros in the panel + * were already copied i nto the spa[m,w] + * + * \param m number of rows in the matrix + * \param w Panel size + * \param jcol Starting column of the panel + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the panel + * \param tempv working array + * \param segrep segment representative... first row in the segment + * \param repfnz First nonzero rows + * \param glu Global LU data. + * + * + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::panel_bmod(const Index m, const Index w, const Index jcol, + const Index nseg, ScalarVector& dense, ScalarVector& tempv, + IndexVector& segrep, IndexVector& repfnz, GlobalLU_t& glu) +{ + + Index ksub,jj,nextl_col; + Index fsupc, nsupc, nsupr, nrow; + Index krep, kfnz; + Index lptr; // points to the row subscripts of a supernode + Index luptr; // ... + Index segsize,no_zeros ; + // For each nonz supernode segment of U[*,j] in topological order + Index k = nseg - 1; + const Index PacketSize = internal::packet_traits<Scalar>::size; + + for (ksub = 0; ksub < nseg; ksub++) + { // For each updating supernode + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + */ + krep = segrep(k); k--; + fsupc = glu.xsup(glu.supno(krep)); + nsupc = krep - fsupc + 1; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); + nrow = nsupr - nsupc; + lptr = glu.xlsub(fsupc); + + // loop over the panel columns to detect the actual number of columns and rows + Index u_rows = 0; + Index u_cols = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + + kfnz = repfnz_col(krep); + if ( kfnz == emptyIdxLU ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + u_cols++; + u_rows = (std::max)(segsize,u_rows); + } + + if(nsupc >= 2) + { + Index ldu = internal::first_multiple<Index>(u_rows, PacketSize); + Map<ScalarMatrix, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu)); + + // gather U + Index u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == emptyIdxLU ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + no_zeros = kfnz - fsupc; + + Index isub = lptr + no_zeros; + Index off = u_rows-segsize; + for (Index i = 0; i < off; i++) U(i,u_col) = 0; + for (Index i = 0; i < segsize; i++) + { + Index irow = glu.lsub(isub); + U(i+off,u_col) = dense_col(irow); + ++isub; + } + u_col++; + } + // solve U = A^-1 U + luptr = glu.xlusup(fsupc); + Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); + no_zeros = (krep - u_rows + 1) - fsupc; + luptr += lda * no_zeros + no_zeros; + MappedMatrixBlock A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) ); + U = A.template triangularView<UnitLower>().solve(U); + + // update + luptr += u_rows; + MappedMatrixBlock B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) ); + eigen_assert(tempv.size()>w*ldu + nrow*w + 1); + + Index ldl = internal::first_multiple<Index>(nrow, PacketSize); + Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize; + MappedMatrixBlock L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl)); + + L.setZero(); + internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride()); + + // scatter U and L + u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == emptyIdxLU ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + no_zeros = kfnz - fsupc; + Index isub = lptr + no_zeros; + + Index off = u_rows-segsize; + for (Index i = 0; i < segsize; i++) + { + Index irow = glu.lsub(isub++); + dense_col(irow) = U.coeff(i+off,u_col); + U.coeffRef(i+off,u_col) = 0; + } + + // Scatter l into SPA dense[] + for (Index i = 0; i < nrow; i++) + { + Index irow = glu.lsub(isub++); + dense_col(irow) -= L.coeff(i,u_col); + L.coeffRef(i,u_col) = 0; + } + u_col++; + } + } + else // level 2 only + { + // Sequence through each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == emptyIdxLU ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = glu.xlusup(fsupc); + + Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr + + // Perform a trianglar solve and block update, + // then scatter the result of sup-col update to dense[] + no_zeros = kfnz - fsupc; + if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros); + } // End for each column in the panel + } + + } // End for each updating supernode +} // end panel bmod + +} // end namespace internal + +} // end namespace Eigen + +#endif // SPARSELU_PANEL_BMOD_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h new file mode 100644 index 0000000..dc0054e --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -0,0 +1,258 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_DFS_H +#define SPARSELU_PANEL_DFS_H + +namespace Eigen { + +namespace internal { + +template<typename IndexVector> +struct panel_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + panel_dfs_traits(Index jcol, Index* marker) + : m_jcol(jcol), m_marker(marker) + {} + bool update_segrep(Index krep, Index jj) + { + if(m_marker[krep]<m_jcol) + { + m_marker[krep] = jj; + return true; + } + return false; + } + void mem_expand(IndexVector& /*glu.lsub*/, Index /*nextl*/, Index /*chmark*/) {} + enum { ExpandMem = false }; + Index m_jcol; + Index* m_marker; +}; + + +template <typename Scalar, typename Index> +template <typename Traits> +void SparseLUImpl<Scalar,Index>::dfs_kernel(const Index jj, IndexVector& perm_r, + Index& nseg, IndexVector& panel_lsub, IndexVector& segrep, + Ref<IndexVector> repfnz_col, IndexVector& xprune, Ref<IndexVector> marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, + Index& nextl_col, Index krow, Traits& traits + ) +{ + + Index kmark = marker(krow); + + // For each unmarked krow of jj + marker(krow) = jj; + Index kperm = perm_r(krow); + if (kperm == emptyIdxLU ) { + // krow is in L : place it in structure of L(*, jj) + panel_lsub(nextl_col++) = krow; // krow is indexed into A + + traits.mem_expand(panel_lsub, nextl_col, kmark); + } + else + { + // krow is in U : if its supernode-representative krep + // has been explored, update repfnz(*) + // krep = supernode representative of the current row + Index krep = glu.xsup(glu.supno(kperm)+1) - 1; + // First nonzero element in the current column: + Index myfnz = repfnz_col(krep); + + if (myfnz != emptyIdxLU ) + { + // Representative visited before + if (myfnz > kperm ) repfnz_col(krep) = kperm; + + } + else + { + // Otherwise, perform dfs starting at krep + Index oldrep = emptyIdxLU; + parent(krep) = oldrep; + repfnz_col(krep) = kperm; + Index xdfs = glu.xlsub(krep); + Index maxdfs = xprune(krep); + + Index kpar; + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + Index kchild = glu.lsub(xdfs); + xdfs++; + Index chmark = marker(kchild); + + if (chmark != jj ) + { + marker(kchild) = jj; + Index chperm = perm_r(kchild); + + if (chperm == emptyIdxLU) + { + // case kchild is in L: place it in L(*, j) + panel_lsub(nextl_col++) = kchild; + traits.mem_expand(panel_lsub, nextl_col, chmark); + } + else + { + // case kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz(*) + Index chrep = glu.xsup(glu.supno(chperm)+1) - 1; + myfnz = repfnz_col(chrep); + + if (myfnz != emptyIdxLU) + { // Visited before + if (myfnz > chperm) + repfnz_col(chrep) = chperm; + } + else + { // Cont. dfs at snode-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeper down G(L) + parent(krep) = oldrep; + repfnz_col(krep) = chperm; + xdfs = glu.xlsub(krep); + maxdfs = xprune(krep); + + } // end if myfnz != -1 + } // end if chperm == -1 + + } // end if chmark !=jj + } // end while xdfs < maxdfs + + // krow has no more unexplored nbrs : + // Place snode-rep krep in postorder DFS, if this + // segment is seen for the first time. (Note that + // "repfnz(krep)" may change later.) + // Baktrack dfs to its parent + if(traits.update_segrep(krep,jj)) + //if (marker1(krep) < jcol ) + { + segrep(nseg) = krep; + ++nseg; + //marker1(krep) = jj; + } + + kpar = parent(krep); // Pop recursion, mimic recursion + if (kpar == emptyIdxLU) + break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while (kpar != emptyIdxLU); // Do until empty stack + + } // end if (myfnz = -1) + + } // end if (kperm == -1) +} + +/** + * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives + * + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. This list is + * a superset of the topological order of each individual column within + * the panel. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. Each column has + * a separate list for this purpose. + * + * Two markers arrays are used for dfs : + * marker[i] == jj, if i was visited during dfs of current column jj; + * marker1[i] >= jcol, if i was visited by earlier columns in this panel; + * + * \param[in] m number of rows in the matrix + * \param[in] w Panel size + * \param[in] jcol Starting column of the panel + * \param[in] A Input matrix in column-major storage + * \param[in] perm_r Row permutation + * \param[out] nseg Number of U segments + * \param[out] dense Accumulate the column vectors of the panel + * \param[out] panel_lsub Subscripts of the row in the panel + * \param[out] segrep Segment representative i.e first nonzero row of each segment + * \param[out] repfnz First nonzero location in each row + * \param[out] xprune The pruned elimination tree + * \param[out] marker work vector + * \param parent The elimination tree + * \param xplore work vector + * \param glu The global data structure + * + */ + +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::panel_dfs(const Index m, const Index w, const Index jcol, MatrixType& A, IndexVector& perm_r, Index& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) +{ + Index nextl_col; // Next available position in panel_lsub[*,jj] + + // Initialize pointers + VectorBlock<IndexVector> marker1(marker, m, m); + nseg = 0; + + panel_dfs_traits<IndexVector> traits(jcol, marker1.data()); + + // For each column in the panel + for (Index jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj - jcol) * m; + + VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row + VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here + + + // For each nnz in A[*, jj] do depth first search + for (typename MatrixType::InnerIterator it(A, jj); it; ++it) + { + Index krow = it.row(); + dense_col(krow) = it.value(); + + Index kmark = marker(krow); + if (kmark == jj) + continue; // krow visited before, go to the next nonzero + + dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent, + xplore, glu, nextl_col, krow, traits); + }// end for nonzeros in column jj + + } // end for column jj +} + +} // end namespace internal +} // end namespace Eigen + +#endif // SPARSELU_PANEL_DFS_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h b/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h new file mode 100644 index 0000000..2e49ef6 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -0,0 +1,137 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of xpivotL.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PIVOTL_H +#define SPARSELU_PIVOTL_H + +namespace Eigen { +namespace internal { + +/** + * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. + * + * Pivot policy : + * (1) Compute thresh = u * max_(i>=j) abs(A_ij); + * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN + * pivot row = k; + * ELSE IF abs(A_jj) >= thresh THEN + * pivot row = j; + * ELSE + * pivot row = m; + * + * Note: If you absolutely want to use a given pivot order, then set u=0.0. + * + * \param jcol The current column of L + * \param diagpivotthresh diagonal pivoting threshold + * \param[in,out] perm_r Row permutation (threshold pivoting) + * \param[in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' + * \param[out] pivrow The pivot row + * \param glu Global LU data + * \return 0 if success, i > 0 if U(i,i) is exactly zero + * + */ +template <typename Scalar, typename Index> +Index SparseLUImpl<Scalar,Index>::pivotL(const Index jcol, const RealScalar& diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, Index& pivrow, GlobalLU_t& glu) +{ + + Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 + Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion + Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode + Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension + Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode + Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode + + // Determine the largest abs numerical value for partial pivoting + Index diagind = iperm_c(jcol); // diagonal index + RealScalar pivmax(-1.0); + Index pivptr = nsupc; + Index diag = emptyIdxLU; + RealScalar rtemp; + Index isub, icol, itemp, k; + for (isub = nsupc; isub < nsupr; ++isub) { + using std::abs; + rtemp = abs(lu_col_ptr[isub]); + if (rtemp > pivmax) { + pivmax = rtemp; + pivptr = isub; + } + if (lsub_ptr[isub] == diagind) diag = isub; + } + + // Test for singularity + if ( pivmax <= RealScalar(0.0) ) { + // if pivmax == -1, the column is structurally empty, otherwise it is only numerically zero + pivrow = pivmax < RealScalar(0.0) ? diagind : lsub_ptr[pivptr]; + perm_r(pivrow) = jcol; + return (jcol+1); + } + + RealScalar thresh = diagpivotthresh * pivmax; + + // Choose appropriate pivotal element + + { + // Test if the diagonal element can be used as a pivot (given the threshold value) + if (diag >= 0 ) + { + // Diagonal element exists + using std::abs; + rtemp = abs(lu_col_ptr[diag]); + if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; + } + pivrow = lsub_ptr[pivptr]; + } + + // Record pivot row + perm_r(pivrow) = jcol; + // Interchange row subscripts + if (pivptr != nsupc ) + { + std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] ); + // Interchange numerical values as well, for the two rows in the whole snode + // such that L is indexed the same way as A + for (icol = 0; icol <= nsupc; icol++) + { + itemp = pivptr + icol * lda; + std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]); + } + } + // cdiv operations + Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) + lu_col_ptr[k] *= temp; + return 0; +} + +} // end namespace internal +} // end namespace Eigen + +#endif // SPARSELU_PIVOTL_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h b/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h new file mode 100644 index 0000000..66460d1 --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -0,0 +1,135 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* + + * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PRUNEL_H +#define SPARSELU_PRUNEL_H + +namespace Eigen { +namespace internal { + +/** + * \brief Prunes the L-structure. + * + * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow" + * + * + * \param jcol The current column of L + * \param[in] perm_r Row permutation + * \param[out] pivrow The pivot row + * \param nseg Number of segments + * \param segrep + * \param repfnz + * \param[out] xprune + * \param glu Global LU data + * + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::pruneL(const Index jcol, const IndexVector& perm_r, const Index pivrow, const Index nseg, const IndexVector& segrep, BlockIndexVector repfnz, IndexVector& xprune, GlobalLU_t& glu) +{ + // For each supernode-rep irep in U(*,j] + Index jsupno = glu.supno(jcol); + Index i,irep,irep1; + bool movnum, do_prune = false; + Index kmin = 0, kmax = 0, minloc, maxloc,krow; + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + irep1 = irep + 1; + do_prune = false; + + // Don't prune with a zero U-segment + if (repfnz(irep) == emptyIdxLU) continue; + + // If a snode overlaps with the next panel, then the U-segment + // is fragmented into two parts -- irep and irep1. We should let + // pruning occur at the rep-column in irep1s snode. + if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune + + // If it has not been pruned & it has a nonz in row L(pivrow,i) + if (glu.supno(irep) != jsupno ) + { + if ( xprune (irep) >= glu.xlsub(irep1) ) + { + kmin = glu.xlsub(irep); + kmax = glu.xlsub(irep1) - 1; + for (krow = kmin; krow <= kmax; krow++) + { + if (glu.lsub(krow) == pivrow) + { + do_prune = true; + break; + } + } + } + + if (do_prune) + { + // do a quicksort-type partition + // movnum=true means that the num values have to be exchanged + movnum = false; + if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 + movnum = true; + + while (kmin <= kmax) + { + if (perm_r(glu.lsub(kmax)) == emptyIdxLU) + kmax--; + else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU) + kmin++; + else + { + // kmin below pivrow (not yet pivoted), and kmax + // above pivrow: interchange the two suscripts + std::swap(glu.lsub(kmin), glu.lsub(kmax)); + + // If the supernode has only one column, then we + // only keep one set of subscripts. For any subscript + // intercnahge performed, similar interchange must be + // done on the numerical values. + if (movnum) + { + minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); + maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); + std::swap(glu.lusup(minloc), glu.lusup(maxloc)); + } + kmin++; + kmax--; + } + } // end while + + xprune(irep) = kmin; //Pruning + } // end if do_prune + } // end pruning + } // End for each U-segment +} + +} // end namespace internal +} // end namespace Eigen + +#endif // SPARSELU_PRUNEL_H diff --git a/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h b/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h new file mode 100644 index 0000000..58ec32e --- /dev/null +++ b/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef SPARSELU_RELAX_SNODE_H +#define SPARSELU_RELAX_SNODE_H + +namespace Eigen { + +namespace internal { + +/** + * \brief Identify the initial relaxed supernodes + * + * This routine is applied to a column elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param n the number of columns + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +template <typename Scalar, typename Index> +void SparseLUImpl<Scalar,Index>::relax_snode (const Index n, IndexVector& et, const Index relax_columns, IndexVector& descendants, IndexVector& relax_end) +{ + + // compute the number of descendants of each node in the etree + Index j, parent; + relax_end.setConstant(emptyIdxLU); + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + // Identify the relaxed supernodes by postorder traversal of the etree + Index snode_start; // beginning of a snode + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + relax_end(snode_start) = j; // Record last column + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + +} + +} // end namespace internal + +} // end namespace Eigen +#endif |