iSAM
Cholesky.cpp
Go to the documentation of this file.
00001 
00028 #include <string.h>
00029 
00030 #include "isam/util.h"
00031 #include "isam/SparseMatrix.h"
00032 #include "isam/SparseSystem.h"
00033 
00034 #include "isam/Cholesky.h"
00035 
00036 #include "cs.h"
00037 #include "cholmod.h"
00038 
00039 // use CSparse instead of CHOLMOD (default)
00040 const bool USE_CSPARSE = false;
00041 
00042 // use CSparse-QR instead of CSparse-Cholesky, much slower, only for testing
00043 const bool USE_CSPARSE_QR = false;
00044 
00045 using namespace std;
00046 using namespace Eigen;
00047 
00048 namespace isam {
00049 
00050 class CholeskyImpl : public Cholesky {
00051   cholmod_sparse* _L;
00052   cholmod_dense* _rhs;
00053   int* _order;
00054 
00055   cholmod_common Common;
00056 
00057 public:
00058 
00059   CholeskyImpl() : _L(NULL), _rhs(NULL), _order(NULL) {
00060     cholmod_start(&Common);
00061   }
00062 
00063   virtual ~CholeskyImpl() {
00064     reset();
00065     cholmod_finish(&Common);
00066   }
00067 
00068   void factorize(const SparseSystem& Ab, VectorXd* delta = NULL, double lambda = 0) {
00069     tic("Cholesky");
00070 
00071     reset(); // make sure _L, _rhs, _order are empty
00072 
00073     cholmod_sparse* At = to_cholmod_transp(Ab);
00074     int nrow = At->ncol;
00075     int ncol = At->nrow;
00076 
00077     // Cholesky factorization
00078     // cholmod factors AA' instead of A'A - so we need to pass in At!
00079     cholmod_factor *L_factor;
00080 #if 0
00081     // restrict ordering (default is trying several)
00082     Common.nmethods = 1;
00083     Common.method[0].ordering = CHOLMOD_AMD; //CHOLMOD_NATURAL; //CHOLMOD_COLAMD;
00084     Common.postorder = false;
00085 #endif
00086     if (lambda>0) { // for Levenberg-Marquardt
00087       cholmod_sparse* A = cholmod_transpose(At, 1, &Common);
00088       // make symmetric matrix (only upper part saved)
00089       cholmod_sparse* AtA = cholmod_ssmult(At, A, 1, 1, 1, &Common); 
00090       // modify diagonal
00091       int* AtAp = (int*)AtA->p;
00092       //      int* AtAi = (int*)AtA->i;
00093       double* AtAx = (double*)AtA->x;
00094       for (int i=0; i<ncol; i++) {
00095         int p = AtAp[i+1]-1;
00096         AtAx[p] *= (1+lambda);
00097       }
00098       L_factor = cholmod_analyze(AtA, &Common);
00099       tic("cholmod_factorize");
00100       cholmod_factorize(AtA, L_factor, &Common);
00101       toc("cholmod_factorize");
00102       cholmod_free_sparse(&AtA, &Common);
00103       cholmod_free_sparse(&A, &Common);
00104     } else {
00105       L_factor = cholmod_analyze(At, &Common);
00106       tic("cholmod_factorize");
00107       cholmod_factorize(At, L_factor, &Common);
00108       toc("cholmod_factorize");
00109     }
00110     // make sure factorization is in correct format (LL, simplicial, packed, ordered)
00111     cholmod_change_factor(CHOLMOD_REAL, true, false, true, true, L_factor, &Common);
00112 
00113     // calculate new rhs by forward substitution (y in R'y = A'b)
00114     // note: original rhs is size nrow, Atb and new rhs are size ncol
00115     cholmod_dense* A_rhs = cholmod_zeros(nrow, 1, CHOLMOD_REAL, &Common);
00116     memcpy(A_rhs->x, Ab.rhs().data(), nrow*sizeof(double));
00117     cholmod_dense* Atb = cholmod_zeros(ncol, 1, CHOLMOD_REAL, &Common);
00118     double alpha[2] = {1., 0.}; // Atb = 1 * (At*A_rhs)
00119     double beta[2] = {0., 0.}; // + 0 * Atb
00120     cholmod_sdmult(At, 0, alpha, beta, A_rhs, Atb, &Common);
00121     // permute Atb according to L_factor
00122     cholmod_dense* Atb_perm = cholmod_solve(CHOLMOD_P, L_factor, Atb, &Common);
00123     // forward sub to obtain new (modified) rhs
00124     _rhs = cholmod_solve(CHOLMOD_L, L_factor, Atb_perm, &Common);
00125 
00126     // optionally solve the triangular system by backsubstitution
00127     if (delta) {
00128       cholmod_dense* delta_ = cholmod_solve(CHOLMOD_Lt, L_factor, _rhs, &Common);
00129       *delta = VectorXd(_rhs->nrow);
00130       memcpy(delta->data(), (double*)delta_->x, _rhs->nrow*sizeof(double));
00131       cholmod_free_dense(&delta_, &Common);
00132     }
00133 
00134     // create R/L with ordering and rhs
00135     // WARNING: L_factor becomes symbolic!! (numeric L is literally pulled out for efficiency)
00136     _order = new int[ncol];
00137     memcpy(_order, (int*)L_factor->Perm, ncol*sizeof(int));
00138     _L = cholmod_factor_to_sparse(L_factor, &Common);
00139 
00140     cholmod_free_dense(&Atb_perm, &Common);
00141     cholmod_free_dense(&Atb, &Common);
00142     cholmod_free_dense(&A_rhs, &Common);
00143     cholmod_free_factor(&L_factor, &Common);
00144     cholmod_free_sparse(&At, &Common);
00145 
00146     toc("Cholesky");
00147   }
00148 
00149   void get_R(SparseSystem& R) {
00150     // we need R but have L, so the transpose works out fine
00151     of_cholmod_transp(_L, R, _order);
00152     VectorXd tmp(_L->nrow);
00153     memcpy(tmp.data(), (double*)_rhs->x, _L->nrow*sizeof(double));
00154     R.set_rhs(tmp);
00155   }
00156 
00157   int* get_order() {
00158     return _order;
00159   }
00160 
00161 private:
00162 
00163   void reset() {
00164     if (_L) cholmod_free_sparse(&_L, &Common);
00165     if (_rhs) cholmod_free_dense(&_rhs, &Common);
00166     if (_order) delete[] _order;
00167   }
00168 
00169   // internal, allocates new cholmod matrix and copies transpose of SparseSystem over
00170   // (SparseSystem is row-based, cholmod column-based)
00171   cholmod_sparse* to_cholmod_transp(const SparseSystem& A) {
00172     // note: num_cols/num_rows swapped for transpose
00173     cholmod_sparse* T = cholmod_allocate_sparse(A.num_cols(), A.num_rows(), A.nnz(),
00174                                                 true, true, 0, CHOLMOD_REAL, &Common);
00175 
00176     int* p = (int*)T->p;
00177     int* i = (int*)T->i;
00178     double* x = (double*)T->x;
00179     int n = 0;
00180     *p = n;
00181     for (int row=0; row<A.num_rows(); row++) {
00182       const SparseVector& r = A.get_row(row);
00183       int nnz = r.nnz();
00184       // easy: CSparse and SparseVector indices are both 0-based
00185       r.copy_raw(i, x);
00186       i += nnz;
00187       x += nnz;
00188       n += nnz;
00189       p++;
00190       *p = n;
00191     }
00192 
00193     return T;
00194   }
00195 
00196   // internal, returns SparseSystem containing a copy of cholmod matrix transposed for efficiency
00197   void of_cholmod_transp(const cholmod_sparse* T, SparseSystem& A, int* order) {
00198     int nrow = T->ncol; // swapped for transpose
00199     int ncol = T->nrow;
00200     SparseVector_p* rows = new SparseVector_p[nrow];
00201     int *p = (int*)T->p;
00202     int *i = (int*)T->i;
00203     double* x = (double*)T->x;
00204     for (int row = 0; row < nrow; row++) {
00205       int nnz = *(p+1) - *p;
00206       rows[row] = new SparseVector(i, x, nnz);
00207       i += nnz;
00208       x += nnz;
00209       p++;
00210     }
00211     A.import_rows_ordered(nrow, ncol, rows, order);
00212     delete [] rows;
00213   }
00214 
00215 };
00216 
00217 class CholeskyImplCSparse : public Cholesky {
00218   cs* _L;
00219   double* _rhs;
00220   int* _order;
00221 
00222 public:
00223 
00224   CholeskyImplCSparse() : _L(NULL), _rhs(NULL), _order(NULL) {
00225   }
00226 
00227   virtual ~CholeskyImplCSparse() {
00228     reset();
00229   }
00230  
00231   // QR factorization, slower than Cholesky below, does not deal with lambda!
00232   int* qr(cs* csA, int n, css* S, csn* N) {
00233     // symbolic QR with reordering
00234     S = cs_sqr(3, csA, 1); // first argument: 0=no reoder, 3=reorder
00235     // numerical QR based on symbolic factorization
00236     N = cs_qr(csA, S);
00237     // note: R factor in U for QR, variable ordering in S->q
00238     // note that L (N->U) is not square! taking subset below
00239     cs* csTemp = cs_spalloc(n, n, N->U->nzmax, 1, 0);
00240     memcpy(csTemp->p, N->U->p, (n+1) * sizeof(int));
00241     memcpy(csTemp->i, N->U->i, N->U->nzmax * sizeof(int));
00242     memcpy(csTemp->x, N->U->x, N->U->nzmax * sizeof(double));
00243     _L = cs_transpose(csTemp, 1);
00244     cs_spfree(csTemp);
00245     return S->q;
00246   }
00247 
00248   int* cholesky(cs* csA, cs* csAt, int n, double lambda, css* S, csn* N) {
00249     // Cholesky factorization
00250     cs* csAtA = cs_multiply(csAt, csA);
00251     if (lambda>0.) {
00252       // modify diagonal
00253       for (int i=0; i<n; i++) {
00254         int p = csAtA->p[i];
00255         while (csAtA->i[p]!=i) {p++;} // find diagonal entry
00256         csAtA->x[p] *= (1.+lambda);
00257       }
00258     }
00259     S = cs_schol(1, csAtA);
00260     N = cs_chol(csAtA, S);
00261     // straight copy
00262     _L = cs_spalloc(n, n, N->L->nzmax, 1, 0);
00263     memcpy(_L->p, N->L->p, (n+1) * sizeof(int));
00264     memcpy(_L->i, N->L->i, N->L->nzmax * sizeof(int));
00265     memcpy(_L->x, N->L->x, N->L->nzmax * sizeof(double));
00266     int* p = cs_pinv(S->pinv, csAtA->n);
00267     cs_spfree(csAtA);
00268     return p;
00269   }
00270 
00271   void factorize(const SparseSystem& Ab, VectorXd* delta = NULL, double lambda = 0) {
00272     tic("Cholesky");
00273 
00274     reset(); // make sure _L, _rhs, _order are empty
00275 
00276     // QR with correct rhs
00277     cs* csAt = to_csparse_transp(Ab);
00278     cs* csA = cs_transpose(csAt, 1);
00279     int m = csA->m;
00280     int n = csA->n;
00281     css* S = NULL;
00282     csn* N = NULL;
00283     int* p = NULL;
00284     if (USE_CSPARSE_QR) {
00285       cout << "WARNING: Using slow QR factorization without LM support" << endl;
00286       p = qr(csA, n, S, N);
00287     } else {
00288       p = cholesky(csA, csAt, n, lambda, S, N);
00289     }
00290     _order = new int[n];
00291     memcpy(_order, p, n*sizeof(int));
00292     // reorder rhs
00293     const double* Ab_rhs = Ab.rhs().data();
00294     // result Atb needs to be initialized - gets added!
00295     double *Atb = new double[m];
00296     for (int i=0; i<n; i++) {
00297       Atb[i] = 0.;
00298     }
00299     cs_gaxpy(csAt, Ab_rhs, Atb);
00300 
00301     // note that rhs is size n, Atb and B_rhs are longer (size m)
00302     _rhs = new double[n];
00303     for (int i=0; i<n; i++) {
00304       _rhs[i] = Atb[p[i]]; // permute Atb
00305     }
00306 
00307     // forward sub to include R
00308     cs_lsolve(_L, _rhs);
00309 
00310     // optionally solve the triangular system
00311     if (delta) {
00312       double* delta_ = new double[n];
00313       memcpy(delta_, _rhs, n*sizeof(double));
00314       cs_ltsolve(_L, delta_);
00315       *delta = VectorXd(n);
00316       memcpy(delta->data(), delta_, n*sizeof(double));
00317       delete[] delta_;
00318     }
00319 
00320     // clean up
00321     if (USE_CSPARSE_QR) {
00322       cs_free(p);
00323     }
00324     cs_spfree(csA);
00325     cs_spfree(csAt);
00326     cs_sfree(S);
00327     cs_nfree(N);
00328     delete[] Atb;
00329   }
00330 
00331   void get_R(SparseSystem& R) {
00332     of_csparse_transp(_L, R, _order);
00333     VectorXd tmp(_L->n);
00334     memcpy(tmp.data(), _rhs, _L->n);
00335     R.set_rhs(tmp);
00336   }
00337 
00338   int* get_order() {
00339     return _order;
00340   }
00341 
00342 private:
00343 
00344   void reset() {
00345     if (_L) cs_spfree(_L);
00346     if (_rhs) delete[] _rhs;
00347     if (_order) delete[] _order;
00348   }
00349 
00350   cs* to_csparse_transp(const SparseMatrix& A) const {
00351     // note: num_cols/num_rows swapped for transpose
00352     cs* T = cs_spalloc(A.num_cols(), A.num_rows(), A.nnz(), 1, 0);
00353     int* p = (int*)T->p;
00354     int* i = (int*)T->i;
00355     double* x = (double*)T->x;
00356     int n = 0;
00357     *p = n;
00358     for (int row=0; row<A.num_rows(); row++) {
00359       const SparseVector& r = A.get_row(row);
00360       int nnz = r.nnz();
00361       // easy: CSparse and SparseVector indices are both 0-based
00362       r.copy_raw(i, x);
00363       i += nnz;
00364       x += nnz;
00365       n += nnz;
00366       p++;
00367       *p = n;
00368     }
00369 
00370     return T;
00371   }
00372 
00373   void of_csparse_transp(const cs* T, SparseSystem& A, int* order) {
00374     int nrow = T->n; // swapped for transpose
00375     int ncol = T->m;
00376     SparseVector_p rows[nrow];
00377     int *p = (int*)T->p;
00378     int *i = (int*)T->i;
00379     double* x = (double*)T->x;
00380     for (int row = 0; row < nrow; row++) {
00381       int nnz = *(p+1) - *p;
00382       rows[row] = new SparseVector(i, x, nnz);
00383       i += nnz;
00384       x += nnz;
00385       p++;
00386     }
00387     A.import_rows_ordered(nrow, ncol, rows, order);
00388   }
00389 
00390 };
00391 
00392 
00393 Cholesky* Cholesky::Create() {
00394   if (USE_CSPARSE) {
00395     return new CholeskyImplCSparse();
00396   } else {
00397     return new CholeskyImpl();
00398   }
00399 }
00400 
00401 }
 All Classes Files Functions Variables