Actual source code: mumps.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:     Provides an interface to the MUMPS sparse solver
  5: */
 6:  #include src/mat/impls/aij/seq/aij.h
 7:  #include src/mat/impls/aij/mpi/mpiaij.h
 8:  #include src/mat/impls/sbaij/seq/sbaij.h
 9:  #include src/mat/impls/sbaij/mpi/mpisbaij.h

 12: #if defined(PETSC_USE_COMPLEX)
 13: #include "zmumps_c.h"
 14: #else
 15: #include "dmumps_c.h" 
 16: #endif
 18: #define JOB_INIT -1
 19: #define JOB_END -2
 20: /* macros s.t. indices match MUMPS documentation */
 21: #define ICNTL(I) icntl[(I)-1] 
 22: #define CNTL(I) cntl[(I)-1] 
 23: #define INFOG(I) infog[(I)-1]
 24: #define INFO(I) info[(I)-1]
 25: #define RINFOG(I) rinfog[(I)-1]
 26: #define RINFO(I) rinfo[(I)-1]

 28: typedef struct {
 29: #if defined(PETSC_USE_COMPLEX)
 30:   ZMUMPS_STRUC_C id;
 31: #else
 32:   DMUMPS_STRUC_C id;
 33: #endif
 34:   MatStructure   matstruc;
 35:   PetscMPIInt    myid,size;
 36:   PetscInt       *irn,*jcn,sym,nSolve;
 37:   PetscScalar    *val;
 38:   MPI_Comm       comm_mumps;
 39:   VecScatter     scat_rhs, scat_sol;
 40:   PetscTruth     isAIJ,CleanUpMUMPS;
 41:   Vec            b_seq,x_seq;
 42:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 43:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 44:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 45:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 46:   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
 47:   PetscErrorCode (*MatDestroy)(Mat);
 48:   PetscErrorCode (*specialdestroy)(Mat);
 49:   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
 50: } Mat_MUMPS;

 52: EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
 54: PetscErrorCode  MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
 56: /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
 57: /*
 58:   input: 
 59:     A       - matrix in mpiaij or mpisbaij (bs=1) format
 60:     shift   - 0: C style output triple; 1: Fortran style output triple.
 61:     valOnly - FALSE: spaces are allocated and values are set for the triple  
 62:               TRUE:  only the values in v array are updated
 63:   output:     
 64:     nnz     - dim of r, c, and v (number of local nonzero entries of A)
 65:     r, c, v - row and col index, matrix values (matrix triples) 
 66:  */
 67: PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
 68:   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
 70:   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
 71:   PetscInt       *row,*col;
 72:   PetscScalar    *av, *bv,*val;
 73:   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;

 76:   if (mumps->isAIJ){
 77:     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
 78:     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
 79:     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
 80:     nz = aa->nz + bb->nz;
 81:     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
 82:     garray = mat->garray;
 83:     av=aa->a; bv=bb->a;
 84: 
 85:   } else {
 86:     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
 87:     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
 88:     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
 89:     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
 90:     nz = aa->nz + bb->nz;
 91:     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
 92:     garray = mat->garray;
 93:     av=aa->a; bv=bb->a;
 94:   }

 96:   if (!valOnly){
 97:     PetscMalloc(nz*sizeof(PetscInt) ,&row);
 98:     PetscMalloc(nz*sizeof(PetscInt),&col);
 99:     PetscMalloc(nz*sizeof(PetscScalar),&val);
100:     *r = row; *c = col; *v = val;
101:   } else {
102:     row = *r; col = *c; val = *v;
103:   }
104:   *nnz = nz;

106:   jj = 0; irow = rstart;
107:   for ( i=0; i<m; i++ ) {
108:     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109:     countA = ai[i+1] - ai[i];
110:     countB = bi[i+1] - bi[i];
111:     bjj = bj + bi[i];

113:     /* get jB, the starting local col index for the 2nd B-part */
114:     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115:     j=-1;
116:     do {
117:       j++;
118:       if (j == countB) break;
119:       jcol = garray[bjj[j]];
120:     } while (jcol < colA_start);
121:     jB = j;
122: 
123:     /* B-part, smaller col index */
124:     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125:     for (j=0; j<jB; j++){
126:       jcol = garray[bjj[j]];
127:       if (!valOnly){
128:         row[jj] = irow + shift; col[jj] = jcol + shift;

130:       }
131:       val[jj++] = *bv++;
132:     }
133:     /* A-part */
134:     for (j=0; j<countA; j++){
135:       if (!valOnly){
136:         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137:       }
138:       val[jj++] = *av++;
139:     }
140:     /* B-part, larger col index */
141:     for (j=jB; j<countB; j++){
142:       if (!valOnly){
143:         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144:       }
145:       val[jj++] = *bv++;
146:     }
147:     irow++;
148:   }
149: 
150:   return(0);
151: }

156: PetscErrorCode  MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157: {
159:   Mat            B=*newmat;
160:   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161:   void           (*f)(void);

164:   if (reuse == MAT_INITIAL_MATRIX) {
165:     MatDuplicate(A,MAT_COPY_VALUES,&B);
166:   }
167:   B->ops->duplicate              = mumps->MatDuplicate;
168:   B->ops->view                   = mumps->MatView;
169:   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170:   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171:   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172:   B->ops->destroy                = mumps->MatDestroy;

174:   /* put back original composed preallocation function */
175:   PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);
176:   if (f) {
177:     PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);
178:   }
179:   PetscFree(mumps);
180:   A->spptr = PETSC_NULL;

182:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);
183:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);
184:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);
185:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);
186:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);
187:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);
188:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);
189:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);

191:   PetscObjectChangeTypeName((PetscObject)B,type);
192:   *newmat = B;
193:   return(0);
194: }

199: PetscErrorCode MatDestroy_MUMPS(Mat A)
200: {
201:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
203:   PetscMPIInt    size=lu->size;
204:   PetscErrorCode (*specialdestroy)(Mat);
206:   if (lu->CleanUpMUMPS) {
207:     /* Terminate instance, deallocate memories */
208:     if (size > 1){
209:       PetscFree(lu->id.sol_loc);
210:       VecScatterDestroy(lu->scat_rhs);
211:       VecDestroy(lu->b_seq);
212:       VecScatterDestroy(lu->scat_sol);
213:       VecDestroy(lu->x_seq);
214:       PetscFree(lu->val);
215:     }
216:     lu->id.job=JOB_END;
217: #if defined(PETSC_USE_COMPLEX)
218:     zmumps_c(&lu->id);
219: #else
220:     dmumps_c(&lu->id);
221: #endif
222:     PetscFree(lu->irn);
223:     PetscFree(lu->jcn);
224:     MPI_Comm_free(&(lu->comm_mumps));
225:   }
226:   specialdestroy = lu->specialdestroy;
227:   (*specialdestroy)(A);
228:   (*A->ops->destroy)(A);
229:   return(0);
230: }

234: PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
235: {
237:   PetscMPIInt    size;

240:   MPI_Comm_size(A->comm,&size);
241:   if (size==1) {
242:     MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
243:   } else {
244:     MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
245:   }
246:   return(0);
247: }

251: PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
252: {
254:   PetscMPIInt    size;

257:   MPI_Comm_size(A->comm,&size);
258:   if (size==1) {
259:     MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);
260:   } else {
261:     MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);
262:   }
263:   return(0);
264: }

268: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) {
269:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
270:   PetscScalar    *array;
271:   Vec            x_seq;
272:   IS             is_iden,is_petsc;
273:   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
275:   PetscInt       i;

278:   lu->id.nrhs = 1;
279:   x_seq = lu->b_seq;
280:   if (lu->size > 1){
281:     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
282:     VecScatterBegin(scat_rhs,b,x_seq,INSERT_VALUES,SCATTER_FORWARD);
283:     VecScatterEnd(scat_rhs,b,x_seq,INSERT_VALUES,SCATTER_FORWARD);
284:     if (!lu->myid) {VecGetArray(x_seq,&array);}
285:   } else {  /* size == 1 */
286:     VecCopy(b,x);
287:     VecGetArray(x,&array);
288:   }
289:   if (!lu->myid) { /* define rhs on the host */
290: #if defined(PETSC_USE_COMPLEX)
291:     lu->id.rhs = (mumps_double_complex*)array;
292: #else
293:     lu->id.rhs = array;
294: #endif
295:   }
296:   if (lu->size == 1){
297:     VecRestoreArray(x,&array);
298:   } else if (!lu->myid){
299:     VecRestoreArray(x_seq,&array);
300:   }

302:   if (lu->size > 1){
303:     /* distributed solution */
304:     lu->id.ICNTL(21) = 1;
305:     if (!lu->nSolve){
306:       /* Create x_seq=sol_loc for repeated use */
307:       PetscInt    lsol_loc;
308:       PetscScalar *sol_loc;
309:       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
310:       PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);
311:       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
312:       lu->id.lsol_loc = lsol_loc;
313: #if defined(PETSC_USE_COMPLEX)
314:       lu->id.sol_loc  = (ZMUMPS_DOUBLE *)sol_loc;
315: #else
316:       lu->id.sol_loc  = (DMUMPS_DOUBLE *)sol_loc;
317: #endif
318:       VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);
319:     }
320:   }

322:   /* solve phase */
323:   /*-------------*/
324:   lu->id.job = 3;
325: #if defined(PETSC_USE_COMPLEX)
326:   zmumps_c(&lu->id);
327: #else
328:   dmumps_c(&lu->id);
329: #endif
330:   if (lu->id.INFOG(1) < 0) {
331:     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
332:   }

334:   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
335:     if (!lu->nSolve){ /* create scatter scat_sol */
336:       ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden); /* from */
337:       for (i=0; i<lu->id.lsol_loc; i++){
338:         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
339:       }
340:       ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);  /* to */
341:       VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);
342:       ISDestroy(is_iden);
343:       ISDestroy(is_petsc);
344:     }
345:     VecScatterBegin(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
346:     VecScatterEnd(lu->scat_sol,lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
347:   }
348:   lu->nSolve++;
349:   return(0);
350: }

352: /* 
353:   input:
354:    F:        numeric factor
355:   output:
356:    nneg:     total number of negative pivots
357:    nzero:    0
358:    npos:     (global dimension of F) - nneg
359: */

363: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
364: {
365:   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
367:   PetscMPIInt    size;

370:   MPI_Comm_size(F->comm,&size);
371:   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
372:   if (size > 1 && lu->id.ICNTL(13) != 1){
373:     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
374:   }
375:   if (nneg){
376:     if (!lu->myid){
377:       *nneg = lu->id.INFOG(12);
378:     }
379:     MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);
380:   }
381:   if (nzero) *nzero = 0;
382:   if (npos)  *npos  = F->rmap.N - (*nneg);
383:   return(0);
384: }

388: PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F)
389: {
390:   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
391:   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
393:   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
394:   PetscTruth     valOnly,flg;
395:   Mat            F_diag;

398:   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
399:     (*F)->ops->solve    = MatSolve_MUMPS;

401:     /* Initialize a MUMPS instance */
402:     MPI_Comm_rank(A->comm, &lu->myid);
403:     MPI_Comm_size(A->comm,&lu->size);
404:     lua->myid = lu->myid; lua->size = lu->size;
405:     lu->id.job = JOB_INIT;
406:     MPI_Comm_dup(A->comm,&(lu->comm_mumps));
407:     MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));

409:     /* Set mumps options */
410:     PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");
411:     lu->id.par=1;  /* host participates factorizaton and solve */
412:     lu->id.sym=lu->sym;
413:     if (lu->sym == 2){
414:       PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);
415:       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
416:     }
417: #if defined(PETSC_USE_COMPLEX)
418:     zmumps_c(&lu->id);
419: #else
420:     dmumps_c(&lu->id);
421: #endif
422: 
423:     if (lu->size == 1){
424:       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
425:     } else {
426:       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
427:     }

429:     icntl=-1;
430:     lu->id.ICNTL(4) = 0;  /* level of printing; overwrite mumps default ICNTL(4)=2 */
431:     PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);
432:     if ((flg && icntl > 0) || PetscLogPrintInfo) {
433:       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
434:     } else { /* no output */
435:       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
436:       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
437:       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
438:     }
439:     PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);
440:     icntl=-1;
441:     PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);
442:     if (flg) {
443:       if (icntl== 1){
444:         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
445:       } else {
446:         lu->id.ICNTL(7) = icntl;
447:       }
448:     }
449:     PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);
450:     PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);
451:     PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);
452:     PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);
453:     PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);
454:     PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);
455:     PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);

457:     PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);
458:     PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);
459:     PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);
460:     PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);
461:     PetscOptionsEnd();
462:   }

464:   /* define matrix A */
465:   switch (lu->id.ICNTL(18)){
466:   case 0:  /* centralized assembled matrix input (size=1) */
467:     if (!lu->myid) {
468:       if (lua->isAIJ){
469:         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
470:         nz               = aa->nz;
471:         ai = aa->i; aj = aa->j; lu->val = aa->a;
472:       } else {
473:         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
474:         nz                  =  aa->nz;
475:         ai = aa->i; aj = aa->j; lu->val = aa->a;
476:       }
477:       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
478:         PetscMalloc(nz*sizeof(PetscInt),&lu->irn);
479:         PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);
480:         nz = 0;
481:         for (i=0; i<M; i++){
482:           rnz = ai[i+1] - ai[i];
483:           while (rnz--) {  /* Fortran row/col index! */
484:             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
485:           }
486:         }
487:       }
488:     }
489:     break;
490:   case 3:  /* distributed assembled matrix input (size>1) */
491:     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
492:       valOnly = PETSC_FALSE;
493:     } else {
494:       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
495:     }
496:     MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);
497:     break;
498:   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
499:   }

501:   /* analysis phase */
502:   /*----------------*/
503:   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
504:     lu->id.job = 1;

506:     lu->id.n = M;
507:     switch (lu->id.ICNTL(18)){
508:     case 0:  /* centralized assembled matrix input */
509:       if (!lu->myid) {
510:         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
511:         if (lu->id.ICNTL(6)>1){
512: #if defined(PETSC_USE_COMPLEX)
513:           lu->id.a = (mumps_double_complex*)lu->val;
514: #else
515:           lu->id.a = lu->val;
516: #endif
517:         }
518:       }
519:       break;
520:     case 3:  /* distributed assembled matrix input (size>1) */
521:       lu->id.nz_loc = nnz;
522:       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
523:       if (lu->id.ICNTL(6)>1) {
524: #if defined(PETSC_USE_COMPLEX)
525:         lu->id.a_loc = (mumps_double_complex*)lu->val;
526: #else
527:         lu->id.a_loc = lu->val;
528: #endif
529:       }
530:       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
531:       IS  is_iden;
532:       Vec b;
533:       if (!lu->myid){
534:         VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);
535:         ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);
536:       } else {
537:         VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);
538:         ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);
539:       }
540:       VecCreate(A->comm,&b);
541:       VecSetSizes(b,A->rmap.n,PETSC_DECIDE);
542:       VecSetFromOptions(b);

544:       VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);
545:       ISDestroy(is_iden);
546:       VecDestroy(b);
547:       break;
548:     }
549: #if defined(PETSC_USE_COMPLEX)
550:     zmumps_c(&lu->id);
551: #else
552:     dmumps_c(&lu->id);
553: #endif
554:     if (lu->id.INFOG(1) < 0) {
555:       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
556:     }
557:   }

559:   /* numerical factorization phase */
560:   /*-------------------------------*/
561:   lu->id.job = 2;
562:   if(!lu->id.ICNTL(18)) {
563:     if (!lu->myid) {
564: #if defined(PETSC_USE_COMPLEX)
565:       lu->id.a = (mumps_double_complex*)lu->val;
566: #else
567:       lu->id.a = lu->val;
568: #endif
569:     }
570:   } else {
571: #if defined(PETSC_USE_COMPLEX)
572:     lu->id.a_loc = (mumps_double_complex*)lu->val;
573: #else
574:     lu->id.a_loc = lu->val;
575: #endif
576:   }
577: #if defined(PETSC_USE_COMPLEX)
578:   zmumps_c(&lu->id);
579: #else
580:   dmumps_c(&lu->id);
581: #endif
582:   if (lu->id.INFOG(1) < 0) {
583:     if (lu->id.INFO(1) == -13) {
584:       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
585:     } else {
586:       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
587:     }
588:   }

590:   if (!lu->myid && lu->id.ICNTL(16) > 0){
591:     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
592:   }

594:   if (lu->size > 1){
595:     if ((*F)->factor == FACTOR_LU){
596:       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
597:     } else {
598:       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
599:     }
600:     F_diag->assembled = PETSC_TRUE;
601:     if (lu->nSolve){
602:       VecScatterDestroy(lu->scat_sol);
603:       PetscFree(lu->id.sol_loc);
604:       VecDestroy(lu->x_seq);
605:     }
606:   }
607:   (*F)->assembled   = PETSC_TRUE;
608:   lu->matstruc      = SAME_NONZERO_PATTERN;
609:   lu->CleanUpMUMPS  = PETSC_TRUE;
610:   lu->nSolve        = 0;
611:   return(0);
612: }

614: /* Note the Petsc r and c permutations are ignored */
617: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
618:   Mat            B;
619:   Mat_MUMPS      *lu;

623:   /* Create the factorization matrix */
624:   MatCreate(A->comm,&B);
625:   MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
626:   MatSetType(B,A->type_name);
627:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
628:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

630:   B->ops->lufactornumeric = MatFactorNumeric_MUMPS;
631:   B->factor               = FACTOR_LU;
632:   lu                      = (Mat_MUMPS*)B->spptr;
633:   lu->sym                 = 0;
634:   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;

636:   *F = B;
637:   return(0);
638: }

640: /* Note the Petsc r permutation is ignored */
643: PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
644:   Mat            B;
645:   Mat_MUMPS      *lu;

649:   /* Create the factorization matrix */
650:   MatCreate(A->comm,&B);
651:   MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
652:   MatSetType(B,A->type_name);
653:   MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);
654:   MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);

656:   B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
657:   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
658:   B->factor                     = FACTOR_CHOLESKY;
659:   lu                            = (Mat_MUMPS*)B->spptr;
660:   lu->sym                       = 2;
661:   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;

663:   *F = B;
664:   return(0);
665: }

669: PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
670:   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;

674:   /* check if matrix is mumps type */
675:   if (A->ops->solve != MatSolve_MUMPS) return(0);

677:   PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
678:   PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);
679:   PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);
680:   PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));
681:   PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));
682:   PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));
683:   PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));
684:   PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));
685:   PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));
686:   PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));
687:   PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));
688:   PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));
689:   PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));
690:   PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));
691:   if (!lu->myid && lu->id.ICNTL(11)>0) {
692:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));
693:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));
694:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));
695:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));
696:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));
697:     PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));
698: 
699:   }
700:   PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));
701:   PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));
702:   PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));
703:   /* ICNTL(15-17) not used */
704:   PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));
705:   PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));
706:   PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));
707:   PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));

709:   PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));
710:   PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));
711:   PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));
712:   PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));

714:   /* infomation local to each processor */
715:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");}
716:   PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));
717:   PetscSynchronizedFlush(A->comm);
718:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");}
719:   PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));
720:   PetscSynchronizedFlush(A->comm);
721:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");}
722:   PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));
723:   PetscSynchronizedFlush(A->comm);
724:   /*
725:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");}
726:   PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));
727:   PetscSynchronizedFlush(A->comm);
728:   */

730:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");}
731:   PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));
732:   PetscSynchronizedFlush(A->comm);

734:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");}
735:   PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));
736:   PetscSynchronizedFlush(A->comm);

738:   if (!lu->myid) {PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");}
739:   PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));
740:   PetscSynchronizedFlush(A->comm);

742:   if (!lu->myid){ /* information from the host */
743:     PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));
744:     PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));
745:     PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));

747:     PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));
748:     PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));
749:     PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));
750:     PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));
751:     PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));
752:     PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));
753:     PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));
754:     PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));
755:     PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));
756:     PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));
757:     PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));
758:     PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));
759:     PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));
760:     PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));
761:     PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));
762:     PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));
763:     PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));
764:      PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));
765:      PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));
766:      PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));
767:      PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));
768:      PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));
769:      PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));
770:   }

772:   return(0);
773: }

777: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
778:   PetscErrorCode    ierr;
779:   PetscTruth        iascii;
780:   PetscViewerFormat format;
781:   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);

784:   (*mumps->MatView)(A,viewer);

786:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
787:   if (iascii) {
788:     PetscViewerGetFormat(viewer,&format);
789:     if (format == PETSC_VIEWER_ASCII_INFO){
790:       MatFactorInfo_MUMPS(A,viewer);
791:     }
792:   }
793:   return(0);
794: }

798: PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
800:   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;

803:   (*mumps->MatAssemblyEnd)(A,mode);

805:   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
806:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
807:   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
808:   return(0);
809: }

814: PetscErrorCode  MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
815: {
817:   PetscMPIInt    size;
818:   MPI_Comm       comm;
819:   Mat            B=*newmat;
820:   Mat_MUMPS      *mumps;

823:   PetscObjectGetComm((PetscObject)A,&comm);
824:   PetscNew(Mat_MUMPS,&mumps);

826:   if (reuse == MAT_INITIAL_MATRIX) {
827:     MatDuplicate(A,MAT_COPY_VALUES,&B);
828:     /* A may have special container that is not duplicated, 
829:        e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */
830:     mumps->MatDuplicate              = B->ops->duplicate;
831:     mumps->MatView                   = B->ops->view;
832:     mumps->MatAssemblyEnd            = B->ops->assemblyend;
833:     mumps->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
834:     mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
835:     mumps->MatDestroy                = B->ops->destroy;
836:   } else {
837:     mumps->MatDuplicate              = A->ops->duplicate;
838:     mumps->MatView                   = A->ops->view;
839:     mumps->MatAssemblyEnd            = A->ops->assemblyend;
840:     mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
841:     mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
842:     mumps->MatDestroy                = A->ops->destroy;
843:   }
844:   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
845:   mumps->CleanUpMUMPS              = PETSC_FALSE;
846:   mumps->isAIJ                     = PETSC_TRUE;

848:   B->spptr                         = (void*)mumps;
849:   B->ops->duplicate                = MatDuplicate_MUMPS;
850:   B->ops->view                     = MatView_MUMPS;
851:   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
852:   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
853:   B->ops->destroy                  = MatDestroy_MUMPS;

855:   MPI_Comm_size(comm,&size);
856:   if (size == 1) {
857:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
858:                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);
859:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
860:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
861:   } else {
862:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
863:                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);
864:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
865:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
866:   }

868:   PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");
869:   PetscObjectChangeTypeName((PetscObject)B,newtype);
870:   *newmat = B;
871:   return(0);
872: }

875: /*MC
876:   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
877:   and sequential matrices via the external package MUMPS.

879:   If MUMPS is installed (see the manual for instructions
880:   on how to declare the existence of external packages),
881:   a matrix type can be constructed which invokes MUMPS solvers.
882:   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).

884:   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
885:   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
886:   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 
887:   for communicators controlling multiple processes.  It is recommended that you call both of
888:   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
889:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
890:   without data copy.

892:   Options Database Keys:
893: + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
894: . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
895: . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
896: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
897: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
898: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
899: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
900: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
901: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
902: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
903: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
904: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
905: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
906: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
907: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold

909:   Level: beginner

911: .seealso: MATSBAIJMUMPS
912: M*/

917: PetscErrorCode  MatCreate_AIJMUMPS(Mat A)
918: {
920:   PetscMPIInt    size;
921: 
923:   MPI_Comm_size(A->comm,&size);
924:   if (size == 1) {
925:     MatSetType(A,MATSEQAIJ);
926:   } else {
927:     MatSetType(A,MATMPIAIJ);
928:     /*
929:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
930:     MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);
931:     */
932:   }
933:   MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);
934:   return(0);
935: }

940: PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
941: {
943:   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;

946:   (*mumps->MatAssemblyEnd)(A,mode);
947:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
948:   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
949:   return(0);
950: }

955: PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
956: {
957:   Mat       A;
958:   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;

962:   /*
963:     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
964:     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
965:     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
966:     block size info so that PETSc can determine the local size properly.  The block size info is set
967:     in the preallocation routine.
968:   */
969:   (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
970:   A    = ((Mat_MPISBAIJ *)B->data)->A;
971:   MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);
972:   return(0);
973: }

979: PetscErrorCode  MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
980: {
982:   PetscMPIInt    size;
983:   MPI_Comm       comm;
984:   Mat            B=*newmat;
985:   Mat_MUMPS      *mumps;
986:   void           (*f)(void);

989:   if (reuse == MAT_INITIAL_MATRIX) {
990:     MatDuplicate(A,MAT_COPY_VALUES,&B);
991:   }

993:   PetscObjectGetComm((PetscObject)A,&comm);
994:   PetscNew(Mat_MUMPS,&mumps);

996:   mumps->MatDuplicate              = A->ops->duplicate;
997:   mumps->MatView                   = A->ops->view;
998:   mumps->MatAssemblyEnd            = A->ops->assemblyend;
999:   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
1000:   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
1001:   mumps->MatDestroy                = A->ops->destroy;
1002:   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
1003:   mumps->CleanUpMUMPS              = PETSC_FALSE;
1004:   mumps->isAIJ                     = PETSC_FALSE;
1005: 
1006:   B->spptr                         = (void*)mumps;
1007:   B->ops->duplicate                = MatDuplicate_MUMPS;
1008:   B->ops->view                     = MatView_MUMPS;
1009:   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
1010:   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
1011:   B->ops->destroy                  = MatDestroy_MUMPS;

1013:   MPI_Comm_size(comm,&size);
1014:   if (size == 1) {
1015:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1016:                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);
1017:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1018:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
1019:   } else {
1020:   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1021:     PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);
1022:     if (f) { /* This case should always be true when this routine is called */
1023:       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1024:       PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1025:                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1026:                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);
1027:     }
1028:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1029:                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);
1030:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1031:                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);
1032:   }

1034:   PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");
1035:   PetscObjectChangeTypeName((PetscObject)B,newtype);
1036:   *newmat = B;
1037:   return(0);
1038: }

1043: PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1045:   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;

1048:   (*lu->MatDuplicate)(A,op,M);
1049:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));
1050:   return(0);
1051: }

1053: /*MC
1054:   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1055:   distributed and sequential matrices via the external package MUMPS.

1057:   If MUMPS is installed (see the manual for instructions
1058:   on how to declare the existence of external packages),
1059:   a matrix type can be constructed which invokes MUMPS solvers.
1060:   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).

1062:   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1063:   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1064:   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported 
1065:   for communicators controlling multiple processes.  It is recommended that you call both of
1066:   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1067:   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1068:   without data copy.

1070:   Options Database Keys:
1071: + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1072: . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1073: . -mat_mumps_icntl_4 <0,...,4> - print level
1074: . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1075: . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1076: . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1077: . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1078: . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1079: . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1080: . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1081: . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1082: . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1083: . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1084: . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1085: - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold

1087:   Level: beginner

1089: .seealso: MATAIJMUMPS
1090: M*/

1095: PetscErrorCode  MatCreate_SBAIJMUMPS(Mat A)
1096: {
1098:   PetscMPIInt    size;

1101:   MPI_Comm_size(A->comm,&size);
1102:   if (size == 1) {
1103:     MatSetType(A,MATSEQSBAIJ);
1104:   } else {
1105:     MatSetType(A,MATMPISBAIJ);
1106:   }
1107:   MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);
1108:   return(0);
1109: }