Actual source code: superlu_dist.c

  1: #define PETSCMAT_DLL

  3: /* 
  4:         Provides an interface to the SuperLU_DIST_2.0 sparse solver
  5: */

  7: #include "src/mat/impls/aij/seq/aij.h"
  8: #include "src/mat/impls/aij/mpi/mpiaij.h"
  9: #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
 10: #include "stdlib.h"
 11: #endif

 14: #if defined(PETSC_USE_COMPLEX)
 15: #include "superlu_zdefs.h"
 16: #else
 17: #include "superlu_ddefs.h"
 18: #endif

 21: typedef enum { GLOBAL,DISTRIBUTED
 22: } SuperLU_MatInputMode;

 24: typedef struct {
 25:   int_t                   nprow,npcol,*row,*col;
 26:   gridinfo_t              grid;
 27:   superlu_options_t       options;
 28:   SuperMatrix             A_sup;
 29:   ScalePermstruct_t       ScalePermstruct;
 30:   LUstruct_t              LUstruct;
 31:   int                     StatPrint;
 32:   int                     MatInputMode;
 33:   SOLVEstruct_t           SOLVEstruct;
 34:   fact_t                  FactPattern;
 35:   MPI_Comm                comm_superlu;
 36: #if defined(PETSC_USE_COMPLEX)
 37:   doublecomplex           *val;
 38: #else
 39:   double                  *val;
 40: #endif

 42:   /* A few function pointers for inheritance */
 43:   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
 44:   PetscErrorCode (*MatView)(Mat,PetscViewer);
 45:   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
 46:   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
 47:   PetscErrorCode (*MatDestroy)(Mat);

 49:   /* Flag to clean up (non-global) SuperLU objects during Destroy */
 50:   PetscTruth CleanUpSuperLU_Dist;
 51: } Mat_SuperLU_DIST;

 53: EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);

 66: PetscErrorCode  MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
 67: {
 68:   PetscErrorCode   ierr;
 69:   Mat              B=*newmat;
 70:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

 73:   if (reuse == MAT_INITIAL_MATRIX) {
 74:     MatDuplicate(A,MAT_COPY_VALUES,&B);
 75:   }
 76:   /* Reset the original function pointers */
 77:   B->ops->duplicate        = lu->MatDuplicate;
 78:   B->ops->view             = lu->MatView;
 79:   B->ops->assemblyend      = lu->MatAssemblyEnd;
 80:   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
 81:   B->ops->destroy          = lu->MatDestroy;
 82:   PetscFree(lu);
 83:   A->spptr = PETSC_NULL;

 85:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);
 86:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);
 87:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);
 88:   PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);

 90:   PetscObjectChangeTypeName((PetscObject)B,type);
 91:   *newmat = B;
 92:   return(0);
 93: }

 98: PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
 99: {
100:   PetscErrorCode   ierr;
101:   PetscMPIInt      size;
102:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
103: 
105:   if (lu->CleanUpSuperLU_Dist) {
106:     /* Deallocate SuperLU_DIST storage */
107:     if (lu->MatInputMode == GLOBAL) {
108:       Destroy_CompCol_Matrix_dist(&lu->A_sup);
109:     } else {
110:       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
111:       if ( lu->options.SolveInitialized ) {
112: #if defined(PETSC_USE_COMPLEX)
113:         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
114: #else
115:         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
116: #endif
117:       }
118:     }
119:     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
120:     ScalePermstructFree(&lu->ScalePermstruct);
121:     LUstructFree(&lu->LUstruct);
122: 
123:     /* Release the SuperLU_DIST process grid. */
124:     superlu_gridexit(&lu->grid);
125: 
126:     MPI_Comm_free(&(lu->comm_superlu));
127:   }

129:   MPI_Comm_size(A->comm,&size);
130:   if (size == 1) {
131:     MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);
132:   } else {
133:     MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);
134:   }
135:   (*A->ops->destroy)(A);
136:   return(0);
137: }

141: PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
142: {
143:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
144:   PetscErrorCode   ierr;
145:   PetscMPIInt      size;
146:   PetscInt         m=A->rmap.N, N=A->cmap.N;
147:   SuperLUStat_t    stat;
148:   double           berr[1];
149:   PetscScalar      *bptr;
150:   PetscInt         info, nrhs=1;
151:   Vec              x_seq;
152:   IS               iden;
153:   VecScatter       scat;
154: 
156:   MPI_Comm_size(A->comm,&size);
157:   if (size > 1) {
158:     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
159:       VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);
160:       ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);
161:       VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);
162:       ISDestroy(iden);

164:       VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
165:       VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);
166:       VecGetArray(x_seq,&bptr);
167:     } else { /* distributed mat input */
168:       VecCopy(b_mpi,x);
169:       VecGetArray(x,&bptr);
170:     }
171:   } else { /* size == 1 */
172:     VecCopy(b_mpi,x);
173:     VecGetArray(x,&bptr);
174:   }
175: 
176:   if (lu->options.Fact != FACTORED)
177:     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

179:   PStatInit(&stat);        /* Initialize the statistics variables. */
180:   if (lu->MatInputMode == GLOBAL) {
181: #if defined(PETSC_USE_COMPLEX)
182:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
183:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
184: #else
185:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
186:                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
187: #endif 
188:   } else { /* distributed mat input */
189: #if defined(PETSC_USE_COMPLEX)
190:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
191:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
192:     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
193: #else
194:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
195:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
196:     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
197: #endif
198:   }
199:   if (lu->options.PrintStat) {
200:      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
201:   }
202:   PStatFree(&stat);
203: 
204:   if (size > 1) {
205:     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
206:       VecRestoreArray(x_seq,&bptr);
207:       VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
208:       VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);
209:       VecScatterDestroy(scat);
210:       VecDestroy(x_seq);
211:     } else {
212:       VecRestoreArray(x,&bptr);
213:     }
214:   } else {
215:     VecRestoreArray(x,&bptr);
216:   }
217:   return(0);
218: }

222: PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
223: {
224:   Mat              *tseq,A_seq = PETSC_NULL;
225:   Mat_SeqAIJ       *aa,*bb;
226:   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
227:   PetscErrorCode   ierr;
228:   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
229:                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
230:   PetscMPIInt      size,rank;
231:   SuperLUStat_t    stat;
232:   double           *berr=0;
233:   IS               isrow;
234:   PetscLogDouble   time0,time,time_min,time_max;
235:   Mat              F_diag=PETSC_NULL;
236: #if defined(PETSC_USE_COMPLEX)
237:   doublecomplex    *av, *bv;
238: #else
239:   double           *av, *bv;
240: #endif

243:   MPI_Comm_size(A->comm,&size);
244:   MPI_Comm_rank(A->comm,&rank);
245: 
246:   if (lu->options.PrintStat) { /* collect time for mat conversion */
247:     MPI_Barrier(A->comm);
248:     PetscGetTime(&time0);
249:   }

251:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
252:     if (size > 1) { /* convert mpi A to seq mat A */
253:       ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);
254:       MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);
255:       ISDestroy(isrow);
256: 
257:       A_seq = *tseq;
258:       PetscFree(tseq);
259:       aa =  (Mat_SeqAIJ*)A_seq->data;
260:     } else {
261:       aa =  (Mat_SeqAIJ*)A->data;
262:     }

264:     /* Convert Petsc NR matrix to SuperLU_DIST NC. 
265:        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
266:     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
267:       if (lu->FactPattern == SamePattern_SameRowPerm){
268:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
269:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
270:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
271:       } else {
272:         Destroy_CompCol_Matrix_dist(&lu->A_sup);
273:         Destroy_LU(N, &lu->grid, &lu->LUstruct);
274:         lu->options.Fact = SamePattern;
275:       }
276:     }
277: #if defined(PETSC_USE_COMPLEX)
278:     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
279: #else
280:     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
281: #endif

283:     /* Create compressed column matrix A_sup. */
284: #if defined(PETSC_USE_COMPLEX)
285:     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
286: #else
287:     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
288: #endif
289:   } else { /* distributed mat input */
290:     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
291:     aa=(Mat_SeqAIJ*)(mat->A)->data;
292:     bb=(Mat_SeqAIJ*)(mat->B)->data;
293:     ai=aa->i; aj=aa->j;
294:     bi=bb->i; bj=bb->j;
295: #if defined(PETSC_USE_COMPLEX)
296:     av=(doublecomplex*)aa->a;
297:     bv=(doublecomplex*)bb->a;
298: #else
299:     av=aa->a;
300:     bv=bb->a;
301: #endif
302:     rstart = A->rmap.rstart;
303:     nz     = aa->nz + bb->nz;
304:     garray = mat->garray;
305: 
306:     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
307: #if defined(PETSC_USE_COMPLEX)
308:       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309: #else
310:       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
311: #endif
312:     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
313:       if (lu->FactPattern == SamePattern_SameRowPerm){
314:         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
315:         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
316:       } else {
317:         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
318:         lu->options.Fact = SamePattern;
319:       }
320:     }
321:     nz = 0; irow = rstart;
322:     for ( i=0; i<m; i++ ) {
323:       lu->row[i] = nz;
324:       countA = ai[i+1] - ai[i];
325:       countB = bi[i+1] - bi[i];
326:       ajj = aj + ai[i];  /* ptr to the beginning of this row */
327:       bjj = bj + bi[i];

329:       /* B part, smaller col index */
330:       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
331:       jB = 0;
332:       for (j=0; j<countB; j++){
333:         jcol = garray[bjj[j]];
334:         if (jcol > colA_start) {
335:           jB = j;
336:           break;
337:         }
338:         lu->col[nz] = jcol;
339:         lu->val[nz++] = *bv++;
340:         if (j==countB-1) jB = countB;
341:       }

343:       /* A part */
344:       for (j=0; j<countA; j++){
345:         lu->col[nz] = rstart + ajj[j];
346:         lu->val[nz++] = *av++;
347:       }

349:       /* B part, larger col index */
350:       for (j=jB; j<countB; j++){
351:         lu->col[nz] = garray[bjj[j]];
352:         lu->val[nz++] = *bv++;
353:       }
354:     }
355:     lu->row[m] = nz;
356: #if defined(PETSC_USE_COMPLEX)
357:     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
358:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
359: #else
360:     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
361:                                    lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
362: #endif
363:   }
364:   if (lu->options.PrintStat) {
365:     PetscGetTime(&time);
366:     time0 = time - time0;
367:   }

369:   /* Factor the matrix. */
370:   PStatInit(&stat);   /* Initialize the statistics variables. */

372:   if (lu->MatInputMode == GLOBAL) { /* global mat input */
373: #if defined(PETSC_USE_COMPLEX)
374:     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
375:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
376: #else
377:     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
378:                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
379: #endif 
380:   } else { /* distributed mat input */
381: #if defined(PETSC_USE_COMPLEX)
382:     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
383:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
384:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
385: #else
386:     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
387:             &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
388:     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
389: #endif
390:   }

392:   if (lu->MatInputMode == GLOBAL && size > 1){
393:     MatDestroy(A_seq);
394:   }

396:   if (lu->options.PrintStat) {
397:     if (size > 1){
398:       MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
399:       MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
400:       MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
401:       time = time/size; /* average time */
402:       if (!rank)
403:         PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
404:                               %g / %g / %g\n",time_max,time_min,time);
405:     } else {
406:       PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
407:                               %g\n",time0);
408:     }
409: 
410:     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
411:   }
412:   PStatFree(&stat);
413:   if (size > 1){
414:     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
415:     F_diag->assembled = PETSC_TRUE;
416:   }
417:   (*F)->assembled  = PETSC_TRUE;
418:   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
419:   return(0);
420: }

422: /* Note the Petsc r and c permutations are ignored */
425: PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
426: {
427:   Mat               B;
428:   Mat_SuperLU_DIST  *lu;
429:   PetscErrorCode    ierr;
430:   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
431:   PetscMPIInt       size;
432:   superlu_options_t options;
433:   PetscTruth        flg;
434:   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
435:   const char        *prtype[] = {"LargeDiag","NATURAL"};
436:   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};

439:   /* Create the factorization matrix */
440:   MatCreate(A->comm,&B);
441:   MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);
442:   MatSetType(B,A->type_name);
443:   MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
444:   MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);

446:   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
447:   B->ops->solve            = MatSolve_SuperLU_DIST;
448:   B->factor                = FACTOR_LU;

450:   lu = (Mat_SuperLU_DIST*)(B->spptr);

452:   /*   Set the default input options:
453:         options.Fact = DOFACT;
454:         options.Equil = YES;
455:         options.ColPerm = MMD_AT_PLUS_A;
456:         options.RowPerm = LargeDiag;
457:         options.ReplaceTinyPivot = YES;
458:         options.Trans = NOTRANS;
459:         options.IterRefine = DOUBLE;
460:         options.SolveInitialized = NO;
461:         options.RefineInitialized = NO;
462:         options.PrintStat = YES;
463:   */
464:   set_default_options_dist(&options);

466:   MPI_Comm_dup(A->comm,&(lu->comm_superlu));
467:   MPI_Comm_size(A->comm,&size);
468:   /* Default num of process columns and rows */
469:   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
470:   if (!lu->npcol) lu->npcol = 1;
471:   while (lu->npcol > 0) {
472:     lu->nprow = (PetscMPIInt)(size/lu->npcol);
473:     if (size == lu->nprow * lu->npcol) break;
474:     lu->npcol --;
475:   }
476: 
477:   PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");
478:     PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);
479:     PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);
480:     if (size != lu->nprow * lu->npcol)
481:       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
482: 
483:     lu->MatInputMode = DISTRIBUTED;
484:     PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);
485:     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;

487:     PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);
488:     if (!flg) {
489:       options.Equil = NO;
490:     }

492:     PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);
493:     if (flg) {
494:       switch (indx) {
495:       case 0:
496:         options.RowPerm = LargeDiag;
497:         break;
498:       case 1:
499:         options.RowPerm = NOROWPERM;
500:         break;
501:       }
502:     }

504:     PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);
505:     if (flg) {
506:       switch (indx) {
507:       case 0:
508:         options.ColPerm = MMD_AT_PLUS_A;
509:         break;
510:       case 1:
511:         options.ColPerm = NATURAL;
512:         break;
513:       case 2:
514:         options.ColPerm = MMD_ATA;
515:         break;
516:       }
517:     }

519:     PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);
520:     if (!flg) {
521:       options.ReplaceTinyPivot = NO;
522:     }

524:     lu->FactPattern = SamePattern;
525:     PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);
526:     if (flg) {
527:       switch (indx) {
528:       case 0:
529:         lu->FactPattern = SamePattern;
530:         break;
531:       case 1:
532:         lu->FactPattern = SamePattern_SameRowPerm;
533:         break;
534:       }
535:     }
536: 
537:     options.IterRefine = NOREFINE;
538:     PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);
539:     if (flg) {
540:       options.IterRefine = DOUBLE;
541:     }

543:     if (PetscLogPrintInfo) {
544:       options.PrintStat = YES;
545:     } else {
546:       options.PrintStat = NO;
547:     }
548:     PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
549:                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);
550:   PetscOptionsEnd();

552:   /* Initialize the SuperLU process grid. */
553:   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);

555:   /* Initialize ScalePermstruct and LUstruct. */
556:   ScalePermstructInit(M, N, &lu->ScalePermstruct);
557:   LUstructInit(M, N, &lu->LUstruct);

559:   lu->options             = options;
560:   lu->options.Fact        = DOFACT;
561:   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
562:   *F = B;
563:   return(0);
564: }

568: PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
569:   PetscErrorCode   ierr;
570:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);

573:   (*lu->MatAssemblyEnd)(A,mode);
574:   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
575:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
576:   return(0);
577: }

581: PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
582: {
583:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
584:   superlu_options_t options;
585:   PetscErrorCode    ierr;

588:   /* check if matrix is superlu_dist type */
589:   if (A->ops->solve != MatSolve_SuperLU_DIST) return(0);

591:   options = lu->options;
592:   PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");
593:   PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);
594:   PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);
595:   PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);
596:   PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);
597:   PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);
598:   PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);
599:   PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");
600:   if (options.ColPerm == NATURAL) {
601:     PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");
602:   } else if (options.ColPerm == MMD_AT_PLUS_A) {
603:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");
604:   } else if (options.ColPerm == MMD_ATA) {
605:     PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");
606:   } else {
607:     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
608:   }
609: 
610:   if (lu->FactPattern == SamePattern){
611:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");
612:   } else {
613:     PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");
614:   }
615:   return(0);
616: }

620: PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
621: {
622:   PetscErrorCode    ierr;
623:   PetscTruth        iascii;
624:   PetscViewerFormat format;
625:   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);

628:   (*lu->MatView)(A,viewer);

630:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
631:   if (iascii) {
632:     PetscViewerGetFormat(viewer,&format);
633:     if (format == PETSC_VIEWER_ASCII_INFO) {
634:       MatFactorInfo_SuperLU_DIST(A,viewer);
635:     }
636:   }
637:   return(0);
638: }


644: PetscErrorCode  MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
645: {
646:   /* This routine is only called to convert to MATSUPERLU_DIST */
647:   /* from MATSEQAIJ if A has a single process communicator */
648:   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
649:   PetscErrorCode   ierr;
650:   PetscMPIInt      size;
651:   MPI_Comm         comm;
652:   Mat              B=*newmat;
653:   Mat_SuperLU_DIST *lu;

656:   PetscNew(Mat_SuperLU_DIST,&lu);
657:   if (reuse == MAT_INITIAL_MATRIX) {
658:     MatDuplicate(A,MAT_COPY_VALUES,&B);
659:     lu->MatDuplicate         = B->ops->duplicate;
660:     lu->MatView              = B->ops->view;
661:     lu->MatAssemblyEnd       = B->ops->assemblyend;
662:     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
663:     lu->MatDestroy           = B->ops->destroy;
664:   } else {
665:     lu->MatDuplicate         = A->ops->duplicate;
666:     lu->MatView              = A->ops->view;
667:     lu->MatAssemblyEnd       = A->ops->assemblyend;
668:     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
669:     lu->MatDestroy           = A->ops->destroy;
670:   }
671:   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;

673:   B->spptr                 = (void*)lu;
674:   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
675:   B->ops->view             = MatView_SuperLU_DIST;
676:   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
677:   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
678:   B->ops->destroy          = MatDestroy_SuperLU_DIST;

680:   PetscObjectGetComm((PetscObject)A,&comm);
681:   MPI_Comm_size(comm,&size);
682:   if (size == 1) {
683:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
684:     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
685:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
686:     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
687:   } else {
688:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
689:                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);
690:     PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
691:                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);
692:   }
693:   PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");
694:   PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);
695:   *newmat = B;
696:   return(0);
697: }

702: PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
703:   PetscErrorCode   ierr;
704:   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;

707:   (*lu->MatDuplicate)(A,op,M);
708:   PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));
709:   return(0);
710: }

712: /*MC
713:   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices 
714:   via the external package SuperLU_DIST.

716:   If SuperLU_DIST is installed (see the manual for
717:   instructions on how to declare the existence of external packages),
718:   a matrix type can be constructed which invokes SuperLU_DIST solvers.
719:   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).

721:   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
722:   and from MATMPIAIJ otherwise.  As a result, for single process communicators, 
723:   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 
724:   for communicators controlling multiple processes.  It is recommended that you call both of
725:   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
726:   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
727:   without data copy.

729:   Options Database Keys:
730: + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
731: . -mat_superlu_dist_r <n> - number of rows in processor partition
732: . -mat_superlu_dist_c <n> - number of columns in processor partition
733: . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
734: . -mat_superlu_dist_equil - equilibrate the matrix
735: . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
736: . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
737: . -mat_superlu_dist_replacetinypivot - replace tiny pivots
738: . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
739: . -mat_superlu_dist_iterrefine - use iterative refinement
740: - -mat_superlu_dist_statprint - print factorization information

742:    Level: beginner

744: .seealso: PCLU
745: M*/

750: PetscErrorCode  MatCreate_SuperLU_DIST(Mat A)
751: {
753:   PetscMPIInt    size;

756:   MPI_Comm_size(A->comm,&size);
757:   if (size == 1) {
758:     MatSetType(A,MATSEQAIJ);
759:   } else {
760:     MatSetType(A,MATMPIAIJ);
761:     /*  A_diag = 0x0 ???  -- do we need it?
762:     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
763:     MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);
764:     */
765:   }
766:   MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);
767:   return(0);
768: }