Actual source code: aijfact.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/aij/seq/aij.h
 4:  #include src/inline/dot.h
 5:  #include src/inline/spops.h
 6:  #include petscbt.h
 7:  #include src/mat/utils/freespace.h

 11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
 12: {

 15:   SETERRQ(PETSC_ERR_SUP,"Code not written");
 16: #if !defined(PETSC_USE_DEBUG)
 17:   return(0);
 18: #endif
 19: }


 22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
 24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
 25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
 26: #endif

 30:   /* ------------------------------------------------------------

 32:           This interface was contribed by Tony Caola

 34:      This routine is an interface to the pivoting drop-tolerance 
 35:      ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 
 36:      SPARSEKIT2.

 38:      The SPARSEKIT2 routines used here are covered by the GNU 
 39:      copyright; see the file gnu in this directory.

 41:      Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
 42:      help in getting this routine ironed out.

 44:      The major drawback to this routine is that if info->fill is 
 45:      not large enough it fails rather than allocating more space;
 46:      this can be fixed by hacking/improving the f2c version of 
 47:      Yousef Saad's code.

 49:      ------------------------------------------------------------
 50: */
 51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
 52: {
 53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
 55:   SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
 56:   You can obtain the drop tolerance routines by installing PETSc from\n\
 57:   www.mcs.anl.gov/petsc\n");
 58: #else
 59:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b;
 60:   IS             iscolf,isicol,isirow;
 61:   PetscTruth     reorder;
 62:   PetscErrorCode ierr,sierr;
 63:   PetscInt       *c,*r,*ic,i,n = A->rmap.n;
 64:   PetscInt       *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
 65:   PetscInt       *ordcol,*iwk,*iperm,*jw;
 66:   PetscInt       jmax,lfill,job,*o_i,*o_j;
 67:   PetscScalar    *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
 68:   PetscReal      af;


 72:   if (info->dt == PETSC_DEFAULT)      info->dt      = .005;
 73:   if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
 74:   if (info->dtcol == PETSC_DEFAULT)   info->dtcol   = .01;
 75:   if (info->fill == PETSC_DEFAULT)    info->fill    = ((double)(n*(info->dtcount+1)))/a->nz;
 76:   lfill   = (PetscInt)(info->dtcount/2.0);
 77:   jmax    = (PetscInt)(info->fill*a->nz);


 80:   /* ------------------------------------------------------------
 81:      If reorder=.TRUE., then the original matrix has to be 
 82:      reordered to reflect the user selected ordering scheme, and
 83:      then de-reordered so it is in it's original format.  
 84:      Because Saad's dperm() is NOT in place, we have to copy 
 85:      the original matrix and allocate more storage. . . 
 86:      ------------------------------------------------------------
 87:   */

 89:   /* set reorder to true if either isrow or iscol is not identity */
 90:   ISIdentity(isrow,&reorder);
 91:   if (reorder) {ISIdentity(iscol,&reorder);}
 92:   reorder = PetscNot(reorder);

 94: 
 95:   /* storage for ilu factor */
 96:   PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
 97:   PetscMalloc(jmax*sizeof(PetscInt),&new_j);
 98:   PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
 99:   PetscMalloc(n*sizeof(PetscInt),&ordcol);

101:   /* ------------------------------------------------------------
102:      Make sure that everything is Fortran formatted (1-Based)
103:      ------------------------------------------------------------
104:   */
105:   for (i=old_i[0];i<old_i[n];i++) {
106:     old_j[i]++;
107:   }
108:   for(i=0;i<n+1;i++) {
109:     old_i[i]++;
110:   };
111: 

113:   if (reorder) {
114:     ISGetIndices(iscol,&c);
115:     ISGetIndices(isrow,&r);
116:     for(i=0;i<n;i++) {
117:       r[i]  = r[i]+1;
118:       c[i]  = c[i]+1;
119:     }
120:     PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122:     PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123:     job  = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124:     for (i=0;i<n;i++) {
125:       r[i]  = r[i]-1;
126:       c[i]  = c[i]-1;
127:     }
128:     ISRestoreIndices(iscol,&c);
129:     ISRestoreIndices(isrow,&r);
130:     o_a = old_a2;
131:     o_j = old_j2;
132:     o_i = old_i2;
133:   } else {
134:     o_a = old_a;
135:     o_j = old_j;
136:     o_i = old_i;
137:   }

139:   /* ------------------------------------------------------------
140:      Call Saad's ilutp() routine to generate the factorization
141:      ------------------------------------------------------------
142:   */

144:   PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145:   PetscMalloc(2*n*sizeof(PetscInt),&jw);
146:   PetscMalloc(n*sizeof(PetscScalar),&w);

148:   SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149:   if (sierr) {
150:     switch (sierr) {
151:       case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152:       case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153:       case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154:       case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155:       case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156:       default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157:     }
158:   }

160:   PetscFree(w);
161:   PetscFree(jw);

163:   /* ------------------------------------------------------------
164:      Saad's routine gives the result in Modified Sparse Row (msr)
165:      Convert to Compressed Sparse Row format (csr) 
166:      ------------------------------------------------------------
167:   */

169:   PetscMalloc(n*sizeof(PetscScalar),&wk);
170:   PetscMalloc((n+1)*sizeof(PetscInt),&iwk);

172:   SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);

174:   PetscFree(iwk);
175:   PetscFree(wk);

177:   if (reorder) {
178:     PetscFree(old_a2);
179:     PetscFree(old_j2);
180:     PetscFree(old_i2);
181:   } else {
182:     /* fix permutation of old_j that the factorization introduced */
183:     for (i=old_i[0]; i<old_i[n]; i++) {
184:       old_j[i-1] = iperm[old_j[i-1]-1];
185:     }
186:   }

188:   /* get rid of the shift to indices starting at 1 */
189:   for (i=0; i<n+1; i++) {
190:     old_i[i]--;
191:   }
192:   for (i=old_i[0];i<old_i[n];i++) {
193:     old_j[i]--;
194:   }
195: 
196:   /* Make the factored matrix 0-based */
197:   for (i=0; i<n+1; i++) {
198:     new_i[i]--;
199:   }
200:   for (i=new_i[0];i<new_i[n];i++) {
201:     new_j[i]--;
202:   }

204:   /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205:   /*-- permute the right-hand-side and solution vectors           --*/
206:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207:   ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208:   ISGetIndices(isicol,&ic);
209:   for(i=0; i<n; i++) {
210:     ordcol[i] = ic[iperm[i]-1];
211:   };
212:   ISRestoreIndices(isicol,&ic);
213:   ISDestroy(isicol);

215:   PetscFree(iperm);

217:   ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218:   PetscFree(ordcol);

220:   /*----- put together the new matrix -----*/

222:   MatCreate(A->comm,fact);
223:   MatSetSizes(*fact,n,n,n,n);
224:   MatSetType(*fact,A->type_name);
225:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226:   (*fact)->factor    = FACTOR_LU;
227:   (*fact)->assembled = PETSC_TRUE;

229:   b = (Mat_SeqAIJ*)(*fact)->data;
230:   b->free_a        = PETSC_TRUE;
231:   b->free_ij       = PETSC_TRUE;
232:   b->sorted        = PETSC_FALSE;
233:   b->singlemalloc  = PETSC_FALSE;
234:   b->a             = new_a;
235:   b->j             = new_j;
236:   b->i             = new_i;
237:   b->ilen          = 0;
238:   b->imax          = 0;
239:   /*  I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240:   b->row           = isirow;
241:   b->col           = iscolf;
242:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
243:   b->maxnz = b->nz = new_i[n];
244:   MatMarkDiagonal_SeqAIJ(*fact);
245:   (*fact)->info.factor_mallocs = 0;

247:   af = ((double)b->nz)/((double)a->nz) + .001;
248:   PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
249:   PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
250:   PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
251:   PetscInfo(A,"for best performance.\n");

253:   MatILUDTFactor_Inode(A,isrow,iscol,info,fact);

255:   return(0);
256: #endif
257: }

261: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262: {
263:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
264:   IS                 isicol;
265:   PetscErrorCode     ierr;
266:   PetscInt           *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267:   PetscInt           *bi,*bj,*ajtmp;
268:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269:   PetscReal          f;
270:   PetscInt           nlnk,*lnk,k,**bi_ptr;
271:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272:   PetscBT            lnkbt;

275:   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
277:   ISGetIndices(isrow,&r);
278:   ISGetIndices(isicol,&ic);

280:   /* get new row pointers */
281:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
282:   bi[0] = 0;

284:   /* bdiag is location of diagonal in factor */
285:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
286:   bdiag[0] = 0;

288:   /* linked list for storing column indices of the active row */
289:   nlnk = n + 1;
290:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

292:   PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);

294:   /* initial FreeSpace size is f*(ai[n]+1) */
295:   f = info->fill;
296:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
297:   current_space = free_space;

299:   for (i=0; i<n; i++) {
300:     /* copy previous fill into linked list */
301:     nzi = 0;
302:     nnz = ai[r[i]+1] - ai[r[i]];
303:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304:     ajtmp = aj + ai[r[i]];
305:     PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
306:     nzi += nlnk;

308:     /* add pivot rows into linked list */
309:     row = lnk[n];
310:     while (row < i) {
311:       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312:       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313:       PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
314:       nzi += nlnk;
315:       row  = lnk[row];
316:     }
317:     bi[i+1] = bi[i] + nzi;
318:     im[i]   = nzi;

320:     /* mark bdiag */
321:     nzbd = 0;
322:     nnz  = nzi;
323:     k    = lnk[n];
324:     while (nnz-- && k < i){
325:       nzbd++;
326:       k = lnk[k];
327:     }
328:     bdiag[i] = bi[i] + nzbd;

330:     /* if free space is not available, make more free space */
331:     if (current_space->local_remaining<nzi) {
332:       nnz = (n - i)*nzi; /* estimated and max additional space needed */
333:       PetscFreeSpaceGet(nnz,&current_space);
334:       reallocs++;
335:     }

337:     /* copy data into free space, then initialize lnk */
338:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
339:     bi_ptr[i] = current_space->array;
340:     current_space->array           += nzi;
341:     current_space->local_used      += nzi;
342:     current_space->local_remaining -= nzi;
343:   }
344: #if defined(PETSC_USE_INFO)
345:   if (ai[n] != 0) {
346:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
348:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
349:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
350:     PetscInfo(A,"for best performance.\n");
351:   } else {
352:     PetscInfo(A,"Empty matrix\n");
353:   }
354: #endif

356:   ISRestoreIndices(isrow,&r);
357:   ISRestoreIndices(isicol,&ic);

359:   /* destroy list of free space and other temporary array(s) */
360:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
361:   PetscFreeSpaceContiguous(&free_space,bj);
362:   PetscLLDestroy(lnk,lnkbt);
363:   PetscFree2(bi_ptr,im);

365:   /* put together the new matrix */
366:   MatCreate(A->comm,B);
367:   MatSetSizes(*B,n,n,n,n);
368:   MatSetType(*B,A->type_name);
369:   MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
370:   PetscLogObjectParent(*B,isicol);
371:   b    = (Mat_SeqAIJ*)(*B)->data;
372:   b->free_a       = PETSC_TRUE;
373:   b->free_ij      = PETSC_TRUE;
374:   b->singlemalloc = PETSC_FALSE;
375:   PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
376:   b->j          = bj;
377:   b->i          = bi;
378:   b->diag       = bdiag;
379:   b->ilen       = 0;
380:   b->imax       = 0;
381:   b->row        = isrow;
382:   b->col        = iscol;
383:   PetscObjectReference((PetscObject)isrow);
384:   PetscObjectReference((PetscObject)iscol);
385:   b->icol       = isicol;
386:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);

388:   /* In b structure:  Free imax, ilen, old a, old j.  Allocate solve_work, new a, new j */
389:   PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
390:   b->maxnz = b->nz = bi[n] ;

392:   (*B)->factor                 =  FACTOR_LU;
393:   (*B)->info.factor_mallocs    = reallocs;
394:   (*B)->info.fill_ratio_given  = f;

396:   if (ai[n] != 0) {
397:     (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398:   } else {
399:     (*B)->info.fill_ratio_needed = 0.0;
400:   }
401:   MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
402:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403:   return(0);
404: }

406: /*
407:     Trouble in factorization, should we dump the original matrix?
408: */
411: PetscErrorCode MatFactorDumpMatrix(Mat A)
412: {
414:   PetscTruth     flg;

417:   PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
418:   if (flg) {
419:     PetscViewer viewer;
420:     char        filename[PETSC_MAX_PATH_LEN];

422:     PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
423:     PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);
424:     MatView(A,viewer);
425:     PetscViewerDestroy(viewer);
426:   }
427:   return(0);
428: }

430: /* ----------------------------------------------------------- */
433: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434: {
435:   Mat            C=*B;
436:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437:   IS             isrow = b->row,isicol = b->icol;
439:   PetscInt       *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440:   PetscInt       *ajtmp,*bjtmp,nz,row,*ics;
441:   PetscInt       *diag_offset = b->diag,diag,*pj;
442:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443:   PetscScalar    d;
444:   PetscReal      rs;
445:   LUShift_Ctx    sctx;
446:   PetscInt       newshift,*ddiag;

449:   ISGetIndices(isrow,&r);
450:   ISGetIndices(isicol,&ic);
451:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
452:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
453:   rtmps = rtmp; ics = ic;

455:   sctx.shift_top  = 0;
456:   sctx.nshift_max = 0;
457:   sctx.shift_lo   = 0;
458:   sctx.shift_hi   = 0;

460:   /* if both shift schemes are chosen by user, only use info->shiftpd */
461:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463:     PetscInt *aai = a->i;
464:     ddiag          = a->diag;
465:     sctx.shift_top = 0;
466:     for (i=0; i<n; i++) {
467:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468:       d  = (a->a)[ddiag[i]];
469:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
470:       v  = a->a+aai[i];
471:       nz = aai[i+1] - aai[i];
472:       for (j=0; j<nz; j++)
473:         rs += PetscAbsScalar(v[j]);
474:       if (rs>sctx.shift_top) sctx.shift_top = rs;
475:     }
476:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477:     sctx.shift_top    *= 1.1;
478:     sctx.nshift_max   = 5;
479:     sctx.shift_lo     = 0.;
480:     sctx.shift_hi     = 1.;
481:   }

483:   sctx.shift_amount = 0;
484:   sctx.nshift       = 0;
485:   do {
486:     sctx.lushift = PETSC_FALSE;
487:     for (i=0; i<n; i++){
488:       nz    = bi[i+1] - bi[i];
489:       bjtmp = bj + bi[i];
490:       for  (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;

492:       /* load in initial (unfactored row) */
493:       nz    = a->i[r[i]+1] - a->i[r[i]];
494:       ajtmp = a->j + a->i[r[i]];
495:       v     = a->a + a->i[r[i]];
496:       for (j=0; j<nz; j++) {
497:         rtmp[ics[ajtmp[j]]] = v[j];
498:       }
499:       rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */

501:       row = *bjtmp++;
502:       while  (row < i) {
503:         pc = rtmp + row;
504:         if (*pc != 0.0) {
505:           pv         = b->a + diag_offset[row];
506:           pj         = b->j + diag_offset[row] + 1;
507:           multiplier = *pc / *pv++;
508:           *pc        = multiplier;
509:           nz         = bi[row+1] - diag_offset[row] - 1;
510:           for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511:           PetscLogFlops(2*nz);
512:         }
513:         row = *bjtmp++;
514:       }
515:       /* finished row so stick it into b->a */
516:       pv   = b->a + bi[i] ;
517:       pj   = b->j + bi[i] ;
518:       nz   = bi[i+1] - bi[i];
519:       diag = diag_offset[i] - bi[i];
520:       rs   = 0.0;
521:       for (j=0; j<nz; j++) {
522:         pv[j] = rtmps[pj[j]];
523:         if (j != diag) rs += PetscAbsScalar(pv[j]);
524:       }

526:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527:       sctx.rs  = rs;
528:       sctx.pv  = pv[diag];
529:       MatLUCheckShift_inline(info,sctx,i,newshift);
530:       if (newshift == 1) break;
531:     }

533:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534:       /*
535:        * if no shift in this attempt & shifting & started shifting & can refine,
536:        * then try lower shift
537:        */
538:       sctx.shift_hi        = info->shift_fraction;
539:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
541:       sctx.lushift         = PETSC_TRUE;
542:       sctx.nshift++;
543:     }
544:   } while (sctx.lushift);

546:   /* invert diagonal entries for simplier triangular solves */
547:   for (i=0; i<n; i++) {
548:     b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549:   }

551:   PetscFree(rtmp);
552:   ISRestoreIndices(isicol,&ic);
553:   ISRestoreIndices(isrow,&r);
554:   C->factor = FACTOR_LU;
555:   (*B)->ops->lufactornumeric   =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556:   C->assembled = PETSC_TRUE;
557:   PetscLogFlops(C->cmap.n);
558:   if (sctx.nshift){
559:     if (info->shiftnz) {
560:       PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
561:     } else if (info->shiftpd) {
562:       PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
563:     }
564:   }
565:   return(0);
566: }

568: /* 
569:    This routine implements inplace ILU(0) with row or/and column permutations. 
570:    Input: 
571:      A - original matrix
572:    Output;
573:      A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 
574:          a->j (col index) is permuted by the inverse of colperm, then sorted
575:          a->a reordered accordingly with a->j
576:          a->diag (ptr to diagonal elements) is updated.
577: */
580: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
581: {
582:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
583:   IS             isrow = a->row,isicol = a->icol;
585:   PetscInt       *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
586:   PetscInt       *ajtmp,nz,row,*ics;
587:   PetscInt       *diag = a->diag,nbdiag,*pj;
588:   PetscScalar    *rtmp,*v,*pc,multiplier,*pv,d;
589:   PetscReal      rs;
590:   LUShift_Ctx    sctx;
591:   PetscInt       newshift;

594:   if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
595:   ISGetIndices(isrow,&r);
596:   ISGetIndices(isicol,&ic);
597:   PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
598:   PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
599:   ics = ic;

601:   sctx.shift_top  = 0;
602:   sctx.nshift_max = 0;
603:   sctx.shift_lo   = 0;
604:   sctx.shift_hi   = 0;

606:   /* if both shift schemes are chosen by user, only use info->shiftpd */
607:   if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
608:   if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
609:     sctx.shift_top = 0;
610:     for (i=0; i<n; i++) {
611:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
612:       d  = (a->a)[diag[i]];
613:       rs = -PetscAbsScalar(d) - PetscRealPart(d);
614:       v  = a->a+ai[i];
615:       nz = ai[i+1] - ai[i];
616:       for (j=0; j<nz; j++)
617:         rs += PetscAbsScalar(v[j]);
618:       if (rs>sctx.shift_top) sctx.shift_top = rs;
619:     }
620:     if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
621:     sctx.shift_top    *= 1.1;
622:     sctx.nshift_max   = 5;
623:     sctx.shift_lo     = 0.;
624:     sctx.shift_hi     = 1.;
625:   }

627:   sctx.shift_amount = 0;
628:   sctx.nshift       = 0;
629:   do {
630:     sctx.lushift = PETSC_FALSE;
631:     for (i=0; i<n; i++){
632:       /* load in initial unfactored row */
633:       nz    = ai[r[i]+1] - ai[r[i]];
634:       ajtmp = aj + ai[r[i]];
635:       v     = a->a + ai[r[i]];
636:       /* sort permuted ajtmp and values v accordingly */
637:       for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
638:       PetscSortIntWithScalarArray(nz,ajtmp,v);

640:       diag[r[i]] = ai[r[i]];
641:       for (j=0; j<nz; j++) {
642:         rtmp[ajtmp[j]] = v[j];
643:         if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
644:       }
645:       rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */

647:       row = *ajtmp++;
648:       while  (row < i) {
649:         pc = rtmp + row;
650:         if (*pc != 0.0) {
651:           pv         = a->a + diag[r[row]];
652:           pj         = aj + diag[r[row]] + 1;

654:           multiplier = *pc / *pv++;
655:           *pc        = multiplier;
656:           nz         = ai[r[row]+1] - diag[r[row]] - 1;
657:           for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
658:           PetscLogFlops(2*nz);
659:         }
660:         row = *ajtmp++;
661:       }
662:       /* finished row so overwrite it onto a->a */
663:       pv   = a->a + ai[r[i]] ;
664:       pj   = aj + ai[r[i]] ;
665:       nz   = ai[r[i]+1] - ai[r[i]];
666:       nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
667: 
668:       rs   = 0.0;
669:       for (j=0; j<nz; j++) {
670:         pv[j] = rtmp[pj[j]];
671:         if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
672:       }

674:       /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
675:       sctx.rs  = rs;
676:       sctx.pv  = pv[nbdiag];
677:       MatLUCheckShift_inline(info,sctx,i,newshift);
678:       if (newshift == 1) break;
679:     }

681:     if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
682:       /*
683:        * if no shift in this attempt & shifting & started shifting & can refine,
684:        * then try lower shift
685:        */
686:       sctx.shift_hi        = info->shift_fraction;
687:       info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
688:       sctx.shift_amount    = info->shift_fraction * sctx.shift_top;
689:       sctx.lushift         = PETSC_TRUE;
690:       sctx.nshift++;
691:     }
692:   } while (sctx.lushift);

694:   /* invert diagonal entries for simplier triangular solves */
695:   for (i=0; i<n; i++) {
696:     a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
697:   }

699:   PetscFree(rtmp);
700:   ISRestoreIndices(isicol,&ic);
701:   ISRestoreIndices(isrow,&r);
702:   A->factor     = FACTOR_LU;
703:   A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
704:   A->assembled = PETSC_TRUE;
705:   PetscLogFlops(A->cmap.n);
706:   if (sctx.nshift){
707:     if (info->shiftnz) {
708:       PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
709:     } else if (info->shiftpd) {
710:       PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
711:     }
712:   }
713:   return(0);
714: }

718: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
719: {
721:   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
722:   A->ops->lufactornumeric  = MatLUFactorNumeric_SeqAIJ;
723:   return(0);
724: }


727: /* ----------------------------------------------------------- */
730: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
731: {
733:   Mat            C;

736:   MatLUFactorSymbolic(A,row,col,info,&C);
737:   MatLUFactorNumeric(A,info,&C);
738:   MatHeaderCopy(A,C);
739:   PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
740:   return(0);
741: }
742: /* ----------------------------------------------------------- */
745: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
746: {
747:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
748:   IS             iscol = a->col,isrow = a->row;
750:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
751:   PetscInt       nz,*rout,*cout;
752:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

755:   if (!n) return(0);

757:   VecGetArray(bb,&b);
758:   VecGetArray(xx,&x);
759:   tmp  = a->solve_work;

761:   ISGetIndices(isrow,&rout); r = rout;
762:   ISGetIndices(iscol,&cout); c = cout + (n-1);

764:   /* forward solve the lower triangular */
765:   tmp[0] = b[*r++];
766:   tmps   = tmp;
767:   for (i=1; i<n; i++) {
768:     v   = aa + ai[i] ;
769:     vi  = aj + ai[i] ;
770:     nz  = a->diag[i] - ai[i];
771:     sum = b[*r++];
772:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
773:     tmp[i] = sum;
774:   }

776:   /* backward solve the upper triangular */
777:   for (i=n-1; i>=0; i--){
778:     v   = aa + a->diag[i] + 1;
779:     vi  = aj + a->diag[i] + 1;
780:     nz  = ai[i+1] - a->diag[i] - 1;
781:     sum = tmp[i];
782:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
783:     x[*c--] = tmp[i] = sum*aa[a->diag[i]];
784:   }

786:   ISRestoreIndices(isrow,&rout);
787:   ISRestoreIndices(iscol,&cout);
788:   VecRestoreArray(bb,&b);
789:   VecRestoreArray(xx,&x);
790:   PetscLogFlops(2*a->nz - A->cmap.n);
791:   return(0);
792: }

796: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
797: {
798:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
799:   IS             iscol = a->col,isrow = a->row;
801:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
802:   PetscInt       nz,*rout,*cout,neq;
803:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

806:   if (!n) return(0);

808:   MatGetArray(B,&b);
809:   MatGetArray(X,&x);
810: 
811:   tmp  = a->solve_work;
812:   ISGetIndices(isrow,&rout); r = rout;
813:   ISGetIndices(iscol,&cout); c = cout;

815:   for (neq=0; neq<n; neq++){
816:     /* forward solve the lower triangular */
817:     tmp[0] = b[r[0]];
818:     tmps   = tmp;
819:     for (i=1; i<n; i++) {
820:       v   = aa + ai[i] ;
821:       vi  = aj + ai[i] ;
822:       nz  = a->diag[i] - ai[i];
823:       sum = b[r[i]];
824:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
825:       tmp[i] = sum;
826:     }
827:     /* backward solve the upper triangular */
828:     for (i=n-1; i>=0; i--){
829:       v   = aa + a->diag[i] + 1;
830:       vi  = aj + a->diag[i] + 1;
831:       nz  = ai[i+1] - a->diag[i] - 1;
832:       sum = tmp[i];
833:       SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834:       x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
835:     }

837:     b += n;
838:     x += n;
839:   }
840:   ISRestoreIndices(isrow,&rout);
841:   ISRestoreIndices(iscol,&cout);
842:   MatRestoreArray(B,&b);
843:   MatRestoreArray(X,&x);
844:   PetscLogFlops(n*(2*a->nz - n));
845:   return(0);
846: }

850: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
851: {
852:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
853:   IS             iscol = a->col,isrow = a->row;
855:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856:   PetscInt       nz,*rout,*cout,row;
857:   PetscScalar    *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;

860:   if (!n) return(0);

862:   VecGetArray(bb,&b);
863:   VecGetArray(xx,&x);
864:   tmp  = a->solve_work;

866:   ISGetIndices(isrow,&rout); r = rout;
867:   ISGetIndices(iscol,&cout); c = cout + (n-1);

869:   /* forward solve the lower triangular */
870:   tmp[0] = b[*r++];
871:   tmps   = tmp;
872:   for (row=1; row<n; row++) {
873:     i   = rout[row]; /* permuted row */
874:     v   = aa + ai[i] ;
875:     vi  = aj + ai[i] ;
876:     nz  = a->diag[i] - ai[i];
877:     sum = b[*r++];
878:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
879:     tmp[row] = sum;
880:   }

882:   /* backward solve the upper triangular */
883:   for (row=n-1; row>=0; row--){
884:     i   = rout[row]; /* permuted row */
885:     v   = aa + a->diag[i] + 1;
886:     vi  = aj + a->diag[i] + 1;
887:     nz  = ai[i+1] - a->diag[i] - 1;
888:     sum = tmp[row];
889:     SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890:     x[*c--] = tmp[row] = sum*aa[a->diag[i]];
891:   }

893:   ISRestoreIndices(isrow,&rout);
894:   ISRestoreIndices(iscol,&cout);
895:   VecRestoreArray(bb,&b);
896:   VecRestoreArray(xx,&x);
897:   PetscLogFlops(2*a->nz - A->cmap.n);
898:   return(0);
899: }

901: /* ----------------------------------------------------------- */
904: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
905: {
906:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
908:   PetscInt       n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
909:   PetscScalar    *x,*b,*aa = a->a;
910: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
911:   PetscInt       adiag_i,i,*vi,nz,ai_i;
912:   PetscScalar    *v,sum;
913: #endif

916:   if (!n) return(0);

918:   VecGetArray(bb,&b);
919:   VecGetArray(xx,&x);

921: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
922:   fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
923: #else
924:   /* forward solve the lower triangular */
925:   x[0] = b[0];
926:   for (i=1; i<n; i++) {
927:     ai_i = ai[i];
928:     v    = aa + ai_i;
929:     vi   = aj + ai_i;
930:     nz   = adiag[i] - ai_i;
931:     sum  = b[i];
932:     while (nz--) sum -= *v++ * x[*vi++];
933:     x[i] = sum;
934:   }

936:   /* backward solve the upper triangular */
937:   for (i=n-1; i>=0; i--){
938:     adiag_i = adiag[i];
939:     v       = aa + adiag_i + 1;
940:     vi      = aj + adiag_i + 1;
941:     nz      = ai[i+1] - adiag_i - 1;
942:     sum     = x[i];
943:     while (nz--) sum -= *v++ * x[*vi++];
944:     x[i]    = sum*aa[adiag_i];
945:   }
946: #endif
947:   PetscLogFlops(2*a->nz - A->cmap.n);
948:   VecRestoreArray(bb,&b);
949:   VecRestoreArray(xx,&x);
950:   return(0);
951: }

955: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
956: {
957:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
958:   IS             iscol = a->col,isrow = a->row;
960:   PetscInt       *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
961:   PetscInt       nz,*rout,*cout;
962:   PetscScalar    *x,*b,*tmp,*aa = a->a,sum,*v;

965:   if (yy != xx) {VecCopy(yy,xx);}

967:   VecGetArray(bb,&b);
968:   VecGetArray(xx,&x);
969:   tmp  = a->solve_work;

971:   ISGetIndices(isrow,&rout); r = rout;
972:   ISGetIndices(iscol,&cout); c = cout + (n-1);

974:   /* forward solve the lower triangular */
975:   tmp[0] = b[*r++];
976:   for (i=1; i<n; i++) {
977:     v   = aa + ai[i] ;
978:     vi  = aj + ai[i] ;
979:     nz  = a->diag[i] - ai[i];
980:     sum = b[*r++];
981:     while (nz--) sum -= *v++ * tmp[*vi++ ];
982:     tmp[i] = sum;
983:   }

985:   /* backward solve the upper triangular */
986:   for (i=n-1; i>=0; i--){
987:     v   = aa + a->diag[i] + 1;
988:     vi  = aj + a->diag[i] + 1;
989:     nz  = ai[i+1] - a->diag[i] - 1;
990:     sum = tmp[i];
991:     while (nz--) sum -= *v++ * tmp[*vi++ ];
992:     tmp[i] = sum*aa[a->diag[i]];
993:     x[*c--] += tmp[i];
994:   }

996:   ISRestoreIndices(isrow,&rout);
997:   ISRestoreIndices(iscol,&cout);
998:   VecRestoreArray(bb,&b);
999:   VecRestoreArray(xx,&x);
1000:   PetscLogFlops(2*a->nz);

1002:   return(0);
1003: }
1004: /* -------------------------------------------------------------------*/
1007: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1008: {
1009:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1010:   IS             iscol = a->col,isrow = a->row;
1012:   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1013:   PetscInt       nz,*rout,*cout,*diag = a->diag;
1014:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v,s1;

1017:   VecGetArray(bb,&b);
1018:   VecGetArray(xx,&x);
1019:   tmp  = a->solve_work;

1021:   ISGetIndices(isrow,&rout); r = rout;
1022:   ISGetIndices(iscol,&cout); c = cout;

1024:   /* copy the b into temp work space according to permutation */
1025:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1027:   /* forward solve the U^T */
1028:   for (i=0; i<n; i++) {
1029:     v   = aa + diag[i] ;
1030:     vi  = aj + diag[i] + 1;
1031:     nz  = ai[i+1] - diag[i] - 1;
1032:     s1  = tmp[i];
1033:     s1 *= (*v++);  /* multiply by inverse of diagonal entry */
1034:     while (nz--) {
1035:       tmp[*vi++ ] -= (*v++)*s1;
1036:     }
1037:     tmp[i] = s1;
1038:   }

1040:   /* backward solve the L^T */
1041:   for (i=n-1; i>=0; i--){
1042:     v   = aa + diag[i] - 1 ;
1043:     vi  = aj + diag[i] - 1 ;
1044:     nz  = diag[i] - ai[i];
1045:     s1  = tmp[i];
1046:     while (nz--) {
1047:       tmp[*vi-- ] -= (*v--)*s1;
1048:     }
1049:   }

1051:   /* copy tmp into x according to permutation */
1052:   for (i=0; i<n; i++) x[r[i]] = tmp[i];

1054:   ISRestoreIndices(isrow,&rout);
1055:   ISRestoreIndices(iscol,&cout);
1056:   VecRestoreArray(bb,&b);
1057:   VecRestoreArray(xx,&x);

1059:   PetscLogFlops(2*a->nz-A->cmap.n);
1060:   return(0);
1061: }

1065: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1066: {
1067:   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
1068:   IS             iscol = a->col,isrow = a->row;
1070:   PetscInt       *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1071:   PetscInt       nz,*rout,*cout,*diag = a->diag;
1072:   PetscScalar    *x,*b,*tmp,*aa = a->a,*v;

1075:   if (zz != xx) {VecCopy(zz,xx);}

1077:   VecGetArray(bb,&b);
1078:   VecGetArray(xx,&x);
1079:   tmp = a->solve_work;

1081:   ISGetIndices(isrow,&rout); r = rout;
1082:   ISGetIndices(iscol,&cout); c = cout;

1084:   /* copy the b into temp work space according to permutation */
1085:   for (i=0; i<n; i++) tmp[i] = b[c[i]];

1087:   /* forward solve the U^T */
1088:   for (i=0; i<n; i++) {
1089:     v   = aa + diag[i] ;
1090:     vi  = aj + diag[i] + 1;
1091:     nz  = ai[i+1] - diag[i] - 1;
1092:     tmp[i] *= *v++;
1093:     while (nz--) {
1094:       tmp[*vi++ ] -= (*v++)*tmp[i];
1095:     }
1096:   }

1098:   /* backward solve the L^T */
1099:   for (i=n-1; i>=0; i--){
1100:     v   = aa + diag[i] - 1 ;
1101:     vi  = aj + diag[i] - 1 ;
1102:     nz  = diag[i] - ai[i];
1103:     while (nz--) {
1104:       tmp[*vi-- ] -= (*v--)*tmp[i];
1105:     }
1106:   }

1108:   /* copy tmp into x according to permutation */
1109:   for (i=0; i<n; i++) x[r[i]] += tmp[i];

1111:   ISRestoreIndices(isrow,&rout);
1112:   ISRestoreIndices(iscol,&cout);
1113:   VecRestoreArray(bb,&b);
1114:   VecRestoreArray(xx,&x);

1116:   PetscLogFlops(2*a->nz);
1117:   return(0);
1118: }
1119: /* ----------------------------------------------------------------*/
1120: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);

1124: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1125: {
1126:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data,*b;
1127:   IS                 isicol;
1128:   PetscErrorCode     ierr;
1129:   PetscInt           *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1130:   PetscInt           *bi,*cols,nnz,*cols_lvl;
1131:   PetscInt           *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1132:   PetscInt           i,levels,diagonal_fill;
1133:   PetscTruth         col_identity,row_identity;
1134:   PetscReal          f;
1135:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1136:   PetscBT            lnkbt;
1137:   PetscInt           nzi,*bj,**bj_ptr,**bjlvl_ptr;
1138:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1139:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1140:   PetscTruth         missing;

1143:   f             = info->fill;
1144:   levels        = (PetscInt)info->levels;
1145:   diagonal_fill = (PetscInt)info->diagonal_fill;
1146:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);

1148:   /* special case that simply copies fill pattern */
1149:   ISIdentity(isrow,&row_identity);
1150:   ISIdentity(iscol,&col_identity);
1151:   if (!levels && row_identity && col_identity) {
1152:     MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
1153:     (*fact)->factor                 = FACTOR_LU;
1154:     (*fact)->info.factor_mallocs    = 0;
1155:     (*fact)->info.fill_ratio_given  = info->fill;
1156:     (*fact)->info.fill_ratio_needed = 1.0;
1157:     b               = (Mat_SeqAIJ*)(*fact)->data;
1158:     MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);
1159:     if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1160:     b->row              = isrow;
1161:     b->col              = iscol;
1162:     b->icol             = isicol;
1163:     PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
1164:     (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1165:     PetscObjectReference((PetscObject)isrow);
1166:     PetscObjectReference((PetscObject)iscol);
1167:     return(0);
1168:   }

1170:   ISGetIndices(isrow,&r);
1171:   ISGetIndices(isicol,&ic);

1173:   /* get new row pointers */
1174:   PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1175:   bi[0] = 0;
1176:   /* bdiag is location of diagonal in factor */
1177:   PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1178:   bdiag[0]  = 0;

1180:   PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1181:   bjlvl_ptr = (PetscInt**)(bj_ptr + n);

1183:   /* create a linked list for storing column indices of the active row */
1184:   nlnk = n + 1;
1185:   PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);

1187:   /* initial FreeSpace size is f*(ai[n]+1) */
1188:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1189:   current_space = free_space;
1190:   PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1191:   current_space_lvl = free_space_lvl;
1192: 
1193:   for (i=0; i<n; i++) {
1194:     nzi = 0;
1195:     /* copy current row into linked list */
1196:     nnz  = ai[r[i]+1] - ai[r[i]];
1197:     if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1198:     cols = aj + ai[r[i]];
1199:     lnk[i] = -1; /* marker to indicate if diagonal exists */
1200:     PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1201:     nzi += nlnk;

1203:     /* make sure diagonal entry is included */
1204:     if (diagonal_fill && lnk[i] == -1) {
1205:       fm = n;
1206:       while (lnk[fm] < i) fm = lnk[fm];
1207:       lnk[i]     = lnk[fm]; /* insert diagonal into linked list */
1208:       lnk[fm]    = i;
1209:       lnk_lvl[i] = 0;
1210:       nzi++; dcount++;
1211:     }

1213:     /* add pivot rows into the active row */
1214:     nzbd = 0;
1215:     prow = lnk[n];
1216:     while (prow < i) {
1217:       nnz      = bdiag[prow];
1218:       cols     = bj_ptr[prow] + nnz + 1;
1219:       cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1220:       nnz      = bi[prow+1] - bi[prow] - nnz - 1;
1221:       PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1222:       nzi += nlnk;
1223:       prow = lnk[prow];
1224:       nzbd++;
1225:     }
1226:     bdiag[i] = nzbd;
1227:     bi[i+1]  = bi[i] + nzi;

1229:     /* if free space is not available, make more free space */
1230:     if (current_space->local_remaining<nzi) {
1231:       nnz = nzi*(n - i); /* estimated and max additional space needed */
1232:       PetscFreeSpaceGet(nnz,&current_space);
1233:       PetscFreeSpaceGet(nnz,&current_space_lvl);
1234:       reallocs++;
1235:     }

1237:     /* copy data into free_space and free_space_lvl, then initialize lnk */
1238:     PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1239:     bj_ptr[i]    = current_space->array;
1240:     bjlvl_ptr[i] = current_space_lvl->array;

1242:     /* make sure the active row i has diagonal entry */
1243:     if (*(bj_ptr[i]+bdiag[i]) != i) {
1244:       SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1245:     try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1246:     }

1248:     current_space->array           += nzi;
1249:     current_space->local_used      += nzi;
1250:     current_space->local_remaining -= nzi;
1251:     current_space_lvl->array           += nzi;
1252:     current_space_lvl->local_used      += nzi;
1253:     current_space_lvl->local_remaining -= nzi;
1254:   }

1256:   ISRestoreIndices(isrow,&r);
1257:   ISRestoreIndices(isicol,&ic);

1259:   /* destroy list of free space and other temporary arrays */
1260:   PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1261:   PetscFreeSpaceContiguous(&free_space,bj);
1262:   PetscIncompleteLLDestroy(lnk,lnkbt);
1263:   PetscFreeSpaceDestroy(free_space_lvl);
1264:   PetscFree(bj_ptr);

1266: #if defined(PETSC_USE_INFO)
1267:   {
1268:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1269:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1270:     PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1271:     PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1272:     PetscInfo(A,"for best performance.\n");
1273:     if (diagonal_fill) {
1274:       PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1275:     }
1276:   }
1277: #endif

1279:   /* put together the new matrix */
1280:   MatCreate(A->comm,fact);
1281:   MatSetSizes(*fact,n,n,n,n);
1282:   MatSetType(*fact,A->type_name);
1283:   MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1284:   PetscLogObjectParent(*fact,isicol);
1285:   b = (Mat_SeqAIJ*)(*fact)->data;
1286:   b->free_a       = PETSC_TRUE;
1287:   b->free_ij      = PETSC_TRUE;
1288:   b->singlemalloc = PETSC_FALSE;
1289:   len = (bi[n] )*sizeof(PetscScalar);
1290:   PetscMalloc(len+1,&b->a);
1291:   b->j          = bj;
1292:   b->i          = bi;
1293:   for (i=0; i<n; i++) bdiag[i] += bi[i];
1294:   b->diag       = bdiag;
1295:   b->ilen       = 0;
1296:   b->imax       = 0;
1297:   b->row        = isrow;
1298:   b->col        = iscol;
1299:   PetscObjectReference((PetscObject)isrow);
1300:   PetscObjectReference((PetscObject)iscol);
1301:   b->icol       = isicol;
1302:   PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1303:   /* In b structure:  Free imax, ilen, old a, old j.  
1304:      Allocate bdiag, solve_work, new a, new j */
1305:   PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1306:   b->maxnz             = b->nz = bi[n] ;
1307:   (*fact)->factor = FACTOR_LU;
1308:   (*fact)->info.factor_mallocs    = reallocs;
1309:   (*fact)->info.fill_ratio_given  = f;
1310:   (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);

1312:   MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1313:   (*fact)->ops->lufactornumeric =  A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */

1315:   return(0);
1316: }

1318:  #include src/mat/impls/sbaij/seq/sbaij.h
1321: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1322: {
1323:   Mat            C = *B;
1324:   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
1325:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)C->data;
1326:   IS             ip=b->row,iip = b->icol;
1328:   PetscInt       *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1329:   PetscInt       *ai=a->i,*aj=a->j;
1330:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1331:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1332:   PetscReal      zeropivot,rs,shiftnz;
1333:   PetscReal      shiftpd;
1334:   ChShift_Ctx    sctx;
1335:   PetscInt       newshift;

1338:   shiftnz   = info->shiftnz;
1339:   shiftpd   = info->shiftpd;
1340:   zeropivot = info->zeropivot;

1342:   ISGetIndices(ip,&rip);
1343:   ISGetIndices(iip,&riip);
1344: 
1345:   /* initialization */
1346:   nz   = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1347:   PetscMalloc(nz,&il);
1348:   jl   = il + mbs;
1349:   rtmp = (MatScalar*)(jl + mbs);

1351:   sctx.shift_amount = 0;
1352:   sctx.nshift       = 0;
1353:   do {
1354:     sctx.chshift = PETSC_FALSE;
1355:     for (i=0; i<mbs; i++) {
1356:       rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1357:     }
1358: 
1359:     for (k = 0; k<mbs; k++){
1360:       bval = ba + bi[k];
1361:       /* initialize k-th row by the perm[k]-th row of A */
1362:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1363:       for (j = jmin; j < jmax; j++){
1364:         col = riip[aj[j]];
1365:         if (col >= k){ /* only take upper triangular entry */
1366:           rtmp[col] = aa[j];
1367:           *bval++  = 0.0; /* for in-place factorization */
1368:         }
1369:       }
1370:       /* shift the diagonal of the matrix */
1371:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1373:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1374:       dk = rtmp[k];
1375:       i = jl[k]; /* first row to be added to k_th row  */

1377:       while (i < k){
1378:         nexti = jl[i]; /* next row to be added to k_th row */

1380:         /* compute multiplier, update diag(k) and U(i,k) */
1381:         ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */
1382:         uikdi = - ba[ili]*ba[bi[i]];  /* diagonal(k) */
1383:         dk += uikdi*ba[ili];
1384:         ba[ili] = uikdi; /* -U(i,k) */

1386:         /* add multiple of row i to k-th row */
1387:         jmin = ili + 1; jmax = bi[i+1];
1388:         if (jmin < jmax){
1389:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1390:           /* update il and jl for row i */
1391:           il[i] = jmin;
1392:           j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1393:         }
1394:         i = nexti;
1395:       }

1397:       /* shift the diagonals when zero pivot is detected */
1398:       /* compute rs=sum of abs(off-diagonal) */
1399:       rs   = 0.0;
1400:       jmin = bi[k]+1;
1401:       nz   = bi[k+1] - jmin;
1402:       bcol = bj + jmin;
1403:       while (nz--){
1404:         rs += PetscAbsScalar(rtmp[*bcol]);
1405:         bcol++;
1406:       }

1408:       sctx.rs = rs;
1409:       sctx.pv = dk;
1410:       MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1411:       if (newshift == 1) break;
1412: 
1413:       /* copy data into U(k,:) */
1414:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1415:       jmin = bi[k]+1; jmax = bi[k+1];
1416:       if (jmin < jmax) {
1417:         for (j=jmin; j<jmax; j++){
1418:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1419:         }
1420:         /* add the k-th row into il and jl */
1421:         il[k] = jmin;
1422:         i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1423:       }
1424:     }
1425:   } while (sctx.chshift);
1426:   PetscFree(il);

1428:   ISRestoreIndices(ip,&rip);
1429:   ISRestoreIndices(iip,&riip);
1430:   C->factor       = FACTOR_CHOLESKY;
1431:   C->assembled    = PETSC_TRUE;
1432:   C->preallocated = PETSC_TRUE;
1433:   PetscLogFlops(C->rmap.n);
1434:   if (sctx.nshift){
1435:     if (shiftnz) {
1436:       PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1437:     } else if (shiftpd) {
1438:       PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1439:     }
1440:   }
1441:   return(0);
1442: }

1446: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1447: {
1448:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1449:   Mat_SeqSBAIJ       *b;
1450:   Mat                B;
1451:   PetscErrorCode     ierr;
1452:   PetscTruth         perm_identity;
1453:   PetscInt           reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1454:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1455:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1456:   PetscInt           ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1457:   PetscReal          fill=info->fill,levels=info->levels;
1458:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1459:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1460:   PetscBT            lnkbt;
1461:   IS                 iperm;
1462: 
1464:   ISIdentity(perm,&perm_identity);
1465:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);

1467:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1468:   ui[0] = 0;

1470:   /* ICC(0) without matrix ordering: simply copies fill pattern */
1471:   if (!levels && perm_identity) {
1472:     for (i=0; i<am; i++) {
1473:       ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1474:     }
1475:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1476:     cols = uj;
1477:     for (i=0; i<am; i++) {
1478:       aj    = a->j + a->diag[i];
1479:       ncols = ui[i+1] - ui[i];
1480:       for (j=0; j<ncols; j++) *cols++ = *aj++;
1481:     }
1482:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1483:     ISGetIndices(iperm,&riip);
1484:     ISGetIndices(perm,&rip);

1486:     /* initialization */
1487:     PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);

1489:     /* jl: linked list for storing indices of the pivot rows 
1490:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1491:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1492:     il         = jl + am;
1493:     uj_ptr     = (PetscInt**)(il + am);
1494:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1495:     for (i=0; i<am; i++){
1496:       jl[i] = am; il[i] = 0;
1497:     }

1499:     /* create and initialize a linked list for storing column indices of the active row k */
1500:     nlnk = am + 1;
1501:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

1503:     /* initial FreeSpace size is fill*(ai[am]+1) */
1504:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1505:     current_space = free_space;
1506:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1507:     current_space_lvl = free_space_lvl;

1509:     for (k=0; k<am; k++){  /* for each active row k */
1510:       /* initialize lnk by the column indices of row rip[k] of A */
1511:       nzk   = 0;
1512:       ncols = ai[rip[k]+1] - ai[rip[k]];
1513:       if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1514:       ncols_upper = 0;
1515:       for (j=0; j<ncols; j++){
1516:         i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1517:         if (riip[i] >= k){ /* only take upper triangular entry */
1518:           ajtmp[ncols_upper] = i;
1519:           ncols_upper++;
1520:         }
1521:       }
1522:       PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1523:       nzk += nlnk;

1525:       /* update lnk by computing fill-in for each pivot row to be merged in */
1526:       prow = jl[k]; /* 1st pivot row */
1527: 
1528:       while (prow < k){
1529:         nextprow = jl[prow];
1530: 
1531:         /* merge prow into k-th row */
1532:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1533:         jmax = ui[prow+1];
1534:         ncols = jmax-jmin;
1535:         i     = jmin - ui[prow];
1536:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1537:         uj    = uj_lvl_ptr[prow] + i; /* levels of cols */
1538:         j     = *(uj - 1);
1539:         PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1540:         nzk += nlnk;

1542:         /* update il and jl for prow */
1543:         if (jmin < jmax){
1544:           il[prow] = jmin;
1545:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1546:         }
1547:         prow = nextprow;
1548:       }

1550:       /* if free space is not available, make more free space */
1551:       if (current_space->local_remaining<nzk) {
1552:         i = am - k + 1; /* num of unfactored rows */
1553:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1554:         PetscFreeSpaceGet(i,&current_space);
1555:         PetscFreeSpaceGet(i,&current_space_lvl);
1556:         reallocs++;
1557:       }

1559:       /* copy data into free_space and free_space_lvl, then initialize lnk */
1560:       if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1561:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

1563:       /* add the k-th row into il and jl */
1564:       if (nzk > 1){
1565:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1566:         jl[k] = jl[i]; jl[i] = k;
1567:         il[k] = ui[k] + 1;
1568:       }
1569:       uj_ptr[k]     = current_space->array;
1570:       uj_lvl_ptr[k] = current_space_lvl->array;

1572:       current_space->array           += nzk;
1573:       current_space->local_used      += nzk;
1574:       current_space->local_remaining -= nzk;

1576:       current_space_lvl->array           += nzk;
1577:       current_space_lvl->local_used      += nzk;
1578:       current_space_lvl->local_remaining -= nzk;

1580:       ui[k+1] = ui[k] + nzk;
1581:     }

1583: #if defined(PETSC_USE_INFO)
1584:     if (ai[am] != 0) {
1585:       PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1586:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1587:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1588:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1589:     } else {
1590:       PetscInfo(A,"Empty matrix.\n");
1591:     }
1592: #endif

1594:     ISRestoreIndices(perm,&rip);
1595:     ISRestoreIndices(iperm,&riip);
1596:     PetscFree(jl);
1597:     PetscFree(ajtmp);

1599:     /* destroy list of free space and other temporary array(s) */
1600:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1601:     PetscFreeSpaceContiguous(&free_space,uj);
1602:     PetscIncompleteLLDestroy(lnk,lnkbt);
1603:     PetscFreeSpaceDestroy(free_space_lvl);

1605:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

1607:   /* put together the new matrix in MATSEQSBAIJ format */
1608:   MatCreate(PETSC_COMM_SELF,fact);
1609:   MatSetSizes(*fact,am,am,am,am);
1610:   B = *fact;
1611:   MatSetType(B,MATSEQSBAIJ);
1612:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1614:   b    = (Mat_SeqSBAIJ*)B->data;
1615:   b->singlemalloc = PETSC_FALSE;
1616:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1617:   b->j    = uj;
1618:   b->i    = ui;
1619:   b->diag = 0;
1620:   b->ilen = 0;
1621:   b->imax = 0;
1622:   b->row  = perm;
1623:   b->col  = perm;
1624:   PetscObjectReference((PetscObject)perm);
1625:   PetscObjectReference((PetscObject)perm);
1626:   b->icol = iperm;
1627:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1628:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1629:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1630:   b->maxnz   = b->nz = ui[am];
1631:   b->free_a  = PETSC_TRUE;
1632:   b->free_ij = PETSC_TRUE;
1633: 
1634:   B->factor                 = FACTOR_CHOLESKY;
1635:   B->info.factor_mallocs    = reallocs;
1636:   B->info.fill_ratio_given  = fill;
1637:   if (ai[am] != 0) {
1638:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1639:   } else {
1640:     B->info.fill_ratio_needed = 0.0;
1641:   }
1642:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1643:   if (perm_identity){
1644:     B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1645:     B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1646:   }
1647:   return(0);
1648: }

1652: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1653: {
1654:   Mat_SeqAIJ         *a = (Mat_SeqAIJ*)A->data;
1655:   Mat_SeqSBAIJ       *b;
1656:   Mat                B;
1657:   PetscErrorCode     ierr;
1658:   PetscTruth         perm_identity;
1659:   PetscReal          fill = info->fill;
1660:   PetscInt           *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1661:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1662:   PetscInt           nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1663:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1664:   PetscBT            lnkbt;
1665:   IS                 iperm;

1668:   /* check whether perm is the identity mapping */
1669:   ISIdentity(perm,&perm_identity);
1670:   ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1671:   ISGetIndices(iperm,&riip);
1672:   ISGetIndices(perm,&rip);

1674:   /* initialization */
1675:   PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1676:   ui[0] = 0;

1678:   /* jl: linked list for storing indices of the pivot rows 
1679:      il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1680:   PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1681:   il     = jl + am;
1682:   cols   = il + am;
1683:   ui_ptr = (PetscInt**)(cols + am);
1684:   for (i=0; i<am; i++){
1685:     jl[i] = am; il[i] = 0;
1686:   }

1688:   /* create and initialize a linked list for storing column indices of the active row k */
1689:   nlnk = am + 1;
1690:   PetscLLCreate(am,am,nlnk,lnk,lnkbt);

1692:   /* initial FreeSpace size is fill*(ai[am]+1) */
1693:   PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1694:   current_space = free_space;

1696:   for (k=0; k<am; k++){  /* for each active row k */
1697:     /* initialize lnk by the column indices of row rip[k] of A */
1698:     nzk   = 0;
1699:     ncols = ai[rip[k]+1] - ai[rip[k]];
1700:     if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1701:     ncols_upper = 0;
1702:     for (j=0; j<ncols; j++){
1703:       i = riip[*(aj + ai[rip[k]] + j)];
1704:       if (i >= k){ /* only take upper triangular entry */
1705:         cols[ncols_upper] = i;
1706:         ncols_upper++;
1707:       }
1708:     }
1709:     PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1710:     nzk += nlnk;

1712:     /* update lnk by computing fill-in for each pivot row to be merged in */
1713:     prow = jl[k]; /* 1st pivot row */
1714: 
1715:     while (prow < k){
1716:       nextprow = jl[prow];
1717:       /* merge prow into k-th row */
1718:       jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
1719:       jmax = ui[prow+1];
1720:       ncols = jmax-jmin;
1721:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1722:       PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1723:       nzk += nlnk;

1725:       /* update il and jl for prow */
1726:       if (jmin < jmax){
1727:         il[prow] = jmin;
1728:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1729:       }
1730:       prow = nextprow;
1731:     }

1733:     /* if free space is not available, make more free space */
1734:     if (current_space->local_remaining<nzk) {
1735:       i = am - k + 1; /* num of unfactored rows */
1736:       i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1737:       PetscFreeSpaceGet(i,&current_space);
1738:       reallocs++;
1739:     }

1741:     /* copy data into free space, then initialize lnk */
1742:     PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);

1744:     /* add the k-th row into il and jl */
1745:     if (nzk-1 > 0){
1746:       i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1747:       jl[k] = jl[i]; jl[i] = k;
1748:       il[k] = ui[k] + 1;
1749:     }
1750:     ui_ptr[k] = current_space->array;
1751:     current_space->array           += nzk;
1752:     current_space->local_used      += nzk;
1753:     current_space->local_remaining -= nzk;

1755:     ui[k+1] = ui[k] + nzk;
1756:   }

1758: #if defined(PETSC_USE_INFO)
1759:   if (ai[am] != 0) {
1760:     PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1761:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1762:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1763:     PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1764:   } else {
1765:      PetscInfo(A,"Empty matrix.\n");
1766:   }
1767: #endif

1769:   ISRestoreIndices(perm,&rip);
1770:   ISRestoreIndices(iperm,&riip);
1771:   PetscFree(jl);

1773:   /* destroy list of free space and other temporary array(s) */
1774:   PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1775:   PetscFreeSpaceContiguous(&free_space,uj);
1776:   PetscLLDestroy(lnk,lnkbt);

1778:   /* put together the new matrix in MATSEQSBAIJ format */
1779:   MatCreate(PETSC_COMM_SELF,fact);
1780:   MatSetSizes(*fact,am,am,am,am);
1781:   B    = *fact;
1782:   MatSetType(B,MATSEQSBAIJ);
1783:   MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);

1785:   b = (Mat_SeqSBAIJ*)B->data;
1786:   b->singlemalloc = PETSC_FALSE;
1787:   b->free_a       = PETSC_TRUE;
1788:   b->free_ij      = PETSC_TRUE;
1789:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1790:   b->j    = uj;
1791:   b->i    = ui;
1792:   b->diag = 0;
1793:   b->ilen = 0;
1794:   b->imax = 0;
1795:   b->row  = perm;
1796:   b->col  = perm;
1797:   PetscObjectReference((PetscObject)perm);
1798:   PetscObjectReference((PetscObject)perm);
1799:   b->icol = iperm;
1800:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1801:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1802:   PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1803:   b->maxnz = b->nz = ui[am];
1804: 
1805:   B->factor                 = FACTOR_CHOLESKY;
1806:   B->info.factor_mallocs    = reallocs;
1807:   B->info.fill_ratio_given  = fill;
1808:   if (ai[am] != 0) {
1809:     B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1810:   } else {
1811:     B->info.fill_ratio_needed = 0.0;
1812:   }
1813:   (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1814:   if (perm_identity){
1815:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1816:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1817:     (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1818:     (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1819:   } else {
1820:     (*fact)->ops->solve           = MatSolve_SeqSBAIJ_1;
1821:     (*fact)->ops->solvetranspose  = MatSolve_SeqSBAIJ_1;
1822:     (*fact)->ops->forwardsolve    = MatForwardSolve_SeqSBAIJ_1;
1823:     (*fact)->ops->backwardsolve   = MatBackwardSolve_SeqSBAIJ_1;
1824:   }
1825:   return(0);
1826: }