Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
23: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
24: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
25: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
26: #endif
30: /* ------------------------------------------------------------
32: This interface was contribed by Tony Caola
34: This routine is an interface to the pivoting drop-tolerance
35: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
36: SPARSEKIT2.
38: The SPARSEKIT2 routines used here are covered by the GNU
39: copyright; see the file gnu in this directory.
41: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
42: help in getting this routine ironed out.
44: The major drawback to this routine is that if info->fill is
45: not large enough it fails rather than allocating more space;
46: this can be fixed by hacking/improving the f2c version of
47: Yousef Saad's code.
49: ------------------------------------------------------------
50: */
51: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
52: {
53: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
55: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
56: You can obtain the drop tolerance routines by installing PETSc from\n\
57: www.mcs.anl.gov/petsc\n");
58: #else
59: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
60: IS iscolf,isicol,isirow;
61: PetscTruth reorder;
62: PetscErrorCode ierr,sierr;
63: PetscInt *c,*r,*ic,i,n = A->rmap.n;
64: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
65: PetscInt *ordcol,*iwk,*iperm,*jw;
66: PetscInt jmax,lfill,job,*o_i,*o_j;
67: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
68: PetscReal af;
72: if (info->dt == PETSC_DEFAULT) info->dt = .005;
73: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
74: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
75: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
76: lfill = (PetscInt)(info->dtcount/2.0);
77: jmax = (PetscInt)(info->fill*a->nz);
80: /* ------------------------------------------------------------
81: If reorder=.TRUE., then the original matrix has to be
82: reordered to reflect the user selected ordering scheme, and
83: then de-reordered so it is in it's original format.
84: Because Saad's dperm() is NOT in place, we have to copy
85: the original matrix and allocate more storage. . .
86: ------------------------------------------------------------
87: */
89: /* set reorder to true if either isrow or iscol is not identity */
90: ISIdentity(isrow,&reorder);
91: if (reorder) {ISIdentity(iscol,&reorder);}
92: reorder = PetscNot(reorder);
94:
95: /* storage for ilu factor */
96: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
97: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
98: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
99: PetscMalloc(n*sizeof(PetscInt),&ordcol);
101: /* ------------------------------------------------------------
102: Make sure that everything is Fortran formatted (1-Based)
103: ------------------------------------------------------------
104: */
105: for (i=old_i[0];i<old_i[n];i++) {
106: old_j[i]++;
107: }
108: for(i=0;i<n+1;i++) {
109: old_i[i]++;
110: };
111:
113: if (reorder) {
114: ISGetIndices(iscol,&c);
115: ISGetIndices(isrow,&r);
116: for(i=0;i<n;i++) {
117: r[i] = r[i]+1;
118: c[i] = c[i]+1;
119: }
120: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
121: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
122: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
123: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
124: for (i=0;i<n;i++) {
125: r[i] = r[i]-1;
126: c[i] = c[i]-1;
127: }
128: ISRestoreIndices(iscol,&c);
129: ISRestoreIndices(isrow,&r);
130: o_a = old_a2;
131: o_j = old_j2;
132: o_i = old_i2;
133: } else {
134: o_a = old_a;
135: o_j = old_j;
136: o_i = old_i;
137: }
139: /* ------------------------------------------------------------
140: Call Saad's ilutp() routine to generate the factorization
141: ------------------------------------------------------------
142: */
144: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
145: PetscMalloc(2*n*sizeof(PetscInt),&jw);
146: PetscMalloc(n*sizeof(PetscScalar),&w);
148: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
149: if (sierr) {
150: switch (sierr) {
151: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
152: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
153: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
154: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
155: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
156: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
157: }
158: }
160: PetscFree(w);
161: PetscFree(jw);
163: /* ------------------------------------------------------------
164: Saad's routine gives the result in Modified Sparse Row (msr)
165: Convert to Compressed Sparse Row format (csr)
166: ------------------------------------------------------------
167: */
169: PetscMalloc(n*sizeof(PetscScalar),&wk);
170: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
172: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
174: PetscFree(iwk);
175: PetscFree(wk);
177: if (reorder) {
178: PetscFree(old_a2);
179: PetscFree(old_j2);
180: PetscFree(old_i2);
181: } else {
182: /* fix permutation of old_j that the factorization introduced */
183: for (i=old_i[0]; i<old_i[n]; i++) {
184: old_j[i-1] = iperm[old_j[i-1]-1];
185: }
186: }
188: /* get rid of the shift to indices starting at 1 */
189: for (i=0; i<n+1; i++) {
190: old_i[i]--;
191: }
192: for (i=old_i[0];i<old_i[n];i++) {
193: old_j[i]--;
194: }
195:
196: /* Make the factored matrix 0-based */
197: for (i=0; i<n+1; i++) {
198: new_i[i]--;
199: }
200: for (i=new_i[0];i<new_i[n];i++) {
201: new_j[i]--;
202: }
204: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
205: /*-- permute the right-hand-side and solution vectors --*/
206: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
207: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
208: ISGetIndices(isicol,&ic);
209: for(i=0; i<n; i++) {
210: ordcol[i] = ic[iperm[i]-1];
211: };
212: ISRestoreIndices(isicol,&ic);
213: ISDestroy(isicol);
215: PetscFree(iperm);
217: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
218: PetscFree(ordcol);
220: /*----- put together the new matrix -----*/
222: MatCreate(A->comm,fact);
223: MatSetSizes(*fact,n,n,n,n);
224: MatSetType(*fact,A->type_name);
225: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
226: (*fact)->factor = FACTOR_LU;
227: (*fact)->assembled = PETSC_TRUE;
229: b = (Mat_SeqAIJ*)(*fact)->data;
230: b->free_a = PETSC_TRUE;
231: b->free_ij = PETSC_TRUE;
232: b->sorted = PETSC_FALSE;
233: b->singlemalloc = PETSC_FALSE;
234: b->a = new_a;
235: b->j = new_j;
236: b->i = new_i;
237: b->ilen = 0;
238: b->imax = 0;
239: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
240: b->row = isirow;
241: b->col = iscolf;
242: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
243: b->maxnz = b->nz = new_i[n];
244: MatMarkDiagonal_SeqAIJ(*fact);
245: (*fact)->info.factor_mallocs = 0;
247: af = ((double)b->nz)/((double)a->nz) + .001;
248: PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
249: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
250: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
251: PetscInfo(A,"for best performance.\n");
253: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
255: return(0);
256: #endif
257: }
261: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
262: {
263: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
264: IS isicol;
265: PetscErrorCode ierr;
266: PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
267: PetscInt *bi,*bj,*ajtmp;
268: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
269: PetscReal f;
270: PetscInt nlnk,*lnk,k,**bi_ptr;
271: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
272: PetscBT lnkbt;
275: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
276: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
277: ISGetIndices(isrow,&r);
278: ISGetIndices(isicol,&ic);
280: /* get new row pointers */
281: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
282: bi[0] = 0;
284: /* bdiag is location of diagonal in factor */
285: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
286: bdiag[0] = 0;
288: /* linked list for storing column indices of the active row */
289: nlnk = n + 1;
290: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
292: PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);
294: /* initial FreeSpace size is f*(ai[n]+1) */
295: f = info->fill;
296: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
297: current_space = free_space;
299: for (i=0; i<n; i++) {
300: /* copy previous fill into linked list */
301: nzi = 0;
302: nnz = ai[r[i]+1] - ai[r[i]];
303: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
304: ajtmp = aj + ai[r[i]];
305: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
306: nzi += nlnk;
308: /* add pivot rows into linked list */
309: row = lnk[n];
310: while (row < i) {
311: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
312: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
313: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
314: nzi += nlnk;
315: row = lnk[row];
316: }
317: bi[i+1] = bi[i] + nzi;
318: im[i] = nzi;
320: /* mark bdiag */
321: nzbd = 0;
322: nnz = nzi;
323: k = lnk[n];
324: while (nnz-- && k < i){
325: nzbd++;
326: k = lnk[k];
327: }
328: bdiag[i] = bi[i] + nzbd;
330: /* if free space is not available, make more free space */
331: if (current_space->local_remaining<nzi) {
332: nnz = (n - i)*nzi; /* estimated and max additional space needed */
333: PetscFreeSpaceGet(nnz,¤t_space);
334: reallocs++;
335: }
337: /* copy data into free space, then initialize lnk */
338: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
339: bi_ptr[i] = current_space->array;
340: current_space->array += nzi;
341: current_space->local_used += nzi;
342: current_space->local_remaining -= nzi;
343: }
344: #if defined(PETSC_USE_INFO)
345: if (ai[n] != 0) {
346: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
347: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
348: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
349: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
350: PetscInfo(A,"for best performance.\n");
351: } else {
352: PetscInfo(A,"Empty matrix\n");
353: }
354: #endif
356: ISRestoreIndices(isrow,&r);
357: ISRestoreIndices(isicol,&ic);
359: /* destroy list of free space and other temporary array(s) */
360: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
361: PetscFreeSpaceContiguous(&free_space,bj);
362: PetscLLDestroy(lnk,lnkbt);
363: PetscFree2(bi_ptr,im);
365: /* put together the new matrix */
366: MatCreate(A->comm,B);
367: MatSetSizes(*B,n,n,n,n);
368: MatSetType(*B,A->type_name);
369: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
370: PetscLogObjectParent(*B,isicol);
371: b = (Mat_SeqAIJ*)(*B)->data;
372: b->free_a = PETSC_TRUE;
373: b->free_ij = PETSC_TRUE;
374: b->singlemalloc = PETSC_FALSE;
375: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
376: b->j = bj;
377: b->i = bi;
378: b->diag = bdiag;
379: b->ilen = 0;
380: b->imax = 0;
381: b->row = isrow;
382: b->col = iscol;
383: PetscObjectReference((PetscObject)isrow);
384: PetscObjectReference((PetscObject)iscol);
385: b->icol = isicol;
386: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
388: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
389: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
390: b->maxnz = b->nz = bi[n] ;
392: (*B)->factor = FACTOR_LU;
393: (*B)->info.factor_mallocs = reallocs;
394: (*B)->info.fill_ratio_given = f;
396: if (ai[n] != 0) {
397: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
398: } else {
399: (*B)->info.fill_ratio_needed = 0.0;
400: }
401: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
402: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
403: return(0);
404: }
406: /*
407: Trouble in factorization, should we dump the original matrix?
408: */
411: PetscErrorCode MatFactorDumpMatrix(Mat A)
412: {
414: PetscTruth flg;
417: PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);
418: if (flg) {
419: PetscViewer viewer;
420: char filename[PETSC_MAX_PATH_LEN];
422: PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);
423: PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);
424: MatView(A,viewer);
425: PetscViewerDestroy(viewer);
426: }
427: return(0);
428: }
430: /* ----------------------------------------------------------- */
433: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
434: {
435: Mat C=*B;
436: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
437: IS isrow = b->row,isicol = b->icol;
439: PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
440: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
441: PetscInt *diag_offset = b->diag,diag,*pj;
442: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
443: PetscScalar d;
444: PetscReal rs;
445: LUShift_Ctx sctx;
446: PetscInt newshift,*ddiag;
449: ISGetIndices(isrow,&r);
450: ISGetIndices(isicol,&ic);
451: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
452: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
453: rtmps = rtmp; ics = ic;
455: sctx.shift_top = 0;
456: sctx.nshift_max = 0;
457: sctx.shift_lo = 0;
458: sctx.shift_hi = 0;
460: /* if both shift schemes are chosen by user, only use info->shiftpd */
461: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
462: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
463: PetscInt *aai = a->i;
464: ddiag = a->diag;
465: sctx.shift_top = 0;
466: for (i=0; i<n; i++) {
467: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
468: d = (a->a)[ddiag[i]];
469: rs = -PetscAbsScalar(d) - PetscRealPart(d);
470: v = a->a+aai[i];
471: nz = aai[i+1] - aai[i];
472: for (j=0; j<nz; j++)
473: rs += PetscAbsScalar(v[j]);
474: if (rs>sctx.shift_top) sctx.shift_top = rs;
475: }
476: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
477: sctx.shift_top *= 1.1;
478: sctx.nshift_max = 5;
479: sctx.shift_lo = 0.;
480: sctx.shift_hi = 1.;
481: }
483: sctx.shift_amount = 0;
484: sctx.nshift = 0;
485: do {
486: sctx.lushift = PETSC_FALSE;
487: for (i=0; i<n; i++){
488: nz = bi[i+1] - bi[i];
489: bjtmp = bj + bi[i];
490: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
492: /* load in initial (unfactored row) */
493: nz = a->i[r[i]+1] - a->i[r[i]];
494: ajtmp = a->j + a->i[r[i]];
495: v = a->a + a->i[r[i]];
496: for (j=0; j<nz; j++) {
497: rtmp[ics[ajtmp[j]]] = v[j];
498: }
499: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
501: row = *bjtmp++;
502: while (row < i) {
503: pc = rtmp + row;
504: if (*pc != 0.0) {
505: pv = b->a + diag_offset[row];
506: pj = b->j + diag_offset[row] + 1;
507: multiplier = *pc / *pv++;
508: *pc = multiplier;
509: nz = bi[row+1] - diag_offset[row] - 1;
510: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
511: PetscLogFlops(2*nz);
512: }
513: row = *bjtmp++;
514: }
515: /* finished row so stick it into b->a */
516: pv = b->a + bi[i] ;
517: pj = b->j + bi[i] ;
518: nz = bi[i+1] - bi[i];
519: diag = diag_offset[i] - bi[i];
520: rs = 0.0;
521: for (j=0; j<nz; j++) {
522: pv[j] = rtmps[pj[j]];
523: if (j != diag) rs += PetscAbsScalar(pv[j]);
524: }
526: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
527: sctx.rs = rs;
528: sctx.pv = pv[diag];
529: MatLUCheckShift_inline(info,sctx,i,newshift);
530: if (newshift == 1) break;
531: }
533: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
534: /*
535: * if no shift in this attempt & shifting & started shifting & can refine,
536: * then try lower shift
537: */
538: sctx.shift_hi = info->shift_fraction;
539: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
540: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
541: sctx.lushift = PETSC_TRUE;
542: sctx.nshift++;
543: }
544: } while (sctx.lushift);
546: /* invert diagonal entries for simplier triangular solves */
547: for (i=0; i<n; i++) {
548: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
549: }
551: PetscFree(rtmp);
552: ISRestoreIndices(isicol,&ic);
553: ISRestoreIndices(isrow,&r);
554: C->factor = FACTOR_LU;
555: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
556: C->assembled = PETSC_TRUE;
557: PetscLogFlops(C->cmap.n);
558: if (sctx.nshift){
559: if (info->shiftnz) {
560: PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
561: } else if (info->shiftpd) {
562: PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
563: }
564: }
565: return(0);
566: }
568: /*
569: This routine implements inplace ILU(0) with row or/and column permutations.
570: Input:
571: A - original matrix
572: Output;
573: A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i]
574: a->j (col index) is permuted by the inverse of colperm, then sorted
575: a->a reordered accordingly with a->j
576: a->diag (ptr to diagonal elements) is updated.
577: */
580: PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B)
581: {
582: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
583: IS isrow = a->row,isicol = a->icol;
585: PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j;
586: PetscInt *ajtmp,nz,row,*ics;
587: PetscInt *diag = a->diag,nbdiag,*pj;
588: PetscScalar *rtmp,*v,*pc,multiplier,*pv,d;
589: PetscReal rs;
590: LUShift_Ctx sctx;
591: PetscInt newshift;
594: if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address");
595: ISGetIndices(isrow,&r);
596: ISGetIndices(isicol,&ic);
597: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
598: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
599: ics = ic;
601: sctx.shift_top = 0;
602: sctx.nshift_max = 0;
603: sctx.shift_lo = 0;
604: sctx.shift_hi = 0;
606: /* if both shift schemes are chosen by user, only use info->shiftpd */
607: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
608: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
609: sctx.shift_top = 0;
610: for (i=0; i<n; i++) {
611: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
612: d = (a->a)[diag[i]];
613: rs = -PetscAbsScalar(d) - PetscRealPart(d);
614: v = a->a+ai[i];
615: nz = ai[i+1] - ai[i];
616: for (j=0; j<nz; j++)
617: rs += PetscAbsScalar(v[j]);
618: if (rs>sctx.shift_top) sctx.shift_top = rs;
619: }
620: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
621: sctx.shift_top *= 1.1;
622: sctx.nshift_max = 5;
623: sctx.shift_lo = 0.;
624: sctx.shift_hi = 1.;
625: }
627: sctx.shift_amount = 0;
628: sctx.nshift = 0;
629: do {
630: sctx.lushift = PETSC_FALSE;
631: for (i=0; i<n; i++){
632: /* load in initial unfactored row */
633: nz = ai[r[i]+1] - ai[r[i]];
634: ajtmp = aj + ai[r[i]];
635: v = a->a + ai[r[i]];
636: /* sort permuted ajtmp and values v accordingly */
637: for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]];
638: PetscSortIntWithScalarArray(nz,ajtmp,v);
640: diag[r[i]] = ai[r[i]];
641: for (j=0; j<nz; j++) {
642: rtmp[ajtmp[j]] = v[j];
643: if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */
644: }
645: rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */
647: row = *ajtmp++;
648: while (row < i) {
649: pc = rtmp + row;
650: if (*pc != 0.0) {
651: pv = a->a + diag[r[row]];
652: pj = aj + diag[r[row]] + 1;
654: multiplier = *pc / *pv++;
655: *pc = multiplier;
656: nz = ai[r[row]+1] - diag[r[row]] - 1;
657: for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j];
658: PetscLogFlops(2*nz);
659: }
660: row = *ajtmp++;
661: }
662: /* finished row so overwrite it onto a->a */
663: pv = a->a + ai[r[i]] ;
664: pj = aj + ai[r[i]] ;
665: nz = ai[r[i]+1] - ai[r[i]];
666: nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */
667:
668: rs = 0.0;
669: for (j=0; j<nz; j++) {
670: pv[j] = rtmp[pj[j]];
671: if (j != nbdiag) rs += PetscAbsScalar(pv[j]);
672: }
674: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
675: sctx.rs = rs;
676: sctx.pv = pv[nbdiag];
677: MatLUCheckShift_inline(info,sctx,i,newshift);
678: if (newshift == 1) break;
679: }
681: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
682: /*
683: * if no shift in this attempt & shifting & started shifting & can refine,
684: * then try lower shift
685: */
686: sctx.shift_hi = info->shift_fraction;
687: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
688: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
689: sctx.lushift = PETSC_TRUE;
690: sctx.nshift++;
691: }
692: } while (sctx.lushift);
694: /* invert diagonal entries for simplier triangular solves */
695: for (i=0; i<n; i++) {
696: a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]];
697: }
699: PetscFree(rtmp);
700: ISRestoreIndices(isicol,&ic);
701: ISRestoreIndices(isrow,&r);
702: A->factor = FACTOR_LU;
703: A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm;
704: A->assembled = PETSC_TRUE;
705: PetscLogFlops(A->cmap.n);
706: if (sctx.nshift){
707: if (info->shiftnz) {
708: PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
709: } else if (info->shiftpd) {
710: PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
711: }
712: }
713: return(0);
714: }
718: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
719: {
721: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
722: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
723: return(0);
724: }
727: /* ----------------------------------------------------------- */
730: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
731: {
733: Mat C;
736: MatLUFactorSymbolic(A,row,col,info,&C);
737: MatLUFactorNumeric(A,info,&C);
738: MatHeaderCopy(A,C);
739: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
740: return(0);
741: }
742: /* ----------------------------------------------------------- */
745: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
746: {
747: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
748: IS iscol = a->col,isrow = a->row;
750: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
751: PetscInt nz,*rout,*cout;
752: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
755: if (!n) return(0);
757: VecGetArray(bb,&b);
758: VecGetArray(xx,&x);
759: tmp = a->solve_work;
761: ISGetIndices(isrow,&rout); r = rout;
762: ISGetIndices(iscol,&cout); c = cout + (n-1);
764: /* forward solve the lower triangular */
765: tmp[0] = b[*r++];
766: tmps = tmp;
767: for (i=1; i<n; i++) {
768: v = aa + ai[i] ;
769: vi = aj + ai[i] ;
770: nz = a->diag[i] - ai[i];
771: sum = b[*r++];
772: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
773: tmp[i] = sum;
774: }
776: /* backward solve the upper triangular */
777: for (i=n-1; i>=0; i--){
778: v = aa + a->diag[i] + 1;
779: vi = aj + a->diag[i] + 1;
780: nz = ai[i+1] - a->diag[i] - 1;
781: sum = tmp[i];
782: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
783: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
784: }
786: ISRestoreIndices(isrow,&rout);
787: ISRestoreIndices(iscol,&cout);
788: VecRestoreArray(bb,&b);
789: VecRestoreArray(xx,&x);
790: PetscLogFlops(2*a->nz - A->cmap.n);
791: return(0);
792: }
796: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
797: {
798: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
799: IS iscol = a->col,isrow = a->row;
801: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
802: PetscInt nz,*rout,*cout,neq;
803: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
806: if (!n) return(0);
808: MatGetArray(B,&b);
809: MatGetArray(X,&x);
810:
811: tmp = a->solve_work;
812: ISGetIndices(isrow,&rout); r = rout;
813: ISGetIndices(iscol,&cout); c = cout;
815: for (neq=0; neq<n; neq++){
816: /* forward solve the lower triangular */
817: tmp[0] = b[r[0]];
818: tmps = tmp;
819: for (i=1; i<n; i++) {
820: v = aa + ai[i] ;
821: vi = aj + ai[i] ;
822: nz = a->diag[i] - ai[i];
823: sum = b[r[i]];
824: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
825: tmp[i] = sum;
826: }
827: /* backward solve the upper triangular */
828: for (i=n-1; i>=0; i--){
829: v = aa + a->diag[i] + 1;
830: vi = aj + a->diag[i] + 1;
831: nz = ai[i+1] - a->diag[i] - 1;
832: sum = tmp[i];
833: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
834: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
835: }
837: b += n;
838: x += n;
839: }
840: ISRestoreIndices(isrow,&rout);
841: ISRestoreIndices(iscol,&cout);
842: MatRestoreArray(B,&b);
843: MatRestoreArray(X,&x);
844: PetscLogFlops(n*(2*a->nz - n));
845: return(0);
846: }
850: PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx)
851: {
852: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
853: IS iscol = a->col,isrow = a->row;
855: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
856: PetscInt nz,*rout,*cout,row;
857: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
860: if (!n) return(0);
862: VecGetArray(bb,&b);
863: VecGetArray(xx,&x);
864: tmp = a->solve_work;
866: ISGetIndices(isrow,&rout); r = rout;
867: ISGetIndices(iscol,&cout); c = cout + (n-1);
869: /* forward solve the lower triangular */
870: tmp[0] = b[*r++];
871: tmps = tmp;
872: for (row=1; row<n; row++) {
873: i = rout[row]; /* permuted row */
874: v = aa + ai[i] ;
875: vi = aj + ai[i] ;
876: nz = a->diag[i] - ai[i];
877: sum = b[*r++];
878: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
879: tmp[row] = sum;
880: }
882: /* backward solve the upper triangular */
883: for (row=n-1; row>=0; row--){
884: i = rout[row]; /* permuted row */
885: v = aa + a->diag[i] + 1;
886: vi = aj + a->diag[i] + 1;
887: nz = ai[i+1] - a->diag[i] - 1;
888: sum = tmp[row];
889: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
890: x[*c--] = tmp[row] = sum*aa[a->diag[i]];
891: }
893: ISRestoreIndices(isrow,&rout);
894: ISRestoreIndices(iscol,&cout);
895: VecRestoreArray(bb,&b);
896: VecRestoreArray(xx,&x);
897: PetscLogFlops(2*a->nz - A->cmap.n);
898: return(0);
899: }
901: /* ----------------------------------------------------------- */
904: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
905: {
906: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
908: PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
909: PetscScalar *x,*b,*aa = a->a;
910: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
911: PetscInt adiag_i,i,*vi,nz,ai_i;
912: PetscScalar *v,sum;
913: #endif
916: if (!n) return(0);
918: VecGetArray(bb,&b);
919: VecGetArray(xx,&x);
921: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
922: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
923: #else
924: /* forward solve the lower triangular */
925: x[0] = b[0];
926: for (i=1; i<n; i++) {
927: ai_i = ai[i];
928: v = aa + ai_i;
929: vi = aj + ai_i;
930: nz = adiag[i] - ai_i;
931: sum = b[i];
932: while (nz--) sum -= *v++ * x[*vi++];
933: x[i] = sum;
934: }
936: /* backward solve the upper triangular */
937: for (i=n-1; i>=0; i--){
938: adiag_i = adiag[i];
939: v = aa + adiag_i + 1;
940: vi = aj + adiag_i + 1;
941: nz = ai[i+1] - adiag_i - 1;
942: sum = x[i];
943: while (nz--) sum -= *v++ * x[*vi++];
944: x[i] = sum*aa[adiag_i];
945: }
946: #endif
947: PetscLogFlops(2*a->nz - A->cmap.n);
948: VecRestoreArray(bb,&b);
949: VecRestoreArray(xx,&x);
950: return(0);
951: }
955: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
956: {
957: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
958: IS iscol = a->col,isrow = a->row;
960: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
961: PetscInt nz,*rout,*cout;
962: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
965: if (yy != xx) {VecCopy(yy,xx);}
967: VecGetArray(bb,&b);
968: VecGetArray(xx,&x);
969: tmp = a->solve_work;
971: ISGetIndices(isrow,&rout); r = rout;
972: ISGetIndices(iscol,&cout); c = cout + (n-1);
974: /* forward solve the lower triangular */
975: tmp[0] = b[*r++];
976: for (i=1; i<n; i++) {
977: v = aa + ai[i] ;
978: vi = aj + ai[i] ;
979: nz = a->diag[i] - ai[i];
980: sum = b[*r++];
981: while (nz--) sum -= *v++ * tmp[*vi++ ];
982: tmp[i] = sum;
983: }
985: /* backward solve the upper triangular */
986: for (i=n-1; i>=0; i--){
987: v = aa + a->diag[i] + 1;
988: vi = aj + a->diag[i] + 1;
989: nz = ai[i+1] - a->diag[i] - 1;
990: sum = tmp[i];
991: while (nz--) sum -= *v++ * tmp[*vi++ ];
992: tmp[i] = sum*aa[a->diag[i]];
993: x[*c--] += tmp[i];
994: }
996: ISRestoreIndices(isrow,&rout);
997: ISRestoreIndices(iscol,&cout);
998: VecRestoreArray(bb,&b);
999: VecRestoreArray(xx,&x);
1000: PetscLogFlops(2*a->nz);
1002: return(0);
1003: }
1004: /* -------------------------------------------------------------------*/
1007: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
1008: {
1009: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1010: IS iscol = a->col,isrow = a->row;
1012: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1013: PetscInt nz,*rout,*cout,*diag = a->diag;
1014: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
1017: VecGetArray(bb,&b);
1018: VecGetArray(xx,&x);
1019: tmp = a->solve_work;
1021: ISGetIndices(isrow,&rout); r = rout;
1022: ISGetIndices(iscol,&cout); c = cout;
1024: /* copy the b into temp work space according to permutation */
1025: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1027: /* forward solve the U^T */
1028: for (i=0; i<n; i++) {
1029: v = aa + diag[i] ;
1030: vi = aj + diag[i] + 1;
1031: nz = ai[i+1] - diag[i] - 1;
1032: s1 = tmp[i];
1033: s1 *= (*v++); /* multiply by inverse of diagonal entry */
1034: while (nz--) {
1035: tmp[*vi++ ] -= (*v++)*s1;
1036: }
1037: tmp[i] = s1;
1038: }
1040: /* backward solve the L^T */
1041: for (i=n-1; i>=0; i--){
1042: v = aa + diag[i] - 1 ;
1043: vi = aj + diag[i] - 1 ;
1044: nz = diag[i] - ai[i];
1045: s1 = tmp[i];
1046: while (nz--) {
1047: tmp[*vi-- ] -= (*v--)*s1;
1048: }
1049: }
1051: /* copy tmp into x according to permutation */
1052: for (i=0; i<n; i++) x[r[i]] = tmp[i];
1054: ISRestoreIndices(isrow,&rout);
1055: ISRestoreIndices(iscol,&cout);
1056: VecRestoreArray(bb,&b);
1057: VecRestoreArray(xx,&x);
1059: PetscLogFlops(2*a->nz-A->cmap.n);
1060: return(0);
1061: }
1065: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
1066: {
1067: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1068: IS iscol = a->col,isrow = a->row;
1070: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
1071: PetscInt nz,*rout,*cout,*diag = a->diag;
1072: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
1075: if (zz != xx) {VecCopy(zz,xx);}
1077: VecGetArray(bb,&b);
1078: VecGetArray(xx,&x);
1079: tmp = a->solve_work;
1081: ISGetIndices(isrow,&rout); r = rout;
1082: ISGetIndices(iscol,&cout); c = cout;
1084: /* copy the b into temp work space according to permutation */
1085: for (i=0; i<n; i++) tmp[i] = b[c[i]];
1087: /* forward solve the U^T */
1088: for (i=0; i<n; i++) {
1089: v = aa + diag[i] ;
1090: vi = aj + diag[i] + 1;
1091: nz = ai[i+1] - diag[i] - 1;
1092: tmp[i] *= *v++;
1093: while (nz--) {
1094: tmp[*vi++ ] -= (*v++)*tmp[i];
1095: }
1096: }
1098: /* backward solve the L^T */
1099: for (i=n-1; i>=0; i--){
1100: v = aa + diag[i] - 1 ;
1101: vi = aj + diag[i] - 1 ;
1102: nz = diag[i] - ai[i];
1103: while (nz--) {
1104: tmp[*vi-- ] -= (*v--)*tmp[i];
1105: }
1106: }
1108: /* copy tmp into x according to permutation */
1109: for (i=0; i<n; i++) x[r[i]] += tmp[i];
1111: ISRestoreIndices(isrow,&rout);
1112: ISRestoreIndices(iscol,&cout);
1113: VecRestoreArray(bb,&b);
1114: VecRestoreArray(xx,&x);
1116: PetscLogFlops(2*a->nz);
1117: return(0);
1118: }
1119: /* ----------------------------------------------------------------*/
1120: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*);
1124: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
1125: {
1126: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
1127: IS isicol;
1128: PetscErrorCode ierr;
1129: PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d;
1130: PetscInt *bi,*cols,nnz,*cols_lvl;
1131: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
1132: PetscInt i,levels,diagonal_fill;
1133: PetscTruth col_identity,row_identity;
1134: PetscReal f;
1135: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1136: PetscBT lnkbt;
1137: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
1138: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1139: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1140: PetscTruth missing;
1143: f = info->fill;
1144: levels = (PetscInt)info->levels;
1145: diagonal_fill = (PetscInt)info->diagonal_fill;
1146: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
1148: /* special case that simply copies fill pattern */
1149: ISIdentity(isrow,&row_identity);
1150: ISIdentity(iscol,&col_identity);
1151: if (!levels && row_identity && col_identity) {
1152: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
1153: (*fact)->factor = FACTOR_LU;
1154: (*fact)->info.factor_mallocs = 0;
1155: (*fact)->info.fill_ratio_given = info->fill;
1156: (*fact)->info.fill_ratio_needed = 1.0;
1157: b = (Mat_SeqAIJ*)(*fact)->data;
1158: MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);
1159: if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);
1160: b->row = isrow;
1161: b->col = iscol;
1162: b->icol = isicol;
1163: PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
1164: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
1165: PetscObjectReference((PetscObject)isrow);
1166: PetscObjectReference((PetscObject)iscol);
1167: return(0);
1168: }
1170: ISGetIndices(isrow,&r);
1171: ISGetIndices(isicol,&ic);
1173: /* get new row pointers */
1174: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
1175: bi[0] = 0;
1176: /* bdiag is location of diagonal in factor */
1177: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
1178: bdiag[0] = 0;
1180: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
1181: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
1183: /* create a linked list for storing column indices of the active row */
1184: nlnk = n + 1;
1185: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
1187: /* initial FreeSpace size is f*(ai[n]+1) */
1188: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
1189: current_space = free_space;
1190: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
1191: current_space_lvl = free_space_lvl;
1192:
1193: for (i=0; i<n; i++) {
1194: nzi = 0;
1195: /* copy current row into linked list */
1196: nnz = ai[r[i]+1] - ai[r[i]];
1197: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
1198: cols = aj + ai[r[i]];
1199: lnk[i] = -1; /* marker to indicate if diagonal exists */
1200: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
1201: nzi += nlnk;
1203: /* make sure diagonal entry is included */
1204: if (diagonal_fill && lnk[i] == -1) {
1205: fm = n;
1206: while (lnk[fm] < i) fm = lnk[fm];
1207: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
1208: lnk[fm] = i;
1209: lnk_lvl[i] = 0;
1210: nzi++; dcount++;
1211: }
1213: /* add pivot rows into the active row */
1214: nzbd = 0;
1215: prow = lnk[n];
1216: while (prow < i) {
1217: nnz = bdiag[prow];
1218: cols = bj_ptr[prow] + nnz + 1;
1219: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1220: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1221: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1222: nzi += nlnk;
1223: prow = lnk[prow];
1224: nzbd++;
1225: }
1226: bdiag[i] = nzbd;
1227: bi[i+1] = bi[i] + nzi;
1229: /* if free space is not available, make more free space */
1230: if (current_space->local_remaining<nzi) {
1231: nnz = nzi*(n - i); /* estimated and max additional space needed */
1232: PetscFreeSpaceGet(nnz,¤t_space);
1233: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1234: reallocs++;
1235: }
1237: /* copy data into free_space and free_space_lvl, then initialize lnk */
1238: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1239: bj_ptr[i] = current_space->array;
1240: bjlvl_ptr[i] = current_space_lvl->array;
1242: /* make sure the active row i has diagonal entry */
1243: if (*(bj_ptr[i]+bdiag[i]) != i) {
1244: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1245: try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i);
1246: }
1248: current_space->array += nzi;
1249: current_space->local_used += nzi;
1250: current_space->local_remaining -= nzi;
1251: current_space_lvl->array += nzi;
1252: current_space_lvl->local_used += nzi;
1253: current_space_lvl->local_remaining -= nzi;
1254: }
1256: ISRestoreIndices(isrow,&r);
1257: ISRestoreIndices(isicol,&ic);
1259: /* destroy list of free space and other temporary arrays */
1260: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1261: PetscFreeSpaceContiguous(&free_space,bj);
1262: PetscIncompleteLLDestroy(lnk,lnkbt);
1263: PetscFreeSpaceDestroy(free_space_lvl);
1264: PetscFree(bj_ptr);
1266: #if defined(PETSC_USE_INFO)
1267: {
1268: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1269: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1270: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1271: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1272: PetscInfo(A,"for best performance.\n");
1273: if (diagonal_fill) {
1274: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1275: }
1276: }
1277: #endif
1279: /* put together the new matrix */
1280: MatCreate(A->comm,fact);
1281: MatSetSizes(*fact,n,n,n,n);
1282: MatSetType(*fact,A->type_name);
1283: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1284: PetscLogObjectParent(*fact,isicol);
1285: b = (Mat_SeqAIJ*)(*fact)->data;
1286: b->free_a = PETSC_TRUE;
1287: b->free_ij = PETSC_TRUE;
1288: b->singlemalloc = PETSC_FALSE;
1289: len = (bi[n] )*sizeof(PetscScalar);
1290: PetscMalloc(len+1,&b->a);
1291: b->j = bj;
1292: b->i = bi;
1293: for (i=0; i<n; i++) bdiag[i] += bi[i];
1294: b->diag = bdiag;
1295: b->ilen = 0;
1296: b->imax = 0;
1297: b->row = isrow;
1298: b->col = iscol;
1299: PetscObjectReference((PetscObject)isrow);
1300: PetscObjectReference((PetscObject)iscol);
1301: b->icol = isicol;
1302: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1303: /* In b structure: Free imax, ilen, old a, old j.
1304: Allocate bdiag, solve_work, new a, new j */
1305: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1306: b->maxnz = b->nz = bi[n] ;
1307: (*fact)->factor = FACTOR_LU;
1308: (*fact)->info.factor_mallocs = reallocs;
1309: (*fact)->info.fill_ratio_given = f;
1310: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1312: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1313: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1315: return(0);
1316: }
1318: #include src/mat/impls/sbaij/seq/sbaij.h
1321: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1322: {
1323: Mat C = *B;
1324: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1325: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1326: IS ip=b->row,iip = b->icol;
1328: PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1329: PetscInt *ai=a->i,*aj=a->j;
1330: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1331: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1332: PetscReal zeropivot,rs,shiftnz;
1333: PetscReal shiftpd;
1334: ChShift_Ctx sctx;
1335: PetscInt newshift;
1338: shiftnz = info->shiftnz;
1339: shiftpd = info->shiftpd;
1340: zeropivot = info->zeropivot;
1342: ISGetIndices(ip,&rip);
1343: ISGetIndices(iip,&riip);
1344:
1345: /* initialization */
1346: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1347: PetscMalloc(nz,&il);
1348: jl = il + mbs;
1349: rtmp = (MatScalar*)(jl + mbs);
1351: sctx.shift_amount = 0;
1352: sctx.nshift = 0;
1353: do {
1354: sctx.chshift = PETSC_FALSE;
1355: for (i=0; i<mbs; i++) {
1356: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1357: }
1358:
1359: for (k = 0; k<mbs; k++){
1360: bval = ba + bi[k];
1361: /* initialize k-th row by the perm[k]-th row of A */
1362: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1363: for (j = jmin; j < jmax; j++){
1364: col = riip[aj[j]];
1365: if (col >= k){ /* only take upper triangular entry */
1366: rtmp[col] = aa[j];
1367: *bval++ = 0.0; /* for in-place factorization */
1368: }
1369: }
1370: /* shift the diagonal of the matrix */
1371: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1373: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1374: dk = rtmp[k];
1375: i = jl[k]; /* first row to be added to k_th row */
1377: while (i < k){
1378: nexti = jl[i]; /* next row to be added to k_th row */
1380: /* compute multiplier, update diag(k) and U(i,k) */
1381: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1382: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1383: dk += uikdi*ba[ili];
1384: ba[ili] = uikdi; /* -U(i,k) */
1386: /* add multiple of row i to k-th row */
1387: jmin = ili + 1; jmax = bi[i+1];
1388: if (jmin < jmax){
1389: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1390: /* update il and jl for row i */
1391: il[i] = jmin;
1392: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1393: }
1394: i = nexti;
1395: }
1397: /* shift the diagonals when zero pivot is detected */
1398: /* compute rs=sum of abs(off-diagonal) */
1399: rs = 0.0;
1400: jmin = bi[k]+1;
1401: nz = bi[k+1] - jmin;
1402: bcol = bj + jmin;
1403: while (nz--){
1404: rs += PetscAbsScalar(rtmp[*bcol]);
1405: bcol++;
1406: }
1408: sctx.rs = rs;
1409: sctx.pv = dk;
1410: MatCholeskyCheckShift_inline(info,sctx,k,newshift);
1411: if (newshift == 1) break;
1412:
1413: /* copy data into U(k,:) */
1414: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1415: jmin = bi[k]+1; jmax = bi[k+1];
1416: if (jmin < jmax) {
1417: for (j=jmin; j<jmax; j++){
1418: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1419: }
1420: /* add the k-th row into il and jl */
1421: il[k] = jmin;
1422: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1423: }
1424: }
1425: } while (sctx.chshift);
1426: PetscFree(il);
1428: ISRestoreIndices(ip,&rip);
1429: ISRestoreIndices(iip,&riip);
1430: C->factor = FACTOR_CHOLESKY;
1431: C->assembled = PETSC_TRUE;
1432: C->preallocated = PETSC_TRUE;
1433: PetscLogFlops(C->rmap.n);
1434: if (sctx.nshift){
1435: if (shiftnz) {
1436: PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1437: } else if (shiftpd) {
1438: PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1439: }
1440: }
1441: return(0);
1442: }
1446: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1447: {
1448: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1449: Mat_SeqSBAIJ *b;
1450: Mat B;
1451: PetscErrorCode ierr;
1452: PetscTruth perm_identity;
1453: PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1454: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1455: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1456: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1457: PetscReal fill=info->fill,levels=info->levels;
1458: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1459: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1460: PetscBT lnkbt;
1461: IS iperm;
1462:
1464: ISIdentity(perm,&perm_identity);
1465: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1467: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1468: ui[0] = 0;
1470: /* ICC(0) without matrix ordering: simply copies fill pattern */
1471: if (!levels && perm_identity) {
1472: for (i=0; i<am; i++) {
1473: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1474: }
1475: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1476: cols = uj;
1477: for (i=0; i<am; i++) {
1478: aj = a->j + a->diag[i];
1479: ncols = ui[i+1] - ui[i];
1480: for (j=0; j<ncols; j++) *cols++ = *aj++;
1481: }
1482: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1483: ISGetIndices(iperm,&riip);
1484: ISGetIndices(perm,&rip);
1486: /* initialization */
1487: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1489: /* jl: linked list for storing indices of the pivot rows
1490: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1491: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1492: il = jl + am;
1493: uj_ptr = (PetscInt**)(il + am);
1494: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1495: for (i=0; i<am; i++){
1496: jl[i] = am; il[i] = 0;
1497: }
1499: /* create and initialize a linked list for storing column indices of the active row k */
1500: nlnk = am + 1;
1501: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1503: /* initial FreeSpace size is fill*(ai[am]+1) */
1504: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1505: current_space = free_space;
1506: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1507: current_space_lvl = free_space_lvl;
1509: for (k=0; k<am; k++){ /* for each active row k */
1510: /* initialize lnk by the column indices of row rip[k] of A */
1511: nzk = 0;
1512: ncols = ai[rip[k]+1] - ai[rip[k]];
1513: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1514: ncols_upper = 0;
1515: for (j=0; j<ncols; j++){
1516: i = *(aj + ai[rip[k]] + j); /* unpermuted column index */
1517: if (riip[i] >= k){ /* only take upper triangular entry */
1518: ajtmp[ncols_upper] = i;
1519: ncols_upper++;
1520: }
1521: }
1522: PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);
1523: nzk += nlnk;
1525: /* update lnk by computing fill-in for each pivot row to be merged in */
1526: prow = jl[k]; /* 1st pivot row */
1527:
1528: while (prow < k){
1529: nextprow = jl[prow];
1530:
1531: /* merge prow into k-th row */
1532: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1533: jmax = ui[prow+1];
1534: ncols = jmax-jmin;
1535: i = jmin - ui[prow];
1536: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1537: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1538: j = *(uj - 1);
1539: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1540: nzk += nlnk;
1542: /* update il and jl for prow */
1543: if (jmin < jmax){
1544: il[prow] = jmin;
1545: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1546: }
1547: prow = nextprow;
1548: }
1550: /* if free space is not available, make more free space */
1551: if (current_space->local_remaining<nzk) {
1552: i = am - k + 1; /* num of unfactored rows */
1553: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1554: PetscFreeSpaceGet(i,¤t_space);
1555: PetscFreeSpaceGet(i,¤t_space_lvl);
1556: reallocs++;
1557: }
1559: /* copy data into free_space and free_space_lvl, then initialize lnk */
1560: if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k);
1561: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1563: /* add the k-th row into il and jl */
1564: if (nzk > 1){
1565: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1566: jl[k] = jl[i]; jl[i] = k;
1567: il[k] = ui[k] + 1;
1568: }
1569: uj_ptr[k] = current_space->array;
1570: uj_lvl_ptr[k] = current_space_lvl->array;
1572: current_space->array += nzk;
1573: current_space->local_used += nzk;
1574: current_space->local_remaining -= nzk;
1576: current_space_lvl->array += nzk;
1577: current_space_lvl->local_used += nzk;
1578: current_space_lvl->local_remaining -= nzk;
1580: ui[k+1] = ui[k] + nzk;
1581: }
1583: #if defined(PETSC_USE_INFO)
1584: if (ai[am] != 0) {
1585: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1586: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1587: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1588: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1589: } else {
1590: PetscInfo(A,"Empty matrix.\n");
1591: }
1592: #endif
1594: ISRestoreIndices(perm,&rip);
1595: ISRestoreIndices(iperm,&riip);
1596: PetscFree(jl);
1597: PetscFree(ajtmp);
1599: /* destroy list of free space and other temporary array(s) */
1600: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1601: PetscFreeSpaceContiguous(&free_space,uj);
1602: PetscIncompleteLLDestroy(lnk,lnkbt);
1603: PetscFreeSpaceDestroy(free_space_lvl);
1605: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1607: /* put together the new matrix in MATSEQSBAIJ format */
1608: MatCreate(PETSC_COMM_SELF,fact);
1609: MatSetSizes(*fact,am,am,am,am);
1610: B = *fact;
1611: MatSetType(B,MATSEQSBAIJ);
1612: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1614: b = (Mat_SeqSBAIJ*)B->data;
1615: b->singlemalloc = PETSC_FALSE;
1616: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1617: b->j = uj;
1618: b->i = ui;
1619: b->diag = 0;
1620: b->ilen = 0;
1621: b->imax = 0;
1622: b->row = perm;
1623: b->col = perm;
1624: PetscObjectReference((PetscObject)perm);
1625: PetscObjectReference((PetscObject)perm);
1626: b->icol = iperm;
1627: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1628: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1629: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1630: b->maxnz = b->nz = ui[am];
1631: b->free_a = PETSC_TRUE;
1632: b->free_ij = PETSC_TRUE;
1633:
1634: B->factor = FACTOR_CHOLESKY;
1635: B->info.factor_mallocs = reallocs;
1636: B->info.fill_ratio_given = fill;
1637: if (ai[am] != 0) {
1638: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1639: } else {
1640: B->info.fill_ratio_needed = 0.0;
1641: }
1642: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1643: if (perm_identity){
1644: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1645: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1646: }
1647: return(0);
1648: }
1652: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1653: {
1654: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1655: Mat_SeqSBAIJ *b;
1656: Mat B;
1657: PetscErrorCode ierr;
1658: PetscTruth perm_identity;
1659: PetscReal fill = info->fill;
1660: PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1661: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1662: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1663: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1664: PetscBT lnkbt;
1665: IS iperm;
1668: /* check whether perm is the identity mapping */
1669: ISIdentity(perm,&perm_identity);
1670: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1671: ISGetIndices(iperm,&riip);
1672: ISGetIndices(perm,&rip);
1674: /* initialization */
1675: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1676: ui[0] = 0;
1678: /* jl: linked list for storing indices of the pivot rows
1679: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1680: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1681: il = jl + am;
1682: cols = il + am;
1683: ui_ptr = (PetscInt**)(cols + am);
1684: for (i=0; i<am; i++){
1685: jl[i] = am; il[i] = 0;
1686: }
1688: /* create and initialize a linked list for storing column indices of the active row k */
1689: nlnk = am + 1;
1690: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1692: /* initial FreeSpace size is fill*(ai[am]+1) */
1693: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1694: current_space = free_space;
1696: for (k=0; k<am; k++){ /* for each active row k */
1697: /* initialize lnk by the column indices of row rip[k] of A */
1698: nzk = 0;
1699: ncols = ai[rip[k]+1] - ai[rip[k]];
1700: if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix");
1701: ncols_upper = 0;
1702: for (j=0; j<ncols; j++){
1703: i = riip[*(aj + ai[rip[k]] + j)];
1704: if (i >= k){ /* only take upper triangular entry */
1705: cols[ncols_upper] = i;
1706: ncols_upper++;
1707: }
1708: }
1709: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1710: nzk += nlnk;
1712: /* update lnk by computing fill-in for each pivot row to be merged in */
1713: prow = jl[k]; /* 1st pivot row */
1714:
1715: while (prow < k){
1716: nextprow = jl[prow];
1717: /* merge prow into k-th row */
1718: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1719: jmax = ui[prow+1];
1720: ncols = jmax-jmin;
1721: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1722: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1723: nzk += nlnk;
1725: /* update il and jl for prow */
1726: if (jmin < jmax){
1727: il[prow] = jmin;
1728: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1729: }
1730: prow = nextprow;
1731: }
1733: /* if free space is not available, make more free space */
1734: if (current_space->local_remaining<nzk) {
1735: i = am - k + 1; /* num of unfactored rows */
1736: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1737: PetscFreeSpaceGet(i,¤t_space);
1738: reallocs++;
1739: }
1741: /* copy data into free space, then initialize lnk */
1742: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1744: /* add the k-th row into il and jl */
1745: if (nzk-1 > 0){
1746: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1747: jl[k] = jl[i]; jl[i] = k;
1748: il[k] = ui[k] + 1;
1749: }
1750: ui_ptr[k] = current_space->array;
1751: current_space->array += nzk;
1752: current_space->local_used += nzk;
1753: current_space->local_remaining -= nzk;
1755: ui[k+1] = ui[k] + nzk;
1756: }
1758: #if defined(PETSC_USE_INFO)
1759: if (ai[am] != 0) {
1760: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1761: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1762: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1763: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1764: } else {
1765: PetscInfo(A,"Empty matrix.\n");
1766: }
1767: #endif
1769: ISRestoreIndices(perm,&rip);
1770: ISRestoreIndices(iperm,&riip);
1771: PetscFree(jl);
1773: /* destroy list of free space and other temporary array(s) */
1774: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1775: PetscFreeSpaceContiguous(&free_space,uj);
1776: PetscLLDestroy(lnk,lnkbt);
1778: /* put together the new matrix in MATSEQSBAIJ format */
1779: MatCreate(PETSC_COMM_SELF,fact);
1780: MatSetSizes(*fact,am,am,am,am);
1781: B = *fact;
1782: MatSetType(B,MATSEQSBAIJ);
1783: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1785: b = (Mat_SeqSBAIJ*)B->data;
1786: b->singlemalloc = PETSC_FALSE;
1787: b->free_a = PETSC_TRUE;
1788: b->free_ij = PETSC_TRUE;
1789: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1790: b->j = uj;
1791: b->i = ui;
1792: b->diag = 0;
1793: b->ilen = 0;
1794: b->imax = 0;
1795: b->row = perm;
1796: b->col = perm;
1797: PetscObjectReference((PetscObject)perm);
1798: PetscObjectReference((PetscObject)perm);
1799: b->icol = iperm;
1800: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1801: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1802: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1803: b->maxnz = b->nz = ui[am];
1804:
1805: B->factor = FACTOR_CHOLESKY;
1806: B->info.factor_mallocs = reallocs;
1807: B->info.fill_ratio_given = fill;
1808: if (ai[am] != 0) {
1809: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1810: } else {
1811: B->info.fill_ratio_needed = 0.0;
1812: }
1813: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1814: if (perm_identity){
1815: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1816: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1817: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1818: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;
1819: } else {
1820: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1;
1821: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1;
1822: (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1;
1823: (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1;
1824: }
1825: return(0);
1826: }