Actual source code: bdfact.c

  1: #define PETSCMAT_DLL

  3: /* Block diagonal matrix format - factorization and triangular solves */

 5:  #include src/mat/impls/bdiag/seq/bdiag.h
 6:  #include src/inline/ilu.h

 10: PetscErrorCode MatILUFactorSymbolic_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
 11: {
 12:   PetscTruth     idn;

 16:   if (A->rmap.N != A->cmap.n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 17:   if (isrow) {
 18:     ISIdentity(isrow,&idn);
 19:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 20:   }
 21:   if (iscol) {
 22:     ISIdentity(iscol,&idn);
 23:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 24:   }
 25:   if (info->levels != 0) {
 26:     SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 27:   }
 28:   MatConvert(A,MATSAME,MAT_INITIAL_MATRIX,B);

 30:   /* Must set to zero for repeated calls with different nonzero structure */
 31:   (*B)->factor = 0;
 32:   return(0);
 33: }

 37: PetscErrorCode MatILUFactor_SeqBDiag(Mat A,IS isrow,IS iscol,MatFactorInfo *info)
 38: {
 39:   PetscTruth     idn;

 43:   /* For now, no fill is allocated in symbolic factorization phase, so we
 44:      directly use the input matrix for numeric factorization. */
 45:   if (A->rmap.N != A->cmap.n) SETERRQ(PETSC_ERR_SUP,"Matrix must be square");
 46:   if (isrow) {
 47:     ISIdentity(isrow,&idn);
 48:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
 49:   }
 50:   if (iscol) {
 51:     ISIdentity(iscol,&idn);
 52:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
 53:   }
 54:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only ILU(0) is supported");
 55:   MatLUFactorNumeric(A,info,&A);
 56:   return(0);
 57: }

 59: /* --------------------------------------------------------------------------*/
 62: PetscErrorCode MatLUFactorNumeric_SeqBDiag_N(Mat A,MatFactorInfo *info,Mat *B)
 63: {
 64:   Mat            C = *B;
 65:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
 66:   PetscInt       k,d,d2,dgk,elim_row,elim_col,bs = A->rmap.bs,knb,knb2,bs2 = bs*bs;
 68:   PetscInt       dnum,nd = a->nd,mblock = a->mblock,nblock = a->nblock;
 69:   PetscInt       *diag = a->diag, m = A->rmap.N,mainbd = a->mainbd,*dgptr,len,i;
 70:   PetscScalar    **dv = a->diagv,*dd = dv[mainbd],*v_work;
 71:   PetscScalar    *multiplier;

 74:   /* Copy input matrix to factored matrix if we've already factored the
 75:      matrix before AND the nonzero structure remains the same.  This is done
 76:      in symbolic factorization the first time through, but there's no symbolic
 77:      factorization for successive calls with same matrix sparsity structure. */
 78:   if (C->factor == FACTOR_LU) {
 79:     for (i=0; i<a->nd; i++) {
 80:       len = a->bdlen[i] * bs2 * sizeof(PetscScalar);
 81:       d   = diag[i];
 82:       if (d > 0) {
 83:         PetscMemcpy(dv[i]+bs2*d,a1->diagv[i]+bs2*d,len);
 84:       } else {
 85:         PetscMemcpy(dv[i],a1->diagv[i],len);
 86:       }
 87:     }
 88:   }

 90:   if (!a->pivot) {
 91:     PetscMalloc((m+1)*sizeof(PetscInt),&a->pivot);
 92:     PetscLogObjectMemory(C,m*sizeof(PetscInt));
 93:   }
 94:   PetscMalloc((bs2+bs+1)*sizeof(PetscScalar),&v_work);
 95:   multiplier = v_work + bs;
 96:   PetscMalloc((mblock+nblock+1)*sizeof(PetscInt),&dgptr);
 97:   PetscMemzero(dgptr,(mblock+nblock)*sizeof(PetscInt));
 98:   for (k=0; k<nd; k++) dgptr[diag[k]+mblock] = k+1;
 99:   for (k=0; k<mblock; k++) { /* k = block pivot_row */
100:     knb = k*bs; knb2 = knb*bs;
101:     /* invert the diagonal block */
102:     Kernel_A_gets_inverse_A(bs,dd+knb2,a->pivot+knb,v_work);
103:     for (d=mainbd-1; d>=0; d--) {
104:       elim_row = k + diag[d];
105:       if (elim_row < mblock) { /* sweep down */
106:         /* dv[d][knb2]: test if entire block is zero? */
107:         Kernel_A_gets_A_times_B(bs,&dv[d][elim_row*bs2],dd+knb2,multiplier);
108:         for (d2=d+1; d2<nd; d2++) {
109:           elim_col = elim_row - diag[d2];
110:           if (elim_col >=0 && elim_col < nblock) {
111:             dgk = k - elim_col;
112:             if ((dnum = dgptr[dgk+mblock])) {
113:               Kernel_A_gets_A_minus_B_times_C(bs,&dv[d2][elim_row*bs2],
114:                              &dv[d][elim_row*bs2],&dv[dnum-1][knb2]);
115:             }
116:           }
117:         }
118:       }
119:     }
120:   }
121:   PetscFree(dgptr);
122:   PetscFree(v_work);
123:   if (!a->solvework) {
124:     PetscMalloc(bs*sizeof(PetscScalar),&a->solvework);
125:   }
126:   C->factor = FACTOR_LU;
127:   return(0);
128: }

132: PetscErrorCode MatLUFactorNumeric_SeqBDiag_1(Mat A,MatFactorInfo *info,Mat *B)
133: {
134:   Mat            C = *B;
135:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)C->data,*a1 = (Mat_SeqBDiag*)A->data;
137:   PetscInt       k,d,d2,dgk,elim_row,elim_col,dnum,nd = a->nd,i,len;
138:   PetscInt       *diag = a->diag,n = A->cmap.n,m = A->rmap.N,mainbd = a->mainbd,*dgptr;
139:   PetscScalar    **dv = a->diagv,*dd = dv[mainbd],mult;

142:   /* Copy input matrix to factored matrix if we've already factored the
143:      matrix before AND the nonzero structure remains the same.  This is done
144:      in symbolic factorization the first time through, but there's no symbolic
145:      factorization for successive calls with same matrix sparsity structure. */
146:   if (C->factor == FACTOR_LU) {
147:     for (i=0; i<nd; i++) {
148:       len = a->bdlen[i] * sizeof(PetscScalar);
149:       d   = diag[i];
150:       if (d > 0) {
151:         PetscMemcpy(dv[i]+d,a1->diagv[i]+d,len);
152:       } else {
153:         PetscMemcpy(dv[i],a1->diagv[i],len);
154:       }
155:     }
156:   }

158:   PetscMalloc((m+n+1)*sizeof(PetscInt),&dgptr);
159:   PetscMemzero(dgptr,(m+n)*sizeof(PetscInt));
160:   for (k=0; k<nd; k++) dgptr[diag[k]+m] = k+1;
161:   for (k=0; k<m; k++) { /* k = pivot_row */
162:     dd[k] = 1.0/dd[k];
163:     for (d=mainbd-1; d>=0; d--) {
164:       elim_row = k + diag[d];
165:       if (elim_row < m) { /* sweep down */
166:         if (dv[d][elim_row] != 0.0) {
167:           dv[d][elim_row] *= dd[k];
168:           mult = dv[d][elim_row];
169:           for (d2=d+1; d2<nd; d2++) {
170:             elim_col = elim_row - diag[d2];
171:             dgk = k - elim_col;
172:             if (elim_col >=0 && elim_col < n) {
173:               if ((dnum = dgptr[dgk+m])) {
174:                 dv[d2][elim_row] -= mult * dv[dnum-1][k];
175:               }
176:             }
177:           }
178:         }
179:       }
180:     }
181:   }
182:   PetscFree(dgptr);
183:   C->factor = FACTOR_LU;
184:   return(0);
185: }

187: /* -----------------------------------------------------------------*/

191: PetscErrorCode MatSolve_SeqBDiag_1(Mat A,Vec xx,Vec yy)
192: {
193:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
195:   PetscInt       i,d,loc,mainbd = a->mainbd;
196:   PetscInt       n = A->cmap.n,m = A->rmap.N,*diag = a->diag,col;
197:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],sum,**dv = a->diagv;

200:   VecGetArray(xx,&x);
201:   VecGetArray(yy,&y);
202:   /* forward solve the lower triangular part */
203:   for (i=0; i<m; i++) {
204:     sum = x[i];
205:     for (d=0; d<mainbd; d++) {
206:       loc = i - diag[d];
207:       if (loc >= 0) sum -= dv[d][i] * y[loc];
208:     }
209:     y[i] = sum;
210:   }
211:   /* backward solve the upper triangular part */
212:   for (i=m-1; i>=0; i--) {
213:     sum = y[i];
214:     for (d=mainbd+1; d<a->nd; d++) {
215:       col = i - diag[d];
216:       if (col < n) sum -= dv[d][i] * y[col];
217:     }
218:     y[i] = sum*dd[i];
219:   }
220:   VecRestoreArray(xx,&x);
221:   VecRestoreArray(yy,&y);
222:   PetscLogFlops(2*a->nz - A->cmap.n);
223:   return(0);
224: }

228: PetscErrorCode MatSolve_SeqBDiag_2(Mat A,Vec xx,Vec yy)
229: {
230:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
231:   PetscInt       i,d,loc,mainbd = a->mainbd;
232:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
234:   PetscInt       m = A->rmap.N,*diag = a->diag,col;
235:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
236:   PetscScalar    w0,w1,sum0,sum1;

239:   VecGetArray(xx,&x);
240:   VecGetArray(yy,&y);
241:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

243:   /* forward solve the lower triangular part */
244:   if (mainbd != 0) {
245:     inb = 0;
246:     for (i=0; i<mblock; i++) {
247:       sum0 = sum1 = 0.0;
248:       for (d=0; d<mainbd; d++) {
249:         loc = 2*(i - diag[d]);
250:         if (loc >= 0) {
251:           dvt = &dv[d][4*i];
252:           w0 = y[loc]; w1 = y[loc+1];
253:           sum0 += dvt[0]*w0 + dvt[2]*w1;
254:           sum1 += dvt[1]*w0 + dvt[3]*w1;
255:         }
256:       }
257:       y[inb] -= sum0; y[inb+1] -= sum1;

259:       inb += 2;
260:     }
261:   }
262:   /* backward solve the upper triangular part */
263:   inb = 2*(mblock-1); inb2 = 2*inb;
264:   for (i=mblock-1; i>=0; i--) {
265:     sum0 = y[inb]; sum1 = y[inb+1];
266:     for (d=mainbd+1; d<a->nd; d++) {
267:       col = 2*(i - diag[d]);
268:       if (col < 2*nblock) {
269:         dvt = &dv[d][4*i];
270:         w0 = y[col]; w1 = y[col+1];
271:         sum0 -= dvt[0]*w0 + dvt[2]*w1;
272:         sum1 -= dvt[1]*w0 + dvt[3]*w1;
273:       }
274:     }
275:     dvt = dd+inb2;
276:     y[inb]   = dvt[0]*sum0 + dvt[2]*sum1;
277:     y[inb+1] = dvt[1]*sum0 + dvt[3]*sum1;
278:     inb -= 2; inb2 -= 4;
279:   }
280:   VecRestoreArray(xx,&x);
281:   VecRestoreArray(yy,&y);
282:   PetscLogFlops(2*a->nz - A->cmap.n);
283:   return(0);
284: }

288: PetscErrorCode MatSolve_SeqBDiag_3(Mat A,Vec xx,Vec yy)
289: {
290:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
291:   PetscInt       i,d,loc,mainbd = a->mainbd;
292:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
294:   PetscInt       m = A->rmap.N,*diag = a->diag,col;
295:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
296:   PetscScalar    w0,w1,w2,sum0,sum1,sum2;

299:   VecGetArray(xx,&x);
300:   VecGetArray(yy,&y);
301:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

303:   /* forward solve the lower triangular part */
304:   if (mainbd != 0) {
305:     inb = 0;
306:     for (i=0; i<mblock; i++) {
307:       sum0 = sum1 = sum2 = 0.0;
308:       for (d=0; d<mainbd; d++) {
309:         loc = 3*(i - diag[d]);
310:         if (loc >= 0) {
311:           dvt = &dv[d][9*i];
312:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];
313:           sum0 += dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
314:           sum1 += dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
315:           sum2 += dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
316:         }
317:       }
318:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;
319:       inb += 3;
320:     }
321:   }
322:   /* backward solve the upper triangular part */
323:   inb = 3*(mblock-1); inb2 = 3*inb;
324:   for (i=mblock-1; i>=0; i--) {
325:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2];
326:     for (d=mainbd+1; d<a->nd; d++) {
327:       col = 3*(i - diag[d]);
328:       if (col < 3*nblock) {
329:         dvt = &dv[d][9*i];
330:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];
331:         sum0 -= dvt[0]*w0 + dvt[3]*w1 + dvt[6]*w2;
332:         sum1 -= dvt[1]*w0 + dvt[4]*w1 + dvt[7]*w2;
333:         sum2 -= dvt[2]*w0 + dvt[5]*w1 + dvt[8]*w2;
334:       }
335:     }
336:     dvt = dd+inb2;
337:     y[inb]   = dvt[0]*sum0 + dvt[3]*sum1 + dvt[6]*sum2;
338:     y[inb+1] = dvt[1]*sum0 + dvt[4]*sum1 + dvt[7]*sum2;
339:     y[inb+2] = dvt[2]*sum0 + dvt[5]*sum1 + dvt[8]*sum2;
340:     inb -= 3; inb2 -= 9;
341:   }
342:   VecRestoreArray(xx,&x);
343:   VecRestoreArray(yy,&y);
344:   PetscLogFlops(2*a->nz - A->cmap.n);
345:   return(0);
346: }

350: PetscErrorCode MatSolve_SeqBDiag_4(Mat A,Vec xx,Vec yy)
351: {
352:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
353:   PetscInt       i,d,loc,mainbd = a->mainbd;
354:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
356:   PetscInt       m = A->rmap.N,*diag = a->diag,col;
357:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
358:   PetscScalar    w0,w1,w2,w3,sum0,sum1,sum2,sum3;

361:   VecGetArray(xx,&x);
362:   VecGetArray(yy,&y);
363:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

365:   /* forward solve the lower triangular part */
366:   if (mainbd != 0) {
367:     inb = 0;
368:     for (i=0; i<mblock; i++) {
369:       sum0 = sum1 = sum2 = sum3 = 0.0;
370:       for (d=0; d<mainbd; d++) {
371:         loc = 4*(i - diag[d]);
372:         if (loc >= 0) {
373:           dvt = &dv[d][16*i];
374:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];
375:           sum0 += dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
376:           sum1 += dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
377:           sum2 += dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
378:           sum3 += dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
379:         }
380:       }
381:       y[inb] -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
382:       inb += 4;
383:     }
384:   }
385:   /* backward solve the upper triangular part */
386:   inb = 4*(mblock-1); inb2 = 4*inb;
387:   for (i=mblock-1; i>=0; i--) {
388:     sum0 = y[inb]; sum1 = y[inb+1]; sum2 =  y[inb+2]; sum3 =  y[inb+3];
389:     for (d=mainbd+1; d<a->nd; d++) {
390:       col = 4*(i - diag[d]);
391:       if (col < 4*nblock) {
392:         dvt = &dv[d][16*i];
393:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];
394:         sum0 -= dvt[0]*w0 + dvt[4]*w1 + dvt[8]*w2  + dvt[12]*w3;
395:         sum1 -= dvt[1]*w0 + dvt[5]*w1 + dvt[9]*w2  + dvt[13]*w3;
396:         sum2 -= dvt[2]*w0 + dvt[6]*w1 + dvt[10]*w2 + dvt[14]*w3;
397:         sum3 -= dvt[3]*w0 + dvt[7]*w1 + dvt[11]*w2 + dvt[15]*w3;
398:       }
399:     }
400:     dvt = dd+inb2;
401:     y[inb]   = dvt[0]*sum0 + dvt[4]*sum1 + dvt[8]*sum2 + dvt[12]*sum3;
402:     y[inb+1] = dvt[1]*sum0 + dvt[5]*sum1 + dvt[9]*sum2 + dvt[13]*sum3;
403:     y[inb+2] = dvt[2]*sum0 + dvt[6]*sum1 + dvt[10]*sum2 + dvt[14]*sum3;
404:     y[inb+3] = dvt[3]*sum0 + dvt[7]*sum1 + dvt[11]*sum2 + dvt[15]*sum3;
405:     inb -= 4; inb2 -= 16;
406:   }
407:   VecRestoreArray(xx,&x);
408:   VecRestoreArray(yy,&y);
409:   PetscLogFlops(2*a->nz - A->cmap.n);
410:   return(0);
411: }

415: PetscErrorCode MatSolve_SeqBDiag_5(Mat A,Vec xx,Vec yy)
416: {
417:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
418:   PetscInt       i,d,loc,mainbd = a->mainbd;
419:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
421:   PetscInt       m = A->rmap.N,*diag = a->diag,col;
422:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv,*dvt;
423:   PetscScalar    w0,w1,w2,w3,w4,sum0,sum1,sum2,sum3,sum4;

426:   VecGetArray(xx,&x);
427:   VecGetArray(yy,&y);
428:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

430:   /* forward solve the lower triangular part */
431:   if (mainbd != 0) {
432:     inb = 0;
433:     for (i=0; i<mblock; i++) {
434:       sum0 = sum1 = sum2 = sum3 = sum4 = 0.0;
435:       for (d=0; d<mainbd; d++) {
436:         loc = 5*(i - diag[d]);
437:         if (loc >= 0) {
438:           dvt = &dv[d][25*i];
439:           w0 = y[loc]; w1 = y[loc+1]; w2 = y[loc+2];w3 = y[loc+3];w4 = y[loc+4];
440:           sum0 += dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
441:           sum1 += dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
442:           sum2 += dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
443:           sum3 += dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
444:           sum4 += dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
445:         }
446:       }
447:       y[inb]   -= sum0; y[inb+1] -= sum1; y[inb+2] -= sum2;y[inb+3] -= sum3;
448:       y[inb+4] -= sum4;
449:       inb += 5;
450:     }
451:   }
452:   /* backward solve the upper triangular part */
453:   inb = 5*(mblock-1); inb2 = 5*inb;
454:   for (i=mblock-1; i>=0; i--) {
455:     sum0 = y[inb];sum1 = y[inb+1];sum2 = y[inb+2];sum3 = y[inb+3];sum4 = y[inb+4];
456:     for (d=mainbd+1; d<a->nd; d++) {
457:       col = 5*(i - diag[d]);
458:       if (col < 5*nblock) {
459:         dvt = &dv[d][25*i];
460:         w0 = y[col]; w1 = y[col+1];w2 = y[col+2];w3 = y[col+3];w4 = y[col+4];
461:         sum0 -= dvt[0]*w0 + dvt[5]*w1 + dvt[10]*w2 + dvt[15]*w3 + dvt[20]*w4;
462:         sum1 -= dvt[1]*w0 + dvt[6]*w1 + dvt[11]*w2 + dvt[16]*w3 + dvt[21]*w4;
463:         sum2 -= dvt[2]*w0 + dvt[7]*w1 + dvt[12]*w2 + dvt[17]*w3 + dvt[22]*w4;
464:         sum3 -= dvt[3]*w0 + dvt[8]*w1 + dvt[13]*w2 + dvt[18]*w3 + dvt[23]*w4;
465:         sum4 -= dvt[4]*w0 + dvt[9]*w1 + dvt[14]*w2 + dvt[19]*w3 + dvt[24]*w4;
466:       }
467:     }
468:     dvt = dd+inb2;
469:     y[inb]   = dvt[0]*sum0 + dvt[5]*sum1 + dvt[10]*sum2 + dvt[15]*sum3
470:                + dvt[20]*sum4;
471:     y[inb+1] = dvt[1]*sum0 + dvt[6]*sum1 + dvt[11]*sum2 + dvt[16]*sum3
472:                + dvt[21]*sum4;
473:     y[inb+2] = dvt[2]*sum0 + dvt[7]*sum1 + dvt[12]*sum2 + dvt[17]*sum3
474:                + dvt[22]*sum4;
475:     y[inb+3] = dvt[3]*sum0 + dvt[8]*sum1 + dvt[13]*sum2 + dvt[18]*sum3
476:                + dvt[23]*sum4;
477:     y[inb+4] = dvt[4]*sum0 + dvt[9]*sum1 + dvt[14]*sum2 + dvt[19]*sum3
478:                + dvt[24]*sum4;
479:     inb -= 5; inb2 -= 25;
480:   }
481:   VecRestoreArray(xx,&x);
482:   VecRestoreArray(yy,&y);
483:   PetscLogFlops(2*a->nz - A->cmap.n);
484:   return(0);
485: }

489: PetscErrorCode MatSolve_SeqBDiag_N(Mat A,Vec xx,Vec yy)
490: {
491:   Mat_SeqBDiag   *a = (Mat_SeqBDiag*)A->data;
492:   PetscInt       i,d,loc,mainbd = a->mainbd;
493:   PetscInt       mblock = a->mblock,nblock = a->nblock,inb,inb2;
495:   PetscInt       bs = A->rmap.bs,m = A->rmap.N,*diag = a->diag,col,bs2 = bs*bs;
496:   PetscScalar    *x,*y,*dd = a->diagv[mainbd],**dv = a->diagv;
497:   PetscScalar    *work = a->solvework;

500:   VecGetArray(xx,&x);
501:   VecGetArray(yy,&y);
502:   PetscMemcpy(y,x,m*sizeof(PetscScalar));

504:   /* forward solve the lower triangular part */
505:   if (mainbd != 0) {
506:     inb = 0;
507:     for (i=0; i<mblock; i++) {
508:       for (d=0; d<mainbd; d++) {
509:         loc = i - diag[d];
510:         if (loc >= 0) {
511:           Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][i*bs2],y+loc*bs);
512:         }
513:       }
514:       inb += bs;
515:     }
516:   }
517:   /* backward solve the upper triangular part */
518:   inb = bs*(mblock-1); inb2 = inb*bs;
519:   for (i=mblock-1; i>=0; i--) {
520:     for (d=mainbd+1; d<a->nd; d++) {
521:       col = i - diag[d];
522:       if (col < nblock) {
523:         Kernel_v_gets_v_minus_A_times_w(bs,y+inb,&dv[d][inb2],y+col*bs);
524:       }
525:     }
526:     Kernel_w_gets_A_times_v(bs,y+inb,dd+inb2,work);
527:     PetscMemcpy(y+inb,work,bs*sizeof(PetscScalar));
528:     inb -= bs; inb2 -= bs2;
529:   }
530:   VecRestoreArray(xx,&x);
531:   VecRestoreArray(yy,&y);
532:   PetscLogFlops(2*a->nz - A->cmap.n);
533:   return(0);
534: }