Actual source code: sbaijfact2.c

  1: #define PETSCMAT_DLL

  3: /*
  4:     Factorization code for SBAIJ format. 
  5: */

 7:  #include src/mat/impls/sbaij/seq/sbaij.h
 8:  #include src/mat/impls/baij/seq/baij.h
 9:  #include src/inline/ilu.h
 10:  #include src/inline/dot.h

 14: PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
 15: {
 16:   Mat_SeqSBAIJ    *a=(Mat_SeqSBAIJ*)A->data;
 17:   IS              isrow=a->row;
 18:   PetscInt        mbs=a->mbs,*ai=a->i,*aj=a->j;
 19:   PetscErrorCode  ierr;
 20:   PetscInt        nz,*vj,k,*r,idx,k1;
 21:   PetscInt        bs=A->rmap.bs,bs2 = a->bs2;
 22:   MatScalar       *aa=a->a,*v,*diag;
 23:   PetscScalar     *x,*xk,*xj,*b,*xk_tmp,*t;

 26:   VecGetArray(bb,&b);
 27:   VecGetArray(xx,&x);
 28:   t  = a->solve_work;
 29:   ISGetIndices(isrow,&r);
 30:   PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);

 32:   /* solve U^T * D * y = b by forward substitution */
 33:   xk = t;
 34:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
 35:     idx   = bs*r[k];
 36:     for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
 37:   }
 38:   for (k=0; k<mbs; k++){
 39:     v  = aa + bs2*ai[k];
 40:     xk = t + k*bs;      /* Dk*xk = k-th block of x */
 41:     PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
 42:     nz = ai[k+1] - ai[k];
 43:     vj = aj + ai[k];
 44:     xj = t + (*vj)*bs;  /* *vj-th block of x, *vj>k */
 45:     while (nz--) {
 46:       /* x(:) += U(k,:)^T*(Dk*xk) */
 47:       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
 48:       vj++; xj = t + (*vj)*bs;
 49:       v += bs2;
 50:     }
 51:     /* xk = inv(Dk)*(Dk*xk) */
 52:     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
 53:     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
 54:   }

 56:   /* solve U*x = y by back substitution */
 57:   for (k=mbs-1; k>=0; k--){
 58:     v  = aa + bs2*ai[k];
 59:     xk = t + k*bs;        /* xk */
 60:     nz = ai[k+1] - ai[k];
 61:     vj = aj + ai[k];
 62:     xj = t + (*vj)*bs;
 63:     while (nz--) {
 64:       /* xk += U(k,:)*x(:) */
 65:       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
 66:       vj++;
 67:       v += bs2; xj = t + (*vj)*bs;
 68:     }
 69:     idx   = bs*r[k];
 70:     for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
 71:   }

 73:   PetscFree(xk_tmp);
 74:   ISRestoreIndices(isrow,&r);
 75:   VecRestoreArray(bb,&b);
 76:   VecRestoreArray(xx,&x);
 77:   PetscLogFlops(bs2*(2*a->nz + mbs));
 78:   return(0);
 79: }

 83: PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
 84: {
 86:   SETERRQ(1,"not implemented yet");
 87:   return(0);
 88: }

 92: PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
 93: {
 95:   SETERRQ(1,"not implemented yet");
 96:   return(0);
 97: }

101: PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
102: {
104:   PetscInt       nz,*vj,k;
105:   PetscInt       bs2 = bs*bs;
106:   MatScalar      *v,*diag;
107:   PetscScalar    *xk,*xj,*xk_tmp;
108: 
110:   PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);
111:   for (k=0; k<mbs; k++){
112:     v  = aa + bs2*ai[k];
113:     xk = x + k*bs;      /* Dk*xk = k-th block of x */
114:     PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
115:     nz = ai[k+1] - ai[k];
116:     vj = aj + ai[k];
117:     xj = x + (*vj)*bs;  /* *vj-th block of x, *vj>k */
118:     while (nz--) {
119:       /* x(:) += U(k,:)^T*(Dk*xk) */
120:       Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
121:       vj++; xj = x + (*vj)*bs;
122:       v += bs2;
123:     }
124:     /* xk = inv(Dk)*(Dk*xk) */
125:     diag = aa+k*bs2;                            /* ptr to inv(Dk) */
126:     Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
127:   }
128:   PetscFree(xk_tmp);
129:   return(0);
130: }

134: PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
135: {
136:   PetscInt       nz,*vj,k;
137:   PetscInt       bs2 = bs*bs;
138:   MatScalar      *v;
139:   PetscScalar    *xk,*xj;

142:   for (k=mbs-1; k>=0; k--){
143:     v  = aa + bs2*ai[k];
144:     xk = x + k*bs;        /* xk */
145:     nz = ai[k+1] - ai[k];
146:     vj = aj + ai[k];
147:     xj = x + (*vj)*bs;
148:     while (nz--) {
149:       /* xk += U(k,:)*x(:) */
150:       Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
151:       vj++;
152:       v += bs2; xj = x + (*vj)*bs;
153:     }
154:   }
155:   return(0);
156: }

160: PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
161: {
162:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
164:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
165:   PetscInt       bs=A->rmap.bs;
166:   MatScalar      *aa=a->a;
167:   PetscScalar    *x,*b;
168: #if defined(PETSC_USE_LOG)
169:   PetscInt       bs2 = a->bs2;
170: #endif

173:   VecGetArray(bb,&b);
174:   VecGetArray(xx,&x);

176:   /* solve U^T * D * y = b by forward substitution */
177:   PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar)); /* x <- b */
178:   ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);

180:   /* solve U*x = y by back substitution */
181:   BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);

183:   VecRestoreArray(bb,&b);
184:   VecRestoreArray(xx,&x);
185:   PetscLogFlops(bs2*(2*a->nz + mbs));
186:   return(0);
187: }

191: PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
192: {
193:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
195:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
196:   PetscInt       bs=A->rmap.bs;
197:   MatScalar      *aa=a->a;
198:   PetscScalar    *x,*b;
199: #if defined(PETSC_USE_LOG)
200:   PetscInt       bs2 = a->bs2;
201: #endif

204:   VecGetArray(bb,&b);
205:   VecGetArray(xx,&x);
206:   PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar)); /* x <- b */
207:   ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
208:   VecRestoreArray(bb,&b);
209:   VecRestoreArray(xx,&x);
210:   PetscLogFlops(bs2*a->nz + A->rmap.N);
211:   return(0);
212: }

216: PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
217: {
218:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
220:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
221:   PetscInt       bs=A->rmap.bs;
222:   MatScalar      *aa=a->a;
223:   PetscScalar    *x,*b;
224: #if defined(PETSC_USE_LOG)
225:   PetscInt       bs2 = a->bs2;
226: #endif

229:   VecGetArray(bb,&b);
230:   VecGetArray(xx,&x);
231:   PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));
232:   BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
233:   VecRestoreArray(bb,&b);
234:   VecRestoreArray(xx,&x);
235:   PetscLogFlops(bs2*a->nz);
236:   return(0);
237: }

241: PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
242: {
243:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
244:   IS             isrow=a->row;
245:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
247:   PetscInt       nz,*vj,k,*r,idx;
248:   MatScalar      *aa=a->a,*v,*d;
249:   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;

252:   VecGetArray(bb,&b);
253:   VecGetArray(xx,&x);
254:   t  = a->solve_work;
255:   ISGetIndices(isrow,&r);

257:   /* solve U^T * D * y = b by forward substitution */
258:   tp = t;
259:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
260:     idx   = 7*r[k];
261:     tp[0] = b[idx];
262:     tp[1] = b[idx+1];
263:     tp[2] = b[idx+2];
264:     tp[3] = b[idx+3];
265:     tp[4] = b[idx+4];
266:     tp[5] = b[idx+5];
267:     tp[6] = b[idx+6];
268:     tp += 7;
269:   }
270: 
271:   for (k=0; k<mbs; k++){
272:     v  = aa + 49*ai[k];
273:     vj = aj + ai[k];
274:     tp = t + k*7;
275:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
276:     nz = ai[k+1] - ai[k];
277:     tp = t + (*vj)*7;
278:     while (nz--) {
279:       tp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
280:       tp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
281:       tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
282:       tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
283:       tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
284:       tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
285:       tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
286:       vj++; tp = t + (*vj)*7;
287:       v += 49;
288:     }

290:     /* xk = inv(Dk)*(Dk*xk) */
291:     d  = aa+k*49;          /* ptr to inv(Dk) */
292:     tp    = t + k*7;
293:     tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
294:     tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
295:     tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
296:     tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
297:     tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
298:     tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
299:     tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
300:   }

302:   /* solve U*x = y by back substitution */
303:   for (k=mbs-1; k>=0; k--){
304:     v  = aa + 49*ai[k];
305:     vj = aj + ai[k];
306:     tp    = t + k*7;
307:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  x6=tp[6]; /* xk */
308:     nz = ai[k+1] - ai[k];
309: 
310:     tp = t + (*vj)*7;
311:     while (nz--) {
312:       /* xk += U(k,:)*x(:) */
313:       x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
314:       x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
315:       x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
316:       x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
317:       x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
318:       x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
319:       x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
320:       vj++; tp = t + (*vj)*7;
321:       v += 49;
322:     }
323:     tp    = t + k*7;
324:     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
325:     idx   = 7*r[k];
326:     x[idx]     = x0;
327:     x[idx+1]   = x1;
328:     x[idx+2]   = x2;
329:     x[idx+3]   = x3;
330:     x[idx+4]   = x4;
331:     x[idx+5]   = x5;
332:     x[idx+6]   = x6;
333:   }

335:   ISRestoreIndices(isrow,&r);
336:   VecRestoreArray(bb,&b);
337:   VecRestoreArray(xx,&x);
338:   PetscLogFlops(49*(2*a->nz + mbs));
339:   return(0);
340: }

344: PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
345: {
346:   MatScalar      *v,*d;
347:   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
348:   PetscInt       nz,*vj,k;

351:   for (k=0; k<mbs; k++){
352:     v  = aa + 49*ai[k];
353:     xp = x + k*7;
354:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
355:     nz = ai[k+1] - ai[k];
356:     vj = aj + ai[k];
357:     xp = x + (*vj)*7;
358:     while (nz--) {
359:       /* x(:) += U(k,:)^T*(Dk*xk) */
360:       xp[0]+=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
361:       xp[1]+=  v[7]*x0 +  v[8]*x1 +  v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
362:       xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
363:       xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
364:       xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
365:       xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
366:       xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
367:       vj++; xp = x + (*vj)*7;
368:       v += 49;
369:     }
370:     /* xk = inv(Dk)*(Dk*xk) */
371:     d  = aa+k*49;          /* ptr to inv(Dk) */
372:     xp = x + k*7;
373:     xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
374:     xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
375:     xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
376:     xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
377:     xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
378:     xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
379:     xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
380:   }
381:   return(0);
382: }

386: PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
387: {
388:   MatScalar      *v;
389:   PetscScalar    *xp,x0,x1,x2,x3,x4,x5,x6;
390:   PetscInt       nz,*vj,k;

393:   for (k=mbs-1; k>=0; k--){
394:     v  = aa + 49*ai[k];
395:     xp = x + k*7;
396:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
397:     nz = ai[k+1] - ai[k];
398:     vj = aj + ai[k];
399:     xp = x + (*vj)*7;
400:     while (nz--) {
401:       /* xk += U(k,:)*x(:) */
402:       x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
403:       x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
404:       x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
405:       x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
406:       x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
407:       x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
408:       x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
409:       vj++;
410:       v += 49; xp = x + (*vj)*7;
411:     }
412:     xp = x + k*7;
413:     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
414:   }
415:   return(0);
416: }

420: PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
421: {
422:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
424:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
425:   MatScalar      *aa=a->a;
426:   PetscScalar    *x,*b;

429:   VecGetArray(bb,&b);
430:   VecGetArray(xx,&x);
431: 
432:   /* solve U^T * D * y = b by forward substitution */
433:   PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar)); /* x <- b */
434:   ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);

436:   /* solve U*x = y by back substitution */
437:   BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);

439:   VecRestoreArray(bb,&b);
440:   VecRestoreArray(xx,&x);
441:   PetscLogFlops(49*(2*a->nz + mbs));
442:   return(0);
443: }

447: PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
448: {
449:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
451:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
452:   MatScalar      *aa=a->a;
453:   PetscScalar    *x,*b;

456:   VecGetArray(bb,&b);
457:   VecGetArray(xx,&x);
458:   PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));
459:   ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
460:   VecRestoreArray(bb,&b);
461:   VecRestoreArray(xx,&x);
462:   PetscLogFlops(49*a->nz + mbs);
463:   return(0);
464: }

468: PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
469: {
470:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
472:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
473:   MatScalar      *aa=a->a;
474:   PetscScalar    *x,*b;

477:   VecGetArray(bb,&b);
478:   VecGetArray(xx,&x);
479:   PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));
480:   BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
481:   VecRestoreArray(bb,&b);
482:   VecRestoreArray(xx,&x);
483:   PetscLogFlops(49*a->nz);
484:   return(0);
485: }

489: PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
490: {
491:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
492:   IS             isrow=a->row;
493:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
495:   PetscInt       nz,*vj,k,*r,idx;
496:   MatScalar      *aa=a->a,*v,*d;
497:   PetscScalar    *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;

500:   VecGetArray(bb,&b);
501:   VecGetArray(xx,&x);
502:   t  = a->solve_work;
503:   ISGetIndices(isrow,&r);

505:   /* solve U^T * D * y = b by forward substitution */
506:   tp = t;
507:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
508:     idx   = 6*r[k];
509:     tp[0] = b[idx];
510:     tp[1] = b[idx+1];
511:     tp[2] = b[idx+2];
512:     tp[3] = b[idx+3];
513:     tp[4] = b[idx+4];
514:     tp[5] = b[idx+5];
515:     tp += 6;
516:   }
517: 
518:   for (k=0; k<mbs; k++){
519:     v  = aa + 36*ai[k];
520:     vj = aj + ai[k];
521:     tp = t + k*6;
522:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
523:     nz = ai[k+1] - ai[k];
524:     tp = t + (*vj)*6;
525:     while (nz--) {
526:       tp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
527:       tp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
528:       tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
529:       tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
530:       tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
531:       tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
532:       vj++; tp = t + (*vj)*6;
533:       v += 36;
534:     }

536:     /* xk = inv(Dk)*(Dk*xk) */
537:     d  = aa+k*36;          /* ptr to inv(Dk) */
538:     tp    = t + k*6;
539:     tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
540:     tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
541:     tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
542:     tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
543:     tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
544:     tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
545:   }

547:   /* solve U*x = y by back substitution */
548:   for (k=mbs-1; k>=0; k--){
549:     v  = aa + 36*ai[k];
550:     vj = aj + ai[k];
551:     tp    = t + k*6;
552:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];  /* xk */
553:     nz = ai[k+1] - ai[k];
554: 
555:     tp = t + (*vj)*6;
556:     while (nz--) {
557:       /* xk += U(k,:)*x(:) */
558:       x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
559:       x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
560:       x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
561:       x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
562:       x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
563:       x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
564:       vj++; tp = t + (*vj)*6;
565:       v += 36;
566:     }
567:     tp    = t + k*6;
568:     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
569:     idx   = 6*r[k];
570:     x[idx]     = x0;
571:     x[idx+1]   = x1;
572:     x[idx+2]   = x2;
573:     x[idx+3]   = x3;
574:     x[idx+4]   = x4;
575:     x[idx+5]   = x5;
576:   }

578:   ISRestoreIndices(isrow,&r);
579:   VecRestoreArray(bb,&b);
580:   VecRestoreArray(xx,&x);
581:   PetscLogFlops(36*(2*a->nz + mbs));
582:   return(0);
583: }

587: PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
588: {
589:   MatScalar      *v,*d;
590:   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
591:   PetscInt       nz,*vj,k;

594:   for (k=0; k<mbs; k++){
595:     v  = aa + 36*ai[k];
596:     xp = x + k*6;
597:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
598:     nz = ai[k+1] - ai[k];
599:     vj = aj + ai[k];
600:     xp = x + (*vj)*6;
601:     while (nz--) {
602:       /* x(:) += U(k,:)^T*(Dk*xk) */
603:       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
604:       xp[1] +=  v[6]*x0 +  v[7]*x1 +  v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
605:       xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
606:       xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
607:       xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
608:       xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
609:       vj++; xp = x + (*vj)*6;
610:       v += 36;
611:     }
612:     /* xk = inv(Dk)*(Dk*xk) */
613:     d  = aa+k*36;          /* ptr to inv(Dk) */
614:     xp = x + k*6;
615:     xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
616:     xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
617:     xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
618:     xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
619:     xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
620:     xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
621:   }
622:   return(0);
623: }
626: PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
627: {
628:   MatScalar      *v;
629:   PetscScalar    *xp,x0,x1,x2,x3,x4,x5;
630:   PetscInt       nz,*vj,k;

633:   for (k=mbs-1; k>=0; k--){
634:     v  = aa + 36*ai[k];
635:     xp = x + k*6;
636:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
637:     nz = ai[k+1] - ai[k];
638:     vj = aj + ai[k];
639:     xp = x + (*vj)*6;
640:     while (nz--) {
641:       /* xk += U(k,:)*x(:) */
642:       x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
643:       x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
644:       x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
645:       x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
646:       x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
647:       x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
648:       vj++;
649:       v += 36; xp = x + (*vj)*6;
650:     }
651:     xp = x + k*6;
652:     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
653:   }
654:   return(0);
655: }


660: PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
661: {
662:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
663:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
664:   MatScalar      *aa=a->a;
665:   PetscScalar    *x,*b;

669:   VecGetArray(bb,&b);
670:   VecGetArray(xx,&x);
671: 
672:   /* solve U^T * D * y = b by forward substitution */
673:   PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
674:   ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);

676:   /* solve U*x = y by back substitution */
677:   BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);

679:   VecRestoreArray(bb,&b);
680:   VecRestoreArray(xx,&x);
681:   PetscLogFlops(36*(2*a->nz + mbs));
682:   return(0);
683: }

687: PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
688: {
689:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
690:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
691:   MatScalar      *aa=a->a;
692:   PetscScalar    *x,*b;

696:   VecGetArray(bb,&b);
697:   VecGetArray(xx,&x);
698:   PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
699:   ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
700:   VecRestoreArray(bb,&b);
701:   VecRestoreArray(xx,&x);
702:   PetscLogFlops(36*a->nz + mbs);
703:   return(0);
704: }

708: PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
709: {
710:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
711:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
712:   MatScalar      *aa=a->a;
713:   PetscScalar    *x,*b;

717:   VecGetArray(bb,&b);
718:   VecGetArray(xx,&x);
719:   PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
720:   BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
721:   VecRestoreArray(bb,&b);
722:   VecRestoreArray(xx,&x);
723:   PetscLogFlops(36*a->nz);
724:   return(0);
725: }

729: PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
730: {
731:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
732:   IS             isrow=a->row;
733:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
735:   PetscInt       nz,*vj,k,*r,idx;
736:   MatScalar      *aa=a->a,*v,*diag;
737:   PetscScalar    *x,*b,x0,x1,x2,x3,x4,*t,*tp;

740:   VecGetArray(bb,&b);
741:   VecGetArray(xx,&x);
742:   t  = a->solve_work;
743:   ISGetIndices(isrow,&r);

745:   /* solve U^T * D * y = b by forward substitution */
746:   tp = t;
747:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
748:     idx   = 5*r[k];
749:     tp[0] = b[idx];
750:     tp[1] = b[idx+1];
751:     tp[2] = b[idx+2];
752:     tp[3] = b[idx+3];
753:     tp[4] = b[idx+4];
754:     tp += 5;
755:   }
756: 
757:   for (k=0; k<mbs; k++){
758:     v  = aa + 25*ai[k];
759:     vj = aj + ai[k];
760:     tp = t + k*5;
761:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
762:     nz = ai[k+1] - ai[k];

764:     tp = t + (*vj)*5;
765:     while (nz--) {
766:       tp[0] +=  v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
767:       tp[1] +=  v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
768:       tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
769:       tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
770:       tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
771:       vj++; tp = t + (*vj)*5;
772:       v += 25;
773:     }

775:     /* xk = inv(Dk)*(Dk*xk) */
776:     diag  = aa+k*25;          /* ptr to inv(Dk) */
777:     tp    = t + k*5;
778:       tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
779:       tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
780:       tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
781:       tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
782:       tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
783:   }

785:   /* solve U*x = y by back substitution */
786:   for (k=mbs-1; k>=0; k--){
787:     v  = aa + 25*ai[k];
788:     vj = aj + ai[k];
789:     tp    = t + k*5;
790:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
791:     nz = ai[k+1] - ai[k];
792: 
793:     tp = t + (*vj)*5;
794:     while (nz--) {
795:       /* xk += U(k,:)*x(:) */
796:       x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
797:       x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
798:       x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
799:       x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
800:       x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
801:       vj++; tp = t + (*vj)*5;
802:       v += 25;
803:     }
804:     tp    = t + k*5;
805:     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
806:     idx   = 5*r[k];
807:     x[idx]     = x0;
808:     x[idx+1]   = x1;
809:     x[idx+2]   = x2;
810:     x[idx+3]   = x3;
811:     x[idx+4]   = x4;
812:   }

814:   ISRestoreIndices(isrow,&r);
815:   VecRestoreArray(bb,&b);
816:   VecRestoreArray(xx,&x);
817:   PetscLogFlops(25*(2*a->nz + mbs));
818:   return(0);
819: }

823: PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
824: {
825:   MatScalar      *v,*diag;
826:   PetscScalar    *xp,x0,x1,x2,x3,x4;
827:   PetscInt       nz,*vj,k;

830:   for (k=0; k<mbs; k++){
831:     v  = aa + 25*ai[k];
832:     xp = x + k*5;
833:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
834:     nz = ai[k+1] - ai[k];
835:     vj = aj + ai[k];
836:     xp = x + (*vj)*5;
837:     while (nz--) {
838:       /* x(:) += U(k,:)^T*(Dk*xk) */
839:       xp[0] +=  v[0]*x0 +  v[1]*x1 +  v[2]*x2 + v[3]*x3 + v[4]*x4;
840:       xp[1] +=  v[5]*x0 +  v[6]*x1 +  v[7]*x2 + v[8]*x3 + v[9]*x4;
841:       xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
842:       xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
843:       xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
844:       vj++; xp = x + (*vj)*5;
845:       v += 25;
846:     }
847:     /* xk = inv(Dk)*(Dk*xk) */
848:     diag = aa+k*25;          /* ptr to inv(Dk) */
849:     xp   = x + k*5;
850:     xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
851:     xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
852:     xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
853:     xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
854:     xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
855:   }
856:   return(0);
857: }

861: PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
862: {
863:   MatScalar      *v;
864:   PetscScalar    *xp,x0,x1,x2,x3,x4;
865:   PetscInt       nz,*vj,k;

868:   for (k=mbs-1; k>=0; k--){
869:     v  = aa + 25*ai[k];
870:     xp = x + k*5;
871:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
872:     nz = ai[k+1] - ai[k];
873:     vj = aj + ai[k];
874:     xp = x + (*vj)*5;
875:     while (nz--) {
876:       /* xk += U(k,:)*x(:) */
877:       x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
878:       x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
879:       x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
880:       x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
881:       x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
882:       vj++;
883:       v += 25; xp = x + (*vj)*5;
884:     }
885:     xp = x + k*5;
886:     xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
887:   }
888:   return(0);
889: }

893: PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
894: {
895:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
896:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
897:   MatScalar      *aa=a->a;
898:   PetscScalar    *x,*b;

902:   VecGetArray(bb,&b);
903:   VecGetArray(xx,&x);

905:   /* solve U^T * D * y = b by forward substitution */
906:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
907:   ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);

909:   /* solve U*x = y by back substitution */
910:   BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);

912:   VecRestoreArray(bb,&b);
913:   VecRestoreArray(xx,&x);
914:   PetscLogFlops(25*(2*a->nz + mbs));
915:   return(0);
916: }

920: PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
921: {
922:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
923:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
924:   MatScalar      *aa=a->a;
925:   PetscScalar    *x,*b;

929:   VecGetArray(bb,&b);
930:   VecGetArray(xx,&x);
931:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
932:   ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
933:   VecRestoreArray(bb,&b);
934:   VecRestoreArray(xx,&x);
935:   PetscLogFlops(25*(a->nz + mbs));
936:   return(0);
937: }

941: PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
942: {
943:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
944:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
945:   MatScalar      *aa=a->a;
946:   PetscScalar    *x,*b;

950:   VecGetArray(bb,&b);
951:   VecGetArray(xx,&x);
952:   PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
953:   BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
954:   VecRestoreArray(bb,&b);
955:   VecRestoreArray(xx,&x);
956:   PetscLogFlops(25*a->nz);
957:   return(0);
958: }

962: PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
963: {
964:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
965:   IS             isrow=a->row;
966:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
968:   PetscInt       nz,*vj,k,*r,idx;
969:   MatScalar      *aa=a->a,*v,*diag;
970:   PetscScalar    *x,*b,x0,x1,x2,x3,*t,*tp;

973:   VecGetArray(bb,&b);
974:   VecGetArray(xx,&x);
975:   t  = a->solve_work;
976:   ISGetIndices(isrow,&r);

978:   /* solve U^T * D * y = b by forward substitution */
979:   tp = t;
980:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
981:     idx   = 4*r[k];
982:     tp[0] = b[idx];
983:     tp[1] = b[idx+1];
984:     tp[2] = b[idx+2];
985:     tp[3] = b[idx+3];
986:     tp += 4;
987:   }
988: 
989:   for (k=0; k<mbs; k++){
990:     v  = aa + 16*ai[k];
991:     vj = aj + ai[k];
992:     tp = t + k*4;
993:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
994:     nz = ai[k+1] - ai[k];

996:     tp = t + (*vj)*4;
997:     while (nz--) {
998:       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
999:       tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1000:       tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1001:       tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1002:       vj++; tp = t + (*vj)*4;
1003:       v += 16;
1004:     }

1006:     /* xk = inv(Dk)*(Dk*xk) */
1007:     diag  = aa+k*16;          /* ptr to inv(Dk) */
1008:     tp    = t + k*4;
1009:     tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1010:     tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1011:     tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1012:     tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1013:   }

1015:   /* solve U*x = y by back substitution */
1016:   for (k=mbs-1; k>=0; k--){
1017:     v  = aa + 16*ai[k];
1018:     vj = aj + ai[k];
1019:     tp    = t + k*4;
1020:     x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1021:     nz = ai[k+1] - ai[k];
1022: 
1023:     tp = t + (*vj)*4;
1024:     while (nz--) {
1025:       /* xk += U(k,:)*x(:) */
1026:       x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1027:       x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1028:       x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1029:       x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1030:       vj++; tp = t + (*vj)*4;
1031:       v += 16;
1032:     }
1033:     tp    = t + k*4;
1034:     tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1035:     idx        = 4*r[k];
1036:     x[idx]     = x0;
1037:     x[idx+1]   = x1;
1038:     x[idx+2]   = x2;
1039:     x[idx+3]   = x3;
1040:   }

1042:   ISRestoreIndices(isrow,&r);
1043:   VecRestoreArray(bb,&b);
1044:   VecRestoreArray(xx,&x);
1045:   PetscLogFlops(16*(2*a->nz + mbs));
1046:   return(0);
1047: }

1051: PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1052: {
1053:   MatScalar      *v,*diag;
1054:   PetscScalar    *xp,x0,x1,x2,x3;
1055:   PetscInt       nz,*vj,k;

1058:   for (k=0; k<mbs; k++){
1059:     v  = aa + 16*ai[k];
1060:     xp = x + k*4;
1061:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1062:     nz = ai[k+1] - ai[k];
1063:     vj = aj + ai[k];
1064:     xp = x + (*vj)*4;
1065:     while (nz--) {
1066:       /* x(:) += U(k,:)^T*(Dk*xk) */
1067:       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1068:       xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1069:       xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1070:       xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1071:       vj++; xp = x + (*vj)*4;
1072:       v += 16;
1073:     }
1074:     /* xk = inv(Dk)*(Dk*xk) */
1075:     diag = aa+k*16;          /* ptr to inv(Dk) */
1076:     xp   = x + k*4;
1077:     xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1078:     xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1079:     xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1080:     xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1081:   }
1082:   return(0);
1083: }

1087: PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1088: {
1089:   MatScalar      *v;
1090:   PetscScalar    *xp,x0,x1,x2,x3;
1091:   PetscInt       nz,*vj,k;

1094:   for (k=mbs-1; k>=0; k--){
1095:     v  = aa + 16*ai[k];
1096:     xp = x + k*4;
1097:     x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1098:     nz = ai[k+1] - ai[k];
1099:     vj = aj + ai[k];
1100:     xp = x + (*vj)*4;
1101:     while (nz--) {
1102:       /* xk += U(k,:)*x(:) */
1103:       x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1104:       x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1105:       x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1106:       x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1107:       vj++;
1108:       v += 16; xp = x + (*vj)*4;
1109:     }
1110:     xp = x + k*4;
1111:     xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1112:   }
1113:   return(0);
1114: }

1118: PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1119: {
1120:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1121:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1122:   MatScalar      *aa=a->a;
1123:   PetscScalar    *x,*b;

1127:   VecGetArray(bb,&b);
1128:   VecGetArray(xx,&x);

1130:   /* solve U^T * D * y = b by forward substitution */
1131:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
1132:   ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);

1134:   /* solve U*x = y by back substitution */
1135:   BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1136:   VecRestoreArray(bb,&b);
1137:   VecRestoreArray(xx,&x);
1138:   PetscLogFlops(16*(2*a->nz + mbs));
1139:   return(0);
1140: }

1144: PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1145: {
1146:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1147:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1148:   MatScalar      *aa=a->a;
1149:   PetscScalar    *x,*b;

1153:   VecGetArray(bb,&b);
1154:   VecGetArray(xx,&x);
1155:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
1156:   ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1157:   VecRestoreArray(bb,&b);
1158:   VecRestoreArray(xx,&x);
1159:   PetscLogFlops(16*a->nz + mbs);
1160:   return(0);
1161: }

1165: PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1166: {
1167:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1168:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1169:   MatScalar      *aa=a->a;
1170:   PetscScalar    *x,*b;

1174:   VecGetArray(bb,&b);
1175:   VecGetArray(xx,&x);
1176:   PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
1177:   BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1178:   VecRestoreArray(bb,&b);
1179:   VecRestoreArray(xx,&x);
1180:   PetscLogFlops(16*a->nz);
1181:   return(0);
1182: }

1186: PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1187: {
1188:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1189:   IS             isrow=a->row;
1190:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1192:   PetscInt       nz,*vj,k,*r,idx;
1193:   MatScalar      *aa=a->a,*v,*diag;
1194:   PetscScalar    *x,*b,x0,x1,x2,*t,*tp;

1197:   VecGetArray(bb,&b);
1198:   VecGetArray(xx,&x);
1199:   t  = a->solve_work;
1200:   ISGetIndices(isrow,&r);

1202:   /* solve U^T * D * y = b by forward substitution */
1203:   tp = t;
1204:   for (k=0; k<mbs; k++) { /* t <- perm(b) */
1205:     idx   = 3*r[k];
1206:     tp[0] = b[idx];
1207:     tp[1] = b[idx+1];
1208:     tp[2] = b[idx+2];
1209:     tp += 3;
1210:   }
1211: 
1212:   for (k=0; k<mbs; k++){
1213:     v  = aa + 9*ai[k];
1214:     vj = aj + ai[k];
1215:     tp = t + k*3;
1216:     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1217:     nz = ai[k+1] - ai[k];

1219:     tp = t + (*vj)*3;
1220:     while (nz--) {
1221:       tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1222:       tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1223:       tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1224:       vj++; tp = t + (*vj)*3;
1225:       v += 9;
1226:     }

1228:     /* xk = inv(Dk)*(Dk*xk) */
1229:     diag  = aa+k*9;          /* ptr to inv(Dk) */
1230:     tp    = t + k*3;
1231:     tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1232:     tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1233:     tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1234:   }

1236:   /* solve U*x = y by back substitution */
1237:   for (k=mbs-1; k>=0; k--){
1238:     v  = aa + 9*ai[k];
1239:     vj = aj + ai[k];
1240:     tp    = t + k*3;
1241:     x0 = tp[0]; x1 = tp[1]; x2 = tp[2];  /* xk */
1242:     nz = ai[k+1] - ai[k];
1243: 
1244:     tp = t + (*vj)*3;
1245:     while (nz--) {
1246:       /* xk += U(k,:)*x(:) */
1247:       x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1248:       x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1249:       x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1250:       vj++; tp = t + (*vj)*3;
1251:       v += 9;
1252:     }
1253:     tp    = t + k*3;
1254:     tp[0] = x0; tp[1] = x1; tp[2] = x2;
1255:     idx      = 3*r[k];
1256:     x[idx]   = x0;
1257:     x[idx+1] = x1;
1258:     x[idx+2] = x2;
1259:   }

1261:   ISRestoreIndices(isrow,&r);
1262:   VecRestoreArray(bb,&b);
1263:   VecRestoreArray(xx,&x);
1264:   PetscLogFlops(9*(2*a->nz + mbs));
1265:   return(0);
1266: }

1270: PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1271: {
1272:   MatScalar      *v,*diag;
1273:   PetscScalar    *xp,x0,x1,x2;
1274:   PetscInt       nz,*vj,k;

1277:   for (k=0; k<mbs; k++){
1278:     v  = aa + 9*ai[k];
1279:     xp = x + k*3;
1280:     x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1281:     nz = ai[k+1] - ai[k];
1282:     vj = aj + ai[k];
1283:     xp = x + (*vj)*3;
1284:     while (nz--) {
1285:       /* x(:) += U(k,:)^T*(Dk*xk) */
1286:       xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1287:       xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1288:       xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1289:       vj++; xp = x + (*vj)*3;
1290:       v += 9;
1291:     }
1292:     /* xk = inv(Dk)*(Dk*xk) */
1293:     diag = aa+k*9;          /* ptr to inv(Dk) */
1294:     xp   = x + k*3;
1295:     xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1296:     xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1297:     xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1298:   }
1299:   return(0);
1300: }

1304: PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1305: {
1306:   MatScalar      *v;
1307:   PetscScalar    *xp,x0,x1,x2;
1308:   PetscInt       nz,*vj,k;

1311:   for (k=mbs-1; k>=0; k--){
1312:     v  = aa + 9*ai[k];
1313:     xp = x + k*3;
1314:     x0 = xp[0]; x1 = xp[1]; x2 = xp[2];  /* xk */
1315:     nz = ai[k+1] - ai[k];
1316:     vj = aj + ai[k];
1317:     xp = x + (*vj)*3;
1318:     while (nz--) {
1319:       /* xk += U(k,:)*x(:) */
1320:       x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1321:       x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1322:       x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1323:       vj++;
1324:       v += 9; xp = x + (*vj)*3;
1325:     }
1326:     xp = x + k*3;
1327:     xp[0] = x0; xp[1] = x1; xp[2] = x2;
1328:   }
1329:   return(0);
1330: }

1334: PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1335: {
1336:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1337:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1338:   MatScalar      *aa=a->a;
1339:   PetscScalar    *x,*b;
1341: 
1343:   VecGetArray(bb,&b);
1344:   VecGetArray(xx,&x);

1346:   /* solve U^T * D * y = b by forward substitution */
1347:   PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1348:   ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);

1350:   /* solve U*x = y by back substitution */
1351:   BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);

1353:   VecRestoreArray(bb,&b);
1354:   VecRestoreArray(xx,&x);
1355:   PetscLogFlops(9*(2*a->nz + mbs));
1356:   return(0);
1357: }

1361: PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1362: {
1363:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1364:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1365:   MatScalar      *aa=a->a;
1366:   PetscScalar    *x,*b;

1370:   VecGetArray(bb,&b);
1371:   VecGetArray(xx,&x);
1372:   PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1373:   ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1374:   VecRestoreArray(bb,&b);
1375:   VecRestoreArray(xx,&x);
1376:   PetscLogFlops(9*(a->nz + mbs));
1377:   return(0);
1378: }

1382: PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1383: {
1384:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1385:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1386:   MatScalar      *aa=a->a;
1387:   PetscScalar    *x,*b;

1391:   VecGetArray(bb,&b);
1392:   VecGetArray(xx,&x);
1393:   PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1394:   BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1395:   VecRestoreArray(bb,&b);
1396:   VecRestoreArray(xx,&x);
1397:   PetscLogFlops(9*a->nz);
1398:   return(0);
1399: }

1403: PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1404: {
1405:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ *)A->data;
1406:   IS             isrow=a->row;
1407:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1409:   PetscInt       nz,*vj,k,k2,*r,idx;
1410:   MatScalar      *aa=a->a,*v,*diag;
1411:   PetscScalar    *x,*b,x0,x1,*t;

1414:   VecGetArray(bb,&b);
1415:   VecGetArray(xx,&x);
1416:   t  = a->solve_work;
1417:   ISGetIndices(isrow,&r);

1419:   /* solve U^T * D * y = perm(b) by forward substitution */
1420:   for (k=0; k<mbs; k++) {  /* t <- perm(b) */
1421:     idx = 2*r[k];
1422:     t[k*2]   = b[idx];
1423:     t[k*2+1] = b[idx+1];
1424:   }
1425:   for (k=0; k<mbs; k++){
1426:     v  = aa + 4*ai[k];
1427:     vj = aj + ai[k];
1428:     k2 = k*2;
1429:     x0 = t[k2]; x1 = t[k2+1];
1430:     nz = ai[k+1] - ai[k];
1431:     while (nz--) {
1432:       t[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1433:       t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1434:       vj++; v += 4;
1435:     }
1436:     diag = aa+k*4;  /* ptr to inv(Dk) */
1437:     t[k2]   = diag[0]*x0 + diag[2]*x1;
1438:     t[k2+1] = diag[1]*x0 + diag[3]*x1;
1439:   }

1441:   /* solve U*x = y by back substitution */
1442:   for (k=mbs-1; k>=0; k--){
1443:     v  = aa + 4*ai[k];
1444:     vj = aj + ai[k];
1445:     k2 = k*2;
1446:     x0 = t[k2]; x1 = t[k2+1];
1447:     nz = ai[k+1] - ai[k];
1448:     while (nz--) {
1449:       x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1450:       x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1451:       vj++; v += 4;
1452:     }
1453:     t[k2]    = x0;
1454:     t[k2+1]  = x1;
1455:     idx      = 2*r[k];
1456:     x[idx]   = x0;
1457:     x[idx+1] = x1;
1458:   }

1460:   ISRestoreIndices(isrow,&r);
1461:   VecRestoreArray(bb,&b);
1462:   VecRestoreArray(xx,&x);
1463:   PetscLogFlops(4*(2*a->nz + mbs));
1464:   return(0);
1465: }

1469: PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1470: {
1471:   MatScalar      *v,*diag;
1472:   PetscScalar    x0,x1;
1473:   PetscInt       nz,*vj,k,k2;

1476:   for (k=0; k<mbs; k++){
1477:     v  = aa + 4*ai[k];
1478:     vj = aj + ai[k];
1479:     k2 = k*2;
1480:     x0 = x[k2]; x1 = x[k2+1];  /* Dk*xk = k-th block of x */
1481:     nz = ai[k+1] - ai[k];
1482: 
1483:     while (nz--) {
1484:       /* x(:) += U(k,:)^T*(Dk*xk) */
1485:       x[(*vj)*2]   += v[0]*x0 + v[1]*x1;
1486:       x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1487:       vj++; v += 4;
1488:     }
1489:     /* xk = inv(Dk)*(Dk*xk) */
1490:     diag = aa+k*4;          /* ptr to inv(Dk) */
1491:     x[k2]   = diag[0]*x0 + diag[2]*x1;
1492:     x[k2+1] = diag[1]*x0 + diag[3]*x1;
1493:   }
1494:   return(0);
1495: }

1499: PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1500: {
1501:   MatScalar      *v;
1502:   PetscScalar    x0,x1;
1503:   PetscInt       nz,*vj,k,k2;

1506:   for (k=mbs-1; k>=0; k--){
1507:     v  = aa + 4*ai[k];
1508:     vj = aj + ai[k];
1509:     k2 = k*2;
1510:     x0 = x[k2]; x1 = x[k2+1];  /* xk */
1511:     nz = ai[k+1] - ai[k];
1512:     while (nz--) {
1513:       /* xk += U(k,:)*x(:) */
1514:       x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1515:       x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1516:       vj++; v += 4;
1517:     }
1518:     x[k2]     = x0;
1519:     x[k2+1]   = x1;
1520:   }
1521:   return(0);
1522: }

1526: PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1527: {
1528:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1529:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1530:   MatScalar      *aa=a->a;
1531:   PetscScalar    *x,*b;

1535:   VecGetArray(bb,&b);
1536:   VecGetArray(xx,&x);

1538:   /* solve U^T * D * y = b by forward substitution */
1539:   PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1540:   ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);

1542:   /* solve U*x = y by back substitution */
1543:   BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);

1545:   VecRestoreArray(bb,&b);
1546:   VecRestoreArray(xx,&x);
1547:   PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1548:   return(0);
1549: }

1553: PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1554: {
1555:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1556:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1557:   MatScalar      *aa=a->a;
1558:   PetscScalar    *x,*b;

1562:   VecGetArray(bb,&b);
1563:   VecGetArray(xx,&x);
1564:   PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1565:   ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1566:   VecRestoreArray(bb,&b);
1567:   VecRestoreArray(xx,&x);
1568:   PetscLogFlops(4*(a->nz + mbs));
1569:   return(0);
1570: }

1574: PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1575: {
1576:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1577:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1578:   MatScalar      *aa=a->a;
1579:   PetscScalar    *x,*b;

1583:   VecGetArray(bb,&b);
1584:   VecGetArray(xx,&x);
1585:   PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1586:   BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1587:   VecRestoreArray(bb,&b);
1588:   VecRestoreArray(xx,&x);
1589:   PetscLogFlops(4*a->nz);
1590:   return(0);
1591: }

1595: PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1596: {
1597:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1598:   IS             isrow=a->row;
1600:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1601:   MatScalar      *aa=a->a,*v;
1602:   PetscScalar    *x,*b,xk,*t;
1603:   PetscInt       nz,*vj,k;

1606:   VecGetArray(bb,&b);
1607:   VecGetArray(xx,&x);
1608:   t    = a->solve_work;
1609:   ISGetIndices(isrow,&rp);
1610: 
1611:   /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */
1612:   for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1613:   for (k=0; k<mbs; k++){
1614:     v  = aa + ai[k] + 1;
1615:     vj = aj + ai[k] + 1;
1616:     xk = t[k];
1617:     nz = ai[k+1] - ai[k] - 1;
1618:     while (nz--) t[*vj++] += (*v++) * xk;
1619:     t[k] = xk*aa[ai[k]];  /* aa[k] = 1/D(k) */
1620:   }

1622:   /* solve U*perm(x) = y by back substitution */
1623:   for (k=mbs-1; k>=0; k--){
1624:     v  = aa + ai[k] + 1;
1625:     vj = aj + ai[k] + 1;
1626:     nz = ai[k+1] - ai[k] - 1;
1627:     while (nz--) t[k] += (*v++) * t[*vj++];
1628:     x[rp[k]] = t[k];
1629:   }

1631:   ISRestoreIndices(isrow,&rp);
1632:   VecRestoreArray(bb,&b);
1633:   VecRestoreArray(xx,&x);
1634:   PetscLogFlops(4*a->nz);
1635:   return(0);
1636: }

1640: PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1641: {
1642:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1643:   IS             isrow=a->row;
1645:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1646:   MatScalar      *aa=a->a,*v;
1647:   PetscReal      diagk;
1648:   PetscScalar    *x,*b,xk;
1649:   PetscInt       nz,*vj,k;

1652:   /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1653:   VecGetArray(bb,&b);
1654:   VecGetArray(xx,&x);
1655:   ISGetIndices(isrow,&rp);
1656: 
1657:   for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1658:   for (k=0; k<mbs; k++){
1659:     v  = aa + ai[k] + 1;
1660:     vj = aj + ai[k] + 1;
1661:     xk = x[k];
1662:     nz = ai[k+1] - ai[k] - 1;
1663:     while (nz--) x[*vj++] += (*v++) * xk;

1665:     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1666:     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1667:     x[k] = xk*sqrt(diagk);
1668:   }
1669:   ISRestoreIndices(isrow,&rp);
1670:   VecRestoreArray(bb,&b);
1671:   VecRestoreArray(xx,&x);
1672:   PetscLogFlops(2*a->nz);
1673:   return(0);
1674: }

1678: PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1679: {
1680:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1681:   IS             isrow=a->row;
1683:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1684:   MatScalar      *aa=a->a,*v;
1685:   PetscReal      diagk;
1686:   PetscScalar    *x,*b,*t;
1687:   PetscInt       nz,*vj,k;

1690:   /* solve D^(1/2)*U*perm(x) = b by back substitution */
1691:   VecGetArray(bb,&b);
1692:   VecGetArray(xx,&x);
1693:   t    = a->solve_work;
1694:   ISGetIndices(isrow,&rp);

1696:   for (k=mbs-1; k>=0; k--){
1697:     v  = aa + ai[k] + 1;
1698:     vj = aj + ai[k] + 1;
1699:     diagk = PetscRealPart(aa[ai[k]]);
1700:     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1701:     t[k] = b[k] * sqrt(diagk);
1702:     nz = ai[k+1] - ai[k] - 1;
1703:     while (nz--) t[k] += (*v++) * t[*vj++];
1704:     x[rp[k]] = t[k];
1705:   }
1706:   ISRestoreIndices(isrow,&rp);
1707:   VecRestoreArray(bb,&b);
1708:   VecRestoreArray(xx,&x);
1709:   PetscLogFlops(2*a->nz);
1710:   return(0);
1711: }

1715: PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1716: {
1717:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;

1721:   if (A->rmap.bs == 1) {
1722:     MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);
1723:   } else {
1724:     IS              isrow=a->row;
1725:     PetscInt             mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i;
1726:     MatScalar       *aa=a->a,*v;
1727:     PetscScalar     *x,*b,*t;
1728:     PetscInt             nz,*vj,k,n;
1729:     if (bb->n > a->solves_work_n) {
1730:       PetscFree(a->solves_work);
1731:       PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);
1732:       a->solves_work_n = bb->n;
1733:     }
1734:     n    = bb->n;
1735:     VecGetArray(bb->v,&b);
1736:     VecGetArray(xx->v,&x);
1737:     t    = a->solves_work;

1739:     ISGetIndices(isrow,&rp);
1740: 
1741:     /* solve U^T*D*y = perm(b) by forward substitution */
1742:     for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */
1743:     for (k=0; k<mbs; k++){
1744:       v  = aa + ai[k];
1745:       vj = aj + ai[k];
1746:       nz = ai[k+1] - ai[k];
1747:       while (nz--) {
1748:         for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1749:         v++;vj++;
1750:       }
1751:       for (i=0; i<n; i++) t[n*k+i] *= aa[k];  /* note: aa[k] = 1/D(k) */
1752:     }
1753: 
1754:     /* solve U*perm(x) = y by back substitution */
1755:     for (k=mbs-1; k>=0; k--){
1756:       v  = aa + ai[k];
1757:       vj = aj + ai[k];
1758:       nz = ai[k+1] - ai[k];
1759:       while (nz--) {
1760:         for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1761:         v++;vj++;
1762:       }
1763:       for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1764:     }

1766:     ISRestoreIndices(isrow,&rp);
1767:     VecRestoreArray(bb->v,&b);
1768:     VecRestoreArray(xx->v,&x);
1769:     PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));
1770:   }
1771:   return(0);
1772: }

1774: /*
1775:       Special case where the matrix was ILU(0) factored in the natural
1776:    ordering. This eliminates the need for the column and row permutation.
1777: */
1780: PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1781: {
1782:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1784:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1785:   MatScalar      *aa=a->a,*v;
1786:   PetscScalar    *x,*b,xk;
1787:   PetscInt       nz,*vj,k;

1790:   VecGetArray(bb,&b);
1791:   VecGetArray(xx,&x);
1792: 
1793:   /* solve U^T*D*y = b by forward substitution */
1794:   PetscMemcpy(x,b,mbs*sizeof(PetscScalar));
1795:   for (k=0; k<mbs; k++){
1796:     v  = aa + ai[k] + 1;
1797:     vj = aj + ai[k] + 1;
1798:     xk = x[k];
1799:     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1800:     while (nz--) x[*vj++] += (*v++) * xk;
1801:     x[k] = xk*aa[ai[k]];  /* note: aa[diag[k]] = 1/D(k) */
1802:   }

1804:   /* solve U*x = y by back substitution */
1805:   for (k=mbs-2; k>=0; k--){
1806:     v  = aa + ai[k] + 1;
1807:     vj = aj + ai[k] + 1;
1808:     xk = x[k];
1809:     nz = ai[k+1] - ai[k] - 1;
1810:     while (nz--) xk += (*v++) * x[*vj++];
1811:     x[k] = xk;
1812:   }

1814:   VecRestoreArray(bb,&b);
1815:   VecRestoreArray(xx,&x);
1816:   PetscLogFlops(4*a->nz);
1817:   return(0);
1818: }

1822: PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1823: {
1824:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1826:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1827:   MatScalar      *aa=a->a,*v;
1828:   PetscReal      diagk;
1829:   PetscScalar    *x,*b;
1830:   PetscInt       nz,*vj,k;

1833:   /* solve U^T*D^(1/2)*x = b by forward substitution */
1834:   VecGetArray(bb,&b);
1835:   VecGetArray(xx,&x);
1836:   PetscMemcpy(x,b,mbs*sizeof(PetscScalar));
1837:   for (k=0; k<mbs; k++){
1838:     v  = aa + ai[k] + 1;
1839:     vj = aj + ai[k] + 1;
1840:     nz = ai[k+1] - ai[k] - 1;     /* exclude diag[k] */
1841:     while (nz--) x[*vj++] += (*v++) * x[k];
1842:     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1843:     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
1844:     x[k] *= sqrt(diagk);
1845:   }
1846:   VecRestoreArray(bb,&b);
1847:   VecRestoreArray(xx,&x);
1848:   PetscLogFlops(2*a->nz);
1849:   return(0);
1850: }

1854: PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1855: {
1856:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data;
1858:   PetscInt       mbs=a->mbs,*ai=a->i,*aj=a->j;
1859:   MatScalar      *aa=a->a,*v;
1860:   PetscReal      diagk;
1861:   PetscScalar    *x,*b;
1862:   PetscInt       nz,*vj,k;

1865:   /* solve D^(1/2)*U*x = b by back substitution */
1866:   VecGetArray(bb,&b);
1867:   VecGetArray(xx,&x);

1869:   for (k=mbs-1; k>=0; k--){
1870:     v  = aa + ai[k] + 1;
1871:     vj = aj + ai[k] + 1;
1872:     diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1873:     if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1874:     x[k] = sqrt(diagk)*b[k];
1875:     nz = ai[k+1] - ai[k] - 1;
1876:     while (nz--) x[k] += (*v++) * x[*vj++];
1877:   }
1878:   VecRestoreArray(bb,&b);
1879:   VecRestoreArray(xx,&x);
1880:   PetscLogFlops(2*a->nz);
1881:   return(0);
1882: }

1884: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1887: PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1888: {
1889:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
1891:   PetscInt       *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1892:   PetscInt       *jutmp,bs = A->rmap.bs,bs2=a->bs2;
1893:   PetscInt       m,reallocs = 0,*levtmp;
1894:   PetscInt       *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1895:   PetscInt       incrlev,*lev,shift,prow,nz;
1896:   PetscReal      f = info->fill,levels = info->levels;
1897:   PetscTruth     perm_identity;

1900:   /* check whether perm is the identity mapping */
1901:   ISIdentity(perm,&perm_identity);

1903:   if (perm_identity){
1904:     a->permute = PETSC_FALSE;
1905:     ai = a->i; aj = a->j;
1906:   } else { /*  non-trivial permutation */
1907:     a->permute = PETSC_TRUE;
1908:     MatReorderingSeqSBAIJ(A, perm);
1909:     ai = a->inew; aj = a->jnew;
1910:   }
1911: 
1912:   /* initialization */
1913:   ISGetIndices(perm,&rip);
1914:   umax  = (PetscInt)(f*ai[mbs] + 1);
1915:   PetscMalloc(umax*sizeof(PetscInt),&lev);
1916:   umax += mbs + 1;
1917:   shift = mbs + 1;
1918:   PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
1919:   PetscMalloc(umax*sizeof(PetscInt),&ju);
1920:   iu[0] = mbs + 1;
1921:   juidx = mbs + 1;
1922:   /* prowl: linked list for pivot row */
1923:   PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);
1924:   /* q: linked list for col index */
1925:   q       = prowl + mbs;
1926:   levtmp  = q     + mbs;
1927: 
1928:   for (i=0; i<mbs; i++){
1929:     prowl[i] = mbs;
1930:     q[i] = 0;
1931:   }

1933:   /* for each row k */
1934:   for (k=0; k<mbs; k++){
1935:     nzk  = 0;
1936:     q[k] = mbs;
1937:     /* copy current row into linked list */
1938:     nz = ai[rip[k]+1] - ai[rip[k]];
1939:     j = ai[rip[k]];
1940:     while (nz--){
1941:       vj = rip[aj[j++]];
1942:       if (vj > k){
1943:         qm = k;
1944:         do {
1945:           m  = qm; qm = q[m];
1946:         } while(qm < vj);
1947:         if (qm == vj) {
1948:           SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1949:         }
1950:         nzk++;
1951:         q[m]       = vj;
1952:         q[vj]      = qm;
1953:         levtmp[vj] = 0;   /* initialize lev for nonzero element */
1954:       }
1955:     }

1957:     /* modify nonzero structure of k-th row by computing fill-in
1958:        for each row prow to be merged in */
1959:     prow = k;
1960:     prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1961: 
1962:     while (prow < k){
1963:       /* merge row prow into k-th row */
1964:       jmin = iu[prow] + 1;
1965:       jmax = iu[prow+1];
1966:       qm = k;
1967:       for (j=jmin; j<jmax; j++){
1968:         incrlev = lev[j-shift] + 1;
1969:         if (incrlev > levels) continue;
1970:         vj      = ju[j];
1971:         do {
1972:           m = qm; qm = q[m];
1973:         } while (qm < vj);
1974:         if (qm != vj){      /* a fill */
1975:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1976:           levtmp[vj] = incrlev;
1977:         } else {
1978:           if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1979:         }
1980:       }
1981:       prow = prowl[prow]; /* next pivot row */
1982:     }
1983: 
1984:     /* add k to row list for first nonzero element in k-th row */
1985:     if (nzk > 1){
1986:       i = q[k]; /* col value of first nonzero element in k_th row of U */
1987:       prowl[k] = prowl[i]; prowl[i] = k;
1988:     }
1989:     iu[k+1] = iu[k] + nzk;

1991:     /* allocate more space to ju and lev if needed */
1992:     if (iu[k+1] > umax) {
1993:       /* estimate how much additional space we will need */
1994:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1995:       /* just double the memory each time */
1996:       maxadd = umax;
1997:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1998:       umax += maxadd;

2000:       /* allocate a longer ju */
2001:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
2002:       PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
2003:       PetscFree(ju);
2004:       ju       = jutmp;

2006:       PetscMalloc(umax*sizeof(PetscInt),&jutmp);
2007:       PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));
2008:       PetscFree(lev);
2009:       lev      = jutmp;
2010:       reallocs += 2; /* count how many times we realloc */
2011:     }

2013:     /* save nonzero structure of k-th row in ju */
2014:     i=k;
2015:     while (nzk --) {
2016:       i                = q[i];
2017:       ju[juidx]        = i;
2018:       lev[juidx-shift] = levtmp[i];
2019:       juidx++;
2020:     }
2021:   }
2022: 
2023: #if defined(PETSC_USE_INFO)
2024:   if (ai[mbs] != 0) {
2025:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2026:     PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2027:     PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2028:     PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
2029:     PetscInfo(A,"for best performance.\n");
2030:   } else {
2031:     PetscInfo(A,"Empty matrix.\n");
2032:   }
2033: #endif

2035:   ISRestoreIndices(perm,&rip);
2036:   PetscFree(prowl);
2037:   PetscFree(lev);

2039:   /* put together the new matrix */
2040:   MatCreate(A->comm,B);
2041:   MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);
2042:   MatSetType(*B,A->type_name);
2043:   MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);

2045:   /* PetscLogObjectParent(*B,iperm); */
2046:   b    = (Mat_SeqSBAIJ*)(*B)->data;
2047:   PetscFree2(b->imax,b->ilen);
2048:   b->singlemalloc = PETSC_FALSE;
2049:   b->free_a       = PETSC_TRUE;
2050:   b->free_ij      = PETSC_TRUE;
2051:   /* the next line frees the default space generated by the Create() */
2052:   PetscFree3(b->a,b->j,b->i);
2053:   PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
2054:   b->j    = ju;
2055:   b->i    = iu;
2056:   b->diag = 0;
2057:   b->ilen = 0;
2058:   b->imax = 0;
2059: 
2060:   if (b->row) {
2061:     ISDestroy(b->row);
2062:   }
2063:   if (b->icol) {
2064:     ISDestroy(b->icol);
2065:   }
2066:   b->row  = perm;
2067:   b->icol = perm;
2068:   PetscObjectReference((PetscObject)perm);
2069:   PetscObjectReference((PetscObject)perm);
2070:   PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
2071:   /* In b structure:  Free imax, ilen, old a, old j.  
2072:      Allocate idnew, solve_work, new a, new j */
2073:   PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
2074:   b->maxnz = b->nz = iu[mbs];
2075: 
2076:   (*B)->factor                 = FACTOR_CHOLESKY;
2077:   (*B)->info.factor_mallocs    = reallocs;
2078:   (*B)->info.fill_ratio_given  = f;
2079:   if (ai[mbs] != 0) {
2080:     (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2081:   } else {
2082:     (*B)->info.fill_ratio_needed = 0.0;
2083:   }

2085:   if (perm_identity){
2086:     switch (bs) {
2087:       case 1:
2088:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2089:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2090:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2091:         (*B)->ops->solves                = MatSolves_SeqSBAIJ_1;
2092:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
2093:         break;
2094:       case 2:
2095:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2096:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2097:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2098:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
2099:         break;
2100:       case 3:
2101:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2102:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2103:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2104:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
2105:         break;
2106:       case 4:
2107:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2108:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2109:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2110:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
2111:         break;
2112:       case 5:
2113:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2114:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2115:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2116:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
2117:         break;
2118:       case 6:
2119:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2120:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2121:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2122:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
2123:         break;
2124:       case 7:
2125:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2126:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2127:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2128:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
2129:       break;
2130:       default:
2131:         (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2132:         (*B)->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2133:         (*B)->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2134:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
2135:       break;
2136:     }
2137:   }

2139:   return(0);
2140: }

2142:  #include petscbt.h
2143:  #include src/mat/utils/freespace.h
2146: PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2147: {
2148:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
2149:   Mat_SeqSBAIJ       *b;
2150:   Mat                B;
2151:   PetscErrorCode     ierr;
2152:   PetscTruth         perm_identity,free_ij = PETSC_TRUE;
2153:   PetscInt           bs=A->rmap.bs,am=a->mbs;
2154:   PetscInt           reallocs=0,*rip,i,*ai,*aj,*ui;
2155:   PetscInt           jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2156:   PetscInt           nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2157:   PetscReal          fill=info->fill,levels=info->levels,ratio_needed;
2158:   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2159:   PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2160:   PetscBT            lnkbt;
2161:   PetscScalar        *ua;

2164:   /*  
2165:    This code originally uses Modified Sparse Row (MSR) storage
2166:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2167:    Then it is rewritten so the factor B takes seqsbaij format. However the associated 
2168:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1, 
2169:    thus the original code in MSR format is still used for these cases. 
2170:    The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever 
2171:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2172:   */
2173:   if (bs > 1){
2174:     MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);
2175:     return(0);
2176:   }

2178:   /* check whether perm is the identity mapping */
2179:   ISIdentity(perm,&perm_identity);
2180: 
2181:   /* special case that simply copies fill pattern */
2182:   if (!levels && perm_identity) {
2183:     a->permute = PETSC_FALSE;
2184:     /* reuse the column pointers and row offsets for memory savings */
2185:     ui           = a->i;
2186:     uj           = a->j;
2187:     free_ij      = PETSC_FALSE;
2188:     ratio_needed = 1.0;
2189:   } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2190:     if (perm_identity){
2191:       a->permute = PETSC_FALSE;
2192:       ai = a->i; aj = a->j;
2193:     } else { /*  non-trivial permutation */
2194:       a->permute = PETSC_TRUE;
2195:       MatReorderingSeqSBAIJ(A, perm);
2196:       ai   = a->inew; aj = a->jnew;
2197:     }
2198:     ISGetIndices(perm,&rip);

2200:     /* initialization */
2201:     PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2202:     ui[0] = 0;

2204:     /* jl: linked list for storing indices of the pivot rows 
2205:        il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2206:     PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);
2207:     il         = jl + am;
2208:     uj_ptr     = (PetscInt**)(il + am);
2209:     uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2210:     for (i=0; i<am; i++){
2211:       jl[i] = am; il[i] = 0;
2212:     }

2214:     /* create and initialize a linked list for storing column indices of the active row k */
2215:     nlnk = am + 1;
2216:     PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);

2218:     /* initial FreeSpace size is fill*(ai[am]+1) */
2219:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2220:     current_space = free_space;
2221:     PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2222:     current_space_lvl = free_space_lvl;

2224:     for (k=0; k<am; k++){  /* for each active row k */
2225:       /* initialize lnk by the column indices of row rip[k] */
2226:       nzk   = 0;
2227:       ncols = ai[rip[k]+1] - ai[rip[k]];
2228:       cols  = aj+ai[rip[k]];
2229:       PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
2230:       nzk += nlnk;

2232:       /* update lnk by computing fill-in for each pivot row to be merged in */
2233:       prow = jl[k]; /* 1st pivot row */
2234: 
2235:       while (prow < k){
2236:         nextprow = jl[prow];
2237: 
2238:         /* merge prow into k-th row */
2239:         jmin = il[prow] + 1;  /* index of the 2nd nzero entry in U(prow,k:am-1) */
2240:         jmax = ui[prow+1];
2241:         ncols = jmax-jmin;
2242:         i     = jmin - ui[prow];
2243:         cols  = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2244:         j     = *(uj_lvl_ptr[prow] + i - 1);
2245:         cols_lvl = uj_lvl_ptr[prow]+i;
2246:         PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2247:         nzk += nlnk;

2249:         /* update il and jl for prow */
2250:         if (jmin < jmax){
2251:           il[prow] = jmin;
2252:           j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2253:         }
2254:         prow = nextprow;
2255:       }

2257:       /* if free space is not available, make more free space */
2258:       if (current_space->local_remaining<nzk) {
2259:         i = am - k + 1; /* num of unfactored rows */
2260:         i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2261:         PetscFreeSpaceGet(i,&current_space);
2262:         PetscFreeSpaceGet(i,&current_space_lvl);
2263:         reallocs++;
2264:       }

2266:       /* copy data into free_space and free_space_lvl, then initialize lnk */
2267:       PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);

2269:       /* add the k-th row into il and jl */
2270:       if (nzk-1 > 0){
2271:         i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2272:         jl[k] = jl[i]; jl[i] = k;
2273:         il[k] = ui[k] + 1;
2274:       }
2275:       uj_ptr[k]     = current_space->array;
2276:       uj_lvl_ptr[k] = current_space_lvl->array;

2278:       current_space->array           += nzk;
2279:       current_space->local_used      += nzk;
2280:       current_space->local_remaining -= nzk;
2281:       current_space_lvl->array           += nzk;
2282:       current_space_lvl->local_used      += nzk;
2283:       current_space_lvl->local_remaining -= nzk;

2285:       ui[k+1] = ui[k] + nzk;
2286:     }

2288: #if defined(PETSC_USE_INFO)
2289:     if (ai[am] != 0) {
2290:       PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2291:       PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2292:       PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2293:       PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2294:     } else {
2295:       PetscInfo(A,"Empty matrix.\n");
2296:     }
2297: #endif

2299:     ISRestoreIndices(perm,&rip);
2300:     PetscFree(jl);

2302:     /* destroy list of free space and other temporary array(s) */
2303:     PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2304:     PetscFreeSpaceContiguous(&free_space,uj);
2305:     PetscIncompleteLLDestroy(lnk,lnkbt);
2306:     PetscFreeSpaceDestroy(free_space_lvl);
2307:     if (ai[am] != 0) {
2308:       ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2309:     } else {
2310:       ratio_needed = 0.0;
2311:     }
2312:   } /* end of case: levels>0 || (levels=0 && !perm_identity) */

2314:   /* put together the new matrix in MATSEQSBAIJ format */
2315:   PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);
2316:   MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);
2317:   B = *fact;
2318:   b = (Mat_SeqSBAIJ*)B->data;
2319:   PetscFree2(b->imax,b->ilen);
2320:   b->singlemalloc = PETSC_FALSE;
2321:   b->free_a  = PETSC_TRUE;
2322:   b->free_ij = free_ij;
2323:   b->diag    = 0;
2324:   b->ilen    = 0;
2325:   b->imax    = 0;
2326:   b->row     = perm;
2327:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2328:   PetscObjectReference((PetscObject)perm);
2329:   b->icol = perm;
2330:   PetscObjectReference((PetscObject)perm);
2331:   PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2332:   b->maxnz = b->nz = ui[am];
2333: 
2334:   B->factor                 = FACTOR_CHOLESKY;
2335:   B->info.factor_mallocs    = reallocs;
2336:   B->info.fill_ratio_given  = fill;
2337:   B->info.fill_ratio_needed = ratio_needed;

2339:   if (perm_identity){
2340:     switch (bs) {
2341:       case 1:
2342:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2343:         B->ops->solve                 = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2344:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2345:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
2346:         break;
2347:       case 2:
2348:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2349:         B->ops->solve                 = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2350:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2351:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
2352:         break;
2353:       case 3:
2354:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2355:         B->ops->solve                 = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2356:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2357:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
2358:         break;
2359:       case 4:
2360:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2361:         B->ops->solve                 = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2362:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2363:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
2364:         break;
2365:       case 5:
2366:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2367:         B->ops->solve                 = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2368:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2369:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
2370:         break;
2371:       case 6:
2372:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2373:         B->ops->solve                 = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2374:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2375:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
2376:         break;
2377:       case 7:
2378:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2379:         B->ops->solve                 = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2380:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2381:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
2382:       break;
2383:       default:
2384:         B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2385:         B->ops->solve                 = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2386:         B->ops->solvetranspose        = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2387:         PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
2388:       break;
2389:     }
2390:   }
2391:   return(0);
2392: }