Actual source code: sbaij2.c

  1: #define PETSCMAT_DLL

 3:  #include src/mat/impls/baij/seq/baij.h
 4:  #include src/inline/spops.h
 5:  #include src/inline/ilu.h
 6:  #include petscbt.h
 7:  #include src/mat/impls/sbaij/seq/sbaij.h

 11: PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat A,PetscInt is_max,IS is[],PetscInt ov)
 12: {
 13:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
 15:   PetscInt       brow,i,j,k,l,mbs,n,*idx,*nidx,isz,bcol,bcol_max,start,end,*ai,*aj,bs,*nidx2;
 16:   PetscBT        table,table0;

 19:   if (ov < 0)  SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified");
 20:   mbs = a->mbs;
 21:   ai  = a->i;
 22:   aj  = a->j;
 23:   bs  = A->rmap.bs;
 24:   PetscBTCreate(mbs,table);
 25:   PetscMalloc((mbs+1)*sizeof(PetscInt),&nidx);
 26:   PetscMalloc((A->rmap.N+1)*sizeof(PetscInt),&nidx2);
 27:   PetscBTCreate(mbs,table0);

 29:   for (i=0; i<is_max; i++) { /* for each is */
 30:     isz  = 0;
 31:     PetscBTMemzero(mbs,table);
 32: 
 33:     /* Extract the indices, assume there can be duplicate entries */
 34:     ISGetIndices(is[i],&idx);
 35:     ISGetLocalSize(is[i],&n);

 37:     /* Enter these into the temp arrays i.e mark table[brow], enter brow into new index */
 38:     bcol_max = 0;
 39:     for (j=0; j<n ; ++j){
 40:       brow = idx[j]/bs; /* convert the indices into block indices */
 41:       if (brow >= mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
 42:       if(!PetscBTLookupSet(table,brow)) {
 43:         nidx[isz++] = brow;
 44:         if (bcol_max < brow) bcol_max = brow;
 45:       }
 46:     }
 47:     ISRestoreIndices(is[i],&idx);
 48:     ISDestroy(is[i]);
 49: 
 50:     k = 0;
 51:     for (j=0; j<ov; j++){ /* for each overlap */
 52:       /* set table0 for lookup - only mark entries that are added onto nidx in (j-1)-th overlap */
 53:       PetscBTMemzero(mbs,table0);
 54:       for (l=k; l<isz; l++) { PetscBTSet(table0,nidx[l]); }

 56:       n = isz;  /* length of the updated is[i] */
 57:       for (brow=0; brow<mbs; brow++){
 58:         start = ai[brow]; end   = ai[brow+1];
 59:         if (PetscBTLookup(table0,brow)){ /* brow is on nidx - row search: collect all bcol in this brow */
 60:           for (l = start; l<end ; l++){
 61:             bcol = aj[l];
 62:             if (!PetscBTLookupSet(table,bcol)) {nidx[isz++] = bcol;}
 63:           }
 64:           k++;
 65:           if (k >= n) break; /* for (brow=0; brow<mbs; brow++) */
 66:         } else { /* brow is not on nidx - col serach: add brow onto nidx if there is a bcol in nidx */
 67:           for (l = start; l<end ; l++){
 68:             bcol = aj[l];
 69:             if (bcol > bcol_max) break;
 70:             if (PetscBTLookup(table0,bcol)){
 71:               if (!PetscBTLookupSet(table,brow)) {nidx[isz++] = brow;}
 72:               break; /* for l = start; l<end ; l++) */
 73:             }
 74:           }
 75:         }
 76:       }
 77:     } /* for each overlap */

 79:     /* expand the Index Set */
 80:     for (j=0; j<isz; j++) {
 81:       for (k=0; k<bs; k++)
 82:         nidx2[j*bs+k] = nidx[j]*bs+k;
 83:     }
 84:     ISCreateGeneral(PETSC_COMM_SELF,isz*bs,nidx2,is+i);
 85:   }
 86:   PetscBTDestroy(table);
 87:   PetscFree(nidx);
 88:   PetscFree(nidx2);
 89:   PetscBTDestroy(table0);
 90:   return(0);
 91: }

 95: PetscErrorCode MatGetSubMatrix_SeqSBAIJ_Private(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
 96: {
 97:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ*)A->data,*c;
 99:   PetscInt       *smap,i,k,kstart,kend,oldcols = a->mbs,*lens;
100:   PetscInt       row,mat_i,*mat_j,tcol,*mat_ilen;
101:   PetscInt       *irow,nrows,*ssmap,bs=A->rmap.bs,bs2=a->bs2;
102:   PetscInt       *aj = a->j,*ai = a->i;
103:   MatScalar      *mat_a;
104:   Mat            C;
105:   PetscTruth     flag;

108:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
109:   ISSorted(iscol,(PetscTruth*)&i);
110:   if (!i) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"IS is not sorted");

112:   ISGetIndices(isrow,&irow);
113:   ISGetSize(isrow,&nrows);
114: 
115:   PetscMalloc((1+oldcols)*sizeof(PetscInt),&smap);
116:   ssmap = smap;
117:   PetscMalloc((1+nrows)*sizeof(PetscInt),&lens);
118:   PetscMemzero(smap,oldcols*sizeof(PetscInt));
119:   for (i=0; i<nrows; i++) smap[irow[i]] = i+1; /* nrows = ncols */
120:   /* determine lens of each row */
121:   for (i=0; i<nrows; i++) {
122:     kstart  = ai[irow[i]];
123:     kend    = kstart + a->ilen[irow[i]];
124:     lens[i] = 0;
125:       for (k=kstart; k<kend; k++) {
126:         if (ssmap[aj[k]]) {
127:           lens[i]++;
128:         }
129:       }
130:     }
131:   /* Create and fill new matrix */
132:   if (scall == MAT_REUSE_MATRIX) {
133:     c = (Mat_SeqSBAIJ *)((*B)->data);

135:     if (c->mbs!=nrows || (*B)->rmap.bs!=bs) SETERRQ(PETSC_ERR_ARG_SIZ,"Submatrix wrong size");
136:     PetscMemcmp(c->ilen,lens,c->mbs *sizeof(PetscInt),&flag);
137:     if (!flag) {
138:       SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong no of nonzeros");
139:     }
140:     PetscMemzero(c->ilen,c->mbs*sizeof(PetscInt));
141:     C = *B;
142:   } else {
143:     MatCreate(A->comm,&C);
144:     MatSetSizes(C,nrows*bs,nrows*bs,PETSC_DETERMINE,PETSC_DETERMINE);
145:     MatSetType(C,A->type_name);
146:     MatSeqSBAIJSetPreallocation_SeqSBAIJ(C,bs,0,lens);
147:   }
148:   c = (Mat_SeqSBAIJ *)(C->data);
149:   for (i=0; i<nrows; i++) {
150:     row    = irow[i];
151:     kstart = ai[row];
152:     kend   = kstart + a->ilen[row];
153:     mat_i  = c->i[i];
154:     mat_j  = c->j + mat_i;
155:     mat_a  = c->a + mat_i*bs2;
156:     mat_ilen = c->ilen + i;
157:     for (k=kstart; k<kend; k++) {
158:       if ((tcol=ssmap[a->j[k]])) {
159:         *mat_j++ = tcol - 1;
160:         PetscMemcpy(mat_a,a->a+k*bs2,bs2*sizeof(MatScalar));
161:         mat_a   += bs2;
162:         (*mat_ilen)++;
163:       }
164:     }
165:   }
166: 
167:   /* Free work space */
168:   PetscFree(smap);
169:   PetscFree(lens);
170:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
171:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
172: 
173:   ISRestoreIndices(isrow,&irow);
174:   *B = C;
175:   return(0);
176: }

180: PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
181: {
182:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
183:   IS             is1;
185:   PetscInt       *vary,*iary,*irow,nrows,i,bs=A->rmap.bs,count;

188:   if (isrow != iscol) SETERRQ(PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isro");
189: 
190:   ISGetIndices(isrow,&irow);
191:   ISGetSize(isrow,&nrows);
192: 
193:   /* Verify if the indices corespond to each element in a block 
194:    and form the IS with compressed IS */
195:   PetscMalloc(2*(a->mbs+1)*sizeof(PetscInt),&vary);
196:   iary = vary + a->mbs;
197:   PetscMemzero(vary,(a->mbs)*sizeof(PetscInt));
198:   for (i=0; i<nrows; i++) vary[irow[i]/bs]++;
199: 
200:   count = 0;
201:   for (i=0; i<a->mbs; i++) {
202:     if (vary[i]!=0 && vary[i]!=bs) SETERRQ(PETSC_ERR_ARG_INCOMP,"Index set does not match blocks");
203:     if (vary[i]==bs) iary[count++] = i;
204:   }
205:   ISCreateGeneral(PETSC_COMM_SELF,count,iary,&is1);
206: 
207:   ISRestoreIndices(isrow,&irow);
208:   PetscFree(vary);

210:   MatGetSubMatrix_SeqSBAIJ_Private(A,is1,is1,cs,scall,B);
211:   ISDestroy(is1);
212:   return(0);
213: }

217: PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
218: {
220:   PetscInt       i;

223:   if (scall == MAT_INITIAL_MATRIX) {
224:     PetscMalloc((n+1)*sizeof(Mat),B);
225:   }

227:   for (i=0; i<n; i++) {
228:     MatGetSubMatrix_SeqSBAIJ(A,irow[i],icol[i],PETSC_DECIDE,scall,&(*B)[i]);
229:   }
230:   return(0);
231: }

233: /* -------------------------------------------------------*/
234: /* Should check that shapes of vectors and matrices match */
235: /* -------------------------------------------------------*/
236:  #include petscblaslapack.h

240: PetscErrorCode MatMult_SeqSBAIJ_1(Mat A,Vec xx,Vec zz)
241: {
242:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
243:   PetscScalar    *x,*z,*xb,x1,zero=0.0;
244:   MatScalar      *v;
246:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

249:   VecSet(zz,zero);
250:   VecGetArray(xx,&x);
251:   VecGetArray(zz,&z);

253:   v  = a->a;
254:   xb = x;
255: 
256:   for (i=0; i<mbs; i++) {
257:     n    = ai[1] - ai[0];  /* length of i_th row of A */
258:     x1   = *xb++;
259:     ib   = aj + *ai++;
260:     jmin = 0;
261:     /* if we ALWAYS required a diagonal entry then could remove this if test */
262:     /* should we use a tmp to hold the accumulated z[i] */
263:     if (*ib == i) {      /* (diag of A)*x */
264:       z[i] += *v++ * x[*ib++];
265:       jmin++;
266:     }
267:     for (j=jmin; j<n; j++) {
268:       cval    = *ib;
269:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
270:       z[i]    += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
271:     }
272:   }

274:   VecRestoreArray(xx,&x);
275:   VecRestoreArray(zz,&z);
276:   PetscLogFlops(2*(a->nz*2 - A->rmap.N) - A->rmap.N);  /* nz = (nz+m)/2 */
277:   return(0);
278: }

282: PetscErrorCode MatMult_SeqSBAIJ_2(Mat A,Vec xx,Vec zz)
283: {
284:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
285:   PetscScalar    *x,*z,*xb,x1,x2,zero=0.0;
286:   MatScalar      *v;
288:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

291:   VecSet(zz,zero);
292:   VecGetArray(xx,&x);
293:   VecGetArray(zz,&z);
294: 
295:   v     = a->a;
296:   xb = x;

298:   for (i=0; i<mbs; i++) {
299:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
300:     x1 = xb[0]; x2 = xb[1];
301:     ib = aj + *ai;
302:     jmin = 0;
303:     if (*ib == i){     /* (diag of A)*x */
304:       z[2*i]   += v[0]*x1 + v[2]*x2;
305:       z[2*i+1] += v[2]*x1 + v[3]*x2;
306:       v += 4; jmin++;
307:     }
308:     for (j=jmin; j<n; j++) {
309:       /* (strict lower triangular part of A)*x  */
310:       cval       = ib[j]*2;
311:       z[cval]     += v[0]*x1 + v[1]*x2;
312:       z[cval+1]   += v[2]*x1 + v[3]*x2;
313:       /* (strict upper triangular part of A)*x  */
314:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
315:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
316:       v  += 4;
317:     }
318:     xb +=2; ai++;
319:   }

321:   VecRestoreArray(xx,&x);
322:   VecRestoreArray(zz,&z);
323:   PetscLogFlops(8*(a->nz*2 - A->rmap.N) - A->rmap.N);
324:   return(0);
325: }

329: PetscErrorCode MatMult_SeqSBAIJ_3(Mat A,Vec xx,Vec zz)
330: {
331:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
332:   PetscScalar    *x,*z,*xb,x1,x2,x3,zero=0.0;
333:   MatScalar      *v;
335:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

338:   VecSet(zz,zero);
339:   VecGetArray(xx,&x);
340:   VecGetArray(zz,&z);
341: 
342:   v    = a->a;
343:   xb   = x;

345:   for (i=0; i<mbs; i++) {
346:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
347:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
348:     ib = aj + *ai;
349:     jmin = 0;
350:     if (*ib == i){     /* (diag of A)*x */
351:       z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
352:       z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
353:       z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
354:       v += 9; jmin++;
355:     }
356:     for (j=jmin; j<n; j++) {
357:       /* (strict lower triangular part of A)*x  */
358:       cval       = ib[j]*3;
359:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
360:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
361:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
362:       /* (strict upper triangular part of A)*x  */
363:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
364:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
365:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
366:       v  += 9;
367:     }
368:     xb +=3; ai++;
369:   }

371:   VecRestoreArray(xx,&x);
372:   VecRestoreArray(zz,&z);
373:   PetscLogFlops(18*(a->nz*2 - A->rmap.N) - A->rmap.N);
374:   return(0);
375: }

379: PetscErrorCode MatMult_SeqSBAIJ_4(Mat A,Vec xx,Vec zz)
380: {
381:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
382:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,zero=0.0;
383:   MatScalar      *v;
385:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

388:   VecSet(zz,zero);
389:   VecGetArray(xx,&x);
390:   VecGetArray(zz,&z);
391: 
392:   v     = a->a;
393:   xb = x;

395:   for (i=0; i<mbs; i++) {
396:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
397:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
398:     ib = aj + *ai;
399:     jmin = 0;
400:     if (*ib == i){     /* (diag of A)*x */
401:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
402:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
403:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
404:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
405:       v += 16; jmin++;
406:     }
407:     for (j=jmin; j<n; j++) {
408:       /* (strict lower triangular part of A)*x  */
409:       cval       = ib[j]*4;
410:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
411:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
412:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
413:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
414:       /* (strict upper triangular part of A)*x  */
415:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
416:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
417:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
418:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
419:       v  += 16;
420:     }
421:     xb +=4; ai++;
422:   }

424:   VecRestoreArray(xx,&x);
425:   VecRestoreArray(zz,&z);
426:   PetscLogFlops(32*(a->nz*2 - A->rmap.N) - A->rmap.N);
427:   return(0);
428: }

432: PetscErrorCode MatMult_SeqSBAIJ_5(Mat A,Vec xx,Vec zz)
433: {
434:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
435:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,zero=0.0;
436:   MatScalar      *v;
438:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

441:   VecSet(zz,zero);
442:   VecGetArray(xx,&x);
443:   VecGetArray(zz,&z);
444: 
445:   v     = a->a;
446:   xb = x;

448:   for (i=0; i<mbs; i++) {
449:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
450:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
451:     ib = aj + *ai;
452:     jmin = 0;
453:     if (*ib == i){      /* (diag of A)*x */
454:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
455:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
456:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
457:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
458:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
459:       v += 25; jmin++;
460:     }
461:     for (j=jmin; j<n; j++) {
462:       /* (strict lower triangular part of A)*x  */
463:       cval       = ib[j]*5;
464:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
465:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
466:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
467:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
468:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
469:       /* (strict upper triangular part of A)*x  */
470:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
471:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
472:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
473:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
474:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
475:       v  += 25;
476:     }
477:     xb +=5; ai++;
478:   }

480:   VecRestoreArray(xx,&x);
481:   VecRestoreArray(zz,&z);
482:   PetscLogFlops(50*(a->nz*2 - A->rmap.N) - A->rmap.N);
483:   return(0);
484: }


489: PetscErrorCode MatMult_SeqSBAIJ_6(Mat A,Vec xx,Vec zz)
490: {
491:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
492:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,zero=0.0;
493:   MatScalar      *v;
495:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

498:   VecSet(zz,zero);
499:   VecGetArray(xx,&x);
500:   VecGetArray(zz,&z);
501: 
502:   v     = a->a;
503:   xb = x;

505:   for (i=0; i<mbs; i++) {
506:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
507:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
508:     ib = aj + *ai;
509:     jmin = 0;
510:     if (*ib == i){      /* (diag of A)*x */
511:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
512:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
513:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
514:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
515:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
516:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
517:       v += 36; jmin++;
518:     }
519:     for (j=jmin; j<n; j++) {
520:       /* (strict lower triangular part of A)*x  */
521:       cval       = ib[j]*6;
522:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
523:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
524:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
525:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
526:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
527:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
528:       /* (strict upper triangular part of A)*x  */
529:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
530:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
531:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
532:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
533:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
534:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
535:       v  += 36;
536:     }
537:     xb +=6; ai++;
538:   }

540:   VecRestoreArray(xx,&x);
541:   VecRestoreArray(zz,&z);
542:   PetscLogFlops(72*(a->nz*2 - A->rmap.N) - A->rmap.N);
543:   return(0);
544: }
547: PetscErrorCode MatMult_SeqSBAIJ_7(Mat A,Vec xx,Vec zz)
548: {
549:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
550:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7,zero=0.0;
551:   MatScalar      *v;
553:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

556:   VecSet(zz,zero);
557:   VecGetArray(xx,&x);
558:   VecGetArray(zz,&z);
559: 
560:   v     = a->a;
561:   xb = x;

563:   for (i=0; i<mbs; i++) {
564:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
565:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
566:     ib = aj + *ai;
567:     jmin = 0;
568:     if (*ib == i){      /* (diag of A)*x */
569:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
570:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
571:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
572:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
573:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
574:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
575:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
576:       v += 49; jmin++;
577:     }
578:     for (j=jmin; j<n; j++) {
579:       /* (strict lower triangular part of A)*x  */
580:       cval       = ib[j]*7;
581:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
582:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
583:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
584:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
585:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
586:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
587:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
588:       /* (strict upper triangular part of A)*x  */
589:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
590:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
591:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
592:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
593:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
594:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
595:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
596:       v  += 49;
597:     }
598:     xb +=7; ai++;
599:   }
600:   VecRestoreArray(xx,&x);
601:   VecRestoreArray(zz,&z);
602:   PetscLogFlops(98*(a->nz*2 - A->rmap.N) - A->rmap.N);
603:   return(0);
604: }

606: /*
607:     This will not work with MatScalar == float because it calls the BLAS
608: */
611: PetscErrorCode MatMult_SeqSBAIJ_N(Mat A,Vec xx,Vec zz)
612: {
613:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
614:   PetscScalar    *x,*x_ptr,*z,*z_ptr,*xb,*zb,*work,*workt,zero=0.0;
615:   MatScalar      *v;
617:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;

620:   VecSet(zz,zero);
621:   VecGetArray(xx,&x); x_ptr=x;
622:   VecGetArray(zz,&z); z_ptr=z;

624:   aj   = a->j;
625:   v    = a->a;
626:   ii   = a->i;

628:   if (!a->mult_work) {
629:     PetscMalloc((A->rmap.N+1)*sizeof(PetscScalar),&a->mult_work);
630:   }
631:   work = a->mult_work;
632: 
633:   for (i=0; i<mbs; i++) {
634:     n     = ii[1] - ii[0]; ncols = n*bs;
635:     workt = work; idx=aj+ii[0];

637:     /* upper triangular part */
638:     for (j=0; j<n; j++) {
639:       xb = x_ptr + bs*(*idx++);
640:       for (k=0; k<bs; k++) workt[k] = xb[k];
641:       workt += bs;
642:     }
643:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
644:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);
645: 
646:     /* strict lower triangular part */
647:     idx = aj+ii[0];
648:     if (*idx == i){
649:       ncols -= bs; v += bs2; idx++; n--;
650:     }
651: 
652:     if (ncols > 0){
653:       workt = work;
654:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
655:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
656:       for (j=0; j<n; j++) {
657:         zb = z_ptr + bs*(*idx++);
658:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
659:         workt += bs;
660:       }
661:     }
662:     x += bs; v += n*bs2; z += bs; ii++;
663:   }
664: 
665:   VecRestoreArray(xx,&x);
666:   VecRestoreArray(zz,&z);
667:   PetscLogFlops(2*(a->nz*2 - A->rmap.N)*bs2 - A->rmap.N);
668:   return(0);
669: }

674: PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz)
675: {
676:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
677:   PetscScalar    *x,*z,*xb,x1;
678:   MatScalar      *v;
680:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

683:   VecCopy_Seq(yy,zz);
684:   VecGetArray(xx,&x);
685:   VecGetArray(zz,&z);
686:   v  = a->a;
687:   xb = x;

689:   for (i=0; i<mbs; i++) {
690:     n  = ai[1] - ai[0];  /* length of i_th row of A */
691:     x1 = xb[0];
692:     ib = aj + *ai;
693:     jmin = 0;
694:     if (*ib == i) {            /* (diag of A)*x */
695:       z[i] += *v++ * x[*ib++]; jmin++;
696:     }
697:     for (j=jmin; j<n; j++) {
698:       cval    = *ib;
699:       z[cval] += *v * x1;      /* (strict lower triangular part of A)*x  */
700:       z[i] += *v++ * x[*ib++]; /* (strict upper triangular part of A)*x  */
701:     }
702:     xb++; ai++;
703:   }

705:   VecRestoreArray(xx,&x);
706:   VecRestoreArray(zz,&z);
707: 
708:   PetscLogFlops(2*(a->nz*2 - A->rmap.n));
709:   return(0);
710: }

714: PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
715: {
716:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
717:   PetscScalar    *x,*z,*xb,x1,x2;
718:   MatScalar      *v;
720:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

723:   VecCopy_Seq(yy,zz);
724:   VecGetArray(xx,&x);
725:   VecGetArray(zz,&z);

727:   v  = a->a;
728:   xb = x;

730:   for (i=0; i<mbs; i++) {
731:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
732:     x1 = xb[0]; x2 = xb[1];
733:     ib = aj + *ai;
734:     jmin = 0;
735:     if (*ib == i){      /* (diag of A)*x */
736:       z[2*i]   += v[0]*x1 + v[2]*x2;
737:       z[2*i+1] += v[2]*x1 + v[3]*x2;
738:       v += 4; jmin++;
739:     }
740:     for (j=jmin; j<n; j++) {
741:       /* (strict lower triangular part of A)*x  */
742:       cval       = ib[j]*2;
743:       z[cval]     += v[0]*x1 + v[1]*x2;
744:       z[cval+1]   += v[2]*x1 + v[3]*x2;
745:       /* (strict upper triangular part of A)*x  */
746:       z[2*i]   += v[0]*x[cval] + v[2]*x[cval+1];
747:       z[2*i+1] += v[1]*x[cval] + v[3]*x[cval+1];
748:       v  += 4;
749:     }
750:     xb +=2; ai++;
751:   }
752:   VecRestoreArray(xx,&x);
753:   VecRestoreArray(zz,&z);

755:   PetscLogFlops(4*(a->nz*2 - A->rmap.n));
756:   return(0);
757: }

761: PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
762: {
763:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
764:   PetscScalar    *x,*z,*xb,x1,x2,x3;
765:   MatScalar      *v;
767:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

770:   VecCopy_Seq(yy,zz);
771:   VecGetArray(xx,&x);
772:   VecGetArray(zz,&z);

774:   v     = a->a;
775:   xb = x;

777:   for (i=0; i<mbs; i++) {
778:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
779:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2];
780:     ib = aj + *ai;
781:     jmin = 0;
782:     if (*ib == i){     /* (diag of A)*x */
783:      z[3*i]   += v[0]*x1 + v[3]*x2 + v[6]*x3;
784:      z[3*i+1] += v[3]*x1 + v[4]*x2 + v[7]*x3;
785:      z[3*i+2] += v[6]*x1 + v[7]*x2 + v[8]*x3;
786:      v += 9; jmin++;
787:     }
788:     for (j=jmin; j<n; j++) {
789:       /* (strict lower triangular part of A)*x  */
790:       cval       = ib[j]*3;
791:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3;
792:       z[cval+1]   += v[3]*x1 + v[4]*x2 + v[5]*x3;
793:       z[cval+2]   += v[6]*x1 + v[7]*x2 + v[8]*x3;
794:       /* (strict upper triangular part of A)*x  */
795:       z[3*i]   += v[0]*x[cval] + v[3]*x[cval+1]+ v[6]*x[cval+2];
796:       z[3*i+1] += v[1]*x[cval] + v[4]*x[cval+1]+ v[7]*x[cval+2];
797:       z[3*i+2] += v[2]*x[cval] + v[5]*x[cval+1]+ v[8]*x[cval+2];
798:       v  += 9;
799:     }
800:     xb +=3; ai++;
801:   }

803:   VecRestoreArray(xx,&x);
804:   VecRestoreArray(zz,&z);

806:   PetscLogFlops(18*(a->nz*2 - A->rmap.n));
807:   return(0);
808: }

812: PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
813: {
814:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
815:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4;
816:   MatScalar      *v;
818:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

821:   VecCopy_Seq(yy,zz);
822:   VecGetArray(xx,&x);
823:   VecGetArray(zz,&z);

825:   v     = a->a;
826:   xb = x;

828:   for (i=0; i<mbs; i++) {
829:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
830:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3];
831:     ib = aj + *ai;
832:     jmin = 0;
833:     if (*ib == i){      /* (diag of A)*x */
834:       z[4*i]   += v[0]*x1 + v[4]*x2 +  v[8]*x3 + v[12]*x4;
835:       z[4*i+1] += v[4]*x1 + v[5]*x2 +  v[9]*x3 + v[13]*x4;
836:       z[4*i+2] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[14]*x4;
837:       z[4*i+3] += v[12]*x1+ v[13]*x2+ v[14]*x3 + v[15]*x4;
838:       v += 16; jmin++;
839:     }
840:     for (j=jmin; j<n; j++) {
841:       /* (strict lower triangular part of A)*x  */
842:       cval       = ib[j]*4;
843:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4;
844:       z[cval+1]   += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4;
845:       z[cval+2]   += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4;
846:       z[cval+3]   += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4;
847:       /* (strict upper triangular part of A)*x  */
848:       z[4*i]   += v[0]*x[cval] + v[4]*x[cval+1]+ v[8]*x[cval+2] + v[12]*x[cval+3];
849:       z[4*i+1] += v[1]*x[cval] + v[5]*x[cval+1]+ v[9]*x[cval+2] + v[13]*x[cval+3];
850:       z[4*i+2] += v[2]*x[cval] + v[6]*x[cval+1]+ v[10]*x[cval+2]+ v[14]*x[cval+3];
851:       z[4*i+3] += v[3]*x[cval] + v[7]*x[cval+1]+ v[11]*x[cval+2]+ v[15]*x[cval+3];
852:       v  += 16;
853:     }
854:     xb +=4; ai++;
855:   }

857:   VecRestoreArray(xx,&x);
858:   VecRestoreArray(zz,&z);

860:   PetscLogFlops(32*(a->nz*2 - A->rmap.n));
861:   return(0);
862: }

866: PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
867: {
868:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
869:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5;
870:   MatScalar      *v;
872:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

875:   VecCopy_Seq(yy,zz);
876:   VecGetArray(xx,&x);
877:   VecGetArray(zz,&z);

879:   v     = a->a;
880:   xb = x;

882:   for (i=0; i<mbs; i++) {
883:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
884:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4];
885:     ib = aj + *ai;
886:     jmin = 0;
887:     if (*ib == i){      /* (diag of A)*x */
888:       z[5*i]   += v[0]*x1  + v[5]*x2 + v[10]*x3 + v[15]*x4+ v[20]*x5;
889:       z[5*i+1] += v[5]*x1  + v[6]*x2 + v[11]*x3 + v[16]*x4+ v[21]*x5;
890:       z[5*i+2] += v[10]*x1 +v[11]*x2 + v[12]*x3 + v[17]*x4+ v[22]*x5;
891:       z[5*i+3] += v[15]*x1 +v[16]*x2 + v[17]*x3 + v[18]*x4+ v[23]*x5;
892:       z[5*i+4] += v[20]*x1 +v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
893:       v += 25; jmin++;
894:     }
895:     for (j=jmin; j<n; j++) {
896:       /* (strict lower triangular part of A)*x  */
897:       cval       = ib[j]*5;
898:       z[cval]     += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5;
899:       z[cval+1]   += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5;
900:       z[cval+2]   += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4+ v[14]*x5;
901:       z[cval+3]   += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4+ v[19]*x5;
902:       z[cval+4]   += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4+ v[24]*x5;
903:       /* (strict upper triangular part of A)*x  */
904:       z[5*i]   +=v[0]*x[cval]+v[5]*x[cval+1]+v[10]*x[cval+2]+v[15]*x[cval+3]+v[20]*x[cval+4];
905:       z[5*i+1] +=v[1]*x[cval]+v[6]*x[cval+1]+v[11]*x[cval+2]+v[16]*x[cval+3]+v[21]*x[cval+4];
906:       z[5*i+2] +=v[2]*x[cval]+v[7]*x[cval+1]+v[12]*x[cval+2]+v[17]*x[cval+3]+v[22]*x[cval+4];
907:       z[5*i+3] +=v[3]*x[cval]+v[8]*x[cval+1]+v[13]*x[cval+2]+v[18]*x[cval+3]+v[23]*x[cval+4];
908:       z[5*i+4] +=v[4]*x[cval]+v[9]*x[cval+1]+v[14]*x[cval+2]+v[19]*x[cval+3]+v[24]*x[cval+4];
909:       v  += 25;
910:     }
911:     xb +=5; ai++;
912:   }

914:   VecRestoreArray(xx,&x);
915:   VecRestoreArray(zz,&z);

917:   PetscLogFlops(50*(a->nz*2 - A->rmap.n));
918:   return(0);
919: }
922: PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
923: {
924:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
925:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6;
926:   MatScalar      *v;
928:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

931:   VecCopy_Seq(yy,zz);
932:   VecGetArray(xx,&x);
933:   VecGetArray(zz,&z);

935:   v     = a->a;
936:   xb = x;

938:   for (i=0; i<mbs; i++) {
939:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
940:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5];
941:     ib = aj + *ai;
942:     jmin = 0;
943:     if (*ib == i){     /* (diag of A)*x */
944:       z[6*i]   += v[0]*x1  + v[6]*x2 + v[12]*x3 + v[18]*x4+ v[24]*x5 + v[30]*x6;
945:       z[6*i+1] += v[6]*x1  + v[7]*x2 + v[13]*x3 + v[19]*x4+ v[25]*x5 + v[31]*x6;
946:       z[6*i+2] += v[12]*x1 +v[13]*x2 + v[14]*x3 + v[20]*x4+ v[26]*x5 + v[32]*x6;
947:       z[6*i+3] += v[18]*x1 +v[19]*x2 + v[20]*x3 + v[21]*x4+ v[27]*x5 + v[33]*x6;
948:       z[6*i+4] += v[24]*x1 +v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[34]*x6;
949:       z[6*i+5] += v[30]*x1 +v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
950:       v += 36; jmin++;
951:     }
952:     for (j=jmin; j<n; j++) {
953:       /* (strict lower triangular part of A)*x  */
954:       cval       = ib[j]*6;
955:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6;
956:       z[cval+1] += v[6]*x1  + v[7]*x2 + v[8]*x3 + v[9]*x4+ v[10]*x5 + v[11]*x6;
957:       z[cval+2] += v[12]*x1  + v[13]*x2 + v[14]*x3 + v[15]*x4+ v[16]*x5 + v[17]*x6;
958:       z[cval+3] += v[18]*x1  + v[19]*x2 + v[20]*x3 + v[21]*x4+ v[22]*x5 + v[23]*x6;
959:       z[cval+4] += v[24]*x1  + v[25]*x2 + v[26]*x3 + v[27]*x4+ v[28]*x5 + v[29]*x6;
960:       z[cval+5] += v[30]*x1  + v[31]*x2 + v[32]*x3 + v[33]*x4+ v[34]*x5 + v[35]*x6;
961:       /* (strict upper triangular part of A)*x  */
962:       z[6*i]   +=v[0]*x[cval]+v[6]*x[cval+1]+v[12]*x[cval+2]+v[18]*x[cval+3]+v[24]*x[cval+4]+v[30]*x[cval+5];
963:       z[6*i+1] +=v[1]*x[cval]+v[7]*x[cval+1]+v[13]*x[cval+2]+v[19]*x[cval+3]+v[25]*x[cval+4]+v[31]*x[cval+5];
964:       z[6*i+2] +=v[2]*x[cval]+v[8]*x[cval+1]+v[14]*x[cval+2]+v[20]*x[cval+3]+v[26]*x[cval+4]+v[32]*x[cval+5];
965:       z[6*i+3] +=v[3]*x[cval]+v[9]*x[cval+1]+v[15]*x[cval+2]+v[21]*x[cval+3]+v[27]*x[cval+4]+v[33]*x[cval+5];
966:       z[6*i+4] +=v[4]*x[cval]+v[10]*x[cval+1]+v[16]*x[cval+2]+v[22]*x[cval+3]+v[28]*x[cval+4]+v[34]*x[cval+5];
967:       z[6*i+5] +=v[5]*x[cval]+v[11]*x[cval+1]+v[17]*x[cval+2]+v[23]*x[cval+3]+v[29]*x[cval+4]+v[35]*x[cval+5];
968:       v  += 36;
969:     }
970:     xb +=6; ai++;
971:   }

973:   VecRestoreArray(xx,&x);
974:   VecRestoreArray(zz,&z);

976:   PetscLogFlops(72*(a->nz*2 - A->rmap.n));
977:   return(0);
978: }

982: PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
983: {
984:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
985:   PetscScalar    *x,*z,*xb,x1,x2,x3,x4,x5,x6,x7;
986:   MatScalar      *v;
988:   PetscInt       mbs=a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j,jmin;

991:   VecCopy_Seq(yy,zz);
992:   VecGetArray(xx,&x);
993:   VecGetArray(zz,&z);

995:   v     = a->a;
996:   xb = x;

998:   for (i=0; i<mbs; i++) {
999:     n  = ai[1] - ai[0]; /* length of i_th block row of A */
1000:     x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5=xb[4]; x6=xb[5]; x7=xb[6];
1001:     ib = aj + *ai;
1002:     jmin = 0;
1003:     if (*ib == i){     /* (diag of A)*x */
1004:       z[7*i]   += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4+ v[28]*x5 + v[35]*x6+ v[42]*x7;
1005:       z[7*i+1] += v[7]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4+ v[29]*x5 + v[36]*x6+ v[43]*x7;
1006:       z[7*i+2] += v[14]*x1+ v[15]*x2 +v[16]*x3 + v[23]*x4+ v[30]*x5 + v[37]*x6+ v[44]*x7;
1007:       z[7*i+3] += v[21]*x1+ v[22]*x2 +v[23]*x3 + v[24]*x4+ v[31]*x5 + v[38]*x6+ v[45]*x7;
1008:       z[7*i+4] += v[28]*x1+ v[29]*x2 +v[30]*x3 + v[31]*x4+ v[32]*x5 + v[39]*x6+ v[46]*x7;
1009:       z[7*i+5] += v[35]*x1+ v[36]*x2 +v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[47]*x7;
1010:       z[7*i+6] += v[42]*x1+ v[43]*x2 +v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1011:       v += 49; jmin++;
1012:     }
1013:     for (j=jmin; j<n; j++) {
1014:       /* (strict lower triangular part of A)*x  */
1015:       cval       = ib[j]*7;
1016:       z[cval]   += v[0]*x1  + v[1]*x2 + v[2]*x3 + v[3]*x4+ v[4]*x5 + v[5]*x6+ v[6]*x7;
1017:       z[cval+1] += v[7]*x1  + v[8]*x2 + v[9]*x3 + v[10]*x4+ v[11]*x5 + v[12]*x6+ v[13]*x7;
1018:       z[cval+2] += v[14]*x1  + v[15]*x2 + v[16]*x3 + v[17]*x4+ v[18]*x5 + v[19]*x6+ v[20]*x7;
1019:       z[cval+3] += v[21]*x1  + v[22]*x2 + v[23]*x3 + v[24]*x4+ v[25]*x5 + v[26]*x6+ v[27]*x7;
1020:       z[cval+4] += v[28]*x1  + v[29]*x2 + v[30]*x3 + v[31]*x4+ v[32]*x5 + v[33]*x6+ v[34]*x7;
1021:       z[cval+5] += v[35]*x1  + v[36]*x2 + v[37]*x3 + v[38]*x4+ v[39]*x5 + v[40]*x6+ v[41]*x7;
1022:       z[cval+6] += v[42]*x1  + v[43]*x2 + v[44]*x3 + v[45]*x4+ v[46]*x5 + v[47]*x6+ v[48]*x7;
1023:       /* (strict upper triangular part of A)*x  */
1024:       z[7*i]  +=v[0]*x[cval]+v[7]*x[cval+1]+v[14]*x[cval+2]+v[21]*x[cval+3]+v[28]*x[cval+4]+v[35]*x[cval+5]+v[42]*x[cval+6];
1025:       z[7*i+1]+=v[1]*x[cval]+v[8]*x[cval+1]+v[15]*x[cval+2]+v[22]*x[cval+3]+v[29]*x[cval+4]+v[36]*x[cval+5]+v[43]*x[cval+6];
1026:       z[7*i+2]+=v[2]*x[cval]+v[9]*x[cval+1]+v[16]*x[cval+2]+v[23]*x[cval+3]+v[30]*x[cval+4]+v[37]*x[cval+5]+v[44]*x[cval+6];
1027:       z[7*i+3]+=v[3]*x[cval]+v[10]*x[cval+1]+v[17]*x[cval+2]+v[24]*x[cval+3]+v[31]*x[cval+4]+v[38]*x[cval+5]+v[45]*x[cval+6];
1028:       z[7*i+4]+=v[4]*x[cval]+v[11]*x[cval+1]+v[18]*x[cval+2]+v[25]*x[cval+3]+v[32]*x[cval+4]+v[39]*x[cval+5]+v[46]*x[cval+6];
1029:       z[7*i+5]+=v[5]*x[cval]+v[12]*x[cval+1]+v[19]*x[cval+2]+v[26]*x[cval+3]+v[33]*x[cval+4]+v[40]*x[cval+5]+v[47]*x[cval+6];
1030:       z[7*i+6]+=v[6]*x[cval]+v[13]*x[cval+1]+v[20]*x[cval+2]+v[27]*x[cval+3]+v[34]*x[cval+4]+v[41]*x[cval+5]+v[48]*x[cval+6];
1031:       v  += 49;
1032:     }
1033:     xb +=7; ai++;
1034:   }

1036:   VecRestoreArray(xx,&x);
1037:   VecRestoreArray(zz,&z);

1039:   PetscLogFlops(98*(a->nz*2 - A->rmap.n));
1040:   return(0);
1041: }

1045: PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
1046: {
1047:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1048:   PetscScalar    *x,*x_ptr,*z,*z_ptr=0,*xb,*zb,*work,*workt;
1049:   MatScalar      *v;
1051:   PetscInt       mbs=a->mbs,i,*idx,*aj,*ii,bs=A->rmap.bs,j,n,bs2=a->bs2,ncols,k;

1054:   VecCopy_Seq(yy,zz);
1055:   VecGetArray(xx,&x); x_ptr=x;
1056:   VecGetArray(zz,&z); z_ptr=z;

1058:   aj   = a->j;
1059:   v    = a->a;
1060:   ii   = a->i;

1062:   if (!a->mult_work) {
1063:     PetscMalloc((A->rmap.n+1)*sizeof(PetscScalar),&a->mult_work);
1064:   }
1065:   work = a->mult_work;
1066: 
1067: 
1068:   for (i=0; i<mbs; i++) {
1069:     n     = ii[1] - ii[0]; ncols = n*bs;
1070:     workt = work; idx=aj+ii[0];

1072:     /* upper triangular part */
1073:     for (j=0; j<n; j++) {
1074:       xb = x_ptr + bs*(*idx++);
1075:       for (k=0; k<bs; k++) workt[k] = xb[k];
1076:       workt += bs;
1077:     }
1078:     /* z(i*bs:(i+1)*bs-1) += A(i,:)*x */
1079:     Kernel_w_gets_w_plus_Ar_times_v(bs,ncols,work,v,z);

1081:     /* strict lower triangular part */
1082:     idx = aj+ii[0];
1083:     if (*idx == i){
1084:       ncols -= bs; v += bs2; idx++; n--;
1085:     }
1086:     if (ncols > 0){
1087:       workt = work;
1088:       PetscMemzero(workt,ncols*sizeof(PetscScalar));
1089:       Kernel_w_gets_w_plus_trans_Ar_times_v(bs,ncols,x,v,workt);
1090:       for (j=0; j<n; j++) {
1091:         zb = z_ptr + bs*(*idx++);
1092:         for (k=0; k<bs; k++) zb[k] += workt[k] ;
1093:         workt += bs;
1094:       }
1095:     }

1097:     x += bs; v += n*bs2; z += bs; ii++;
1098:   }

1100:   VecRestoreArray(xx,&x);
1101:   VecRestoreArray(zz,&z);

1103:   PetscLogFlops(2*(a->nz*2 - A->rmap.n));
1104:   return(0);
1105: }

1109: PetscErrorCode MatScale_SeqSBAIJ(Mat inA,PetscScalar alpha)
1110: {
1111:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data;
1112:   PetscScalar oalpha = alpha;
1113:   PetscBLASInt one = 1,totalnz = (PetscBLASInt)a->bs2*a->nz;

1117:   BLASscal_(&totalnz,&oalpha,a->a,&one);
1118:   PetscLogFlops(totalnz);
1119:   return(0);
1120: }

1124: PetscErrorCode MatNorm_SeqSBAIJ(Mat A,NormType type,PetscReal *norm)
1125: {
1126:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1127:   MatScalar      *v = a->a;
1128:   PetscReal      sum_diag = 0.0, sum_off = 0.0, *sum;
1129:   PetscInt       i,j,k,bs = A->rmap.bs,bs2=a->bs2,k1,mbs=a->mbs,*aj=a->j;
1131:   PetscInt       *jl,*il,jmin,jmax,nexti,ik,*col;
1132: 
1134:   if (type == NORM_FROBENIUS) {
1135:     for (k=0; k<mbs; k++){
1136:       jmin = a->i[k]; jmax = a->i[k+1];
1137:       col  = aj + jmin;
1138:       if (*col == k){         /* diagonal block */
1139:         for (i=0; i<bs2; i++){
1140: #if defined(PETSC_USE_COMPLEX)
1141:           sum_diag += PetscRealPart(PetscConj(*v)*(*v)); v++;
1142: #else
1143:           sum_diag += (*v)*(*v); v++;
1144: #endif
1145:         }
1146:         jmin++;
1147:       }
1148:       for (j=jmin; j<jmax; j++){  /* off-diagonal blocks */
1149:         for (i=0; i<bs2; i++){
1150: #if defined(PETSC_USE_COMPLEX)
1151:           sum_off += PetscRealPart(PetscConj(*v)*(*v)); v++;
1152: #else
1153:           sum_off += (*v)*(*v); v++;
1154: #endif  
1155:         }
1156:       }
1157:     }
1158:     *norm = sqrt(sum_diag + 2*sum_off);
1159:   }  else if (type == NORM_INFINITY || type == NORM_1) { /* maximum row/column sum */
1160:     PetscMalloc((2*mbs+1)*sizeof(PetscInt)+bs*sizeof(PetscReal),&il);
1161:     jl   = il + mbs;
1162:     sum  = (PetscReal*)(jl + mbs);
1163:     for (i=0; i<mbs; i++) jl[i] = mbs;
1164:     il[0] = 0;

1166:     *norm = 0.0;
1167:     for (k=0; k<mbs; k++) { /* k_th block row */
1168:       for (j=0; j<bs; j++) sum[j]=0.0;
1169:       /*-- col sum --*/
1170:       i = jl[k]; /* first |A(i,k)| to be added */
1171:       /* jl[k]=i: first nozero element in row i for submatrix A(1:k,k:n) (active window)
1172:                   at step k */
1173:       while (i<mbs){
1174:         nexti = jl[i];  /* next block row to be added */
1175:         ik    = il[i];  /* block index of A(i,k) in the array a */
1176:         for (j=0; j<bs; j++){
1177:           v = a->a + ik*bs2 + j*bs;
1178:           for (k1=0; k1<bs; k1++) {
1179:             sum[j] += PetscAbsScalar(*v); v++;
1180:           }
1181:         }
1182:         /* update il, jl */
1183:         jmin = ik + 1; /* block index of array a: points to the next nonzero of A in row i */
1184:         jmax = a->i[i+1];
1185:         if (jmin < jmax){
1186:           il[i] = jmin;
1187:           j   = a->j[jmin];
1188:           jl[i] = jl[j]; jl[j]=i;
1189:         }
1190:         i = nexti;
1191:       }
1192:       /*-- row sum --*/
1193:       jmin = a->i[k]; jmax = a->i[k+1];
1194:       for (i=jmin; i<jmax; i++) {
1195:         for (j=0; j<bs; j++){
1196:           v = a->a + i*bs2 + j;
1197:           for (k1=0; k1<bs; k1++){
1198:             sum[j] += PetscAbsScalar(*v); v += bs;
1199:           }
1200:         }
1201:       }
1202:       /* add k_th block row to il, jl */
1203:       col = aj+jmin;
1204:       if (*col == k) jmin++;
1205:       if (jmin < jmax){
1206:         il[k] = jmin;
1207:         j = a->j[jmin]; jl[k] = jl[j]; jl[j] = k;
1208:       }
1209:       for (j=0; j<bs; j++){
1210:         if (sum[j] > *norm) *norm = sum[j];
1211:       }
1212:     }
1213:     PetscFree(il);
1214:   } else {
1215:     SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
1216:   }
1217:   return(0);
1218: }

1222: PetscErrorCode MatEqual_SeqSBAIJ(Mat A,Mat B,PetscTruth* flg)
1223: {
1224:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data,*b = (Mat_SeqSBAIJ *)B->data;


1229:   /* If the  matrix/block dimensions are not equal, or no of nonzeros or shift */
1230:   if ((A->rmap.N != B->rmap.N) || (A->cmap.n != B->cmap.n) || (A->rmap.bs != B->rmap.bs)|| (a->nz != b->nz)) {
1231:     *flg = PETSC_FALSE;
1232:     return(0);
1233:   }
1234: 
1235:   /* if the a->i are the same */
1236:   PetscMemcmp(a->i,b->i,(a->mbs+1)*sizeof(PetscInt),flg);
1237:   if (!*flg) {
1238:     return(0);
1239:   }
1240: 
1241:   /* if a->j are the same */
1242:   PetscMemcmp(a->j,b->j,(a->nz)*sizeof(PetscInt),flg);
1243:   if (!*flg) {
1244:     return(0);
1245:   }
1246:   /* if a->a are the same */
1247:   PetscMemcmp(a->a,b->a,(a->nz)*(A->rmap.bs)*(A->rmap.bs)*sizeof(PetscScalar),flg);
1248:   return(0);
1249: }

1253: PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat A,Vec v)
1254: {
1255:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1257:   PetscInt       i,j,k,n,row,bs,*ai,*aj,ambs,bs2;
1258:   PetscScalar    *x,zero = 0.0;
1259:   MatScalar      *aa,*aa_j;

1262:   bs   = A->rmap.bs;
1263:   if (A->factor && bs>1) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix with bs>1");
1264: 
1265:   aa   = a->a;
1266:   ai   = a->i;
1267:   aj   = a->j;
1268:   ambs = a->mbs;
1269:   bs2  = a->bs2;

1271:   VecSet(v,zero);
1272:   VecGetArray(v,&x);
1273:   VecGetLocalSize(v,&n);
1274:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1275:   for (i=0; i<ambs; i++) {
1276:     j=ai[i];
1277:     if (aj[j] == i) {             /* if this is a diagonal element */
1278:       row  = i*bs;
1279:       aa_j = aa + j*bs2;
1280:       if (A->factor && bs==1){
1281:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = 1.0/aa_j[k];
1282:       } else {
1283:         for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k];
1284:       }
1285:     }
1286:   }
1287: 
1288:   VecRestoreArray(v,&x);
1289:   return(0);
1290: }

1294: PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat A,Vec ll,Vec rr)
1295: {
1296:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1297:   PetscScalar    *l,x,*li,*ri;
1298:   MatScalar      *aa,*v;
1300:   PetscInt       i,j,k,lm,M,m,*ai,*aj,mbs,tmp,bs,bs2;
1301:   PetscTruth     flg;

1304:   if (ll != rr){
1305:     VecEqual(ll,rr,&flg);
1306:     if (!flg)
1307:       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1308:   }
1309:   if (!ll) return(0);
1310:   ai  = a->i;
1311:   aj  = a->j;
1312:   aa  = a->a;
1313:   m   = A->rmap.N;
1314:   bs  = A->rmap.bs;
1315:   mbs = a->mbs;
1316:   bs2 = a->bs2;

1318:   VecGetArray(ll,&l);
1319:   VecGetLocalSize(ll,&lm);
1320:   if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector wrong length");
1321:   for (i=0; i<mbs; i++) { /* for each block row */
1322:     M  = ai[i+1] - ai[i];
1323:     li = l + i*bs;
1324:     v  = aa + bs2*ai[i];
1325:     for (j=0; j<M; j++) { /* for each block */
1326:       ri = l + bs*aj[ai[i]+j];
1327:       for (k=0; k<bs; k++) {
1328:         x = ri[k];
1329:         for (tmp=0; tmp<bs; tmp++) (*v++) *= li[tmp]*x;
1330:       }
1331:     }
1332:   }
1333:   VecRestoreArray(ll,&l);
1334:   PetscLogFlops(2*a->nz);
1335:   return(0);
1336: }

1340: PetscErrorCode MatGetInfo_SeqSBAIJ(Mat A,MatInfoType flag,MatInfo *info)
1341: {
1342:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1345:   info->rows_global    = (double)A->rmap.N;
1346:   info->columns_global = (double)A->rmap.N;
1347:   info->rows_local     = (double)A->rmap.N;
1348:   info->columns_local  = (double)A->rmap.N;
1349:   info->block_size     = a->bs2;
1350:   info->nz_allocated   = a->maxnz; /*num. of nonzeros in upper triangular part */
1351:   info->nz_used        = a->bs2*a->nz; /*num. of nonzeros in upper triangular part */
1352:   info->nz_unneeded    = (double)(info->nz_allocated - info->nz_used);
1353:   info->assemblies   = A->num_ass;
1354:   info->mallocs      = a->reallocs;
1355:   info->memory       = A->mem;
1356:   if (A->factor) {
1357:     info->fill_ratio_given  = A->info.fill_ratio_given;
1358:     info->fill_ratio_needed = A->info.fill_ratio_needed;
1359:     info->factor_mallocs    = A->info.factor_mallocs;
1360:   } else {
1361:     info->fill_ratio_given  = 0;
1362:     info->fill_ratio_needed = 0;
1363:     info->factor_mallocs    = 0;
1364:   }
1365:   return(0);
1366: }


1371: PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat A)
1372: {
1373:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;

1377:   PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(MatScalar));
1378:   return(0);
1379: }

1383: /* 
1384:    This code does not work since it only checks the upper triangular part of
1385:   the matrix. Hence it is not listed in the function table.
1386: */
1387: PetscErrorCode MatGetRowMaxAbs_SeqSBAIJ(Mat A,Vec v,PetscInt idx[])
1388: {
1389:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data;
1391:   PetscInt       i,j,n,row,col,bs,*ai,*aj,mbs;
1392:   PetscReal      atmp;
1393:   MatScalar      *aa;
1394:   PetscScalar    *x;
1395:   PetscInt       ncols,brow,bcol,krow,kcol;

1398:   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
1399:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1400:   bs   = A->rmap.bs;
1401:   aa   = a->a;
1402:   ai   = a->i;
1403:   aj   = a->j;
1404:   mbs = a->mbs;

1406:   VecSet(v,0.0);
1407:   VecGetArray(v,&x);
1408:   VecGetLocalSize(v,&n);
1409:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1410:   for (i=0; i<mbs; i++) {
1411:     ncols = ai[1] - ai[0]; ai++;
1412:     brow  = bs*i;
1413:     for (j=0; j<ncols; j++){
1414:       bcol = bs*(*aj);
1415:       for (kcol=0; kcol<bs; kcol++){
1416:         col = bcol + kcol;      /* col index */
1417:         for (krow=0; krow<bs; krow++){
1418:           atmp = PetscAbsScalar(*aa); aa++;
1419:           row = brow + krow;    /* row index */
1420:           if (PetscRealPart(x[row]) < atmp) x[row] = atmp;
1421:           if (*aj > i && PetscRealPart(x[col]) < atmp) x[col] = atmp;
1422:         }
1423:       }
1424:       aj++;
1425:     }
1426:   }
1427:   VecRestoreArray(v,&x);
1428:   return(0);
1429: }