Actual source code: sbaijfact2.c
1: #define PETSCMAT_DLL
3: /*
4: Factorization code for SBAIJ format.
5: */
7: #include src/mat/impls/sbaij/seq/sbaij.h
8: #include src/mat/impls/baij/seq/baij.h
9: #include src/inline/ilu.h
10: #include src/inline/dot.h
14: PetscErrorCode MatSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
15: {
16: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
17: IS isrow=a->row;
18: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
19: PetscErrorCode ierr;
20: PetscInt nz,*vj,k,*r,idx,k1;
21: PetscInt bs=A->rmap.bs,bs2 = a->bs2;
22: MatScalar *aa=a->a,*v,*diag;
23: PetscScalar *x,*xk,*xj,*b,*xk_tmp,*t;
26: VecGetArray(bb,&b);
27: VecGetArray(xx,&x);
28: t = a->solve_work;
29: ISGetIndices(isrow,&r);
30: PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);
32: /* solve U^T * D * y = b by forward substitution */
33: xk = t;
34: for (k=0; k<mbs; k++) { /* t <- perm(b) */
35: idx = bs*r[k];
36: for (k1=0; k1<bs; k1++) *xk++ = b[idx+k1];
37: }
38: for (k=0; k<mbs; k++){
39: v = aa + bs2*ai[k];
40: xk = t + k*bs; /* Dk*xk = k-th block of x */
41: PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
42: nz = ai[k+1] - ai[k];
43: vj = aj + ai[k];
44: xj = t + (*vj)*bs; /* *vj-th block of x, *vj>k */
45: while (nz--) {
46: /* x(:) += U(k,:)^T*(Dk*xk) */
47: Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
48: vj++; xj = t + (*vj)*bs;
49: v += bs2;
50: }
51: /* xk = inv(Dk)*(Dk*xk) */
52: diag = aa+k*bs2; /* ptr to inv(Dk) */
53: Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
54: }
56: /* solve U*x = y by back substitution */
57: for (k=mbs-1; k>=0; k--){
58: v = aa + bs2*ai[k];
59: xk = t + k*bs; /* xk */
60: nz = ai[k+1] - ai[k];
61: vj = aj + ai[k];
62: xj = t + (*vj)*bs;
63: while (nz--) {
64: /* xk += U(k,:)*x(:) */
65: Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
66: vj++;
67: v += bs2; xj = t + (*vj)*bs;
68: }
69: idx = bs*r[k];
70: for (k1=0; k1<bs; k1++) x[idx+k1] = *xk++;
71: }
73: PetscFree(xk_tmp);
74: ISRestoreIndices(isrow,&r);
75: VecRestoreArray(bb,&b);
76: VecRestoreArray(xx,&x);
77: PetscLogFlops(bs2*(2*a->nz + mbs));
78: return(0);
79: }
83: PetscErrorCode MatForwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
84: {
86: SETERRQ(1,"not implemented yet");
87: return(0);
88: }
92: PetscErrorCode MatBackwardSolve_SeqSBAIJ_N(Mat A,Vec bb,Vec xx)
93: {
95: SETERRQ(1,"not implemented yet");
96: return(0);
97: }
101: PetscErrorCode ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
102: {
104: PetscInt nz,*vj,k;
105: PetscInt bs2 = bs*bs;
106: MatScalar *v,*diag;
107: PetscScalar *xk,*xj,*xk_tmp;
108:
110: PetscMalloc(bs*sizeof(PetscScalar),&xk_tmp);
111: for (k=0; k<mbs; k++){
112: v = aa + bs2*ai[k];
113: xk = x + k*bs; /* Dk*xk = k-th block of x */
114: PetscMemcpy(xk_tmp,xk,bs*sizeof(PetscScalar)); /* xk_tmp <- xk */
115: nz = ai[k+1] - ai[k];
116: vj = aj + ai[k];
117: xj = x + (*vj)*bs; /* *vj-th block of x, *vj>k */
118: while (nz--) {
119: /* x(:) += U(k,:)^T*(Dk*xk) */
120: Kernel_v_gets_v_plus_Atranspose_times_w(bs,xj,v,xk_tmp); /* xj <- xj + v^t * xk */
121: vj++; xj = x + (*vj)*bs;
122: v += bs2;
123: }
124: /* xk = inv(Dk)*(Dk*xk) */
125: diag = aa+k*bs2; /* ptr to inv(Dk) */
126: Kernel_w_gets_A_times_v(bs,xk_tmp,diag,xk); /* xk <- diag * xk */
127: }
128: PetscFree(xk_tmp);
129: return(0);
130: }
134: PetscErrorCode BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscInt bs,PetscScalar *x)
135: {
136: PetscInt nz,*vj,k;
137: PetscInt bs2 = bs*bs;
138: MatScalar *v;
139: PetscScalar *xk,*xj;
142: for (k=mbs-1; k>=0; k--){
143: v = aa + bs2*ai[k];
144: xk = x + k*bs; /* xk */
145: nz = ai[k+1] - ai[k];
146: vj = aj + ai[k];
147: xj = x + (*vj)*bs;
148: while (nz--) {
149: /* xk += U(k,:)*x(:) */
150: Kernel_v_gets_v_plus_A_times_w(bs,xk,v,xj); /* xk <- xk + v*xj */
151: vj++;
152: v += bs2; xj = x + (*vj)*bs;
153: }
154: }
155: return(0);
156: }
160: PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
161: {
162: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
164: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
165: PetscInt bs=A->rmap.bs;
166: MatScalar *aa=a->a;
167: PetscScalar *x,*b;
168: #if defined(PETSC_USE_LOG)
169: PetscInt bs2 = a->bs2;
170: #endif
173: VecGetArray(bb,&b);
174: VecGetArray(xx,&x);
176: /* solve U^T * D * y = b by forward substitution */
177: PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar)); /* x <- b */
178: ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
180: /* solve U*x = y by back substitution */
181: BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
183: VecRestoreArray(bb,&b);
184: VecRestoreArray(xx,&x);
185: PetscLogFlops(bs2*(2*a->nz + mbs));
186: return(0);
187: }
191: PetscErrorCode MatForwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
192: {
193: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
195: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
196: PetscInt bs=A->rmap.bs;
197: MatScalar *aa=a->a;
198: PetscScalar *x,*b;
199: #if defined(PETSC_USE_LOG)
200: PetscInt bs2 = a->bs2;
201: #endif
204: VecGetArray(bb,&b);
205: VecGetArray(xx,&x);
206: PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar)); /* x <- b */
207: ForwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
208: VecRestoreArray(bb,&b);
209: VecRestoreArray(xx,&x);
210: PetscLogFlops(bs2*a->nz + A->rmap.N);
211: return(0);
212: }
216: PetscErrorCode MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering(Mat A,Vec bb,Vec xx)
217: {
218: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
220: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
221: PetscInt bs=A->rmap.bs;
222: MatScalar *aa=a->a;
223: PetscScalar *x,*b;
224: #if defined(PETSC_USE_LOG)
225: PetscInt bs2 = a->bs2;
226: #endif
229: VecGetArray(bb,&b);
230: VecGetArray(xx,&x);
231: PetscMemcpy(x,b,bs*mbs*sizeof(PetscScalar));
232: BackwardSolve_SeqSBAIJ_N_NaturalOrdering_private(ai,aj,aa,mbs,bs,x);
233: VecRestoreArray(bb,&b);
234: VecRestoreArray(xx,&x);
235: PetscLogFlops(bs2*a->nz);
236: return(0);
237: }
241: PetscErrorCode MatSolve_SeqSBAIJ_7(Mat A,Vec bb,Vec xx)
242: {
243: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
244: IS isrow=a->row;
245: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
247: PetscInt nz,*vj,k,*r,idx;
248: MatScalar *aa=a->a,*v,*d;
249: PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,x6,*t,*tp;
252: VecGetArray(bb,&b);
253: VecGetArray(xx,&x);
254: t = a->solve_work;
255: ISGetIndices(isrow,&r);
257: /* solve U^T * D * y = b by forward substitution */
258: tp = t;
259: for (k=0; k<mbs; k++) { /* t <- perm(b) */
260: idx = 7*r[k];
261: tp[0] = b[idx];
262: tp[1] = b[idx+1];
263: tp[2] = b[idx+2];
264: tp[3] = b[idx+3];
265: tp[4] = b[idx+4];
266: tp[5] = b[idx+5];
267: tp[6] = b[idx+6];
268: tp += 7;
269: }
270:
271: for (k=0; k<mbs; k++){
272: v = aa + 49*ai[k];
273: vj = aj + ai[k];
274: tp = t + k*7;
275: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6];
276: nz = ai[k+1] - ai[k];
277: tp = t + (*vj)*7;
278: while (nz--) {
279: tp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
280: tp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
281: tp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
282: tp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
283: tp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
284: tp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
285: tp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
286: vj++; tp = t + (*vj)*7;
287: v += 49;
288: }
290: /* xk = inv(Dk)*(Dk*xk) */
291: d = aa+k*49; /* ptr to inv(Dk) */
292: tp = t + k*7;
293: tp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
294: tp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
295: tp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
296: tp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
297: tp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
298: tp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
299: tp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
300: }
302: /* solve U*x = y by back substitution */
303: for (k=mbs-1; k>=0; k--){
304: v = aa + 49*ai[k];
305: vj = aj + ai[k];
306: tp = t + k*7;
307: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; x6=tp[6]; /* xk */
308: nz = ai[k+1] - ai[k];
309:
310: tp = t + (*vj)*7;
311: while (nz--) {
312: /* xk += U(k,:)*x(:) */
313: x0 += v[0]*tp[0] + v[7]*tp[1] + v[14]*tp[2] + v[21]*tp[3] + v[28]*tp[4] + v[35]*tp[5] + v[42]*tp[6];
314: x1 += v[1]*tp[0] + v[8]*tp[1] + v[15]*tp[2] + v[22]*tp[3] + v[29]*tp[4] + v[36]*tp[5] + v[43]*tp[6];
315: x2 += v[2]*tp[0] + v[9]*tp[1] + v[16]*tp[2] + v[23]*tp[3] + v[30]*tp[4] + v[37]*tp[5] + v[44]*tp[6];
316: x3 += v[3]*tp[0]+ v[10]*tp[1] + v[17]*tp[2] + v[24]*tp[3] + v[31]*tp[4] + v[38]*tp[5] + v[45]*tp[6];
317: x4 += v[4]*tp[0]+ v[11]*tp[1] + v[18]*tp[2] + v[25]*tp[3] + v[32]*tp[4] + v[39]*tp[5] + v[46]*tp[6];
318: x5 += v[5]*tp[0]+ v[12]*tp[1] + v[19]*tp[2] + v[26]*tp[3] + v[33]*tp[4] + v[40]*tp[5] + v[47]*tp[6];
319: x6 += v[6]*tp[0]+ v[13]*tp[1] + v[20]*tp[2] + v[27]*tp[3] + v[34]*tp[4] + v[41]*tp[5] + v[48]*tp[6];
320: vj++; tp = t + (*vj)*7;
321: v += 49;
322: }
323: tp = t + k*7;
324: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5; tp[6]=x6;
325: idx = 7*r[k];
326: x[idx] = x0;
327: x[idx+1] = x1;
328: x[idx+2] = x2;
329: x[idx+3] = x3;
330: x[idx+4] = x4;
331: x[idx+5] = x5;
332: x[idx+6] = x6;
333: }
335: ISRestoreIndices(isrow,&r);
336: VecRestoreArray(bb,&b);
337: VecRestoreArray(xx,&x);
338: PetscLogFlops(49*(2*a->nz + mbs));
339: return(0);
340: }
344: PetscErrorCode ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
345: {
346: MatScalar *v,*d;
347: PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6;
348: PetscInt nz,*vj,k;
351: for (k=0; k<mbs; k++){
352: v = aa + 49*ai[k];
353: xp = x + k*7;
354: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* Dk*xk = k-th block of x */
355: nz = ai[k+1] - ai[k];
356: vj = aj + ai[k];
357: xp = x + (*vj)*7;
358: while (nz--) {
359: /* x(:) += U(k,:)^T*(Dk*xk) */
360: xp[0]+= v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5 + v[6]*x6;
361: xp[1]+= v[7]*x0 + v[8]*x1 + v[9]*x2+ v[10]*x3+ v[11]*x4+ v[12]*x5+ v[13]*x6;
362: xp[2]+= v[14]*x0 + v[15]*x1 + v[16]*x2+ v[17]*x3+ v[18]*x4+ v[19]*x5+ v[20]*x6;
363: xp[3]+= v[21]*x0 + v[22]*x1 + v[23]*x2+ v[24]*x3+ v[25]*x4+ v[26]*x5+ v[27]*x6;
364: xp[4]+= v[28]*x0 + v[29]*x1 + v[30]*x2+ v[31]*x3+ v[32]*x4+ v[33]*x5+ v[34]*x6;
365: xp[5]+= v[35]*x0 + v[36]*x1 + v[37]*x2+ v[38]*x3+ v[39]*x4+ v[40]*x5+ v[41]*x6;
366: xp[6]+= v[42]*x0 + v[43]*x1 + v[44]*x2+ v[45]*x3+ v[46]*x4+ v[47]*x5+ v[48]*x6;
367: vj++; xp = x + (*vj)*7;
368: v += 49;
369: }
370: /* xk = inv(Dk)*(Dk*xk) */
371: d = aa+k*49; /* ptr to inv(Dk) */
372: xp = x + k*7;
373: xp[0] = d[0]*x0 + d[7]*x1 + d[14]*x2 + d[21]*x3 + d[28]*x4 + d[35]*x5 + d[42]*x6;
374: xp[1] = d[1]*x0 + d[8]*x1 + d[15]*x2 + d[22]*x3 + d[29]*x4 + d[36]*x5 + d[43]*x6;
375: xp[2] = d[2]*x0 + d[9]*x1 + d[16]*x2 + d[23]*x3 + d[30]*x4 + d[37]*x5 + d[44]*x6;
376: xp[3] = d[3]*x0+ d[10]*x1 + d[17]*x2 + d[24]*x3 + d[31]*x4 + d[38]*x5 + d[45]*x6;
377: xp[4] = d[4]*x0+ d[11]*x1 + d[18]*x2 + d[25]*x3 + d[32]*x4 + d[39]*x5 + d[46]*x6;
378: xp[5] = d[5]*x0+ d[12]*x1 + d[19]*x2 + d[26]*x3 + d[33]*x4 + d[40]*x5 + d[47]*x6;
379: xp[6] = d[6]*x0+ d[13]*x1 + d[20]*x2 + d[27]*x3 + d[34]*x4 + d[41]*x5 + d[48]*x6;
380: }
381: return(0);
382: }
386: PetscErrorCode BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
387: {
388: MatScalar *v;
389: PetscScalar *xp,x0,x1,x2,x3,x4,x5,x6;
390: PetscInt nz,*vj,k;
393: for (k=mbs-1; k>=0; k--){
394: v = aa + 49*ai[k];
395: xp = x + k*7;
396: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; x6=xp[6]; /* xk */
397: nz = ai[k+1] - ai[k];
398: vj = aj + ai[k];
399: xp = x + (*vj)*7;
400: while (nz--) {
401: /* xk += U(k,:)*x(:) */
402: x0 += v[0]*xp[0] + v[7]*xp[1] + v[14]*xp[2] + v[21]*xp[3] + v[28]*xp[4] + v[35]*xp[5] + v[42]*xp[6];
403: x1 += v[1]*xp[0] + v[8]*xp[1] + v[15]*xp[2] + v[22]*xp[3] + v[29]*xp[4] + v[36]*xp[5] + v[43]*xp[6];
404: x2 += v[2]*xp[0] + v[9]*xp[1] + v[16]*xp[2] + v[23]*xp[3] + v[30]*xp[4] + v[37]*xp[5] + v[44]*xp[6];
405: x3 += v[3]*xp[0]+ v[10]*xp[1] + v[17]*xp[2] + v[24]*xp[3] + v[31]*xp[4] + v[38]*xp[5] + v[45]*xp[6];
406: x4 += v[4]*xp[0]+ v[11]*xp[1] + v[18]*xp[2] + v[25]*xp[3] + v[32]*xp[4] + v[39]*xp[5] + v[46]*xp[6];
407: x5 += v[5]*xp[0]+ v[12]*xp[1] + v[19]*xp[2] + v[26]*xp[3] + v[33]*xp[4] + v[40]*xp[5] + v[47]*xp[6];
408: x6 += v[6]*xp[0]+ v[13]*xp[1] + v[20]*xp[2] + v[27]*xp[3] + v[34]*xp[4] + v[41]*xp[5] + v[48]*xp[6];
409: vj++;
410: v += 49; xp = x + (*vj)*7;
411: }
412: xp = x + k*7;
413: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5; xp[6]=x6;
414: }
415: return(0);
416: }
420: PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
421: {
422: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
424: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
425: MatScalar *aa=a->a;
426: PetscScalar *x,*b;
429: VecGetArray(bb,&b);
430: VecGetArray(xx,&x);
431:
432: /* solve U^T * D * y = b by forward substitution */
433: PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar)); /* x <- b */
434: ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
436: /* solve U*x = y by back substitution */
437: BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
439: VecRestoreArray(bb,&b);
440: VecRestoreArray(xx,&x);
441: PetscLogFlops(49*(2*a->nz + mbs));
442: return(0);
443: }
447: PetscErrorCode MatForwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
448: {
449: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
451: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
452: MatScalar *aa=a->a;
453: PetscScalar *x,*b;
456: VecGetArray(bb,&b);
457: VecGetArray(xx,&x);
458: PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));
459: ForwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
460: VecRestoreArray(bb,&b);
461: VecRestoreArray(xx,&x);
462: PetscLogFlops(49*a->nz + mbs);
463: return(0);
464: }
468: PetscErrorCode MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering(Mat A,Vec bb,Vec xx)
469: {
470: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
472: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
473: MatScalar *aa=a->a;
474: PetscScalar *x,*b;
477: VecGetArray(bb,&b);
478: VecGetArray(xx,&x);
479: PetscMemcpy(x,b,7*mbs*sizeof(PetscScalar));
480: BackwardSolve_SeqSBAIJ_7_NaturalOrdering_private(ai,aj,aa,mbs,x);
481: VecRestoreArray(bb,&b);
482: VecRestoreArray(xx,&x);
483: PetscLogFlops(49*a->nz);
484: return(0);
485: }
489: PetscErrorCode MatSolve_SeqSBAIJ_6(Mat A,Vec bb,Vec xx)
490: {
491: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
492: IS isrow=a->row;
493: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
495: PetscInt nz,*vj,k,*r,idx;
496: MatScalar *aa=a->a,*v,*d;
497: PetscScalar *x,*b,x0,x1,x2,x3,x4,x5,*t,*tp;
500: VecGetArray(bb,&b);
501: VecGetArray(xx,&x);
502: t = a->solve_work;
503: ISGetIndices(isrow,&r);
505: /* solve U^T * D * y = b by forward substitution */
506: tp = t;
507: for (k=0; k<mbs; k++) { /* t <- perm(b) */
508: idx = 6*r[k];
509: tp[0] = b[idx];
510: tp[1] = b[idx+1];
511: tp[2] = b[idx+2];
512: tp[3] = b[idx+3];
513: tp[4] = b[idx+4];
514: tp[5] = b[idx+5];
515: tp += 6;
516: }
517:
518: for (k=0; k<mbs; k++){
519: v = aa + 36*ai[k];
520: vj = aj + ai[k];
521: tp = t + k*6;
522: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5];
523: nz = ai[k+1] - ai[k];
524: tp = t + (*vj)*6;
525: while (nz--) {
526: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
527: tp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
528: tp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
529: tp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
530: tp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
531: tp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
532: vj++; tp = t + (*vj)*6;
533: v += 36;
534: }
536: /* xk = inv(Dk)*(Dk*xk) */
537: d = aa+k*36; /* ptr to inv(Dk) */
538: tp = t + k*6;
539: tp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
540: tp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
541: tp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
542: tp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
543: tp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
544: tp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
545: }
547: /* solve U*x = y by back substitution */
548: for (k=mbs-1; k>=0; k--){
549: v = aa + 36*ai[k];
550: vj = aj + ai[k];
551: tp = t + k*6;
552: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4]; x5=tp[5]; /* xk */
553: nz = ai[k+1] - ai[k];
554:
555: tp = t + (*vj)*6;
556: while (nz--) {
557: /* xk += U(k,:)*x(:) */
558: x0 += v[0]*tp[0] + v[6]*tp[1] + v[12]*tp[2] + v[18]*tp[3] + v[24]*tp[4] + v[30]*tp[5];
559: x1 += v[1]*tp[0] + v[7]*tp[1] + v[13]*tp[2] + v[19]*tp[3] + v[25]*tp[4] + v[31]*tp[5];
560: x2 += v[2]*tp[0] + v[8]*tp[1] + v[14]*tp[2] + v[20]*tp[3] + v[26]*tp[4] + v[32]*tp[5];
561: x3 += v[3]*tp[0] + v[9]*tp[1] + v[15]*tp[2] + v[21]*tp[3] + v[27]*tp[4] + v[33]*tp[5];
562: x4 += v[4]*tp[0]+ v[10]*tp[1] + v[16]*tp[2] + v[22]*tp[3] + v[28]*tp[4] + v[34]*tp[5];
563: x5 += v[5]*tp[0]+ v[11]*tp[1] + v[17]*tp[2] + v[23]*tp[3] + v[29]*tp[4] + v[35]*tp[5];
564: vj++; tp = t + (*vj)*6;
565: v += 36;
566: }
567: tp = t + k*6;
568: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4; tp[5]=x5;
569: idx = 6*r[k];
570: x[idx] = x0;
571: x[idx+1] = x1;
572: x[idx+2] = x2;
573: x[idx+3] = x3;
574: x[idx+4] = x4;
575: x[idx+5] = x5;
576: }
578: ISRestoreIndices(isrow,&r);
579: VecRestoreArray(bb,&b);
580: VecRestoreArray(xx,&x);
581: PetscLogFlops(36*(2*a->nz + mbs));
582: return(0);
583: }
587: PetscErrorCode ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
588: {
589: MatScalar *v,*d;
590: PetscScalar *xp,x0,x1,x2,x3,x4,x5;
591: PetscInt nz,*vj,k;
594: for (k=0; k<mbs; k++){
595: v = aa + 36*ai[k];
596: xp = x + k*6;
597: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* Dk*xk = k-th block of x */
598: nz = ai[k+1] - ai[k];
599: vj = aj + ai[k];
600: xp = x + (*vj)*6;
601: while (nz--) {
602: /* x(:) += U(k,:)^T*(Dk*xk) */
603: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4 + v[5]*x5;
604: xp[1] += v[6]*x0 + v[7]*x1 + v[8]*x2 + v[9]*x3+ v[10]*x4+ v[11]*x5;
605: xp[2] += v[12]*x0 + v[13]*x1 + v[14]*x2+ v[15]*x3+ v[16]*x4+ v[17]*x5;
606: xp[3] += v[18]*x0 + v[19]*x1 + v[20]*x2+ v[21]*x3+ v[22]*x4+ v[23]*x5;
607: xp[4] += v[24]*x0 + v[25]*x1 + v[26]*x2+ v[27]*x3+ v[28]*x4+ v[29]*x5;
608: xp[5] += v[30]*x0 + v[31]*x1 + v[32]*x2+ v[33]*x3+ v[34]*x4+ v[35]*x5;
609: vj++; xp = x + (*vj)*6;
610: v += 36;
611: }
612: /* xk = inv(Dk)*(Dk*xk) */
613: d = aa+k*36; /* ptr to inv(Dk) */
614: xp = x + k*6;
615: xp[0] = d[0]*x0 + d[6]*x1 + d[12]*x2 + d[18]*x3 + d[24]*x4 + d[30]*x5;
616: xp[1] = d[1]*x0 + d[7]*x1 + d[13]*x2 + d[19]*x3 + d[25]*x4 + d[31]*x5;
617: xp[2] = d[2]*x0 + d[8]*x1 + d[14]*x2 + d[20]*x3 + d[26]*x4 + d[32]*x5;
618: xp[3] = d[3]*x0 + d[9]*x1 + d[15]*x2 + d[21]*x3 + d[27]*x4 + d[33]*x5;
619: xp[4] = d[4]*x0+ d[10]*x1 + d[16]*x2 + d[22]*x3 + d[28]*x4 + d[34]*x5;
620: xp[5] = d[5]*x0+ d[11]*x1 + d[17]*x2 + d[23]*x3 + d[29]*x4 + d[35]*x5;
621: }
622: return(0);
623: }
626: PetscErrorCode BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
627: {
628: MatScalar *v;
629: PetscScalar *xp,x0,x1,x2,x3,x4,x5;
630: PetscInt nz,*vj,k;
633: for (k=mbs-1; k>=0; k--){
634: v = aa + 36*ai[k];
635: xp = x + k*6;
636: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4]; x5=xp[5]; /* xk */
637: nz = ai[k+1] - ai[k];
638: vj = aj + ai[k];
639: xp = x + (*vj)*6;
640: while (nz--) {
641: /* xk += U(k,:)*x(:) */
642: x0 += v[0]*xp[0] + v[6]*xp[1] + v[12]*xp[2] + v[18]*xp[3] + v[24]*xp[4] + v[30]*xp[5];
643: x1 += v[1]*xp[0] + v[7]*xp[1] + v[13]*xp[2] + v[19]*xp[3] + v[25]*xp[4] + v[31]*xp[5];
644: x2 += v[2]*xp[0] + v[8]*xp[1] + v[14]*xp[2] + v[20]*xp[3] + v[26]*xp[4] + v[32]*xp[5];
645: x3 += v[3]*xp[0] + v[9]*xp[1] + v[15]*xp[2] + v[21]*xp[3] + v[27]*xp[4] + v[33]*xp[5];
646: x4 += v[4]*xp[0]+ v[10]*xp[1] + v[16]*xp[2] + v[22]*xp[3] + v[28]*xp[4] + v[34]*xp[5];
647: x5 += v[5]*xp[0]+ v[11]*xp[1] + v[17]*xp[2] + v[23]*xp[3] + v[29]*xp[4] + v[35]*xp[5];
648: vj++;
649: v += 36; xp = x + (*vj)*6;
650: }
651: xp = x + k*6;
652: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4; xp[5]=x5;
653: }
654: return(0);
655: }
660: PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
661: {
662: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
663: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
664: MatScalar *aa=a->a;
665: PetscScalar *x,*b;
669: VecGetArray(bb,&b);
670: VecGetArray(xx,&x);
671:
672: /* solve U^T * D * y = b by forward substitution */
673: PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
674: ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
676: /* solve U*x = y by back substitution */
677: BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
679: VecRestoreArray(bb,&b);
680: VecRestoreArray(xx,&x);
681: PetscLogFlops(36*(2*a->nz + mbs));
682: return(0);
683: }
687: PetscErrorCode MatForwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
688: {
689: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
690: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
691: MatScalar *aa=a->a;
692: PetscScalar *x,*b;
696: VecGetArray(bb,&b);
697: VecGetArray(xx,&x);
698: PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
699: ForwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
700: VecRestoreArray(bb,&b);
701: VecRestoreArray(xx,&x);
702: PetscLogFlops(36*a->nz + mbs);
703: return(0);
704: }
708: PetscErrorCode MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering(Mat A,Vec bb,Vec xx)
709: {
710: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
711: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
712: MatScalar *aa=a->a;
713: PetscScalar *x,*b;
717: VecGetArray(bb,&b);
718: VecGetArray(xx,&x);
719: PetscMemcpy(x,b,6*mbs*sizeof(PetscScalar)); /* x <- b */
720: BackwardSolve_SeqSBAIJ_6_NaturalOrdering_private(ai,aj,aa,mbs,x);
721: VecRestoreArray(bb,&b);
722: VecRestoreArray(xx,&x);
723: PetscLogFlops(36*a->nz);
724: return(0);
725: }
729: PetscErrorCode MatSolve_SeqSBAIJ_5(Mat A,Vec bb,Vec xx)
730: {
731: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
732: IS isrow=a->row;
733: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
735: PetscInt nz,*vj,k,*r,idx;
736: MatScalar *aa=a->a,*v,*diag;
737: PetscScalar *x,*b,x0,x1,x2,x3,x4,*t,*tp;
740: VecGetArray(bb,&b);
741: VecGetArray(xx,&x);
742: t = a->solve_work;
743: ISGetIndices(isrow,&r);
745: /* solve U^T * D * y = b by forward substitution */
746: tp = t;
747: for (k=0; k<mbs; k++) { /* t <- perm(b) */
748: idx = 5*r[k];
749: tp[0] = b[idx];
750: tp[1] = b[idx+1];
751: tp[2] = b[idx+2];
752: tp[3] = b[idx+3];
753: tp[4] = b[idx+4];
754: tp += 5;
755: }
756:
757: for (k=0; k<mbs; k++){
758: v = aa + 25*ai[k];
759: vj = aj + ai[k];
760: tp = t + k*5;
761: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];
762: nz = ai[k+1] - ai[k];
764: tp = t + (*vj)*5;
765: while (nz--) {
766: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
767: tp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
768: tp[2] += v[10]*x0+ v[11]*x1+ v[12]*x2+ v[13]*x3+ v[14]*x4;
769: tp[3] += v[15]*x0+ v[16]*x1+ v[17]*x2+ v[18]*x3+ v[19]*x4;
770: tp[4] += v[20]*x0+ v[21]*x1+ v[22]*x2+ v[23]*x3+ v[24]*x4;
771: vj++; tp = t + (*vj)*5;
772: v += 25;
773: }
775: /* xk = inv(Dk)*(Dk*xk) */
776: diag = aa+k*25; /* ptr to inv(Dk) */
777: tp = t + k*5;
778: tp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
779: tp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
780: tp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
781: tp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
782: tp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
783: }
785: /* solve U*x = y by back substitution */
786: for (k=mbs-1; k>=0; k--){
787: v = aa + 25*ai[k];
788: vj = aj + ai[k];
789: tp = t + k*5;
790: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; x4=tp[4];/* xk */
791: nz = ai[k+1] - ai[k];
792:
793: tp = t + (*vj)*5;
794: while (nz--) {
795: /* xk += U(k,:)*x(:) */
796: x0 += v[0]*tp[0] + v[5]*tp[1] + v[10]*tp[2] + v[15]*tp[3] + v[20]*tp[4];
797: x1 += v[1]*tp[0] + v[6]*tp[1] + v[11]*tp[2] + v[16]*tp[3] + v[21]*tp[4];
798: x2 += v[2]*tp[0] + v[7]*tp[1] + v[12]*tp[2] + v[17]*tp[3] + v[22]*tp[4];
799: x3 += v[3]*tp[0] + v[8]*tp[1] + v[13]*tp[2] + v[18]*tp[3] + v[23]*tp[4];
800: x4 += v[4]*tp[0] + v[9]*tp[1] + v[14]*tp[2] + v[19]*tp[3] + v[24]*tp[4];
801: vj++; tp = t + (*vj)*5;
802: v += 25;
803: }
804: tp = t + k*5;
805: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3; tp[4]=x4;
806: idx = 5*r[k];
807: x[idx] = x0;
808: x[idx+1] = x1;
809: x[idx+2] = x2;
810: x[idx+3] = x3;
811: x[idx+4] = x4;
812: }
814: ISRestoreIndices(isrow,&r);
815: VecRestoreArray(bb,&b);
816: VecRestoreArray(xx,&x);
817: PetscLogFlops(25*(2*a->nz + mbs));
818: return(0);
819: }
823: PetscErrorCode ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
824: {
825: MatScalar *v,*diag;
826: PetscScalar *xp,x0,x1,x2,x3,x4;
827: PetscInt nz,*vj,k;
830: for (k=0; k<mbs; k++){
831: v = aa + 25*ai[k];
832: xp = x + k*5;
833: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* Dk*xk = k-th block of x */
834: nz = ai[k+1] - ai[k];
835: vj = aj + ai[k];
836: xp = x + (*vj)*5;
837: while (nz--) {
838: /* x(:) += U(k,:)^T*(Dk*xk) */
839: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3 + v[4]*x4;
840: xp[1] += v[5]*x0 + v[6]*x1 + v[7]*x2 + v[8]*x3 + v[9]*x4;
841: xp[2] += v[10]*x0 + v[11]*x1 + v[12]*x2+ v[13]*x3+ v[14]*x4;
842: xp[3] += v[15]*x0 + v[16]*x1 + v[17]*x2+ v[18]*x3+ v[19]*x4;
843: xp[4] += v[20]*x0 + v[21]*x1 + v[22]*x2+ v[23]*x3+ v[24]*x4;
844: vj++; xp = x + (*vj)*5;
845: v += 25;
846: }
847: /* xk = inv(Dk)*(Dk*xk) */
848: diag = aa+k*25; /* ptr to inv(Dk) */
849: xp = x + k*5;
850: xp[0] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4;
851: xp[1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4;
852: xp[2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4;
853: xp[3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4;
854: xp[4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4;
855: }
856: return(0);
857: }
861: PetscErrorCode BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
862: {
863: MatScalar *v;
864: PetscScalar *xp,x0,x1,x2,x3,x4;
865: PetscInt nz,*vj,k;
868: for (k=mbs-1; k>=0; k--){
869: v = aa + 25*ai[k];
870: xp = x + k*5;
871: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; x4=xp[4];/* xk */
872: nz = ai[k+1] - ai[k];
873: vj = aj + ai[k];
874: xp = x + (*vj)*5;
875: while (nz--) {
876: /* xk += U(k,:)*x(:) */
877: x0 += v[0]*xp[0] + v[5]*xp[1] + v[10]*xp[2] + v[15]*xp[3] + v[20]*xp[4];
878: x1 += v[1]*xp[0] + v[6]*xp[1] + v[11]*xp[2] + v[16]*xp[3] + v[21]*xp[4];
879: x2 += v[2]*xp[0] + v[7]*xp[1] + v[12]*xp[2] + v[17]*xp[3] + v[22]*xp[4];
880: x3 += v[3]*xp[0] + v[8]*xp[1] + v[13]*xp[2] + v[18]*xp[3] + v[23]*xp[4];
881: x4 += v[4]*xp[0] + v[9]*xp[1] + v[14]*xp[2] + v[19]*xp[3] + v[24]*xp[4];
882: vj++;
883: v += 25; xp = x + (*vj)*5;
884: }
885: xp = x + k*5;
886: xp[0]=x0; xp[1]=x1; xp[2]=x2; xp[3]=x3; xp[4]=x4;
887: }
888: return(0);
889: }
893: PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
894: {
895: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
896: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
897: MatScalar *aa=a->a;
898: PetscScalar *x,*b;
902: VecGetArray(bb,&b);
903: VecGetArray(xx,&x);
905: /* solve U^T * D * y = b by forward substitution */
906: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
907: ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
909: /* solve U*x = y by back substitution */
910: BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
912: VecRestoreArray(bb,&b);
913: VecRestoreArray(xx,&x);
914: PetscLogFlops(25*(2*a->nz + mbs));
915: return(0);
916: }
920: PetscErrorCode MatForwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
921: {
922: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
923: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
924: MatScalar *aa=a->a;
925: PetscScalar *x,*b;
929: VecGetArray(bb,&b);
930: VecGetArray(xx,&x);
931: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar)); /* x <- b */
932: ForwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
933: VecRestoreArray(bb,&b);
934: VecRestoreArray(xx,&x);
935: PetscLogFlops(25*(a->nz + mbs));
936: return(0);
937: }
941: PetscErrorCode MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering(Mat A,Vec bb,Vec xx)
942: {
943: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
944: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
945: MatScalar *aa=a->a;
946: PetscScalar *x,*b;
950: VecGetArray(bb,&b);
951: VecGetArray(xx,&x);
952: PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));
953: BackwardSolve_SeqSBAIJ_5_NaturalOrdering_private(ai,aj,aa,mbs,x);
954: VecRestoreArray(bb,&b);
955: VecRestoreArray(xx,&x);
956: PetscLogFlops(25*a->nz);
957: return(0);
958: }
962: PetscErrorCode MatSolve_SeqSBAIJ_4(Mat A,Vec bb,Vec xx)
963: {
964: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
965: IS isrow=a->row;
966: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
968: PetscInt nz,*vj,k,*r,idx;
969: MatScalar *aa=a->a,*v,*diag;
970: PetscScalar *x,*b,x0,x1,x2,x3,*t,*tp;
973: VecGetArray(bb,&b);
974: VecGetArray(xx,&x);
975: t = a->solve_work;
976: ISGetIndices(isrow,&r);
978: /* solve U^T * D * y = b by forward substitution */
979: tp = t;
980: for (k=0; k<mbs; k++) { /* t <- perm(b) */
981: idx = 4*r[k];
982: tp[0] = b[idx];
983: tp[1] = b[idx+1];
984: tp[2] = b[idx+2];
985: tp[3] = b[idx+3];
986: tp += 4;
987: }
988:
989: for (k=0; k<mbs; k++){
990: v = aa + 16*ai[k];
991: vj = aj + ai[k];
992: tp = t + k*4;
993: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3];
994: nz = ai[k+1] - ai[k];
996: tp = t + (*vj)*4;
997: while (nz--) {
998: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
999: tp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1000: tp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1001: tp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1002: vj++; tp = t + (*vj)*4;
1003: v += 16;
1004: }
1006: /* xk = inv(Dk)*(Dk*xk) */
1007: diag = aa+k*16; /* ptr to inv(Dk) */
1008: tp = t + k*4;
1009: tp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1010: tp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1011: tp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1012: tp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1013: }
1015: /* solve U*x = y by back substitution */
1016: for (k=mbs-1; k>=0; k--){
1017: v = aa + 16*ai[k];
1018: vj = aj + ai[k];
1019: tp = t + k*4;
1020: x0=tp[0]; x1=tp[1]; x2=tp[2]; x3=tp[3]; /* xk */
1021: nz = ai[k+1] - ai[k];
1022:
1023: tp = t + (*vj)*4;
1024: while (nz--) {
1025: /* xk += U(k,:)*x(:) */
1026: x0 += v[0]*tp[0] + v[4]*tp[1] + v[8]*tp[2] + v[12]*tp[3];
1027: x1 += v[1]*tp[0] + v[5]*tp[1] + v[9]*tp[2] + v[13]*tp[3];
1028: x2 += v[2]*tp[0] + v[6]*tp[1]+ v[10]*tp[2] + v[14]*tp[3];
1029: x3 += v[3]*tp[0] + v[7]*tp[1]+ v[11]*tp[2] + v[15]*tp[3];
1030: vj++; tp = t + (*vj)*4;
1031: v += 16;
1032: }
1033: tp = t + k*4;
1034: tp[0]=x0; tp[1]=x1; tp[2]=x2; tp[3]=x3;
1035: idx = 4*r[k];
1036: x[idx] = x0;
1037: x[idx+1] = x1;
1038: x[idx+2] = x2;
1039: x[idx+3] = x3;
1040: }
1042: ISRestoreIndices(isrow,&r);
1043: VecRestoreArray(bb,&b);
1044: VecRestoreArray(xx,&x);
1045: PetscLogFlops(16*(2*a->nz + mbs));
1046: return(0);
1047: }
1051: PetscErrorCode ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1052: {
1053: MatScalar *v,*diag;
1054: PetscScalar *xp,x0,x1,x2,x3;
1055: PetscInt nz,*vj,k;
1058: for (k=0; k<mbs; k++){
1059: v = aa + 16*ai[k];
1060: xp = x + k*4;
1061: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* Dk*xk = k-th block of x */
1062: nz = ai[k+1] - ai[k];
1063: vj = aj + ai[k];
1064: xp = x + (*vj)*4;
1065: while (nz--) {
1066: /* x(:) += U(k,:)^T*(Dk*xk) */
1067: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2 + v[3]*x3;
1068: xp[1] += v[4]*x0 + v[5]*x1 + v[6]*x2 + v[7]*x3;
1069: xp[2] += v[8]*x0 + v[9]*x1 + v[10]*x2+ v[11]*x3;
1070: xp[3] += v[12]*x0+ v[13]*x1+ v[14]*x2+ v[15]*x3;
1071: vj++; xp = x + (*vj)*4;
1072: v += 16;
1073: }
1074: /* xk = inv(Dk)*(Dk*xk) */
1075: diag = aa+k*16; /* ptr to inv(Dk) */
1076: xp = x + k*4;
1077: xp[0] = diag[0]*x0 + diag[4]*x1 + diag[8]*x2 + diag[12]*x3;
1078: xp[1] = diag[1]*x0 + diag[5]*x1 + diag[9]*x2 + diag[13]*x3;
1079: xp[2] = diag[2]*x0 + diag[6]*x1 + diag[10]*x2+ diag[14]*x3;
1080: xp[3] = diag[3]*x0 + diag[7]*x1 + diag[11]*x2+ diag[15]*x3;
1081: }
1082: return(0);
1083: }
1087: PetscErrorCode BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1088: {
1089: MatScalar *v;
1090: PetscScalar *xp,x0,x1,x2,x3;
1091: PetscInt nz,*vj,k;
1094: for (k=mbs-1; k>=0; k--){
1095: v = aa + 16*ai[k];
1096: xp = x + k*4;
1097: x0=xp[0]; x1=xp[1]; x2=xp[2]; x3=xp[3]; /* xk */
1098: nz = ai[k+1] - ai[k];
1099: vj = aj + ai[k];
1100: xp = x + (*vj)*4;
1101: while (nz--) {
1102: /* xk += U(k,:)*x(:) */
1103: x0 += v[0]*xp[0] + v[4]*xp[1] + v[8]*xp[2] + v[12]*xp[3];
1104: x1 += v[1]*xp[0] + v[5]*xp[1] + v[9]*xp[2] + v[13]*xp[3];
1105: x2 += v[2]*xp[0] + v[6]*xp[1]+ v[10]*xp[2] + v[14]*xp[3];
1106: x3 += v[3]*xp[0] + v[7]*xp[1]+ v[11]*xp[2] + v[15]*xp[3];
1107: vj++;
1108: v += 16; xp = x + (*vj)*4;
1109: }
1110: xp = x + k*4;
1111: xp[0] = x0; xp[1] = x1; xp[2] = x2; xp[3] = x3;
1112: }
1113: return(0);
1114: }
1118: PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1119: {
1120: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1121: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1122: MatScalar *aa=a->a;
1123: PetscScalar *x,*b;
1127: VecGetArray(bb,&b);
1128: VecGetArray(xx,&x);
1130: /* solve U^T * D * y = b by forward substitution */
1131: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
1132: ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1134: /* solve U*x = y by back substitution */
1135: BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1136: VecRestoreArray(bb,&b);
1137: VecRestoreArray(xx,&x);
1138: PetscLogFlops(16*(2*a->nz + mbs));
1139: return(0);
1140: }
1144: PetscErrorCode MatForwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1145: {
1146: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1147: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1148: MatScalar *aa=a->a;
1149: PetscScalar *x,*b;
1153: VecGetArray(bb,&b);
1154: VecGetArray(xx,&x);
1155: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar)); /* x <- b */
1156: ForwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1157: VecRestoreArray(bb,&b);
1158: VecRestoreArray(xx,&x);
1159: PetscLogFlops(16*a->nz + mbs);
1160: return(0);
1161: }
1165: PetscErrorCode MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering(Mat A,Vec bb,Vec xx)
1166: {
1167: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1168: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1169: MatScalar *aa=a->a;
1170: PetscScalar *x,*b;
1174: VecGetArray(bb,&b);
1175: VecGetArray(xx,&x);
1176: PetscMemcpy(x,b,4*mbs*sizeof(PetscScalar));
1177: BackwardSolve_SeqSBAIJ_4_NaturalOrdering_private(ai,aj,aa,mbs,x);
1178: VecRestoreArray(bb,&b);
1179: VecRestoreArray(xx,&x);
1180: PetscLogFlops(16*a->nz);
1181: return(0);
1182: }
1186: PetscErrorCode MatSolve_SeqSBAIJ_3(Mat A,Vec bb,Vec xx)
1187: {
1188: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1189: IS isrow=a->row;
1190: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1192: PetscInt nz,*vj,k,*r,idx;
1193: MatScalar *aa=a->a,*v,*diag;
1194: PetscScalar *x,*b,x0,x1,x2,*t,*tp;
1197: VecGetArray(bb,&b);
1198: VecGetArray(xx,&x);
1199: t = a->solve_work;
1200: ISGetIndices(isrow,&r);
1202: /* solve U^T * D * y = b by forward substitution */
1203: tp = t;
1204: for (k=0; k<mbs; k++) { /* t <- perm(b) */
1205: idx = 3*r[k];
1206: tp[0] = b[idx];
1207: tp[1] = b[idx+1];
1208: tp[2] = b[idx+2];
1209: tp += 3;
1210: }
1211:
1212: for (k=0; k<mbs; k++){
1213: v = aa + 9*ai[k];
1214: vj = aj + ai[k];
1215: tp = t + k*3;
1216: x0 = tp[0]; x1 = tp[1]; x2 = tp[2];
1217: nz = ai[k+1] - ai[k];
1219: tp = t + (*vj)*3;
1220: while (nz--) {
1221: tp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1222: tp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1223: tp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1224: vj++; tp = t + (*vj)*3;
1225: v += 9;
1226: }
1228: /* xk = inv(Dk)*(Dk*xk) */
1229: diag = aa+k*9; /* ptr to inv(Dk) */
1230: tp = t + k*3;
1231: tp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1232: tp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1233: tp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1234: }
1236: /* solve U*x = y by back substitution */
1237: for (k=mbs-1; k>=0; k--){
1238: v = aa + 9*ai[k];
1239: vj = aj + ai[k];
1240: tp = t + k*3;
1241: x0 = tp[0]; x1 = tp[1]; x2 = tp[2]; /* xk */
1242: nz = ai[k+1] - ai[k];
1243:
1244: tp = t + (*vj)*3;
1245: while (nz--) {
1246: /* xk += U(k,:)*x(:) */
1247: x0 += v[0]*tp[0] + v[3]*tp[1] + v[6]*tp[2];
1248: x1 += v[1]*tp[0] + v[4]*tp[1] + v[7]*tp[2];
1249: x2 += v[2]*tp[0] + v[5]*tp[1] + v[8]*tp[2];
1250: vj++; tp = t + (*vj)*3;
1251: v += 9;
1252: }
1253: tp = t + k*3;
1254: tp[0] = x0; tp[1] = x1; tp[2] = x2;
1255: idx = 3*r[k];
1256: x[idx] = x0;
1257: x[idx+1] = x1;
1258: x[idx+2] = x2;
1259: }
1261: ISRestoreIndices(isrow,&r);
1262: VecRestoreArray(bb,&b);
1263: VecRestoreArray(xx,&x);
1264: PetscLogFlops(9*(2*a->nz + mbs));
1265: return(0);
1266: }
1270: PetscErrorCode ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1271: {
1272: MatScalar *v,*diag;
1273: PetscScalar *xp,x0,x1,x2;
1274: PetscInt nz,*vj,k;
1277: for (k=0; k<mbs; k++){
1278: v = aa + 9*ai[k];
1279: xp = x + k*3;
1280: x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* Dk*xk = k-th block of x */
1281: nz = ai[k+1] - ai[k];
1282: vj = aj + ai[k];
1283: xp = x + (*vj)*3;
1284: while (nz--) {
1285: /* x(:) += U(k,:)^T*(Dk*xk) */
1286: xp[0] += v[0]*x0 + v[1]*x1 + v[2]*x2;
1287: xp[1] += v[3]*x0 + v[4]*x1 + v[5]*x2;
1288: xp[2] += v[6]*x0 + v[7]*x1 + v[8]*x2;
1289: vj++; xp = x + (*vj)*3;
1290: v += 9;
1291: }
1292: /* xk = inv(Dk)*(Dk*xk) */
1293: diag = aa+k*9; /* ptr to inv(Dk) */
1294: xp = x + k*3;
1295: xp[0] = diag[0]*x0 + diag[3]*x1 + diag[6]*x2;
1296: xp[1] = diag[1]*x0 + diag[4]*x1 + diag[7]*x2;
1297: xp[2] = diag[2]*x0 + diag[5]*x1 + diag[8]*x2;
1298: }
1299: return(0);
1300: }
1304: PetscErrorCode BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1305: {
1306: MatScalar *v;
1307: PetscScalar *xp,x0,x1,x2;
1308: PetscInt nz,*vj,k;
1311: for (k=mbs-1; k>=0; k--){
1312: v = aa + 9*ai[k];
1313: xp = x + k*3;
1314: x0 = xp[0]; x1 = xp[1]; x2 = xp[2]; /* xk */
1315: nz = ai[k+1] - ai[k];
1316: vj = aj + ai[k];
1317: xp = x + (*vj)*3;
1318: while (nz--) {
1319: /* xk += U(k,:)*x(:) */
1320: x0 += v[0]*xp[0] + v[3]*xp[1] + v[6]*xp[2];
1321: x1 += v[1]*xp[0] + v[4]*xp[1] + v[7]*xp[2];
1322: x2 += v[2]*xp[0] + v[5]*xp[1] + v[8]*xp[2];
1323: vj++;
1324: v += 9; xp = x + (*vj)*3;
1325: }
1326: xp = x + k*3;
1327: xp[0] = x0; xp[1] = x1; xp[2] = x2;
1328: }
1329: return(0);
1330: }
1334: PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1335: {
1336: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1337: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1338: MatScalar *aa=a->a;
1339: PetscScalar *x,*b;
1341:
1343: VecGetArray(bb,&b);
1344: VecGetArray(xx,&x);
1346: /* solve U^T * D * y = b by forward substitution */
1347: PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1348: ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1350: /* solve U*x = y by back substitution */
1351: BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1353: VecRestoreArray(bb,&b);
1354: VecRestoreArray(xx,&x);
1355: PetscLogFlops(9*(2*a->nz + mbs));
1356: return(0);
1357: }
1361: PetscErrorCode MatForwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1362: {
1363: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1364: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1365: MatScalar *aa=a->a;
1366: PetscScalar *x,*b;
1370: VecGetArray(bb,&b);
1371: VecGetArray(xx,&x);
1372: PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1373: ForwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1374: VecRestoreArray(bb,&b);
1375: VecRestoreArray(xx,&x);
1376: PetscLogFlops(9*(a->nz + mbs));
1377: return(0);
1378: }
1382: PetscErrorCode MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
1383: {
1384: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1385: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1386: MatScalar *aa=a->a;
1387: PetscScalar *x,*b;
1391: VecGetArray(bb,&b);
1392: VecGetArray(xx,&x);
1393: PetscMemcpy(x,b,3*mbs*sizeof(PetscScalar));
1394: BackwardSolve_SeqSBAIJ_3_NaturalOrdering_private(ai,aj,aa,mbs,x);
1395: VecRestoreArray(bb,&b);
1396: VecRestoreArray(xx,&x);
1397: PetscLogFlops(9*a->nz);
1398: return(0);
1399: }
1403: PetscErrorCode MatSolve_SeqSBAIJ_2(Mat A,Vec bb,Vec xx)
1404: {
1405: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ *)A->data;
1406: IS isrow=a->row;
1407: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1409: PetscInt nz,*vj,k,k2,*r,idx;
1410: MatScalar *aa=a->a,*v,*diag;
1411: PetscScalar *x,*b,x0,x1,*t;
1414: VecGetArray(bb,&b);
1415: VecGetArray(xx,&x);
1416: t = a->solve_work;
1417: ISGetIndices(isrow,&r);
1419: /* solve U^T * D * y = perm(b) by forward substitution */
1420: for (k=0; k<mbs; k++) { /* t <- perm(b) */
1421: idx = 2*r[k];
1422: t[k*2] = b[idx];
1423: t[k*2+1] = b[idx+1];
1424: }
1425: for (k=0; k<mbs; k++){
1426: v = aa + 4*ai[k];
1427: vj = aj + ai[k];
1428: k2 = k*2;
1429: x0 = t[k2]; x1 = t[k2+1];
1430: nz = ai[k+1] - ai[k];
1431: while (nz--) {
1432: t[(*vj)*2] += v[0]*x0 + v[1]*x1;
1433: t[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1434: vj++; v += 4;
1435: }
1436: diag = aa+k*4; /* ptr to inv(Dk) */
1437: t[k2] = diag[0]*x0 + diag[2]*x1;
1438: t[k2+1] = diag[1]*x0 + diag[3]*x1;
1439: }
1441: /* solve U*x = y by back substitution */
1442: for (k=mbs-1; k>=0; k--){
1443: v = aa + 4*ai[k];
1444: vj = aj + ai[k];
1445: k2 = k*2;
1446: x0 = t[k2]; x1 = t[k2+1];
1447: nz = ai[k+1] - ai[k];
1448: while (nz--) {
1449: x0 += v[0]*t[(*vj)*2] + v[2]*t[(*vj)*2+1];
1450: x1 += v[1]*t[(*vj)*2] + v[3]*t[(*vj)*2+1];
1451: vj++; v += 4;
1452: }
1453: t[k2] = x0;
1454: t[k2+1] = x1;
1455: idx = 2*r[k];
1456: x[idx] = x0;
1457: x[idx+1] = x1;
1458: }
1460: ISRestoreIndices(isrow,&r);
1461: VecRestoreArray(bb,&b);
1462: VecRestoreArray(xx,&x);
1463: PetscLogFlops(4*(2*a->nz + mbs));
1464: return(0);
1465: }
1469: PetscErrorCode ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1470: {
1471: MatScalar *v,*diag;
1472: PetscScalar x0,x1;
1473: PetscInt nz,*vj,k,k2;
1476: for (k=0; k<mbs; k++){
1477: v = aa + 4*ai[k];
1478: vj = aj + ai[k];
1479: k2 = k*2;
1480: x0 = x[k2]; x1 = x[k2+1]; /* Dk*xk = k-th block of x */
1481: nz = ai[k+1] - ai[k];
1482:
1483: while (nz--) {
1484: /* x(:) += U(k,:)^T*(Dk*xk) */
1485: x[(*vj)*2] += v[0]*x0 + v[1]*x1;
1486: x[(*vj)*2+1] += v[2]*x0 + v[3]*x1;
1487: vj++; v += 4;
1488: }
1489: /* xk = inv(Dk)*(Dk*xk) */
1490: diag = aa+k*4; /* ptr to inv(Dk) */
1491: x[k2] = diag[0]*x0 + diag[2]*x1;
1492: x[k2+1] = diag[1]*x0 + diag[3]*x1;
1493: }
1494: return(0);
1495: }
1499: PetscErrorCode BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(PetscInt *ai,PetscInt *aj,MatScalar *aa,PetscInt mbs,PetscScalar *x)
1500: {
1501: MatScalar *v;
1502: PetscScalar x0,x1;
1503: PetscInt nz,*vj,k,k2;
1506: for (k=mbs-1; k>=0; k--){
1507: v = aa + 4*ai[k];
1508: vj = aj + ai[k];
1509: k2 = k*2;
1510: x0 = x[k2]; x1 = x[k2+1]; /* xk */
1511: nz = ai[k+1] - ai[k];
1512: while (nz--) {
1513: /* xk += U(k,:)*x(:) */
1514: x0 += v[0]*x[(*vj)*2] + v[2]*x[(*vj)*2+1];
1515: x1 += v[1]*x[(*vj)*2] + v[3]*x[(*vj)*2+1];
1516: vj++; v += 4;
1517: }
1518: x[k2] = x0;
1519: x[k2+1] = x1;
1520: }
1521: return(0);
1522: }
1526: PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1527: {
1528: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1529: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1530: MatScalar *aa=a->a;
1531: PetscScalar *x,*b;
1535: VecGetArray(bb,&b);
1536: VecGetArray(xx,&x);
1538: /* solve U^T * D * y = b by forward substitution */
1539: PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1540: ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1542: /* solve U*x = y by back substitution */
1543: BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1545: VecRestoreArray(bb,&b);
1546: VecRestoreArray(xx,&x);
1547: PetscLogFlops(4*(2*a->nz + mbs)); /* bs2*(2*a->nz + mbs) */
1548: return(0);
1549: }
1553: PetscErrorCode MatForwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1554: {
1555: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1556: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1557: MatScalar *aa=a->a;
1558: PetscScalar *x,*b;
1562: VecGetArray(bb,&b);
1563: VecGetArray(xx,&x);
1564: PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1565: ForwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1566: VecRestoreArray(bb,&b);
1567: VecRestoreArray(xx,&x);
1568: PetscLogFlops(4*(a->nz + mbs));
1569: return(0);
1570: }
1574: PetscErrorCode MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering(Mat A,Vec bb,Vec xx)
1575: {
1576: Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data;
1577: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1578: MatScalar *aa=a->a;
1579: PetscScalar *x,*b;
1583: VecGetArray(bb,&b);
1584: VecGetArray(xx,&x);
1585: PetscMemcpy(x,b,2*mbs*sizeof(PetscScalar));
1586: BackwardSolve_SeqSBAIJ_2_NaturalOrdering_private(ai,aj,aa,mbs,x);
1587: VecRestoreArray(bb,&b);
1588: VecRestoreArray(xx,&x);
1589: PetscLogFlops(4*a->nz);
1590: return(0);
1591: }
1595: PetscErrorCode MatSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1596: {
1597: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1598: IS isrow=a->row;
1600: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1601: MatScalar *aa=a->a,*v;
1602: PetscScalar *x,*b,xk,*t;
1603: PetscInt nz,*vj,k;
1606: VecGetArray(bb,&b);
1607: VecGetArray(xx,&x);
1608: t = a->solve_work;
1609: ISGetIndices(isrow,&rp);
1610:
1611: /* solve U^T*D^(1/2)*y = perm(b) by forward substitution */
1612: for (k=0; k<mbs; k++) t[k] = b[rp[k]];
1613: for (k=0; k<mbs; k++){
1614: v = aa + ai[k] + 1;
1615: vj = aj + ai[k] + 1;
1616: xk = t[k];
1617: nz = ai[k+1] - ai[k] - 1;
1618: while (nz--) t[*vj++] += (*v++) * xk;
1619: t[k] = xk*aa[ai[k]]; /* aa[k] = 1/D(k) */
1620: }
1622: /* solve U*perm(x) = y by back substitution */
1623: for (k=mbs-1; k>=0; k--){
1624: v = aa + ai[k] + 1;
1625: vj = aj + ai[k] + 1;
1626: nz = ai[k+1] - ai[k] - 1;
1627: while (nz--) t[k] += (*v++) * t[*vj++];
1628: x[rp[k]] = t[k];
1629: }
1631: ISRestoreIndices(isrow,&rp);
1632: VecRestoreArray(bb,&b);
1633: VecRestoreArray(xx,&x);
1634: PetscLogFlops(4*a->nz);
1635: return(0);
1636: }
1640: PetscErrorCode MatForwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1641: {
1642: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1643: IS isrow=a->row;
1645: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1646: MatScalar *aa=a->a,*v;
1647: PetscReal diagk;
1648: PetscScalar *x,*b,xk;
1649: PetscInt nz,*vj,k;
1652: /* solve U^T*D^(1/2)*x = perm(b) by forward substitution */
1653: VecGetArray(bb,&b);
1654: VecGetArray(xx,&x);
1655: ISGetIndices(isrow,&rp);
1656:
1657: for (k=0; k<mbs; k++) x[k] = b[rp[k]];
1658: for (k=0; k<mbs; k++){
1659: v = aa + ai[k] + 1;
1660: vj = aj + ai[k] + 1;
1661: xk = x[k];
1662: nz = ai[k+1] - ai[k] - 1;
1663: while (nz--) x[*vj++] += (*v++) * xk;
1665: diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1666: if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1667: x[k] = xk*sqrt(diagk);
1668: }
1669: ISRestoreIndices(isrow,&rp);
1670: VecRestoreArray(bb,&b);
1671: VecRestoreArray(xx,&x);
1672: PetscLogFlops(2*a->nz);
1673: return(0);
1674: }
1678: PetscErrorCode MatBackwardSolve_SeqSBAIJ_1(Mat A,Vec bb,Vec xx)
1679: {
1680: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1681: IS isrow=a->row;
1683: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp;
1684: MatScalar *aa=a->a,*v;
1685: PetscReal diagk;
1686: PetscScalar *x,*b,*t;
1687: PetscInt nz,*vj,k;
1690: /* solve D^(1/2)*U*perm(x) = b by back substitution */
1691: VecGetArray(bb,&b);
1692: VecGetArray(xx,&x);
1693: t = a->solve_work;
1694: ISGetIndices(isrow,&rp);
1696: for (k=mbs-1; k>=0; k--){
1697: v = aa + ai[k] + 1;
1698: vj = aj + ai[k] + 1;
1699: diagk = PetscRealPart(aa[ai[k]]);
1700: if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1701: t[k] = b[k] * sqrt(diagk);
1702: nz = ai[k+1] - ai[k] - 1;
1703: while (nz--) t[k] += (*v++) * t[*vj++];
1704: x[rp[k]] = t[k];
1705: }
1706: ISRestoreIndices(isrow,&rp);
1707: VecRestoreArray(bb,&b);
1708: VecRestoreArray(xx,&x);
1709: PetscLogFlops(2*a->nz);
1710: return(0);
1711: }
1715: PetscErrorCode MatSolves_SeqSBAIJ_1(Mat A,Vecs bb,Vecs xx)
1716: {
1717: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1721: if (A->rmap.bs == 1) {
1722: MatSolve_SeqSBAIJ_1(A,bb->v,xx->v);
1723: } else {
1724: IS isrow=a->row;
1725: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,*rp,i;
1726: MatScalar *aa=a->a,*v;
1727: PetscScalar *x,*b,*t;
1728: PetscInt nz,*vj,k,n;
1729: if (bb->n > a->solves_work_n) {
1730: PetscFree(a->solves_work);
1731: PetscMalloc(bb->n*A->rmap.N*sizeof(PetscScalar),&a->solves_work);
1732: a->solves_work_n = bb->n;
1733: }
1734: n = bb->n;
1735: VecGetArray(bb->v,&b);
1736: VecGetArray(xx->v,&x);
1737: t = a->solves_work;
1739: ISGetIndices(isrow,&rp);
1740:
1741: /* solve U^T*D*y = perm(b) by forward substitution */
1742: for (k=0; k<mbs; k++) {for (i=0; i<n; i++) t[n*k+i] = b[rp[k]+i*mbs];} /* values are stored interlaced in t */
1743: for (k=0; k<mbs; k++){
1744: v = aa + ai[k];
1745: vj = aj + ai[k];
1746: nz = ai[k+1] - ai[k];
1747: while (nz--) {
1748: for (i=0; i<n; i++) t[n*(*vj)+i] += (*v) * t[n*k+i];
1749: v++;vj++;
1750: }
1751: for (i=0; i<n; i++) t[n*k+i] *= aa[k]; /* note: aa[k] = 1/D(k) */
1752: }
1753:
1754: /* solve U*perm(x) = y by back substitution */
1755: for (k=mbs-1; k>=0; k--){
1756: v = aa + ai[k];
1757: vj = aj + ai[k];
1758: nz = ai[k+1] - ai[k];
1759: while (nz--) {
1760: for (i=0; i<n; i++) t[n*k+i] += (*v) * t[n*(*vj)+i];
1761: v++;vj++;
1762: }
1763: for (i=0; i<n; i++) x[rp[k]+i*mbs] = t[n*k+i];
1764: }
1766: ISRestoreIndices(isrow,&rp);
1767: VecRestoreArray(bb->v,&b);
1768: VecRestoreArray(xx->v,&x);
1769: PetscLogFlops(bb->n*(4*a->nz + A->rmap.N));
1770: }
1771: return(0);
1772: }
1774: /*
1775: Special case where the matrix was ILU(0) factored in the natural
1776: ordering. This eliminates the need for the column and row permutation.
1777: */
1780: PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1781: {
1782: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1784: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1785: MatScalar *aa=a->a,*v;
1786: PetscScalar *x,*b,xk;
1787: PetscInt nz,*vj,k;
1790: VecGetArray(bb,&b);
1791: VecGetArray(xx,&x);
1792:
1793: /* solve U^T*D*y = b by forward substitution */
1794: PetscMemcpy(x,b,mbs*sizeof(PetscScalar));
1795: for (k=0; k<mbs; k++){
1796: v = aa + ai[k] + 1;
1797: vj = aj + ai[k] + 1;
1798: xk = x[k];
1799: nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */
1800: while (nz--) x[*vj++] += (*v++) * xk;
1801: x[k] = xk*aa[ai[k]]; /* note: aa[diag[k]] = 1/D(k) */
1802: }
1804: /* solve U*x = y by back substitution */
1805: for (k=mbs-2; k>=0; k--){
1806: v = aa + ai[k] + 1;
1807: vj = aj + ai[k] + 1;
1808: xk = x[k];
1809: nz = ai[k+1] - ai[k] - 1;
1810: while (nz--) xk += (*v++) * x[*vj++];
1811: x[k] = xk;
1812: }
1814: VecRestoreArray(bb,&b);
1815: VecRestoreArray(xx,&x);
1816: PetscLogFlops(4*a->nz);
1817: return(0);
1818: }
1822: PetscErrorCode MatForwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1823: {
1824: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1826: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1827: MatScalar *aa=a->a,*v;
1828: PetscReal diagk;
1829: PetscScalar *x,*b;
1830: PetscInt nz,*vj,k;
1833: /* solve U^T*D^(1/2)*x = b by forward substitution */
1834: VecGetArray(bb,&b);
1835: VecGetArray(xx,&x);
1836: PetscMemcpy(x,b,mbs*sizeof(PetscScalar));
1837: for (k=0; k<mbs; k++){
1838: v = aa + ai[k] + 1;
1839: vj = aj + ai[k] + 1;
1840: nz = ai[k+1] - ai[k] - 1; /* exclude diag[k] */
1841: while (nz--) x[*vj++] += (*v++) * x[k];
1842: diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1843: if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ2(PETSC_ERR_SUP,"Diagonal (%g,%g) must be real and nonnegative",PetscRealPart(aa[ai[k]]),PetscImaginaryPart(aa[ai[k]]));
1844: x[k] *= sqrt(diagk);
1845: }
1846: VecRestoreArray(bb,&b);
1847: VecRestoreArray(xx,&x);
1848: PetscLogFlops(2*a->nz);
1849: return(0);
1850: }
1854: PetscErrorCode MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx)
1855: {
1856: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
1858: PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j;
1859: MatScalar *aa=a->a,*v;
1860: PetscReal diagk;
1861: PetscScalar *x,*b;
1862: PetscInt nz,*vj,k;
1865: /* solve D^(1/2)*U*x = b by back substitution */
1866: VecGetArray(bb,&b);
1867: VecGetArray(xx,&x);
1869: for (k=mbs-1; k>=0; k--){
1870: v = aa + ai[k] + 1;
1871: vj = aj + ai[k] + 1;
1872: diagk = PetscRealPart(aa[ai[k]]); /* note: aa[diag[k]] = 1/D(k) */
1873: if (PetscImaginaryPart(aa[ai[k]]) || diagk < 0) SETERRQ(PETSC_ERR_SUP,"Diagonal must be real and nonnegative");
1874: x[k] = sqrt(diagk)*b[k];
1875: nz = ai[k+1] - ai[k] - 1;
1876: while (nz--) x[k] += (*v++) * x[*vj++];
1877: }
1878: VecRestoreArray(bb,&b);
1879: VecRestoreArray(xx,&x);
1880: PetscLogFlops(2*a->nz);
1881: return(0);
1882: }
1884: /* Use Modified Sparse Row storage for u and ju, see Saad pp.85 */
1887: PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ_MSR(Mat A,IS perm,MatFactorInfo *info,Mat *B)
1888: {
1889: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data,*b;
1891: PetscInt *rip,i,mbs = a->mbs,*ai = a->i,*aj = a->j;
1892: PetscInt *jutmp,bs = A->rmap.bs,bs2=a->bs2;
1893: PetscInt m,reallocs = 0,*levtmp;
1894: PetscInt *prowl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
1895: PetscInt incrlev,*lev,shift,prow,nz;
1896: PetscReal f = info->fill,levels = info->levels;
1897: PetscTruth perm_identity;
1900: /* check whether perm is the identity mapping */
1901: ISIdentity(perm,&perm_identity);
1903: if (perm_identity){
1904: a->permute = PETSC_FALSE;
1905: ai = a->i; aj = a->j;
1906: } else { /* non-trivial permutation */
1907: a->permute = PETSC_TRUE;
1908: MatReorderingSeqSBAIJ(A, perm);
1909: ai = a->inew; aj = a->jnew;
1910: }
1911:
1912: /* initialization */
1913: ISGetIndices(perm,&rip);
1914: umax = (PetscInt)(f*ai[mbs] + 1);
1915: PetscMalloc(umax*sizeof(PetscInt),&lev);
1916: umax += mbs + 1;
1917: shift = mbs + 1;
1918: PetscMalloc((mbs+1)*sizeof(PetscInt),&iu);
1919: PetscMalloc(umax*sizeof(PetscInt),&ju);
1920: iu[0] = mbs + 1;
1921: juidx = mbs + 1;
1922: /* prowl: linked list for pivot row */
1923: PetscMalloc((3*mbs+1)*sizeof(PetscInt),&prowl);
1924: /* q: linked list for col index */
1925: q = prowl + mbs;
1926: levtmp = q + mbs;
1927:
1928: for (i=0; i<mbs; i++){
1929: prowl[i] = mbs;
1930: q[i] = 0;
1931: }
1933: /* for each row k */
1934: for (k=0; k<mbs; k++){
1935: nzk = 0;
1936: q[k] = mbs;
1937: /* copy current row into linked list */
1938: nz = ai[rip[k]+1] - ai[rip[k]];
1939: j = ai[rip[k]];
1940: while (nz--){
1941: vj = rip[aj[j++]];
1942: if (vj > k){
1943: qm = k;
1944: do {
1945: m = qm; qm = q[m];
1946: } while(qm < vj);
1947: if (qm == vj) {
1948: SETERRQ(PETSC_ERR_PLIB,"Duplicate entry in A\n");
1949: }
1950: nzk++;
1951: q[m] = vj;
1952: q[vj] = qm;
1953: levtmp[vj] = 0; /* initialize lev for nonzero element */
1954: }
1955: }
1957: /* modify nonzero structure of k-th row by computing fill-in
1958: for each row prow to be merged in */
1959: prow = k;
1960: prow = prowl[prow]; /* next pivot row (== 0 for symbolic factorization) */
1961:
1962: while (prow < k){
1963: /* merge row prow into k-th row */
1964: jmin = iu[prow] + 1;
1965: jmax = iu[prow+1];
1966: qm = k;
1967: for (j=jmin; j<jmax; j++){
1968: incrlev = lev[j-shift] + 1;
1969: if (incrlev > levels) continue;
1970: vj = ju[j];
1971: do {
1972: m = qm; qm = q[m];
1973: } while (qm < vj);
1974: if (qm != vj){ /* a fill */
1975: nzk++; q[m] = vj; q[vj] = qm; qm = vj;
1976: levtmp[vj] = incrlev;
1977: } else {
1978: if (levtmp[vj] > incrlev) levtmp[vj] = incrlev;
1979: }
1980: }
1981: prow = prowl[prow]; /* next pivot row */
1982: }
1983:
1984: /* add k to row list for first nonzero element in k-th row */
1985: if (nzk > 1){
1986: i = q[k]; /* col value of first nonzero element in k_th row of U */
1987: prowl[k] = prowl[i]; prowl[i] = k;
1988: }
1989: iu[k+1] = iu[k] + nzk;
1991: /* allocate more space to ju and lev if needed */
1992: if (iu[k+1] > umax) {
1993: /* estimate how much additional space we will need */
1994: /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
1995: /* just double the memory each time */
1996: maxadd = umax;
1997: if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
1998: umax += maxadd;
2000: /* allocate a longer ju */
2001: PetscMalloc(umax*sizeof(PetscInt),&jutmp);
2002: PetscMemcpy(jutmp,ju,iu[k]*sizeof(PetscInt));
2003: PetscFree(ju);
2004: ju = jutmp;
2006: PetscMalloc(umax*sizeof(PetscInt),&jutmp);
2007: PetscMemcpy(jutmp,lev,(iu[k]-shift)*sizeof(PetscInt));
2008: PetscFree(lev);
2009: lev = jutmp;
2010: reallocs += 2; /* count how many times we realloc */
2011: }
2013: /* save nonzero structure of k-th row in ju */
2014: i=k;
2015: while (nzk --) {
2016: i = q[i];
2017: ju[juidx] = i;
2018: lev[juidx-shift] = levtmp[i];
2019: juidx++;
2020: }
2021: }
2022:
2023: #if defined(PETSC_USE_INFO)
2024: if (ai[mbs] != 0) {
2025: PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2026: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
2027: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2028: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
2029: PetscInfo(A,"for best performance.\n");
2030: } else {
2031: PetscInfo(A,"Empty matrix.\n");
2032: }
2033: #endif
2035: ISRestoreIndices(perm,&rip);
2036: PetscFree(prowl);
2037: PetscFree(lev);
2039: /* put together the new matrix */
2040: MatCreate(A->comm,B);
2041: MatSetSizes(*B,bs*mbs,bs*mbs,bs*mbs,bs*mbs);
2042: MatSetType(*B,A->type_name);
2043: MatSeqSBAIJSetPreallocation_SeqSBAIJ(*B,bs,0,PETSC_NULL);
2045: /* PetscLogObjectParent(*B,iperm); */
2046: b = (Mat_SeqSBAIJ*)(*B)->data;
2047: PetscFree2(b->imax,b->ilen);
2048: b->singlemalloc = PETSC_FALSE;
2049: b->free_a = PETSC_TRUE;
2050: b->free_ij = PETSC_TRUE;
2051: /* the next line frees the default space generated by the Create() */
2052: PetscFree3(b->a,b->j,b->i);
2053: PetscMalloc((iu[mbs]+1)*sizeof(MatScalar)*bs2,&b->a);
2054: b->j = ju;
2055: b->i = iu;
2056: b->diag = 0;
2057: b->ilen = 0;
2058: b->imax = 0;
2059:
2060: if (b->row) {
2061: ISDestroy(b->row);
2062: }
2063: if (b->icol) {
2064: ISDestroy(b->icol);
2065: }
2066: b->row = perm;
2067: b->icol = perm;
2068: PetscObjectReference((PetscObject)perm);
2069: PetscObjectReference((PetscObject)perm);
2070: PetscMalloc((bs*mbs+bs)*sizeof(PetscScalar),&b->solve_work);
2071: /* In b structure: Free imax, ilen, old a, old j.
2072: Allocate idnew, solve_work, new a, new j */
2073: PetscLogObjectMemory(*B,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
2074: b->maxnz = b->nz = iu[mbs];
2075:
2076: (*B)->factor = FACTOR_CHOLESKY;
2077: (*B)->info.factor_mallocs = reallocs;
2078: (*B)->info.fill_ratio_given = f;
2079: if (ai[mbs] != 0) {
2080: (*B)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
2081: } else {
2082: (*B)->info.fill_ratio_needed = 0.0;
2083: }
2085: if (perm_identity){
2086: switch (bs) {
2087: case 1:
2088: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2089: (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2090: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2091: (*B)->ops->solves = MatSolves_SeqSBAIJ_1;
2092: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
2093: break;
2094: case 2:
2095: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2096: (*B)->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2097: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2098: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
2099: break;
2100: case 3:
2101: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2102: (*B)->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2103: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2104: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
2105: break;
2106: case 4:
2107: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2108: (*B)->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2109: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2110: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
2111: break;
2112: case 5:
2113: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2114: (*B)->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2115: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2116: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
2117: break;
2118: case 6:
2119: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2120: (*B)->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2121: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2122: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
2123: break;
2124: case 7:
2125: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2126: (*B)->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2127: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2128: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
2129: break;
2130: default:
2131: (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2132: (*B)->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2133: (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2134: PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
2135: break;
2136: }
2137: }
2139: return(0);
2140: }
2142: #include petscbt.h
2143: #include src/mat/utils/freespace.h
2146: PetscErrorCode MatICCFactorSymbolic_SeqSBAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
2147: {
2148: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
2149: Mat_SeqSBAIJ *b;
2150: Mat B;
2151: PetscErrorCode ierr;
2152: PetscTruth perm_identity,free_ij = PETSC_TRUE;
2153: PetscInt bs=A->rmap.bs,am=a->mbs;
2154: PetscInt reallocs=0,*rip,i,*ai,*aj,*ui;
2155: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
2156: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,ncols,*cols,*cols_lvl,*uj,**uj_ptr,**uj_lvl_ptr;
2157: PetscReal fill=info->fill,levels=info->levels,ratio_needed;
2158: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2159: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
2160: PetscBT lnkbt;
2161: PetscScalar *ua;
2164: /*
2165: This code originally uses Modified Sparse Row (MSR) storage
2166: (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
2167: Then it is rewritten so the factor B takes seqsbaij format. However the associated
2168: MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1,
2169: thus the original code in MSR format is still used for these cases.
2170: The code below should replace MatICCFactorSymbolic_SeqSBAIJ_MSR() whenever
2171: MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
2172: */
2173: if (bs > 1){
2174: MatICCFactorSymbolic_SeqSBAIJ_MSR(A,perm,info,fact);
2175: return(0);
2176: }
2178: /* check whether perm is the identity mapping */
2179: ISIdentity(perm,&perm_identity);
2180:
2181: /* special case that simply copies fill pattern */
2182: if (!levels && perm_identity) {
2183: a->permute = PETSC_FALSE;
2184: /* reuse the column pointers and row offsets for memory savings */
2185: ui = a->i;
2186: uj = a->j;
2187: free_ij = PETSC_FALSE;
2188: ratio_needed = 1.0;
2189: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
2190: if (perm_identity){
2191: a->permute = PETSC_FALSE;
2192: ai = a->i; aj = a->j;
2193: } else { /* non-trivial permutation */
2194: a->permute = PETSC_TRUE;
2195: MatReorderingSeqSBAIJ(A, perm);
2196: ai = a->inew; aj = a->jnew;
2197: }
2198: ISGetIndices(perm,&rip);
2200: /* initialization */
2201: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
2202: ui[0] = 0;
2204: /* jl: linked list for storing indices of the pivot rows
2205: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
2206: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt*),&jl);
2207: il = jl + am;
2208: uj_ptr = (PetscInt**)(il + am);
2209: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
2210: for (i=0; i<am; i++){
2211: jl[i] = am; il[i] = 0;
2212: }
2214: /* create and initialize a linked list for storing column indices of the active row k */
2215: nlnk = am + 1;
2216: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
2218: /* initial FreeSpace size is fill*(ai[am]+1) */
2219: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
2220: current_space = free_space;
2221: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
2222: current_space_lvl = free_space_lvl;
2224: for (k=0; k<am; k++){ /* for each active row k */
2225: /* initialize lnk by the column indices of row rip[k] */
2226: nzk = 0;
2227: ncols = ai[rip[k]+1] - ai[rip[k]];
2228: cols = aj+ai[rip[k]];
2229: PetscIncompleteLLInit(ncols,cols,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
2230: nzk += nlnk;
2232: /* update lnk by computing fill-in for each pivot row to be merged in */
2233: prow = jl[k]; /* 1st pivot row */
2234:
2235: while (prow < k){
2236: nextprow = jl[prow];
2237:
2238: /* merge prow into k-th row */
2239: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
2240: jmax = ui[prow+1];
2241: ncols = jmax-jmin;
2242: i = jmin - ui[prow];
2243: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
2244: j = *(uj_lvl_ptr[prow] + i - 1);
2245: cols_lvl = uj_lvl_ptr[prow]+i;
2246: PetscICCLLAddSorted(ncols,cols,levels,cols_lvl,am,nlnk,lnk,lnk_lvl,lnkbt,j);
2247: nzk += nlnk;
2249: /* update il and jl for prow */
2250: if (jmin < jmax){
2251: il[prow] = jmin;
2252: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
2253: }
2254: prow = nextprow;
2255: }
2257: /* if free space is not available, make more free space */
2258: if (current_space->local_remaining<nzk) {
2259: i = am - k + 1; /* num of unfactored rows */
2260: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
2261: PetscFreeSpaceGet(i,¤t_space);
2262: PetscFreeSpaceGet(i,¤t_space_lvl);
2263: reallocs++;
2264: }
2266: /* copy data into free_space and free_space_lvl, then initialize lnk */
2267: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
2269: /* add the k-th row into il and jl */
2270: if (nzk-1 > 0){
2271: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
2272: jl[k] = jl[i]; jl[i] = k;
2273: il[k] = ui[k] + 1;
2274: }
2275: uj_ptr[k] = current_space->array;
2276: uj_lvl_ptr[k] = current_space_lvl->array;
2278: current_space->array += nzk;
2279: current_space->local_used += nzk;
2280: current_space->local_remaining -= nzk;
2281: current_space_lvl->array += nzk;
2282: current_space_lvl->local_used += nzk;
2283: current_space_lvl->local_remaining -= nzk;
2285: ui[k+1] = ui[k] + nzk;
2286: }
2288: #if defined(PETSC_USE_INFO)
2289: if (ai[am] != 0) {
2290: PetscReal af = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2291: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
2292: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
2293: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
2294: } else {
2295: PetscInfo(A,"Empty matrix.\n");
2296: }
2297: #endif
2299: ISRestoreIndices(perm,&rip);
2300: PetscFree(jl);
2302: /* destroy list of free space and other temporary array(s) */
2303: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
2304: PetscFreeSpaceContiguous(&free_space,uj);
2305: PetscIncompleteLLDestroy(lnk,lnkbt);
2306: PetscFreeSpaceDestroy(free_space_lvl);
2307: if (ai[am] != 0) {
2308: ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
2309: } else {
2310: ratio_needed = 0.0;
2311: }
2312: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
2314: /* put together the new matrix in MATSEQSBAIJ format */
2315: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&ua);
2316: MatCreateSeqSBAIJWithArrays(PETSC_COMM_SELF,1,am,am,ui,uj,ua,fact);
2317: B = *fact;
2318: b = (Mat_SeqSBAIJ*)B->data;
2319: PetscFree2(b->imax,b->ilen);
2320: b->singlemalloc = PETSC_FALSE;
2321: b->free_a = PETSC_TRUE;
2322: b->free_ij = free_ij;
2323: b->diag = 0;
2324: b->ilen = 0;
2325: b->imax = 0;
2326: b->row = perm;
2327: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
2328: PetscObjectReference((PetscObject)perm);
2329: b->icol = perm;
2330: PetscObjectReference((PetscObject)perm);
2331: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
2332: b->maxnz = b->nz = ui[am];
2333:
2334: B->factor = FACTOR_CHOLESKY;
2335: B->info.factor_mallocs = reallocs;
2336: B->info.fill_ratio_given = fill;
2337: B->info.fill_ratio_needed = ratio_needed;
2339: if (perm_identity){
2340: switch (bs) {
2341: case 1:
2342: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
2343: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2344: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
2345: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=1\n");
2346: break;
2347: case 2:
2348: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering;
2349: B->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2350: B->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering;
2351: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=2\n");
2352: break;
2353: case 3:
2354: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering;
2355: B->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2356: B->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering;
2357: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=3\n");
2358: break;
2359: case 4:
2360: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering;
2361: B->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2362: B->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering;
2363: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=4\n");
2364: break;
2365: case 5:
2366: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering;
2367: B->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2368: B->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering;
2369: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=5\n");
2370: break;
2371: case 6:
2372: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering;
2373: B->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2374: B->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering;
2375: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=6\n");
2376: break;
2377: case 7:
2378: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering;
2379: B->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2380: B->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering;
2381: PetscInfo(A,"Using special in-place natural ordering factor and solve BS=7\n");
2382: break;
2383: default:
2384: B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering;
2385: B->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2386: B->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering;
2387: PetscInfo(A,"Using special in-place natural ordering factor and solve BS>7\n");
2388: break;
2389: }
2390: }
2391: return(0);
2392: }