Actual source code: matpapt.c
1: #define PETSCMAT_DLL
3: /*
4: Defines matrix-matrix product routines for pairs of SeqAIJ matrices
5: C = P * A * P^T
6: */
8: #include src/mat/impls/aij/seq/aij.h
9: #include src/mat/utils/freespace.h
11: static PetscEvent logkey_matapplypapt = 0;
12: static PetscEvent logkey_matapplypapt_symbolic = 0;
13: static PetscEvent logkey_matapplypapt_numeric = 0;
15: /*
16: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ - Forms the symbolic product of two SeqAIJ matrices
17: C = P * A * P^T;
19: Note: C is assumed to be uncreated.
20: If this is not the case, Destroy C before calling this routine.
21: */
24: PetscErrorCode MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
25: {
26: /* Note: This code is virtually identical to that of MatApplyPtAP_SeqAIJ_Symbolic */
27: /* and MatMatMult_SeqAIJ_SeqAIJ_Symbolic. Perhaps they could be merged nicely. */
28: PetscErrorCode ierr;
29: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
30: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
31: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pti,*ptj,*ptjj;
32: PetscInt *ci,*cj,*paj,*padenserow,*pasparserow,*denserow,*sparserow;
33: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N;
34: PetscInt i,j,k,pnzi,arow,anzj,panzi,ptrow,ptnzj,cnzi;
35: MatScalar *ca;
38: /* some error checking which could be moved into interface layer */
39: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
40: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
42: /* Set up timers */
43: if (!logkey_matapplypapt_symbolic) {
45: }
48: /* Create ij structure of P^T */
49: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
51: /* Allocate ci array, arrays for fill computation and */
52: /* free space for accumulating nonzero column info */
53: PetscMalloc(((pm+1)*1)*sizeof(PetscInt),&ci);
54: ci[0] = 0;
56: PetscMalloc((2*an+2*pm+1)*sizeof(PetscInt),&padenserow);
57: PetscMemzero(padenserow,(2*an+2*pm+1)*sizeof(PetscInt));
58: pasparserow = padenserow + an;
59: denserow = pasparserow + an;
60: sparserow = denserow + pm;
62: /* Set initial free space to be nnz(A) scaled by aspect ratio of Pt. */
63: /* This should be reasonable if sparsity of PAPt is similar to that of A. */
64: PetscFreeSpaceGet((ai[am]/pn)*pm,&free_space);
65: current_space = free_space;
67: /* Determine fill for each row of C: */
68: for (i=0;i<pm;i++) {
69: pnzi = pi[i+1] - pi[i];
70: panzi = 0;
71: /* Get symbolic sparse row of PA: */
72: for (j=0;j<pnzi;j++) {
73: arow = *pj++;
74: anzj = ai[arow+1] - ai[arow];
75: ajj = aj + ai[arow];
76: for (k=0;k<anzj;k++) {
77: if (!padenserow[ajj[k]]) {
78: padenserow[ajj[k]] = -1;
79: pasparserow[panzi++] = ajj[k];
80: }
81: }
82: }
83: /* Using symbolic row of PA, determine symbolic row of C: */
84: paj = pasparserow;
85: cnzi = 0;
86: for (j=0;j<panzi;j++) {
87: ptrow = *paj++;
88: ptnzj = pti[ptrow+1] - pti[ptrow];
89: ptjj = ptj + pti[ptrow];
90: for (k=0;k<ptnzj;k++) {
91: if (!denserow[ptjj[k]]) {
92: denserow[ptjj[k]] = -1;
93: sparserow[cnzi++] = ptjj[k];
94: }
95: }
96: }
98: /* sort sparse representation */
99: PetscSortInt(cnzi,sparserow);
101: /* If free space is not available, make more free space */
102: /* Double the amount of total space in the list */
103: if (current_space->local_remaining<cnzi) {
104: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
105: }
107: /* Copy data into free space, and zero out dense row */
108: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
109: current_space->array += cnzi;
110: current_space->local_used += cnzi;
111: current_space->local_remaining -= cnzi;
113: for (j=0;j<panzi;j++) {
114: padenserow[pasparserow[j]] = 0;
115: }
116: for (j=0;j<cnzi;j++) {
117: denserow[sparserow[j]] = 0;
118: }
119: ci[i+1] = ci[i] + cnzi;
120: }
121: /* column indices are in the list of free space */
122: /* Allocate space for cj, initialize cj, and */
123: /* destroy list of free space and other temporary array(s) */
124: PetscMalloc((ci[pm]+1)*sizeof(PetscInt),&cj);
125: PetscFreeSpaceContiguous(&free_space,cj);
126: PetscFree(padenserow);
127:
128: /* Allocate space for ca */
129: PetscMalloc((ci[pm]+1)*sizeof(MatScalar),&ca);
130: PetscMemzero(ca,(ci[pm]+1)*sizeof(MatScalar));
131:
132: /* put together the new matrix */
133: MatCreateSeqAIJWithArrays(A->comm,pm,pm,ci,cj,ca,C);
135: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
136: /* Since these are PETSc arrays, change flags to free them as necessary. */
137: c = (Mat_SeqAIJ *)((*C)->data);
138: c->free_a = PETSC_TRUE;
139: c->free_ij = PETSC_TRUE;
140: c->nonew = 0;
142: /* Clean up. */
143: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
146: return(0);
147: }
149: /*
150: MatApplyPAPt_Numeric_SeqAIJ - Forms the numeric product of two SeqAIJ matrices
151: C = P * A * P^T;
152: Note: C must have been created by calling MatApplyPAPt_Symbolic_SeqAIJ.
153: */
156: PetscErrorCode MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat C)
157: {
159: PetscInt flops=0;
160: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
161: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
162: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
163: PetscInt *ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj=p->j,*paj,*pajdense,*ptj;
164: PetscInt *ci=c->i,*cj=c->j;
165: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,cn=C->cmap.N,cm=C->rmap.N;
166: PetscInt i,j,k,k1,k2,pnzi,anzj,panzj,arow,ptcol,ptnzj,cnzi;
167: MatScalar *aa=a->a,*pa=p->a,*pta=p->a,*ptaj,*paa,*aaj,*ca=c->a,sum;
171: /* This error checking should be unnecessary if the symbolic was performed */
172: if (pm!=cm) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm,cm);
173: if (pn!=am) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pn,am);
174: if (am!=an) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix 'A' must be square, %D != %D",am, an);
175: if (pm!=cn) SETERRQ2(PETSC_ERR_ARG_SIZ,"Matrix dimensions are incompatible, %D != %D",pm, cn);
177: /* Set up timers */
178: if (!logkey_matapplypapt_numeric) {
180: }
183: PetscMalloc(an*(sizeof(MatScalar)+2*sizeof(PetscInt)),&paa);
184: PetscMemzero(paa,an*(sizeof(MatScalar)+2*sizeof(PetscInt)));
185: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
187: paj = (PetscInt*)(paa + an);
188: pajdense = paj + an;
190: for (i=0;i<pm;i++) {
191: /* Form sparse row of P*A */
192: pnzi = pi[i+1] - pi[i];
193: panzj = 0;
194: for (j=0;j<pnzi;j++) {
195: arow = *pj++;
196: anzj = ai[arow+1] - ai[arow];
197: ajj = aj + ai[arow];
198: aaj = aa + ai[arow];
199: for (k=0;k<anzj;k++) {
200: if (!pajdense[ajj[k]]) {
201: pajdense[ajj[k]] = -1;
202: paj[panzj++] = ajj[k];
203: }
204: paa[ajj[k]] += (*pa)*aaj[k];
205: }
206: flops += 2*anzj;
207: pa++;
208: }
210: /* Sort the j index array for quick sparse axpy. */
211: PetscSortInt(panzj,paj);
213: /* Compute P*A*P^T using sparse inner products. */
214: /* Take advantage of pre-computed (i,j) of C for locations of non-zeros. */
215: cnzi = ci[i+1] - ci[i];
216: for (j=0;j<cnzi;j++) {
217: /* Form sparse inner product of current row of P*A with (*cj++) col of P^T. */
218: ptcol = *cj++;
219: ptnzj = pi[ptcol+1] - pi[ptcol];
220: ptj = pjj + pi[ptcol];
221: ptaj = pta + pi[ptcol];
222: sum = 0.;
223: k1 = 0;
224: k2 = 0;
225: while ((k1<panzj) && (k2<ptnzj)) {
226: if (paj[k1]==ptj[k2]) {
227: sum += paa[paj[k1++]]*ptaj[k2++];
228: } else if (paj[k1] < ptj[k2]) {
229: k1++;
230: } else /* if (paj[k1] > ptj[k2]) */ {
231: k2++;
232: }
233: }
234: *ca++ = sum;
235: }
237: /* Zero the current row info for P*A */
238: for (j=0;j<panzj;j++) {
239: paa[paj[j]] = 0.;
240: pajdense[paj[j]] = 0;
241: }
242: }
244: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
245: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
246: PetscLogFlops(flops);
248: return(0);
249: }
250:
253: PetscErrorCode MatApplyPAPt_SeqAIJ_SeqAIJ(Mat A,Mat P,Mat *C)
254: {
258: if (!logkey_matapplypapt) {
260: }
262: MatApplyPAPt_Symbolic_SeqAIJ_SeqAIJ(A,P,C);
263: MatApplyPAPt_Numeric_SeqAIJ_SeqAIJ(A,P,*C);
265: return(0);
266: }