Actual source code: mpiaij.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/inline/spops.h
6: /*
7: Local utility routine that creates a mapping from the global column
8: number to the local number in the off-diagonal part of the local
9: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
10: a slightly higher hash table cost; without it it is not scalable (each processor
11: has an order N integer array but is fast to acess.
12: */
15: PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16: {
17: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
19: PetscInt n = aij->B->cmap.n,i;
22: #if defined (PETSC_USE_CTABLE)
23: PetscTableCreate(n,&aij->colmap);
24: for (i=0; i<n; i++){
25: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
26: }
27: #else
28: PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);
29: PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));
30: PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));
31: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32: #endif
33: return(0);
34: }
37: #define CHUNKSIZE 15
38: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39: { \
40: if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41: lastcol1 = col;\
42: while (high1-low1 > 5) { \
43: t = (low1+high1)/2; \
44: if (rp1[t] > col) high1 = t; \
45: else low1 = t; \
46: } \
47: for (_i=low1; _i<high1; _i++) { \
48: if (rp1[_i] > col) break; \
49: if (rp1[_i] == col) { \
50: if (addv == ADD_VALUES) ap1[_i] += value; \
51: else ap1[_i] = value; \
52: goto a_noinsert; \
53: } \
54: } \
55: if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56: if (nonew == 1) goto a_noinsert; \
57: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58: MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59: N = nrow1++ - 1; a->nz++; high1++; \
60: /* shift up all the later entries in this row */ \
61: for (ii=N; ii>=_i; ii--) { \
62: rp1[ii+1] = rp1[ii]; \
63: ap1[ii+1] = ap1[ii]; \
64: } \
65: rp1[_i] = col; \
66: ap1[_i] = value; \
67: a_noinsert: ; \
68: ailen[row] = nrow1; \
69: }
72: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73: { \
74: if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75: lastcol2 = col;\
76: while (high2-low2 > 5) { \
77: t = (low2+high2)/2; \
78: if (rp2[t] > col) high2 = t; \
79: else low2 = t; \
80: } \
81: for (_i=low2; _i<high2; _i++) { \
82: if (rp2[_i] > col) break; \
83: if (rp2[_i] == col) { \
84: if (addv == ADD_VALUES) ap2[_i] += value; \
85: else ap2[_i] = value; \
86: goto b_noinsert; \
87: } \
88: } \
89: if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90: if (nonew == 1) goto b_noinsert; \
91: if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92: MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93: N = nrow2++ - 1; b->nz++; high2++;\
94: /* shift up all the later entries in this row */ \
95: for (ii=N; ii>=_i; ii--) { \
96: rp2[ii+1] = rp2[ii]; \
97: ap2[ii+1] = ap2[ii]; \
98: } \
99: rp2[_i] = col; \
100: ap2[_i] = value; \
101: b_noinsert: ; \
102: bilen[row] = nrow2; \
103: }
107: PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108: {
109: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
110: Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
112: PetscInt l,*garray = mat->garray,diag;
115: /* code only works for square matrices A */
117: /* find size of row to the left of the diagonal part */
118: MatGetOwnershipRange(A,&diag,0);
119: row = row - diag;
120: for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121: if (garray[b->j[b->i[row]+l]] > diag) break;
122: }
123: PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));
125: /* diagonal part */
126: PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));
128: /* right of diagonal part */
129: PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));
130: return(0);
131: }
135: PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136: {
137: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
138: PetscScalar value;
140: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142: PetscTruth roworiented = aij->roworiented;
144: /* Some Variables required in the macro */
145: Mat A = aij->A;
146: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
147: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148: PetscScalar *aa = a->a;
149: PetscTruth ignorezeroentries = a->ignorezeroentries;
150: Mat B = aij->B;
151: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
152: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153: PetscScalar *ba = b->a;
155: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156: PetscInt nonew = a->nonew;
157: PetscScalar *ap1,*ap2;
160: for (i=0; i<m; i++) {
161: if (im[i] < 0) continue;
162: #if defined(PETSC_USE_DEBUG)
163: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164: #endif
165: if (im[i] >= rstart && im[i] < rend) {
166: row = im[i] - rstart;
167: lastcol1 = -1;
168: rp1 = aj + ai[row];
169: ap1 = aa + ai[row];
170: rmax1 = aimax[row];
171: nrow1 = ailen[row];
172: low1 = 0;
173: high1 = nrow1;
174: lastcol2 = -1;
175: rp2 = bj + bi[row];
176: ap2 = ba + bi[row];
177: rmax2 = bimax[row];
178: nrow2 = bilen[row];
179: low2 = 0;
180: high2 = nrow2;
182: for (j=0; j<n; j++) {
183: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185: if (in[j] >= cstart && in[j] < cend){
186: col = in[j] - cstart;
187: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188: } else if (in[j] < 0) continue;
189: #if defined(PETSC_USE_DEBUG)
190: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191: #endif
192: else {
193: if (mat->was_assembled) {
194: if (!aij->colmap) {
195: CreateColmap_MPIAIJ_Private(mat);
196: }
197: #if defined (PETSC_USE_CTABLE)
198: PetscTableFind(aij->colmap,in[j]+1,&col);
199: col--;
200: #else
201: col = aij->colmap[in[j]] - 1;
202: #endif
203: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204: DisAssemble_MPIAIJ(mat);
205: col = in[j];
206: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207: B = aij->B;
208: b = (Mat_SeqAIJ*)B->data;
209: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210: rp2 = bj + bi[row];
211: ap2 = ba + bi[row];
212: rmax2 = bimax[row];
213: nrow2 = bilen[row];
214: low2 = 0;
215: high2 = nrow2;
216: bm = aij->B->rmap.n;
217: ba = b->a;
218: }
219: } else col = in[j];
220: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221: }
222: }
223: } else {
224: if (!aij->donotstash) {
225: if (roworiented) {
226: if (ignorezeroentries && v[i*n] == 0.0) continue;
227: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
228: } else {
229: if (ignorezeroentries && v[i] == 0.0) continue;
230: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
231: }
232: }
233: }
234: }
235: return(0);
236: }
240: PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241: {
242: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
244: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
248: for (i=0; i<m; i++) {
249: if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250: if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251: if (idxm[i] >= rstart && idxm[i] < rend) {
252: row = idxm[i] - rstart;
253: for (j=0; j<n; j++) {
254: if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255: if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256: if (idxn[j] >= cstart && idxn[j] < cend){
257: col = idxn[j] - cstart;
258: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
259: } else {
260: if (!aij->colmap) {
261: CreateColmap_MPIAIJ_Private(mat);
262: }
263: #if defined (PETSC_USE_CTABLE)
264: PetscTableFind(aij->colmap,idxn[j]+1,&col);
265: col --;
266: #else
267: col = aij->colmap[idxn[j]] - 1;
268: #endif
269: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270: else {
271: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
272: }
273: }
274: }
275: } else {
276: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277: }
278: }
279: return(0);
280: }
284: PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285: {
286: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
288: PetscInt nstash,reallocs;
289: InsertMode addv;
292: if (aij->donotstash) {
293: return(0);
294: }
296: /* make sure all processors are either in INSERTMODE or ADDMODE */
297: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
298: if (addv == (ADD_VALUES|INSERT_VALUES)) {
299: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300: }
301: mat->insertmode = addv; /* in case this processor had no cache */
303: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
304: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
305: PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
306: return(0);
307: }
311: PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312: {
313: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
314: Mat_SeqAIJ *a=(Mat_SeqAIJ *)aij->A->data;
316: PetscMPIInt n;
317: PetscInt i,j,rstart,ncols,flg;
318: PetscInt *row,*col,other_disassembled;
319: PetscScalar *val;
320: InsertMode addv = mat->insertmode;
322: /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324: if (!aij->donotstash) {
325: while (1) {
326: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
327: if (!flg) break;
329: for (i=0; i<n;) {
330: /* Now identify the consecutive vals belonging to the same row */
331: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332: if (j < n) ncols = j-i;
333: else ncols = n-i;
334: /* Now assemble all these values with a single function call */
335: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
336: i = j;
337: }
338: }
339: MatStashScatterEnd_Private(&mat->stash);
340: }
341: a->compressedrow.use = PETSC_FALSE;
342: MatAssemblyBegin(aij->A,mode);
343: MatAssemblyEnd(aij->A,mode);
345: /* determine if any processor has disassembled, if so we must
346: also disassemble ourselfs, in order that we may reassemble. */
347: /*
348: if nonzero structure of submatrix B cannot change then we know that
349: no processor disassembled thus we can skip this stuff
350: */
351: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
352: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
353: if (mat->was_assembled && !other_disassembled) {
354: DisAssemble_MPIAIJ(mat);
355: }
356: }
357: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358: MatSetUpMultiply_MPIAIJ(mat);
359: }
360: MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);
361: ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362: MatAssemblyBegin(aij->B,mode);
363: MatAssemblyEnd(aij->B,mode);
365: PetscFree(aij->rowvalues);
366: aij->rowvalues = 0;
368: /* used by MatAXPY() */
369: a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0; /* b->xtoy = 0 */
370: a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0; /* b->XtoY = 0 */
372: return(0);
373: }
377: PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378: {
379: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
383: MatZeroEntries(l->A);
384: MatZeroEntries(l->B);
385: return(0);
386: }
390: PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391: {
392: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
394: PetscMPIInt size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395: PetscInt i,*owners = A->rmap.range;
396: PetscInt *nprocs,j,idx,nsends,row;
397: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
398: PetscInt *rvalues,count,base,slen,*source;
399: PetscInt *lens,*lrows,*values,rstart=A->rmap.rstart;
400: MPI_Comm comm = A->comm;
401: MPI_Request *send_waits,*recv_waits;
402: MPI_Status recv_status,*send_status;
403: #if defined(PETSC_DEBUG)
404: PetscTruth found = PETSC_FALSE;
405: #endif
408: /* first count number of contributors to each processor */
409: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
410: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
411: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
412: j = 0;
413: for (i=0; i<N; i++) {
414: if (lastidx > (idx = rows[i])) j = 0;
415: lastidx = idx;
416: for (; j<size; j++) {
417: if (idx >= owners[j] && idx < owners[j+1]) {
418: nprocs[2*j]++;
419: nprocs[2*j+1] = 1;
420: owner[i] = j;
421: #if defined(PETSC_DEBUG)
422: found = PETSC_TRUE;
423: #endif
424: break;
425: }
426: }
427: #if defined(PETSC_DEBUG)
428: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429: found = PETSC_FALSE;
430: #endif
431: }
432: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434: /* inform other processors of number of messages and max length*/
435: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
437: /* post receives: */
438: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
439: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
440: for (i=0; i<nrecvs; i++) {
441: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
442: }
444: /* do sends:
445: 1) starts[i] gives the starting index in svalues for stuff going to
446: the ith processor
447: */
448: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
449: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
450: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
451: starts[0] = 0;
452: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453: for (i=0; i<N; i++) {
454: svalues[starts[owner[i]]++] = rows[i];
455: }
457: starts[0] = 0;
458: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459: count = 0;
460: for (i=0; i<size; i++) {
461: if (nprocs[2*i+1]) {
462: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
463: }
464: }
465: PetscFree(starts);
467: base = owners[rank];
469: /* wait on receives */
470: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
471: source = lens + nrecvs;
472: count = nrecvs; slen = 0;
473: while (count) {
474: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
475: /* unpack receives into our local space */
476: MPI_Get_count(&recv_status,MPIU_INT,&n);
477: source[imdex] = recv_status.MPI_SOURCE;
478: lens[imdex] = n;
479: slen += n;
480: count--;
481: }
482: PetscFree(recv_waits);
483:
484: /* move the data into the send scatter */
485: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
486: count = 0;
487: for (i=0; i<nrecvs; i++) {
488: values = rvalues + i*nmax;
489: for (j=0; j<lens[i]; j++) {
490: lrows[count++] = values[j] - base;
491: }
492: }
493: PetscFree(rvalues);
494: PetscFree(lens);
495: PetscFree(owner);
496: PetscFree(nprocs);
497:
498: /* actually zap the local rows */
499: /*
500: Zero the required rows. If the "diagonal block" of the matrix
501: is square and the user wishes to set the diagonal we use separate
502: code so that MatSetValues() is not called for each diagonal allocating
503: new memory, thus calling lots of mallocs and slowing things down.
505: Contributed by: Matthew Knepley
506: */
507: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508: MatZeroRows(l->B,slen,lrows,0.0);
509: if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510: MatZeroRows(l->A,slen,lrows,diag);
511: } else if (diag != 0.0) {
512: MatZeroRows(l->A,slen,lrows,0.0);
513: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516: }
517: for (i = 0; i < slen; i++) {
518: row = lrows[i] + rstart;
519: MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);
520: }
521: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
522: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
523: } else {
524: MatZeroRows(l->A,slen,lrows,0.0);
525: }
526: PetscFree(lrows);
528: /* wait on sends */
529: if (nsends) {
530: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
531: MPI_Waitall(nsends,send_waits,send_status);
532: PetscFree(send_status);
533: }
534: PetscFree(send_waits);
535: PetscFree(svalues);
537: return(0);
538: }
542: PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543: {
544: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
546: PetscInt nt;
549: VecGetLocalSize(xx,&nt);
550: if (nt != A->cmap.n) {
551: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552: }
553: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
554: (*a->A->ops->mult)(a->A,xx,yy);
555: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
556: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
557: return(0);
558: }
562: PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563: {
564: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
568: VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
569: (*a->A->ops->multadd)(a->A,xx,yy,zz);
570: VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);
571: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
572: return(0);
573: }
577: PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578: {
579: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
581: PetscTruth merged;
584: VecScatterGetMerged(a->Mvctx,&merged);
585: /* do nondiagonal part */
586: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
587: if (!merged) {
588: /* send it on its way */
589: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
590: /* do local part */
591: (*a->A->ops->multtranspose)(a->A,xx,yy);
592: /* receive remote parts: note this assumes the values are not actually */
593: /* added in yy until the next line, */
594: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
595: } else {
596: /* do local part */
597: (*a->A->ops->multtranspose)(a->A,xx,yy);
598: /* send it on its way */
599: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
600: /* values actually were received in the Begin() but we need to call this nop */
601: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
602: }
603: return(0);
604: }
609: PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610: {
611: MPI_Comm comm;
612: Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613: Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614: IS Me,Notme;
616: PetscInt M,N,first,last,*notme,i;
617: PetscMPIInt size;
621: /* Easy test: symmetric diagonal block */
622: Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623: MatIsTranspose(Adia,Bdia,tol,f);
624: if (!*f) return(0);
625: PetscObjectGetComm((PetscObject)Amat,&comm);
626: MPI_Comm_size(comm,&size);
627: if (size == 1) return(0);
629: /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630: MatGetSize(Amat,&M,&N);
631: MatGetOwnershipRange(Amat,&first,&last);
632: PetscMalloc((N-last+first)*sizeof(PetscInt),¬me);
633: for (i=0; i<first; i++) notme[i] = i;
634: for (i=last; i<M; i++) notme[i-last+first] = i;
635: ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);
636: ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);
637: MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);
638: Aoff = Aoffs[0];
639: MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);
640: Boff = Boffs[0];
641: MatIsTranspose(Aoff,Boff,tol,f);
642: MatDestroyMatrices(1,&Aoffs);
643: MatDestroyMatrices(1,&Boffs);
644: ISDestroy(Me);
645: ISDestroy(Notme);
647: return(0);
648: }
653: PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654: {
655: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
659: /* do nondiagonal part */
660: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
661: /* send it on its way */
662: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
663: /* do local part */
664: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
665: /* receive remote parts */
666: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
667: return(0);
668: }
670: /*
671: This only works correctly for square matrices where the subblock A->A is the
672: diagonal block
673: */
676: PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677: {
679: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
682: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683: if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685: }
686: MatGetDiagonal(a->A,v);
687: return(0);
688: }
692: PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693: {
694: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
698: MatScale(a->A,aa);
699: MatScale(a->B,aa);
700: return(0);
701: }
705: PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706: {
707: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
711: #if defined(PETSC_USE_LOG)
712: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713: #endif
714: MatStashDestroy_Private(&mat->stash);
715: MatDestroy(aij->A);
716: MatDestroy(aij->B);
717: #if defined (PETSC_USE_CTABLE)
718: if (aij->colmap) {PetscTableDestroy(aij->colmap);}
719: #else
720: PetscFree(aij->colmap);
721: #endif
722: PetscFree(aij->garray);
723: if (aij->lvec) {VecDestroy(aij->lvec);}
724: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
725: PetscFree(aij->rowvalues);
726: PetscFree(aij->ld);
727: PetscFree(aij);
729: PetscObjectChangeTypeName((PetscObject)mat,0);
730: PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);
731: PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);
732: PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
733: PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);
734: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);
735: PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);
736: PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);
737: return(0);
738: }
742: PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743: {
744: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
745: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
746: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
747: PetscErrorCode ierr;
748: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag;
749: int fd;
750: PetscInt nz,header[4],*row_lengths,*range=0,rlen,i;
751: PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752: PetscScalar *column_values;
755: MPI_Comm_rank(mat->comm,&rank);
756: MPI_Comm_size(mat->comm,&size);
757: nz = A->nz + B->nz;
758: if (!rank) {
759: header[0] = MAT_FILE_COOKIE;
760: header[1] = mat->rmap.N;
761: header[2] = mat->cmap.N;
762: MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);
763: PetscViewerBinaryGetDescriptor(viewer,&fd);
764: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
765: /* get largest number of rows any processor has */
766: rlen = mat->rmap.n;
767: range = mat->rmap.range;
768: for (i=1; i<size; i++) {
769: rlen = PetscMax(rlen,range[i+1] - range[i]);
770: }
771: } else {
772: MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);
773: rlen = mat->rmap.n;
774: }
776: /* load up the local row counts */
777: PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);
778: for (i=0; i<mat->rmap.n; i++) {
779: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780: }
782: /* store the row lengths to the file */
783: if (!rank) {
784: MPI_Status status;
785: PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);
786: for (i=1; i<size; i++) {
787: rlen = range[i+1] - range[i];
788: MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);
789: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);
790: }
791: } else {
792: MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);
793: }
794: PetscFree(row_lengths);
796: /* load up the local column indices */
797: nzmax = nz; /* )th processor needs space a largest processor needs */
798: MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);
799: PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);
800: cnt = 0;
801: for (i=0; i<mat->rmap.n; i++) {
802: for (j=B->i[i]; j<B->i[i+1]; j++) {
803: if ( (col = garray[B->j[j]]) > cstart) break;
804: column_indices[cnt++] = col;
805: }
806: for (k=A->i[i]; k<A->i[i+1]; k++) {
807: column_indices[cnt++] = A->j[k] + cstart;
808: }
809: for (; j<B->i[i+1]; j++) {
810: column_indices[cnt++] = garray[B->j[j]];
811: }
812: }
813: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815: /* store the column indices to the file */
816: if (!rank) {
817: MPI_Status status;
818: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);
819: for (i=1; i<size; i++) {
820: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
821: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822: MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);
823: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);
824: }
825: } else {
826: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
827: MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);
828: }
829: PetscFree(column_indices);
831: /* load up the local column values */
832: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
833: cnt = 0;
834: for (i=0; i<mat->rmap.n; i++) {
835: for (j=B->i[i]; j<B->i[i+1]; j++) {
836: if ( garray[B->j[j]] > cstart) break;
837: column_values[cnt++] = B->a[j];
838: }
839: for (k=A->i[i]; k<A->i[i+1]; k++) {
840: column_values[cnt++] = A->a[k];
841: }
842: for (; j<B->i[i+1]; j++) {
843: column_values[cnt++] = B->a[j];
844: }
845: }
846: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848: /* store the column values to the file */
849: if (!rank) {
850: MPI_Status status;
851: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);
852: for (i=1; i<size; i++) {
853: MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);
854: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
856: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);
857: }
858: } else {
859: MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);
860: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
861: }
862: PetscFree(column_values);
863: return(0);
864: }
868: PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869: {
870: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
871: PetscErrorCode ierr;
872: PetscMPIInt rank = aij->rank,size = aij->size;
873: PetscTruth isdraw,iascii,isbinary;
874: PetscViewer sviewer;
875: PetscViewerFormat format;
878: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
879: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
880: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
881: if (iascii) {
882: PetscViewerGetFormat(viewer,&format);
883: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884: MatInfo info;
885: PetscTruth inodes;
887: MPI_Comm_rank(mat->comm,&rank);
888: MatGetInfo(mat,MAT_LOCAL,&info);
889: MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);
890: if (!inodes) {
891: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
893: } else {
894: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895: rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
896: }
897: MatGetInfo(aij->A,MAT_LOCAL,&info);
898: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
899: MatGetInfo(aij->B,MAT_LOCAL,&info);
900: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);
901: PetscViewerFlush(viewer);
902: PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");
903: VecScatterView(aij->Mvctx,viewer);
904: return(0);
905: } else if (format == PETSC_VIEWER_ASCII_INFO) {
906: PetscInt inodecount,inodelimit,*inodes;
907: MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);
908: if (inodes) {
909: PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);
910: } else {
911: PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");
912: }
913: return(0);
914: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915: return(0);
916: }
917: } else if (isbinary) {
918: if (size == 1) {
919: PetscObjectSetName((PetscObject)aij->A,mat->name);
920: MatView(aij->A,viewer);
921: } else {
922: MatView_MPIAIJ_Binary(mat,viewer);
923: }
924: return(0);
925: } else if (isdraw) {
926: PetscDraw draw;
927: PetscTruth isnull;
928: PetscViewerDrawGetDraw(viewer,0,&draw);
929: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
930: }
932: if (size == 1) {
933: PetscObjectSetName((PetscObject)aij->A,mat->name);
934: MatView(aij->A,viewer);
935: } else {
936: /* assemble the entire matrix onto first processor. */
937: Mat A;
938: Mat_SeqAIJ *Aloc;
939: PetscInt M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940: PetscScalar *a;
942: MatCreate(mat->comm,&A);
943: if (!rank) {
944: MatSetSizes(A,M,N,M,N);
945: } else {
946: MatSetSizes(A,0,0,M,N);
947: }
948: /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949: MatSetType(A,MATMPIAIJ);
950: MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);
951: PetscLogObjectParent(mat,A);
953: /* copy over the A part */
954: Aloc = (Mat_SeqAIJ*)aij->A->data;
955: m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956: row = mat->rmap.rstart;
957: for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958: for (i=0; i<m; i++) {
959: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
960: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961: }
962: aj = Aloc->j;
963: for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
965: /* copy over the B part */
966: Aloc = (Mat_SeqAIJ*)aij->B->data;
967: m = aij->B->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968: row = mat->rmap.rstart;
969: PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);
970: ct = cols;
971: for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972: for (i=0; i<m; i++) {
973: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
974: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975: }
976: PetscFree(ct);
977: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
978: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
979: /*
980: Everyone has to call to draw the matrix since the graphics waits are
981: synchronized across all processors that share the PetscDraw object
982: */
983: PetscViewerGetSingleton(viewer,&sviewer);
984: if (!rank) {
985: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
986: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
987: }
988: PetscViewerRestoreSingleton(viewer,&sviewer);
989: MatDestroy(A);
990: }
991: return(0);
992: }
996: PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997: {
999: PetscTruth iascii,isdraw,issocket,isbinary;
1000:
1002: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
1003: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1004: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1005: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1006: if (iascii || isdraw || isbinary || issocket) {
1007: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
1008: } else {
1009: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010: }
1011: return(0);
1012: }
1016: PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017: {
1018: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1020: Vec bb1;
1023: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025: VecDuplicate(bb,&bb1);
1027: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028: if (flag & SOR_ZERO_INITIAL_GUESS) {
1029: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1030: its--;
1031: }
1032:
1033: while (its--) {
1034: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1035: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1037: /* update rhs: bb1 = bb - B*x */
1038: VecScale(mat->lvec,-1.0);
1039: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1041: /* local sweep */
1042: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043:
1044: }
1045: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046: if (flag & SOR_ZERO_INITIAL_GUESS) {
1047: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1048: its--;
1049: }
1050: while (its--) {
1051: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1052: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1054: /* update rhs: bb1 = bb - B*x */
1055: VecScale(mat->lvec,-1.0);
1056: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1058: /* local sweep */
1059: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060:
1061: }
1062: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063: if (flag & SOR_ZERO_INITIAL_GUESS) {
1064: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1065: its--;
1066: }
1067: while (its--) {
1068: VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1069: VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);
1071: /* update rhs: bb1 = bb - B*x */
1072: VecScale(mat->lvec,-1.0);
1073: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1075: /* local sweep */
1076: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077:
1078: }
1079: } else {
1080: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081: }
1083: VecDestroy(bb1);
1084: return(0);
1085: }
1089: PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090: {
1091: MPI_Comm comm,pcomm;
1092: PetscInt first,local_size,nrows,*rows;
1093: int ntids;
1094: IS crowp,growp,irowp,lrowp,lcolp,icolp;
1098: PetscObjectGetComm((PetscObject)A,&comm);
1099: /* make a collective version of 'rowp' */
1100: PetscObjectGetComm((PetscObject)rowp,&pcomm);
1101: if (pcomm==comm) {
1102: crowp = rowp;
1103: } else {
1104: ISGetSize(rowp,&nrows);
1105: ISGetIndices(rowp,&rows);
1106: ISCreateGeneral(comm,nrows,rows,&crowp);
1107: ISRestoreIndices(rowp,&rows);
1108: }
1109: /* collect the global row permutation and invert it */
1110: ISAllGather(crowp,&growp);
1111: ISSetPermutation(growp);
1112: if (pcomm!=comm) {
1113: ISDestroy(crowp);
1114: }
1115: ISInvertPermutation(growp,PETSC_DECIDE,&irowp);
1116: /* get the local target indices */
1117: MatGetOwnershipRange(A,&first,PETSC_NULL);
1118: MatGetLocalSize(A,&local_size,PETSC_NULL);
1119: ISGetIndices(irowp,&rows);
1120: ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);
1121: ISRestoreIndices(irowp,&rows);
1122: ISDestroy(irowp);
1123: /* the column permutation is so much easier;
1124: make a local version of 'colp' and invert it */
1125: PetscObjectGetComm((PetscObject)colp,&pcomm);
1126: MPI_Comm_size(pcomm,&ntids);
1127: if (ntids==1) {
1128: lcolp = colp;
1129: } else {
1130: ISGetSize(colp,&nrows);
1131: ISGetIndices(colp,&rows);
1132: ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);
1133: }
1134: ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);
1135: ISSetPermutation(lcolp);
1136: if (ntids>1) {
1137: ISRestoreIndices(colp,&rows);
1138: ISDestroy(lcolp);
1139: }
1140: /* now we just get the submatrix */
1141: MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);
1142: /* clean up */
1143: ISDestroy(lrowp);
1144: ISDestroy(icolp);
1145: return(0);
1146: }
1150: PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151: {
1152: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1153: Mat A = mat->A,B = mat->B;
1155: PetscReal isend[5],irecv[5];
1158: info->block_size = 1.0;
1159: MatGetInfo(A,MAT_LOCAL,info);
1160: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161: isend[3] = info->memory; isend[4] = info->mallocs;
1162: MatGetInfo(B,MAT_LOCAL,info);
1163: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164: isend[3] += info->memory; isend[4] += info->mallocs;
1165: if (flag == MAT_LOCAL) {
1166: info->nz_used = isend[0];
1167: info->nz_allocated = isend[1];
1168: info->nz_unneeded = isend[2];
1169: info->memory = isend[3];
1170: info->mallocs = isend[4];
1171: } else if (flag == MAT_GLOBAL_MAX) {
1172: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1173: info->nz_used = irecv[0];
1174: info->nz_allocated = irecv[1];
1175: info->nz_unneeded = irecv[2];
1176: info->memory = irecv[3];
1177: info->mallocs = irecv[4];
1178: } else if (flag == MAT_GLOBAL_SUM) {
1179: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1180: info->nz_used = irecv[0];
1181: info->nz_allocated = irecv[1];
1182: info->nz_unneeded = irecv[2];
1183: info->memory = irecv[3];
1184: info->mallocs = irecv[4];
1185: }
1186: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1187: info->fill_ratio_needed = 0;
1188: info->factor_mallocs = 0;
1189: info->rows_global = (double)matin->rmap.N;
1190: info->columns_global = (double)matin->cmap.N;
1191: info->rows_local = (double)matin->rmap.n;
1192: info->columns_local = (double)matin->cmap.N;
1194: return(0);
1195: }
1199: PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200: {
1201: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1205: switch (op) {
1206: case MAT_NO_NEW_NONZERO_LOCATIONS:
1207: case MAT_YES_NEW_NONZERO_LOCATIONS:
1208: case MAT_COLUMNS_UNSORTED:
1209: case MAT_COLUMNS_SORTED:
1210: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211: case MAT_KEEP_ZEROED_ROWS:
1212: case MAT_NEW_NONZERO_LOCATION_ERR:
1213: case MAT_USE_INODES:
1214: case MAT_DO_NOT_USE_INODES:
1215: case MAT_IGNORE_ZERO_ENTRIES:
1216: MatSetOption(a->A,op);
1217: MatSetOption(a->B,op);
1218: break;
1219: case MAT_ROW_ORIENTED:
1220: a->roworiented = PETSC_TRUE;
1221: MatSetOption(a->A,op);
1222: MatSetOption(a->B,op);
1223: break;
1224: case MAT_ROWS_SORTED:
1225: case MAT_ROWS_UNSORTED:
1226: case MAT_YES_NEW_DIAGONALS:
1227: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
1228: break;
1229: case MAT_COLUMN_ORIENTED:
1230: a->roworiented = PETSC_FALSE;
1231: MatSetOption(a->A,op);
1232: MatSetOption(a->B,op);
1233: break;
1234: case MAT_IGNORE_OFF_PROC_ENTRIES:
1235: a->donotstash = PETSC_TRUE;
1236: break;
1237: case MAT_NO_NEW_DIAGONALS:
1238: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239: case MAT_SYMMETRIC:
1240: MatSetOption(a->A,op);
1241: break;
1242: case MAT_STRUCTURALLY_SYMMETRIC:
1243: case MAT_HERMITIAN:
1244: case MAT_SYMMETRY_ETERNAL:
1245: MatSetOption(a->A,op);
1246: break;
1247: case MAT_NOT_SYMMETRIC:
1248: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249: case MAT_NOT_HERMITIAN:
1250: case MAT_NOT_SYMMETRY_ETERNAL:
1251: break;
1252: default:
1253: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254: }
1255: return(0);
1256: }
1260: PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261: {
1262: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1263: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265: PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266: PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267: PetscInt *cmap,*idx_p;
1270: if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271: mat->getrowactive = PETSC_TRUE;
1273: if (!mat->rowvalues && (idx || v)) {
1274: /*
1275: allocate enough space to hold information from the longest row.
1276: */
1277: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278: PetscInt max = 1,tmp;
1279: for (i=0; i<matin->rmap.n; i++) {
1280: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281: if (max < tmp) { max = tmp; }
1282: }
1283: PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);
1284: mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285: }
1287: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288: lrow = row - rstart;
1290: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291: if (!v) {pvA = 0; pvB = 0;}
1292: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1294: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1295: nztot = nzA + nzB;
1297: cmap = mat->garray;
1298: if (v || idx) {
1299: if (nztot) {
1300: /* Sort by increasing column numbers, assuming A and B already sorted */
1301: PetscInt imark = -1;
1302: if (v) {
1303: *v = v_p = mat->rowvalues;
1304: for (i=0; i<nzB; i++) {
1305: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1306: else break;
1307: }
1308: imark = i;
1309: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1310: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1311: }
1312: if (idx) {
1313: *idx = idx_p = mat->rowindices;
1314: if (imark > -1) {
1315: for (i=0; i<imark; i++) {
1316: idx_p[i] = cmap[cworkB[i]];
1317: }
1318: } else {
1319: for (i=0; i<nzB; i++) {
1320: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1321: else break;
1322: }
1323: imark = i;
1324: }
1325: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1326: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1327: }
1328: } else {
1329: if (idx) *idx = 0;
1330: if (v) *v = 0;
1331: }
1332: }
1333: *nz = nztot;
1334: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1335: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1336: return(0);
1337: }
1341: PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342: {
1343: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1346: if (!aij->getrowactive) {
1347: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348: }
1349: aij->getrowactive = PETSC_FALSE;
1350: return(0);
1351: }
1355: PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356: {
1357: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1358: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360: PetscInt i,j,cstart = mat->cmap.rstart;
1361: PetscReal sum = 0.0;
1362: PetscScalar *v;
1365: if (aij->size == 1) {
1366: MatNorm(aij->A,type,norm);
1367: } else {
1368: if (type == NORM_FROBENIUS) {
1369: v = amat->a;
1370: for (i=0; i<amat->nz; i++) {
1371: #if defined(PETSC_USE_COMPLEX)
1372: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373: #else
1374: sum += (*v)*(*v); v++;
1375: #endif
1376: }
1377: v = bmat->a;
1378: for (i=0; i<bmat->nz; i++) {
1379: #if defined(PETSC_USE_COMPLEX)
1380: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381: #else
1382: sum += (*v)*(*v); v++;
1383: #endif
1384: }
1385: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1386: *norm = sqrt(*norm);
1387: } else if (type == NORM_1) { /* max column norm */
1388: PetscReal *tmp,*tmp2;
1389: PetscInt *jj,*garray = aij->garray;
1390: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);
1391: PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);
1392: PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));
1393: *norm = 0.0;
1394: v = amat->a; jj = amat->j;
1395: for (j=0; j<amat->nz; j++) {
1396: tmp[cstart + *jj++ ] += PetscAbsScalar(*v); v++;
1397: }
1398: v = bmat->a; jj = bmat->j;
1399: for (j=0; j<bmat->nz; j++) {
1400: tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401: }
1402: MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);
1403: for (j=0; j<mat->cmap.N; j++) {
1404: if (tmp2[j] > *norm) *norm = tmp2[j];
1405: }
1406: PetscFree(tmp);
1407: PetscFree(tmp2);
1408: } else if (type == NORM_INFINITY) { /* max row norm */
1409: PetscReal ntemp = 0.0;
1410: for (j=0; j<aij->A->rmap.n; j++) {
1411: v = amat->a + amat->i[j];
1412: sum = 0.0;
1413: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414: sum += PetscAbsScalar(*v); v++;
1415: }
1416: v = bmat->a + bmat->i[j];
1417: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418: sum += PetscAbsScalar(*v); v++;
1419: }
1420: if (sum > ntemp) ntemp = sum;
1421: }
1422: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1423: } else {
1424: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425: }
1426: }
1427: return(0);
1428: }
1432: PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433: {
1434: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1435: Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1437: PetscInt M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1438: PetscInt cstart=A->cmap.rstart,ncol;
1439: Mat B;
1440: PetscScalar *array;
1443: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445: /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1446: ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1447: ai = Aloc->i; aj = Aloc->j;
1448: bi = Bloc->i; bj = Bloc->j;
1449: PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);
1450: cols = d_nnz + na + 1; /* work space to be used by B part */
1451: PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));
1452: for (i=0; i<ai[ma]; i++){
1453: d_nnz[aj[i]] ++;
1454: aj[i] += cstart; /* global col index to be used by MatSetValues() */
1455: }
1457: MatCreate(A->comm,&B);
1458: MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);
1459: MatSetType(B,A->type_name);
1460: MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);
1462: /* copy over the A part */
1463: array = Aloc->a;
1464: row = A->rmap.rstart;
1465: for (i=0; i<ma; i++) {
1466: ncol = ai[i+1]-ai[i];
1467: MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);
1468: row++; array += ncol; aj += ncol;
1469: }
1470: aj = Aloc->j;
1471: for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1473: /* copy over the B part */
1474: array = Bloc->a;
1475: row = A->rmap.rstart;
1476: for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1477: for (i=0; i<mb; i++) {
1478: ncol = bi[i+1]-bi[i];
1479: MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);
1480: row++; array += ncol; cols += ncol;
1481: }
1482: PetscFree(d_nnz);
1483: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1484: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1485: if (matout) {
1486: *matout = B;
1487: } else {
1488: MatHeaderCopy(A,B);
1489: }
1490: return(0);
1491: }
1495: PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1496: {
1497: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1498: Mat a = aij->A,b = aij->B;
1500: PetscInt s1,s2,s3;
1503: MatGetLocalSize(mat,&s2,&s3);
1504: if (rr) {
1505: VecGetLocalSize(rr,&s1);
1506: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1507: /* Overlap communication with computation. */
1508: VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1509: }
1510: if (ll) {
1511: VecGetLocalSize(ll,&s1);
1512: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1513: (*b->ops->diagonalscale)(b,ll,0);
1514: }
1515: /* scale the diagonal block */
1516: (*a->ops->diagonalscale)(a,ll,rr);
1518: if (rr) {
1519: /* Do a scatter end and then right scale the off-diagonal block */
1520: VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);
1521: (*b->ops->diagonalscale)(b,0,aij->lvec);
1522: }
1523:
1524: return(0);
1525: }
1529: PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1530: {
1531: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1535: MatSetBlockSize(a->A,bs);
1536: MatSetBlockSize(a->B,bs);
1537: return(0);
1538: }
1541: PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1542: {
1543: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1547: MatSetUnfactored(a->A);
1548: return(0);
1549: }
1553: PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1554: {
1555: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1556: Mat a,b,c,d;
1557: PetscTruth flg;
1561: a = matA->A; b = matA->B;
1562: c = matB->A; d = matB->B;
1564: MatEqual(a,c,&flg);
1565: if (flg) {
1566: MatEqual(b,d,&flg);
1567: }
1568: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1569: return(0);
1570: }
1574: PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1575: {
1577: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1578: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1581: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583: /* because of the column compression in the off-processor part of the matrix a->B,
1584: the number of columns in a->B and b->B may be different, hence we cannot call
1585: the MatCopy() directly on the two parts. If need be, we can provide a more
1586: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1587: then copying the submatrices */
1588: MatCopy_Basic(A,B,str);
1589: } else {
1590: MatCopy(a->A,b->A,str);
1591: MatCopy(a->B,b->B,str);
1592: }
1593: return(0);
1594: }
1598: PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1599: {
1603: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1604: return(0);
1605: }
1607: #include petscblaslapack.h
1610: PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1611: {
1613: PetscInt i;
1614: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1615: PetscBLASInt bnz,one=1;
1616: Mat_SeqAIJ *x,*y;
1619: if (str == SAME_NONZERO_PATTERN) {
1620: PetscScalar alpha = a;
1621: x = (Mat_SeqAIJ *)xx->A->data;
1622: y = (Mat_SeqAIJ *)yy->A->data;
1623: bnz = (PetscBLASInt)x->nz;
1624: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1625: x = (Mat_SeqAIJ *)xx->B->data;
1626: y = (Mat_SeqAIJ *)yy->B->data;
1627: bnz = (PetscBLASInt)x->nz;
1628: BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1629: } else if (str == SUBSET_NONZERO_PATTERN) {
1630: MatAXPY_SeqAIJ(yy->A,a,xx->A,str);
1632: x = (Mat_SeqAIJ *)xx->B->data;
1633: y = (Mat_SeqAIJ *)yy->B->data;
1634: if (y->xtoy && y->XtoY != xx->B) {
1635: PetscFree(y->xtoy);
1636: MatDestroy(y->XtoY);
1637: }
1638: if (!y->xtoy) { /* get xtoy */
1639: MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);
1640: y->XtoY = xx->B;
1641: PetscObjectReference((PetscObject)xx->B);
1642: }
1643: for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1644: } else {
1645: MatAXPY_Basic(Y,a,X,str);
1646: }
1647: return(0);
1648: }
1650: EXTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
1654: PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
1655: {
1656: #if defined(PETSC_USE_COMPLEX)
1658: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1661: MatConjugate_SeqAIJ(aij->A);
1662: MatConjugate_SeqAIJ(aij->B);
1663: #else
1665: #endif
1666: return(0);
1667: }
1671: PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1672: {
1673: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1677: MatRealPart(a->A);
1678: MatRealPart(a->B);
1679: return(0);
1680: }
1684: PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1685: {
1686: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1690: MatImaginaryPart(a->A);
1691: MatImaginaryPart(a->B);
1692: return(0);
1693: }
1695: #ifdef PETSC_HAVE_PBGL
1697: #include <boost/parallel/mpi/bsp_process_group.hpp>
1698: #include <boost/graph/distributed/ilu_default_graph.hpp>
1699: #include <boost/graph/distributed/ilu_0_block.hpp>
1700: #include <boost/graph/distributed/ilu_preconditioner.hpp>
1701: #include <boost/graph/distributed/petsc/interface.hpp>
1702: #include <boost/multi_array.hpp>
1703: #include <boost/parallel/distributed_property_map.hpp>
1707: /*
1708: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1709: */
1710: PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1711: {
1712: namespace petsc = boost::distributed::petsc;
1713:
1714: namespace graph_dist = boost::graph::distributed;
1715: using boost::graph::distributed::ilu_default::process_group_type;
1716: using boost::graph::ilu_permuted;
1718: PetscTruth row_identity, col_identity;
1719: PetscContainer c;
1720: PetscInt m, n, M, N;
1721: PetscErrorCode ierr;
1724: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1725: ISIdentity(isrow, &row_identity);
1726: ISIdentity(iscol, &col_identity);
1727: if (!row_identity || !col_identity) {
1728: SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1729: }
1731: process_group_type pg;
1732: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1733: lgraph_type* lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1734: lgraph_type& level_graph = *lgraph_p;
1735: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1737: petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738: ilu_permuted(level_graph);
1740: /* put together the new matrix */
1741: MatCreate(A->comm, fact);
1742: MatGetLocalSize(A, &m, &n);
1743: MatGetSize(A, &M, &N);
1744: MatSetSizes(*fact, m, n, M, N);
1745: MatSetType(*fact, A->type_name);
1746: MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);
1747: MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);
1748: (*fact)->factor = FACTOR_LU;
1750: PetscContainerCreate(A->comm, &c);
1751: PetscContainerSetPointer(c, lgraph_p);
1752: PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1753: return(0);
1754: }
1758: PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1759: {
1761: return(0);
1762: }
1766: /*
1767: This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1768: */
1769: PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1770: {
1771: namespace graph_dist = boost::graph::distributed;
1773: typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
1774: lgraph_type* lgraph_p;
1775: PetscContainer c;
1779: PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);
1780: PetscContainerGetPointer(c, (void **) &lgraph_p);
1781: VecCopy(b, x);
1783: PetscScalar* array_x;
1784: VecGetArray(x, &array_x);
1785: PetscInt sx;
1786: VecGetSize(x, &sx);
1787:
1788: PetscScalar* array_b;
1789: VecGetArray(b, &array_b);
1790: PetscInt sb;
1791: VecGetSize(b, &sb);
1793: lgraph_type& level_graph = *lgraph_p;
1794: graph_dist::ilu_default::graph_type& graph(level_graph.graph);
1796: typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1797: array_ref_type ref_b(array_b, boost::extents[num_vertices(graph)]),
1798: ref_x(array_x, boost::extents[num_vertices(graph)]);
1800: typedef boost::iterator_property_map<array_ref_type::iterator,
1801: boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type> gvector_type;
1802: gvector_type vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1803: vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1804:
1805: ilu_set_solve(*lgraph_p, vector_b, vector_x);
1807: return(0);
1808: }
1809: #endif
1811: typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1812: PetscInt nzlocal,nsends,nrecvs;
1813: PetscInt *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1814: PetscScalar *sbuf_a,**rbuf_a;
1815: PetscErrorCode (*MatDestroy)(Mat);
1816: } Mat_Redundant;
1820: PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1821: {
1822: PetscErrorCode ierr;
1823: Mat_Redundant *redund=(Mat_Redundant*)ptr;
1824: PetscInt i;
1827: PetscFree(redund->send_rank);
1828: PetscFree(redund->sbuf_j);
1829: PetscFree(redund->sbuf_a);
1830: for (i=0; i<redund->nrecvs; i++){
1831: PetscFree(redund->rbuf_j[i]);
1832: PetscFree(redund->rbuf_a[i]);
1833: }
1834: PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);
1835: PetscFree(redund);
1836: return(0);
1837: }
1841: PetscErrorCode MatDestroy_MatRedundant(Mat A)
1842: {
1843: PetscErrorCode ierr;
1844: PetscContainer container;
1845: Mat_Redundant *redund=PETSC_NULL;
1848: PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);
1849: if (container) {
1850: PetscContainerGetPointer(container,(void **)&redund);
1851: } else {
1852: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1853: }
1854: A->ops->destroy = redund->MatDestroy;
1855: PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);
1856: (*A->ops->destroy)(A);
1857: PetscContainerDestroy(container);
1858: return(0);
1859: }
1863: PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1864: {
1865: PetscMPIInt rank,size;
1866: MPI_Comm comm=mat->comm;
1868: PetscInt nsends=0,nrecvs=0,i,rownz_max=0;
1869: PetscMPIInt *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1870: PetscInt *rowrange=mat->rmap.range;
1871: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1872: Mat A=aij->A,B=aij->B,C=*matredundant;
1873: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1874: PetscScalar *sbuf_a;
1875: PetscInt nzlocal=a->nz+b->nz;
1876: PetscInt j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1877: PetscInt rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1878: PetscInt *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1879: PetscScalar *vals,*aworkA,*aworkB;
1880: PetscMPIInt tag1,tag2,tag3,imdex;
1881: MPI_Request *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1882: *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1883: MPI_Status recv_status,*send_status;
1884: PetscInt *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1885: PetscInt **rbuf_j=PETSC_NULL;
1886: PetscScalar **rbuf_a=PETSC_NULL;
1887: Mat_Redundant *redund=PETSC_NULL;
1888: PetscContainer container;
1891: MPI_Comm_rank(comm,&rank);
1892: MPI_Comm_size(comm,&size);
1894: if (reuse == MAT_REUSE_MATRIX) {
1895: MatGetSize(C,&M,&N);
1896: if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1897: MatGetLocalSize(C,&M,&N);
1898: if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1899: PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);
1900: if (container) {
1901: PetscContainerGetPointer(container,(void **)&redund);
1902: } else {
1903: SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1904: }
1905: if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1907: nsends = redund->nsends;
1908: nrecvs = redund->nrecvs;
1909: send_rank = redund->send_rank; recv_rank = send_rank + size;
1910: sbuf_nz = redund->sbuf_nz; rbuf_nz = sbuf_nz + nsends;
1911: sbuf_j = redund->sbuf_j;
1912: sbuf_a = redund->sbuf_a;
1913: rbuf_j = redund->rbuf_j;
1914: rbuf_a = redund->rbuf_a;
1915: }
1917: if (reuse == MAT_INITIAL_MATRIX){
1918: PetscMPIInt subrank,subsize;
1919: PetscInt nleftover,np_subcomm;
1920: /* get the destination processors' id send_rank, nsends and nrecvs */
1921: MPI_Comm_rank(subcomm,&subrank);
1922: MPI_Comm_size(subcomm,&subsize);
1923: PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1924: recv_rank = send_rank + size;
1925: np_subcomm = size/nsubcomm;
1926: nleftover = size - nsubcomm*np_subcomm;
1927: nsends = 0; nrecvs = 0;
1928: for (i=0; i<size; i++){ /* i=rank*/
1929: if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1930: send_rank[nsends] = i; nsends++;
1931: recv_rank[nrecvs++] = i;
1932: }
1933: }
1934: if (rank >= size - nleftover){/* this proc is a leftover processor */
1935: i = size-nleftover-1;
1936: j = 0;
1937: while (j < nsubcomm - nleftover){
1938: send_rank[nsends++] = i;
1939: i--; j++;
1940: }
1941: }
1943: if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1944: for (i=0; i<nleftover; i++){
1945: recv_rank[nrecvs++] = size-nleftover+i;
1946: }
1947: }
1949: /* allocate sbuf_j, sbuf_a */
1950: i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1951: PetscMalloc(i*sizeof(PetscInt),&sbuf_j);
1952: PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);
1953: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1954:
1955: /* copy mat's local entries into the buffers */
1956: if (reuse == MAT_INITIAL_MATRIX){
1957: rownz_max = 0;
1958: rptr = sbuf_j;
1959: cols = sbuf_j + rend-rstart + 1;
1960: vals = sbuf_a;
1961: rptr[0] = 0;
1962: for (i=0; i<rend-rstart; i++){
1963: row = i + rstart;
1964: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1965: ncols = nzA + nzB;
1966: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1967: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1968: /* load the column indices for this row into cols */
1969: lwrite = 0;
1970: for (l=0; l<nzB; l++) {
1971: if ((ctmp = bmap[cworkB[l]]) < cstart){
1972: vals[lwrite] = aworkB[l];
1973: cols[lwrite++] = ctmp;
1974: }
1975: }
1976: for (l=0; l<nzA; l++){
1977: vals[lwrite] = aworkA[l];
1978: cols[lwrite++] = cstart + cworkA[l];
1979: }
1980: for (l=0; l<nzB; l++) {
1981: if ((ctmp = bmap[cworkB[l]]) >= cend){
1982: vals[lwrite] = aworkB[l];
1983: cols[lwrite++] = ctmp;
1984: }
1985: }
1986: vals += ncols;
1987: cols += ncols;
1988: rptr[i+1] = rptr[i] + ncols;
1989: if (rownz_max < ncols) rownz_max = ncols;
1990: }
1991: if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1992: } else { /* only copy matrix values into sbuf_a */
1993: rptr = sbuf_j;
1994: vals = sbuf_a;
1995: rptr[0] = 0;
1996: for (i=0; i<rend-rstart; i++){
1997: row = i + rstart;
1998: nzA = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1999: ncols = nzA + nzB;
2000: cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2001: aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2002: lwrite = 0;
2003: for (l=0; l<nzB; l++) {
2004: if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2005: }
2006: for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2007: for (l=0; l<nzB; l++) {
2008: if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2009: }
2010: vals += ncols;
2011: rptr[i+1] = rptr[i] + ncols;
2012: }
2013: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2015: /* send nzlocal to others, and recv other's nzlocal */
2016: /*--------------------------------------------------*/
2017: if (reuse == MAT_INITIAL_MATRIX){
2018: PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2019: s_waits2 = s_waits3 + nsends;
2020: s_waits1 = s_waits2 + nsends;
2021: r_waits1 = s_waits1 + nsends;
2022: r_waits2 = r_waits1 + nrecvs;
2023: r_waits3 = r_waits2 + nrecvs;
2024: } else {
2025: PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);
2026: r_waits3 = s_waits3 + nsends;
2027: }
2029: PetscObjectGetNewTag((PetscObject)mat,&tag3);
2030: if (reuse == MAT_INITIAL_MATRIX){
2031: /* get new tags to keep the communication clean */
2032: PetscObjectGetNewTag((PetscObject)mat,&tag1);
2033: PetscObjectGetNewTag((PetscObject)mat,&tag2);
2034: PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);
2035: rbuf_nz = sbuf_nz + nsends;
2036:
2037: /* post receives of other's nzlocal */
2038: for (i=0; i<nrecvs; i++){
2039: MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);
2040: }
2041: /* send nzlocal to others */
2042: for (i=0; i<nsends; i++){
2043: sbuf_nz[i] = nzlocal;
2044: MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);
2045: }
2046: /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2047: count = nrecvs;
2048: while (count) {
2049: MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);
2050: recv_rank[imdex] = recv_status.MPI_SOURCE;
2051: /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2052: PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);
2054: i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2055: rbuf_nz[imdex] += i + 2;
2056: PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);
2057: MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);
2058: count--;
2059: }
2060: /* wait on sends of nzlocal */
2061: if (nsends) {MPI_Waitall(nsends,s_waits1,send_status);}
2062: /* send mat->i,j to others, and recv from other's */
2063: /*------------------------------------------------*/
2064: for (i=0; i<nsends; i++){
2065: j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2066: MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);
2067: }
2068: /* wait on receives of mat->i,j */
2069: /*------------------------------*/
2070: count = nrecvs;
2071: while (count) {
2072: MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);
2073: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2074: count--;
2075: }
2076: /* wait on sends of mat->i,j */
2077: /*---------------------------*/
2078: if (nsends) {
2079: MPI_Waitall(nsends,s_waits2,send_status);
2080: }
2081: } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2083: /* post receives, send and receive mat->a */
2084: /*----------------------------------------*/
2085: for (imdex=0; imdex<nrecvs; imdex++) {
2086: MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);
2087: }
2088: for (i=0; i<nsends; i++){
2089: MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);
2090: }
2091: count = nrecvs;
2092: while (count) {
2093: MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);
2094: if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2095: count--;
2096: }
2097: if (nsends) {
2098: MPI_Waitall(nsends,s_waits3,send_status);
2099: }
2101: PetscFree2(s_waits3,send_status);
2102:
2103: /* create redundant matrix */
2104: /*-------------------------*/
2105: if (reuse == MAT_INITIAL_MATRIX){
2106: /* compute rownz_max for preallocation */
2107: for (imdex=0; imdex<nrecvs; imdex++){
2108: j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2109: rptr = rbuf_j[imdex];
2110: for (i=0; i<j; i++){
2111: ncols = rptr[i+1] - rptr[i];
2112: if (rownz_max < ncols) rownz_max = ncols;
2113: }
2114: }
2115:
2116: MatCreate(subcomm,&C);
2117: MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);
2118: MatSetFromOptions(C);
2119: MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);
2120: MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);
2121: } else {
2122: C = *matredundant;
2123: }
2125: /* insert local matrix entries */
2126: rptr = sbuf_j;
2127: cols = sbuf_j + rend-rstart + 1;
2128: vals = sbuf_a;
2129: for (i=0; i<rend-rstart; i++){
2130: row = i + rstart;
2131: ncols = rptr[i+1] - rptr[i];
2132: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2133: vals += ncols;
2134: cols += ncols;
2135: }
2136: /* insert received matrix entries */
2137: for (imdex=0; imdex<nrecvs; imdex++){
2138: rstart = rowrange[recv_rank[imdex]];
2139: rend = rowrange[recv_rank[imdex]+1];
2140: rptr = rbuf_j[imdex];
2141: cols = rbuf_j[imdex] + rend-rstart + 1;
2142: vals = rbuf_a[imdex];
2143: for (i=0; i<rend-rstart; i++){
2144: row = i + rstart;
2145: ncols = rptr[i+1] - rptr[i];
2146: MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);
2147: vals += ncols;
2148: cols += ncols;
2149: }
2150: }
2151: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2152: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2153: MatGetSize(C,&M,&N);
2154: if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2155: if (reuse == MAT_INITIAL_MATRIX){
2156: PetscContainer container;
2157: *matredundant = C;
2158: /* create a supporting struct and attach it to C for reuse */
2159: PetscNew(Mat_Redundant,&redund);
2160: PetscContainerCreate(PETSC_COMM_SELF,&container);
2161: PetscContainerSetPointer(container,redund);
2162: PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);
2163: PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);
2164:
2165: redund->nzlocal = nzlocal;
2166: redund->nsends = nsends;
2167: redund->nrecvs = nrecvs;
2168: redund->send_rank = send_rank;
2169: redund->sbuf_nz = sbuf_nz;
2170: redund->sbuf_j = sbuf_j;
2171: redund->sbuf_a = sbuf_a;
2172: redund->rbuf_j = rbuf_j;
2173: redund->rbuf_a = rbuf_a;
2175: redund->MatDestroy = C->ops->destroy;
2176: C->ops->destroy = MatDestroy_MatRedundant;
2177: }
2178: return(0);
2179: }
2181: /* -------------------------------------------------------------------*/
2182: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2183: MatGetRow_MPIAIJ,
2184: MatRestoreRow_MPIAIJ,
2185: MatMult_MPIAIJ,
2186: /* 4*/ MatMultAdd_MPIAIJ,
2187: MatMultTranspose_MPIAIJ,
2188: MatMultTransposeAdd_MPIAIJ,
2189: #ifdef PETSC_HAVE_PBGL
2190: MatSolve_MPIAIJ,
2191: #else
2192: 0,
2193: #endif
2194: 0,
2195: 0,
2196: /*10*/ 0,
2197: 0,
2198: 0,
2199: MatRelax_MPIAIJ,
2200: MatTranspose_MPIAIJ,
2201: /*15*/ MatGetInfo_MPIAIJ,
2202: MatEqual_MPIAIJ,
2203: MatGetDiagonal_MPIAIJ,
2204: MatDiagonalScale_MPIAIJ,
2205: MatNorm_MPIAIJ,
2206: /*20*/ MatAssemblyBegin_MPIAIJ,
2207: MatAssemblyEnd_MPIAIJ,
2208: 0,
2209: MatSetOption_MPIAIJ,
2210: MatZeroEntries_MPIAIJ,
2211: /*25*/ MatZeroRows_MPIAIJ,
2212: 0,
2213: #ifdef PETSC_HAVE_PBGL
2214: MatLUFactorNumeric_MPIAIJ,
2215: #else
2216: 0,
2217: #endif
2218: 0,
2219: 0,
2220: /*30*/ MatSetUpPreallocation_MPIAIJ,
2221: #ifdef PETSC_HAVE_PBGL
2222: MatILUFactorSymbolic_MPIAIJ,
2223: #else
2224: 0,
2225: #endif
2226: 0,
2227: 0,
2228: 0,
2229: /*35*/ MatDuplicate_MPIAIJ,
2230: 0,
2231: 0,
2232: 0,
2233: 0,
2234: /*40*/ MatAXPY_MPIAIJ,
2235: MatGetSubMatrices_MPIAIJ,
2236: MatIncreaseOverlap_MPIAIJ,
2237: MatGetValues_MPIAIJ,
2238: MatCopy_MPIAIJ,
2239: /*45*/ 0,
2240: MatScale_MPIAIJ,
2241: 0,
2242: 0,
2243: 0,
2244: /*50*/ MatSetBlockSize_MPIAIJ,
2245: 0,
2246: 0,
2247: 0,
2248: 0,
2249: /*55*/ MatFDColoringCreate_MPIAIJ,
2250: 0,
2251: MatSetUnfactored_MPIAIJ,
2252: MatPermute_MPIAIJ,
2253: 0,
2254: /*60*/ MatGetSubMatrix_MPIAIJ,
2255: MatDestroy_MPIAIJ,
2256: MatView_MPIAIJ,
2257: 0,
2258: 0,
2259: /*65*/ 0,
2260: 0,
2261: 0,
2262: 0,
2263: 0,
2264: /*70*/ 0,
2265: 0,
2266: MatSetColoring_MPIAIJ,
2267: #if defined(PETSC_HAVE_ADIC)
2268: MatSetValuesAdic_MPIAIJ,
2269: #else
2270: 0,
2271: #endif
2272: MatSetValuesAdifor_MPIAIJ,
2273: /*75*/ 0,
2274: 0,
2275: 0,
2276: 0,
2277: 0,
2278: /*80*/ 0,
2279: 0,
2280: 0,
2281: 0,
2282: /*84*/ MatLoad_MPIAIJ,
2283: 0,
2284: 0,
2285: 0,
2286: 0,
2287: 0,
2288: /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2289: MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2290: MatMatMultNumeric_MPIAIJ_MPIAIJ,
2291: MatPtAP_Basic,
2292: MatPtAPSymbolic_MPIAIJ,
2293: /*95*/ MatPtAPNumeric_MPIAIJ,
2294: 0,
2295: 0,
2296: 0,
2297: 0,
2298: /*100*/0,
2299: MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2300: MatPtAPNumeric_MPIAIJ_MPIAIJ,
2301: MatConjugate_MPIAIJ,
2302: 0,
2303: /*105*/MatSetValuesRow_MPIAIJ,
2304: MatRealPart_MPIAIJ,
2305: MatImaginaryPart_MPIAIJ,
2306: 0,
2307: 0,
2308: /*110*/0,
2309: MatGetRedundantMatrix_MPIAIJ};
2311: /* ----------------------------------------------------------------------------------------*/
2316: PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2317: {
2318: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2322: MatStoreValues(aij->A);
2323: MatStoreValues(aij->B);
2324: return(0);
2325: }
2331: PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2332: {
2333: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2337: MatRetrieveValues(aij->A);
2338: MatRetrieveValues(aij->B);
2339: return(0);
2340: }
2343: #include petscpc.h
2347: PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2348: {
2349: Mat_MPIAIJ *b;
2351: PetscInt i;
2354: B->preallocated = PETSC_TRUE;
2355: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2356: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2357: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2358: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2360: B->rmap.bs = B->cmap.bs = 1;
2361: PetscMapSetUp(&B->rmap);
2362: PetscMapSetUp(&B->cmap);
2363: if (d_nnz) {
2364: for (i=0; i<B->rmap.n; i++) {
2365: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2366: }
2367: }
2368: if (o_nnz) {
2369: for (i=0; i<B->rmap.n; i++) {
2370: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2371: }
2372: }
2373: b = (Mat_MPIAIJ*)B->data;
2375: /* Explicitly create 2 MATSEQAIJ matrices. */
2376: MatCreate(PETSC_COMM_SELF,&b->A);
2377: MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);
2378: MatSetType(b->A,MATSEQAIJ);
2379: PetscLogObjectParent(B,b->A);
2380: MatCreate(PETSC_COMM_SELF,&b->B);
2381: MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);
2382: MatSetType(b->B,MATSEQAIJ);
2383: PetscLogObjectParent(B,b->B);
2385: MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);
2386: MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);
2388: return(0);
2389: }
2394: PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2395: {
2396: Mat mat;
2397: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2401: *newmat = 0;
2402: MatCreate(matin->comm,&mat);
2403: MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);
2404: MatSetType(mat,matin->type_name);
2405: PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));
2406: a = (Mat_MPIAIJ*)mat->data;
2407:
2408: mat->factor = matin->factor;
2409: mat->rmap.bs = matin->rmap.bs;
2410: mat->assembled = PETSC_TRUE;
2411: mat->insertmode = NOT_SET_VALUES;
2412: mat->preallocated = PETSC_TRUE;
2414: a->size = oldmat->size;
2415: a->rank = oldmat->rank;
2416: a->donotstash = oldmat->donotstash;
2417: a->roworiented = oldmat->roworiented;
2418: a->rowindices = 0;
2419: a->rowvalues = 0;
2420: a->getrowactive = PETSC_FALSE;
2422: PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);
2423: PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);
2425: MatStashCreate_Private(matin->comm,1,&mat->stash);
2426: if (oldmat->colmap) {
2427: #if defined (PETSC_USE_CTABLE)
2428: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2429: #else
2430: PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);
2431: PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));
2432: PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));
2433: #endif
2434: } else a->colmap = 0;
2435: if (oldmat->garray) {
2436: PetscInt len;
2437: len = oldmat->B->cmap.n;
2438: PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);
2439: PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2440: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt)); }
2441: } else a->garray = 0;
2442:
2443: VecDuplicate(oldmat->lvec,&a->lvec);
2444: PetscLogObjectParent(mat,a->lvec);
2445: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2446: PetscLogObjectParent(mat,a->Mvctx);
2447: MatDuplicate(oldmat->A,cpvalues,&a->A);
2448: PetscLogObjectParent(mat,a->A);
2449: MatDuplicate(oldmat->B,cpvalues,&a->B);
2450: PetscLogObjectParent(mat,a->B);
2451: PetscFListDuplicate(matin->qlist,&mat->qlist);
2452: *newmat = mat;
2453: return(0);
2454: }
2456: #include petscsys.h
2460: PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2461: {
2462: Mat A;
2463: PetscScalar *vals,*svals;
2464: MPI_Comm comm = ((PetscObject)viewer)->comm;
2465: MPI_Status status;
2467: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2468: PetscInt i,nz,j,rstart,rend,mmax;
2469: PetscInt header[4],*rowlengths = 0,M,N,m,*cols;
2470: PetscInt *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2471: PetscInt cend,cstart,n,*rowners;
2472: int fd;
2475: MPI_Comm_size(comm,&size);
2476: MPI_Comm_rank(comm,&rank);
2477: if (!rank) {
2478: PetscViewerBinaryGetDescriptor(viewer,&fd);
2479: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2480: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2481: }
2483: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
2484: M = header[1]; N = header[2];
2485: /* determine ownership of all rows */
2486: m = M/size + ((M % size) > rank);
2487: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
2488: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
2490: /* First process needs enough room for process with most rows */
2491: if (!rank) {
2492: mmax = rowners[1];
2493: for (i=2; i<size; i++) {
2494: mmax = PetscMax(mmax,rowners[i]);
2495: }
2496: } else mmax = m;
2498: rowners[0] = 0;
2499: for (i=2; i<=size; i++) {
2500: rowners[i] += rowners[i-1];
2501: }
2502: rstart = rowners[rank];
2503: rend = rowners[rank+1];
2505: /* distribute row lengths to all processors */
2506: PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);
2507: if (!rank) {
2508: PetscBinaryRead(fd,ourlens,m,PETSC_INT);
2509: PetscMalloc(m*sizeof(PetscInt),&rowlengths);
2510: PetscMalloc(size*sizeof(PetscInt),&procsnz);
2511: PetscMemzero(procsnz,size*sizeof(PetscInt));
2512: for (j=0; j<m; j++) {
2513: procsnz[0] += ourlens[j];
2514: }
2515: for (i=1; i<size; i++) {
2516: PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);
2517: /* calculate the number of nonzeros on each processor */
2518: for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2519: procsnz[i] += rowlengths[j];
2520: }
2521: MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
2522: }
2523: PetscFree(rowlengths);
2524: } else {
2525: MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);
2526: }
2528: if (!rank) {
2529: /* determine max buffer needed and allocate it */
2530: maxnz = 0;
2531: for (i=0; i<size; i++) {
2532: maxnz = PetscMax(maxnz,procsnz[i]);
2533: }
2534: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
2536: /* read in my part of the matrix column indices */
2537: nz = procsnz[0];
2538: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2539: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2541: /* read in every one elses and ship off */
2542: for (i=1; i<size; i++) {
2543: nz = procsnz[i];
2544: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2545: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
2546: }
2547: PetscFree(cols);
2548: } else {
2549: /* determine buffer space needed for message */
2550: nz = 0;
2551: for (i=0; i<m; i++) {
2552: nz += ourlens[i];
2553: }
2554: PetscMalloc(nz*sizeof(PetscInt),&mycols);
2556: /* receive message of column indices*/
2557: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
2558: MPI_Get_count(&status,MPIU_INT,&maxnz);
2559: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2560: }
2562: /* determine column ownership if matrix is not square */
2563: if (N != M) {
2564: n = N/size + ((N % size) > rank);
2565: MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);
2566: cstart = cend - n;
2567: } else {
2568: cstart = rstart;
2569: cend = rend;
2570: n = cend - cstart;
2571: }
2573: /* loop over local rows, determining number of off diagonal entries */
2574: PetscMemzero(offlens,m*sizeof(PetscInt));
2575: jj = 0;
2576: for (i=0; i<m; i++) {
2577: for (j=0; j<ourlens[i]; j++) {
2578: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2579: jj++;
2580: }
2581: }
2583: /* create our matrix */
2584: for (i=0; i<m; i++) {
2585: ourlens[i] -= offlens[i];
2586: }
2587: MatCreate(comm,&A);
2588: MatSetSizes(A,m,n,M,N);
2589: MatSetType(A,type);
2590: MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);
2592: MatSetOption(A,MAT_COLUMNS_SORTED);
2593: for (i=0; i<m; i++) {
2594: ourlens[i] += offlens[i];
2595: }
2597: if (!rank) {
2598: PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);
2600: /* read in my part of the matrix numerical values */
2601: nz = procsnz[0];
2602: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2603:
2604: /* insert into matrix */
2605: jj = rstart;
2606: smycols = mycols;
2607: svals = vals;
2608: for (i=0; i<m; i++) {
2609: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2610: smycols += ourlens[i];
2611: svals += ourlens[i];
2612: jj++;
2613: }
2615: /* read in other processors and ship out */
2616: for (i=1; i<size; i++) {
2617: nz = procsnz[i];
2618: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2619: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2620: }
2621: PetscFree(procsnz);
2622: } else {
2623: /* receive numeric values */
2624: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
2626: /* receive message of values*/
2627: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2628: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2629: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2631: /* insert into matrix */
2632: jj = rstart;
2633: smycols = mycols;
2634: svals = vals;
2635: for (i=0; i<m; i++) {
2636: MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
2637: smycols += ourlens[i];
2638: svals += ourlens[i];
2639: jj++;
2640: }
2641: }
2642: PetscFree2(ourlens,offlens);
2643: PetscFree(vals);
2644: PetscFree(mycols);
2645: PetscFree(rowners);
2647: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2648: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2649: *newmat = A;
2650: return(0);
2651: }
2655: /*
2656: Not great since it makes two copies of the submatrix, first an SeqAIJ
2657: in local and then by concatenating the local matrices the end result.
2658: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2659: */
2660: PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2661: {
2663: PetscMPIInt rank,size;
2664: PetscInt i,m,n,rstart,row,rend,nz,*cwork,j;
2665: PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2666: Mat *local,M,Mreuse;
2667: PetscScalar *vwork,*aa;
2668: MPI_Comm comm = mat->comm;
2669: Mat_SeqAIJ *aij;
2673: MPI_Comm_rank(comm,&rank);
2674: MPI_Comm_size(comm,&size);
2676: if (call == MAT_REUSE_MATRIX) {
2677: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2678: if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2679: local = &Mreuse;
2680: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2681: } else {
2682: MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2683: Mreuse = *local;
2684: PetscFree(local);
2685: }
2687: /*
2688: m - number of local rows
2689: n - number of columns (same on all processors)
2690: rstart - first row in new global matrix generated
2691: */
2692: MatGetSize(Mreuse,&m,&n);
2693: if (call == MAT_INITIAL_MATRIX) {
2694: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2695: ii = aij->i;
2696: jj = aij->j;
2698: /*
2699: Determine the number of non-zeros in the diagonal and off-diagonal
2700: portions of the matrix in order to do correct preallocation
2701: */
2703: /* first get start and end of "diagonal" columns */
2704: if (csize == PETSC_DECIDE) {
2705: ISGetSize(isrow,&mglobal);
2706: if (mglobal == n) { /* square matrix */
2707: nlocal = m;
2708: } else {
2709: nlocal = n/size + ((n % size) > rank);
2710: }
2711: } else {
2712: nlocal = csize;
2713: }
2714: MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);
2715: rstart = rend - nlocal;
2716: if (rank == size - 1 && rend != n) {
2717: SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2718: }
2720: /* next, compute all the lengths */
2721: PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);
2722: olens = dlens + m;
2723: for (i=0; i<m; i++) {
2724: jend = ii[i+1] - ii[i];
2725: olen = 0;
2726: dlen = 0;
2727: for (j=0; j<jend; j++) {
2728: if (*jj < rstart || *jj >= rend) olen++;
2729: else dlen++;
2730: jj++;
2731: }
2732: olens[i] = olen;
2733: dlens[i] = dlen;
2734: }
2735: MatCreate(comm,&M);
2736: MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);
2737: MatSetType(M,mat->type_name);
2738: MatMPIAIJSetPreallocation(M,0,dlens,0,olens);
2739: PetscFree(dlens);
2740: } else {
2741: PetscInt ml,nl;
2743: M = *newmat;
2744: MatGetLocalSize(M,&ml,&nl);
2745: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2746: MatZeroEntries(M);
2747: /*
2748: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2749: rather than the slower MatSetValues().
2750: */
2751: M->was_assembled = PETSC_TRUE;
2752: M->assembled = PETSC_FALSE;
2753: }
2754: MatGetOwnershipRange(M,&rstart,&rend);
2755: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2756: ii = aij->i;
2757: jj = aij->j;
2758: aa = aij->a;
2759: for (i=0; i<m; i++) {
2760: row = rstart + i;
2761: nz = ii[i+1] - ii[i];
2762: cwork = jj; jj += nz;
2763: vwork = aa; aa += nz;
2764: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2765: }
2767: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2768: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2769: *newmat = M;
2771: /* save submatrix used in processor for next request */
2772: if (call == MAT_INITIAL_MATRIX) {
2773: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2774: PetscObjectDereference((PetscObject)Mreuse);
2775: }
2777: return(0);
2778: }
2783: PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2784: {
2785: PetscInt m,cstart, cend,j,nnz,i,d;
2786: PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2787: const PetscInt *JJ;
2788: PetscScalar *values;
2792: if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2794: B->rmap.bs = B->cmap.bs = 1;
2795: PetscMapSetUp(&B->rmap);
2796: PetscMapSetUp(&B->cmap);
2797: m = B->rmap.n;
2798: cstart = B->cmap.rstart;
2799: cend = B->cmap.rend;
2800: rstart = B->rmap.rstart;
2802: PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);
2803: o_nnz = d_nnz + m;
2805: for (i=0; i<m; i++) {
2806: nnz = Ii[i+1]- Ii[i];
2807: JJ = J + Ii[i];
2808: nnz_max = PetscMax(nnz_max,nnz);
2809: if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2810: for (j=0; j<nnz; j++) {
2811: if (*JJ >= cstart) break;
2812: JJ++;
2813: }
2814: d = 0;
2815: for (; j<nnz; j++) {
2816: if (*JJ++ >= cend) break;
2817: d++;
2818: }
2819: d_nnz[i] = d;
2820: o_nnz[i] = nnz - d;
2821: }
2822: MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);
2823: PetscFree(d_nnz);
2825: if (v) values = (PetscScalar*)v;
2826: else {
2827: PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);
2828: PetscMemzero(values,nnz_max*sizeof(PetscScalar));
2829: }
2831: MatSetOption(B,MAT_COLUMNS_SORTED);
2832: for (i=0; i<m; i++) {
2833: ii = i + rstart;
2834: nnz = Ii[i+1]- Ii[i];
2835: MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);
2836: }
2837: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2838: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2839: MatSetOption(B,MAT_COLUMNS_UNSORTED);
2841: if (!v) {
2842: PetscFree(values);
2843: }
2844: return(0);
2845: }
2850: /*@
2851: MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2852: (the default parallel PETSc format).
2854: Collective on MPI_Comm
2856: Input Parameters:
2857: + B - the matrix
2858: . i - the indices into j for the start of each local row (starts with zero)
2859: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2860: - v - optional values in the matrix
2862: Level: developer
2864: Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2865: storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2867: .keywords: matrix, aij, compressed row, sparse, parallel
2869: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2870: MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2871: @*/
2872: PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2873: {
2874: PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2877: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);
2878: if (f) {
2879: (*f)(B,i,j,v);
2880: }
2881: return(0);
2882: }
2886: /*@C
2887: MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2888: (the default parallel PETSc format). For good matrix assembly performance
2889: the user should preallocate the matrix storage by setting the parameters
2890: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2891: performance can be increased by more than a factor of 50.
2893: Collective on MPI_Comm
2895: Input Parameters:
2896: + A - the matrix
2897: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2898: (same value is used for all local rows)
2899: . d_nnz - array containing the number of nonzeros in the various rows of the
2900: DIAGONAL portion of the local submatrix (possibly different for each row)
2901: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2902: The size of this array is equal to the number of local rows, i.e 'm'.
2903: You must leave room for the diagonal entry even if it is zero.
2904: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2905: submatrix (same value is used for all local rows).
2906: - o_nnz - array containing the number of nonzeros in the various rows of the
2907: OFF-DIAGONAL portion of the local submatrix (possibly different for
2908: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2909: structure. The size of this array is equal to the number
2910: of local rows, i.e 'm'.
2912: If the *_nnz parameter is given then the *_nz parameter is ignored
2914: The AIJ format (also called the Yale sparse matrix format or
2915: compressed row storage (CSR)), is fully compatible with standard Fortran 77
2916: storage. The stored row and column indices begin with zero. See the users manual for details.
2918: The parallel matrix is partitioned such that the first m0 rows belong to
2919: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2920: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2922: The DIAGONAL portion of the local submatrix of a processor can be defined
2923: as the submatrix which is obtained by extraction the part corresponding
2924: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2925: first row that belongs to the processor, and r2 is the last row belonging
2926: to the this processor. This is a square mxm matrix. The remaining portion
2927: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2929: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2931: Example usage:
2932:
2933: Consider the following 8x8 matrix with 34 non-zero values, that is
2934: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2935: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2936: as follows:
2938: .vb
2939: 1 2 0 | 0 3 0 | 0 4
2940: Proc0 0 5 6 | 7 0 0 | 8 0
2941: 9 0 10 | 11 0 0 | 12 0
2942: -------------------------------------
2943: 13 0 14 | 15 16 17 | 0 0
2944: Proc1 0 18 0 | 19 20 21 | 0 0
2945: 0 0 0 | 22 23 0 | 24 0
2946: -------------------------------------
2947: Proc2 25 26 27 | 0 0 28 | 29 0
2948: 30 0 0 | 31 32 33 | 0 34
2949: .ve
2951: This can be represented as a collection of submatrices as:
2953: .vb
2954: A B C
2955: D E F
2956: G H I
2957: .ve
2959: Where the submatrices A,B,C are owned by proc0, D,E,F are
2960: owned by proc1, G,H,I are owned by proc2.
2962: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2963: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964: The 'M','N' parameters are 8,8, and have the same values on all procs.
2966: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2967: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2968: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2969: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2970: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2971: matrix, ans [DF] as another SeqAIJ matrix.
2973: When d_nz, o_nz parameters are specified, d_nz storage elements are
2974: allocated for every row of the local diagonal submatrix, and o_nz
2975: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2976: One way to choose d_nz and o_nz is to use the max nonzerors per local
2977: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2978: In this case, the values of d_nz,o_nz are:
2979: .vb
2980: proc0 : dnz = 2, o_nz = 2
2981: proc1 : dnz = 3, o_nz = 2
2982: proc2 : dnz = 1, o_nz = 4
2983: .ve
2984: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2985: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2986: for proc3. i.e we are using 12+15+10=37 storage locations to store
2987: 34 values.
2989: When d_nnz, o_nnz parameters are specified, the storage is specified
2990: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2991: In the above case the values for d_nnz,o_nnz are:
2992: .vb
2993: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2994: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2995: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2996: .ve
2997: Here the space allocated is sum of all the above values i.e 34, and
2998: hence pre-allocation is perfect.
3000: Level: intermediate
3002: .keywords: matrix, aij, compressed row, sparse, parallel
3004: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3005: MPIAIJ
3006: @*/
3007: PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3008: {
3009: PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3012: PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);
3013: if (f) {
3014: (*f)(B,d_nz,d_nnz,o_nz,o_nnz);
3015: }
3016: return(0);
3017: }
3021: /*@C
3022: MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3023: CSR format the local rows.
3025: Collective on MPI_Comm
3027: Input Parameters:
3028: + comm - MPI communicator
3029: . m - number of local rows (Cannot be PETSC_DECIDE)
3030: . n - This value should be the same as the local size used in creating the
3031: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3032: calculated if N is given) For square matrices n is almost always m.
3033: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3034: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3035: . i - row indices
3036: . j - column indices
3037: - a - matrix values
3039: Output Parameter:
3040: . mat - the matrix
3042: Level: intermediate
3044: Notes:
3045: The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3046: thus you CANNOT change the matrix entries by changing the values of a[] after you have
3047: called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3049: The i and j indices are 0 based
3051: .keywords: matrix, aij, compressed row, sparse, parallel
3053: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3054: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3055: @*/
3056: PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3057: {
3061: if (i[0]) {
3062: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3063: }
3064: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3065: MatCreate(comm,mat);
3066: MatSetType(*mat,MATMPIAIJ);
3067: MatMPIAIJSetPreallocationCSR(*mat,i,j,a);
3068: return(0);
3069: }
3073: /*@C
3074: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3075: (the default parallel PETSc format). For good matrix assembly performance
3076: the user should preallocate the matrix storage by setting the parameters
3077: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
3078: performance can be increased by more than a factor of 50.
3080: Collective on MPI_Comm
3082: Input Parameters:
3083: + comm - MPI communicator
3084: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3085: This value should be the same as the local size used in creating the
3086: y vector for the matrix-vector product y = Ax.
3087: . n - This value should be the same as the local size used in creating the
3088: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3089: calculated if N is given) For square matrices n is almost always m.
3090: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3091: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3092: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
3093: (same value is used for all local rows)
3094: . d_nnz - array containing the number of nonzeros in the various rows of the
3095: DIAGONAL portion of the local submatrix (possibly different for each row)
3096: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3097: The size of this array is equal to the number of local rows, i.e 'm'.
3098: You must leave room for the diagonal entry even if it is zero.
3099: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
3100: submatrix (same value is used for all local rows).
3101: - o_nnz - array containing the number of nonzeros in the various rows of the
3102: OFF-DIAGONAL portion of the local submatrix (possibly different for
3103: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3104: structure. The size of this array is equal to the number
3105: of local rows, i.e 'm'.
3107: Output Parameter:
3108: . A - the matrix
3110: Notes:
3111: If the *_nnz parameter is given then the *_nz parameter is ignored
3113: m,n,M,N parameters specify the size of the matrix, and its partitioning across
3114: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3115: storage requirements for this matrix.
3117: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
3118: processor than it must be used on all processors that share the object for
3119: that argument.
3121: The user MUST specify either the local or global matrix dimensions
3122: (possibly both).
3124: The parallel matrix is partitioned across processors such that the
3125: first m0 rows belong to process 0, the next m1 rows belong to
3126: process 1, the next m2 rows belong to process 2 etc.. where
3127: m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3128: values corresponding to [m x N] submatrix.
3130: The columns are logically partitioned with the n0 columns belonging
3131: to 0th partition, the next n1 columns belonging to the next
3132: partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3134: The DIAGONAL portion of the local submatrix on any given processor
3135: is the submatrix corresponding to the rows and columns m,n
3136: corresponding to the given processor. i.e diagonal matrix on
3137: process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3138: etc. The remaining portion of the local submatrix [m x (N-n)]
3139: constitute the OFF-DIAGONAL portion. The example below better
3140: illustrates this concept.
3142: For a square global matrix we define each processor's diagonal portion
3143: to be its local rows and the corresponding columns (a square submatrix);
3144: each processor's off-diagonal portion encompasses the remainder of the
3145: local matrix (a rectangular submatrix).
3147: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3149: When calling this routine with a single process communicator, a matrix of
3150: type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this
3151: type of communicator, use the construction mechanism:
3152: MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3154: By default, this format uses inodes (identical nodes) when possible.
3155: We search for consecutive rows with the same nonzero structure, thereby
3156: reusing matrix information to achieve increased efficiency.
3158: Options Database Keys:
3159: + -mat_no_inode - Do not use inodes
3160: . -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3161: - -mat_aij_oneindex - Internally use indexing starting at 1
3162: rather than 0. Note that when calling MatSetValues(),
3163: the user still MUST index entries starting at 0!
3166: Example usage:
3167:
3168: Consider the following 8x8 matrix with 34 non-zero values, that is
3169: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3170: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3171: as follows:
3173: .vb
3174: 1 2 0 | 0 3 0 | 0 4
3175: Proc0 0 5 6 | 7 0 0 | 8 0
3176: 9 0 10 | 11 0 0 | 12 0
3177: -------------------------------------
3178: 13 0 14 | 15 16 17 | 0 0
3179: Proc1 0 18 0 | 19 20 21 | 0 0
3180: 0 0 0 | 22 23 0 | 24 0
3181: -------------------------------------
3182: Proc2 25 26 27 | 0 0 28 | 29 0
3183: 30 0 0 | 31 32 33 | 0 34
3184: .ve
3186: This can be represented as a collection of submatrices as:
3188: .vb
3189: A B C
3190: D E F
3191: G H I
3192: .ve
3194: Where the submatrices A,B,C are owned by proc0, D,E,F are
3195: owned by proc1, G,H,I are owned by proc2.
3197: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3198: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3199: The 'M','N' parameters are 8,8, and have the same values on all procs.
3201: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3202: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3203: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3204: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3205: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3206: matrix, ans [DF] as another SeqAIJ matrix.
3208: When d_nz, o_nz parameters are specified, d_nz storage elements are
3209: allocated for every row of the local diagonal submatrix, and o_nz
3210: storage locations are allocated for every row of the OFF-DIAGONAL submat.
3211: One way to choose d_nz and o_nz is to use the max nonzerors per local
3212: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3213: In this case, the values of d_nz,o_nz are:
3214: .vb
3215: proc0 : dnz = 2, o_nz = 2
3216: proc1 : dnz = 3, o_nz = 2
3217: proc2 : dnz = 1, o_nz = 4
3218: .ve
3219: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3220: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3221: for proc3. i.e we are using 12+15+10=37 storage locations to store
3222: 34 values.
3224: When d_nnz, o_nnz parameters are specified, the storage is specified
3225: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3226: In the above case the values for d_nnz,o_nnz are:
3227: .vb
3228: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3229: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3230: proc2: d_nnz = [1,1] and o_nnz = [4,4]
3231: .ve
3232: Here the space allocated is sum of all the above values i.e 34, and
3233: hence pre-allocation is perfect.
3235: Level: intermediate
3237: .keywords: matrix, aij, compressed row, sparse, parallel
3239: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3240: MPIAIJ, MatCreateMPIAIJWithArrays()
3241: @*/
3242: PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3243: {
3245: PetscMPIInt size;
3248: MatCreate(comm,A);
3249: MatSetSizes(*A,m,n,M,N);
3250: MPI_Comm_size(comm,&size);
3251: if (size > 1) {
3252: MatSetType(*A,MATMPIAIJ);
3253: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
3254: } else {
3255: MatSetType(*A,MATSEQAIJ);
3256: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
3257: }
3258: return(0);
3259: }
3263: PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3264: {
3265: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3268: *Ad = a->A;
3269: *Ao = a->B;
3270: *colmap = a->garray;
3271: return(0);
3272: }
3276: PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3277: {
3279: PetscInt i;
3280: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3283: if (coloring->ctype == IS_COLORING_GLOBAL) {
3284: ISColoringValue *allcolors,*colors;
3285: ISColoring ocoloring;
3287: /* set coloring for diagonal portion */
3288: MatSetColoring_SeqAIJ(a->A,coloring);
3290: /* set coloring for off-diagonal portion */
3291: ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
3292: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3293: for (i=0; i<a->B->cmap.n; i++) {
3294: colors[i] = allcolors[a->garray[i]];
3295: }
3296: PetscFree(allcolors);
3297: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3298: MatSetColoring_SeqAIJ(a->B,ocoloring);
3299: ISColoringDestroy(ocoloring);
3300: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3301: ISColoringValue *colors;
3302: PetscInt *larray;
3303: ISColoring ocoloring;
3305: /* set coloring for diagonal portion */
3306: PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);
3307: for (i=0; i<a->A->cmap.n; i++) {
3308: larray[i] = i + A->cmap.rstart;
3309: }
3310: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);
3311: PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);
3312: for (i=0; i<a->A->cmap.n; i++) {
3313: colors[i] = coloring->colors[larray[i]];
3314: }
3315: PetscFree(larray);
3316: ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);
3317: MatSetColoring_SeqAIJ(a->A,ocoloring);
3318: ISColoringDestroy(ocoloring);
3320: /* set coloring for off-diagonal portion */
3321: PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);
3322: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);
3323: PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);
3324: for (i=0; i<a->B->cmap.n; i++) {
3325: colors[i] = coloring->colors[larray[i]];
3326: }
3327: PetscFree(larray);
3328: ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);
3329: MatSetColoring_SeqAIJ(a->B,ocoloring);
3330: ISColoringDestroy(ocoloring);
3331: } else {
3332: SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3333: }
3335: return(0);
3336: }
3338: #if defined(PETSC_HAVE_ADIC)
3341: PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3342: {
3343: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3347: MatSetValuesAdic_SeqAIJ(a->A,advalues);
3348: MatSetValuesAdic_SeqAIJ(a->B,advalues);
3349: return(0);
3350: }
3351: #endif
3355: PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3356: {
3357: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
3361: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
3362: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
3363: return(0);
3364: }
3368: /*@C
3369: MatMerge - Creates a single large PETSc matrix by concatinating sequential
3370: matrices from each processor
3372: Collective on MPI_Comm
3374: Input Parameters:
3375: + comm - the communicators the parallel matrix will live on
3376: . inmat - the input sequential matrices
3377: . n - number of local columns (or PETSC_DECIDE)
3378: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3380: Output Parameter:
3381: . outmat - the parallel matrix generated
3383: Level: advanced
3385: Notes: The number of columns of the matrix in EACH processor MUST be the same.
3387: @*/
3388: PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3389: {
3391: PetscInt m,N,i,rstart,nnz,Ii,*dnz,*onz;
3392: PetscInt *indx;
3393: PetscScalar *values;
3396: MatGetSize(inmat,&m,&N);
3397: if (scall == MAT_INITIAL_MATRIX){
3398: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3399: if (n == PETSC_DECIDE){
3400: PetscSplitOwnership(comm,&n,&N);
3401: }
3402: MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);
3403: rstart -= m;
3405: MatPreallocateInitialize(comm,m,n,dnz,onz);
3406: for (i=0;i<m;i++) {
3407: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3408: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
3409: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);
3410: }
3411: /* This routine will ONLY return MPIAIJ type matrix */
3412: MatCreate(comm,outmat);
3413: MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3414: MatSetType(*outmat,MATMPIAIJ);
3415: MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);
3416: MatPreallocateFinalize(dnz,onz);
3417:
3418: } else if (scall == MAT_REUSE_MATRIX){
3419: MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);
3420: } else {
3421: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3422: }
3424: for (i=0;i<m;i++) {
3425: MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3426: Ii = i + rstart;
3427: MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);
3428: MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);
3429: }
3430: MatDestroy(inmat);
3431: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
3432: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
3434: return(0);
3435: }
3439: PetscErrorCode MatFileSplit(Mat A,char *outfile)
3440: {
3441: PetscErrorCode ierr;
3442: PetscMPIInt rank;
3443: PetscInt m,N,i,rstart,nnz;
3444: size_t len;
3445: const PetscInt *indx;
3446: PetscViewer out;
3447: char *name;
3448: Mat B;
3449: const PetscScalar *values;
3452: MatGetLocalSize(A,&m,0);
3453: MatGetSize(A,0,&N);
3454: /* Should this be the type of the diagonal block of A? */
3455: MatCreate(PETSC_COMM_SELF,&B);
3456: MatSetSizes(B,m,N,m,N);
3457: MatSetType(B,MATSEQAIJ);
3458: MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
3459: MatGetOwnershipRange(A,&rstart,0);
3460: for (i=0;i<m;i++) {
3461: MatGetRow(A,i+rstart,&nnz,&indx,&values);
3462: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
3463: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
3464: }
3465: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
3466: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
3468: MPI_Comm_rank(A->comm,&rank);
3469: PetscStrlen(outfile,&len);
3470: PetscMalloc((len+5)*sizeof(char),&name);
3471: sprintf(name,"%s.%d",outfile,rank);
3472: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);
3473: PetscFree(name);
3474: MatView(B,out);
3475: PetscViewerDestroy(out);
3476: MatDestroy(B);
3477: return(0);
3478: }
3480: EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3483: PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3484: {
3485: PetscErrorCode ierr;
3486: Mat_Merge_SeqsToMPI *merge;
3487: PetscContainer container;
3490: PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);
3491: if (container) {
3492: PetscContainerGetPointer(container,(void **)&merge);
3493: PetscFree(merge->id_r);
3494: PetscFree(merge->len_s);
3495: PetscFree(merge->len_r);
3496: PetscFree(merge->bi);
3497: PetscFree(merge->bj);
3498: PetscFree(merge->buf_ri);
3499: PetscFree(merge->buf_rj);
3500: PetscFree(merge->coi);
3501: PetscFree(merge->coj);
3502: PetscFree(merge->owners_co);
3503: PetscFree(merge->rowmap.range);
3504:
3505: PetscContainerDestroy(container);
3506: PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);
3507: }
3508: PetscFree(merge);
3510: MatDestroy_MPIAIJ(A);
3511: return(0);
3512: }
3514: #include src/mat/utils/freespace.h
3515: #include petscbt.h
3516: static PetscEvent logkey_seqstompinum = 0;
3519: /*@C
3520: MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3521: matrices from each processor
3523: Collective on MPI_Comm
3525: Input Parameters:
3526: + comm - the communicators the parallel matrix will live on
3527: . seqmat - the input sequential matrices
3528: . m - number of local rows (or PETSC_DECIDE)
3529: . n - number of local columns (or PETSC_DECIDE)
3530: - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3532: Output Parameter:
3533: . mpimat - the parallel matrix generated
3535: Level: advanced
3537: Notes:
3538: The dimensions of the sequential matrix in each processor MUST be the same.
3539: The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3540: destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3541: @*/
3542: PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3543: {
3544: PetscErrorCode ierr;
3545: MPI_Comm comm=mpimat->comm;
3546: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3547: PetscMPIInt size,rank,taga,*len_s;
3548: PetscInt N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3549: PetscInt proc,m;
3550: PetscInt **buf_ri,**buf_rj;
3551: PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3552: PetscInt nrows,**buf_ri_k,**nextrow,**nextai;
3553: MPI_Request *s_waits,*r_waits;
3554: MPI_Status *status;
3555: MatScalar *aa=a->a,**abuf_r,*ba_i;
3556: Mat_Merge_SeqsToMPI *merge;
3557: PetscContainer container;
3558:
3560: if (!logkey_seqstompinum) {
3562: }
3565: MPI_Comm_size(comm,&size);
3566: MPI_Comm_rank(comm,&rank);
3568: PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);
3569: if (container) {
3570: PetscContainerGetPointer(container,(void **)&merge);
3571: }
3572: bi = merge->bi;
3573: bj = merge->bj;
3574: buf_ri = merge->buf_ri;
3575: buf_rj = merge->buf_rj;
3577: PetscMalloc(size*sizeof(MPI_Status),&status);
3578: owners = merge->rowmap.range;
3579: len_s = merge->len_s;
3581: /* send and recv matrix values */
3582: /*-----------------------------*/
3583: PetscObjectGetNewTag((PetscObject)mpimat,&taga);
3584: PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);
3586: PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);
3587: for (proc=0,k=0; proc<size; proc++){
3588: if (!len_s[proc]) continue;
3589: i = owners[proc];
3590: MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);
3591: k++;
3592: }
3594: if (merge->nrecv) {MPI_Waitall(merge->nrecv,r_waits,status);}
3595: if (merge->nsend) {MPI_Waitall(merge->nsend,s_waits,status);}
3596: PetscFree(status);
3598: PetscFree(s_waits);
3599: PetscFree(r_waits);
3601: /* insert mat values of mpimat */
3602: /*----------------------------*/
3603: PetscMalloc(N*sizeof(MatScalar),&ba_i);
3604: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3605: nextrow = buf_ri_k + merge->nrecv;
3606: nextai = nextrow + merge->nrecv;
3608: for (k=0; k<merge->nrecv; k++){
3609: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3610: nrows = *(buf_ri_k[k]);
3611: nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */
3612: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3613: }
3615: /* set values of ba */
3616: m = merge->rowmap.n;
3617: for (i=0; i<m; i++) {
3618: arow = owners[rank] + i;
3619: bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */
3620: bnzi = bi[i+1] - bi[i];
3621: PetscMemzero(ba_i,bnzi*sizeof(MatScalar));
3623: /* add local non-zero vals of this proc's seqmat into ba */
3624: anzi = ai[arow+1] - ai[arow];
3625: aj = a->j + ai[arow];
3626: aa = a->a + ai[arow];
3627: nextaj = 0;
3628: for (j=0; nextaj<anzi; j++){
3629: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3630: ba_i[j] += aa[nextaj++];
3631: }
3632: }
3634: /* add received vals into ba */
3635: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3636: /* i-th row */
3637: if (i == *nextrow[k]) {
3638: anzi = *(nextai[k]+1) - *nextai[k];
3639: aj = buf_rj[k] + *(nextai[k]);
3640: aa = abuf_r[k] + *(nextai[k]);
3641: nextaj = 0;
3642: for (j=0; nextaj<anzi; j++){
3643: if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3644: ba_i[j] += aa[nextaj++];
3645: }
3646: }
3647: nextrow[k]++; nextai[k]++;
3648: }
3649: }
3650: MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);
3651: }
3652: MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);
3653: MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);
3655: PetscFree(abuf_r);
3656: PetscFree(ba_i);
3657: PetscFree(buf_ri_k);
3659: return(0);
3660: }
3662: static PetscEvent logkey_seqstompisym = 0;
3665: PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3666: {
3667: PetscErrorCode ierr;
3668: Mat B_mpi;
3669: Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data;
3670: PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3671: PetscInt **buf_rj,**buf_ri,**buf_ri_k;
3672: PetscInt M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3673: PetscInt len,proc,*dnz,*onz;
3674: PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3675: PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3676: MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits;
3677: MPI_Status *status;
3678: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
3679: PetscBT lnkbt;
3680: Mat_Merge_SeqsToMPI *merge;
3681: PetscContainer container;
3684: if (!logkey_seqstompisym) {
3686: }
3689: /* make sure it is a PETSc comm */
3690: PetscCommDuplicate(comm,&comm,PETSC_NULL);
3691: MPI_Comm_size(comm,&size);
3692: MPI_Comm_rank(comm,&rank);
3693:
3694: PetscNew(Mat_Merge_SeqsToMPI,&merge);
3695: PetscMalloc(size*sizeof(MPI_Status),&status);
3697: /* determine row ownership */
3698: /*---------------------------------------------------------*/
3699: PetscMapInitialize(comm,&merge->rowmap);
3700: merge->rowmap.n = m;
3701: merge->rowmap.N = M;
3702: merge->rowmap.bs = 1;
3703: PetscMapSetUp(&merge->rowmap);
3704: PetscMalloc(size*sizeof(PetscMPIInt),&len_si);
3705: PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);
3706:
3707: m = merge->rowmap.n;
3708: M = merge->rowmap.N;
3709: owners = merge->rowmap.range;
3711: /* determine the number of messages to send, their lengths */
3712: /*---------------------------------------------------------*/
3713: len_s = merge->len_s;
3715: len = 0; /* length of buf_si[] */
3716: merge->nsend = 0;
3717: for (proc=0; proc<size; proc++){
3718: len_si[proc] = 0;
3719: if (proc == rank){
3720: len_s[proc] = 0;
3721: } else {
3722: len_si[proc] = owners[proc+1] - owners[proc] + 1;
3723: len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3724: }
3725: if (len_s[proc]) {
3726: merge->nsend++;
3727: nrows = 0;
3728: for (i=owners[proc]; i<owners[proc+1]; i++){
3729: if (ai[i+1] > ai[i]) nrows++;
3730: }
3731: len_si[proc] = 2*(nrows+1);
3732: len += len_si[proc];
3733: }
3734: }
3736: /* determine the number and length of messages to receive for ij-structure */
3737: /*-------------------------------------------------------------------------*/
3738: PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);
3739: PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);
3741: /* post the Irecv of j-structure */
3742: /*-------------------------------*/
3743: PetscCommGetNewTag(comm,&tagj);
3744: PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);
3746: /* post the Isend of j-structure */
3747: /*--------------------------------*/
3748: PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);
3749: sj_waits = si_waits + merge->nsend;
3751: for (proc=0, k=0; proc<size; proc++){
3752: if (!len_s[proc]) continue;
3753: i = owners[proc];
3754: MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);
3755: k++;
3756: }
3758: /* receives and sends of j-structure are complete */
3759: /*------------------------------------------------*/
3760: if (merge->nrecv) {MPI_Waitall(merge->nrecv,rj_waits,status);}
3761: if (merge->nsend) {MPI_Waitall(merge->nsend,sj_waits,status);}
3762:
3763: /* send and recv i-structure */
3764: /*---------------------------*/
3765: PetscCommGetNewTag(comm,&tagi);
3766: PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);
3767:
3768: PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);
3769: buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
3770: for (proc=0,k=0; proc<size; proc++){
3771: if (!len_s[proc]) continue;
3772: /* form outgoing message for i-structure:
3773: buf_si[0]: nrows to be sent
3774: [1:nrows]: row index (global)
3775: [nrows+1:2*nrows+1]: i-structure index
3776: */
3777: /*-------------------------------------------*/
3778: nrows = len_si[proc]/2 - 1;
3779: buf_si_i = buf_si + nrows+1;
3780: buf_si[0] = nrows;
3781: buf_si_i[0] = 0;
3782: nrows = 0;
3783: for (i=owners[proc]; i<owners[proc+1]; i++){
3784: anzi = ai[i+1] - ai[i];
3785: if (anzi) {
3786: buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3787: buf_si[nrows+1] = i-owners[proc]; /* local row index */
3788: nrows++;
3789: }
3790: }
3791: MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);
3792: k++;
3793: buf_si += len_si[proc];
3794: }
3796: if (merge->nrecv) {MPI_Waitall(merge->nrecv,ri_waits,status);}
3797: if (merge->nsend) {MPI_Waitall(merge->nsend,si_waits,status);}
3799: PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);
3800: for (i=0; i<merge->nrecv; i++){
3801: PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);
3802: }
3804: PetscFree(len_si);
3805: PetscFree(len_ri);
3806: PetscFree(rj_waits);
3807: PetscFree(si_waits);
3808: PetscFree(ri_waits);
3809: PetscFree(buf_s);
3810: PetscFree(status);
3812: /* compute a local seq matrix in each processor */
3813: /*----------------------------------------------*/
3814: /* allocate bi array and free space for accumulating nonzero column info */
3815: PetscMalloc((m+1)*sizeof(PetscInt),&bi);
3816: bi[0] = 0;
3818: /* create and initialize a linked list */
3819: nlnk = N+1;
3820: PetscLLCreate(N,N,nlnk,lnk,lnkbt);
3821:
3822: /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3823: len = 0;
3824: len = ai[owners[rank+1]] - ai[owners[rank]];
3825: PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);
3826: current_space = free_space;
3828: /* determine symbolic info for each local row */
3829: PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);
3830: nextrow = buf_ri_k + merge->nrecv;
3831: nextai = nextrow + merge->nrecv;
3832: for (k=0; k<merge->nrecv; k++){
3833: buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3834: nrows = *buf_ri_k[k];
3835: nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */
3836: nextai[k] = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure */
3837: }
3839: MatPreallocateInitialize(comm,m,n,dnz,onz);
3840: len = 0;
3841: for (i=0;i<m;i++) {
3842: bnzi = 0;
3843: /* add local non-zero cols of this proc's seqmat into lnk */
3844: arow = owners[rank] + i;
3845: anzi = ai[arow+1] - ai[arow];
3846: aj = a->j + ai[arow];
3847: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3848: bnzi += nlnk;
3849: /* add received col data into lnk */
3850: for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3851: if (i == *nextrow[k]) { /* i-th row */
3852: anzi = *(nextai[k]+1) - *nextai[k];
3853: aj = buf_rj[k] + *nextai[k];
3854: PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);
3855: bnzi += nlnk;
3856: nextrow[k]++; nextai[k]++;
3857: }
3858: }
3859: if (len < bnzi) len = bnzi; /* =max(bnzi) */
3861: /* if free space is not available, make more free space */
3862: if (current_space->local_remaining<bnzi) {
3863: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
3864: nspacedouble++;
3865: }
3866: /* copy data into free space, then initialize lnk */
3867: PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);
3868: MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);
3870: current_space->array += bnzi;
3871: current_space->local_used += bnzi;
3872: current_space->local_remaining -= bnzi;
3873:
3874: bi[i+1] = bi[i] + bnzi;
3875: }
3876:
3877: PetscFree(buf_ri_k);
3879: PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);
3880: PetscFreeSpaceContiguous(&free_space,bj);
3881: PetscLLDestroy(lnk,lnkbt);
3883: /* create symbolic parallel matrix B_mpi */
3884: /*---------------------------------------*/
3885: MatCreate(comm,&B_mpi);
3886: if (n==PETSC_DECIDE) {
3887: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);
3888: } else {
3889: MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);
3890: }
3891: MatSetType(B_mpi,MATMPIAIJ);
3892: MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);
3893: MatPreallocateFinalize(dnz,onz);
3895: /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3896: B_mpi->assembled = PETSC_FALSE;
3897: B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
3898: merge->bi = bi;
3899: merge->bj = bj;
3900: merge->buf_ri = buf_ri;
3901: merge->buf_rj = buf_rj;
3902: merge->coi = PETSC_NULL;
3903: merge->coj = PETSC_NULL;
3904: merge->owners_co = PETSC_NULL;
3906: /* attach the supporting struct to B_mpi for reuse */
3907: PetscContainerCreate(PETSC_COMM_SELF,&container);
3908: PetscContainerSetPointer(container,merge);
3909: PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);
3910: *mpimat = B_mpi;
3912: PetscCommDestroy(&comm);
3914: return(0);
3915: }
3917: static PetscEvent logkey_seqstompi = 0;
3920: PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3921: {
3922: PetscErrorCode ierr;
3925: if (!logkey_seqstompi) {
3927: }
3929: if (scall == MAT_INITIAL_MATRIX){
3930: MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);
3931: }
3932: MatMerge_SeqsToMPINumeric(seqmat,*mpimat);
3934: return(0);
3935: }
3936: static PetscEvent logkey_getlocalmat = 0;
3939: /*@C
3940: MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3942: Not Collective
3944: Input Parameters:
3945: + A - the matrix
3946: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3948: Output Parameter:
3949: . A_loc - the local sequential matrix generated
3951: Level: developer
3953: @*/
3954: PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3955: {
3956: PetscErrorCode ierr;
3957: Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data;
3958: Mat_SeqAIJ *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3959: PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3960: PetscScalar *aa=a->a,*ba=b->a,*ca;
3961: PetscInt am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3962: PetscInt *ci,*cj,col,ncols_d,ncols_o,jo;
3965: if (!logkey_getlocalmat) {
3967: }
3969: if (scall == MAT_INITIAL_MATRIX){
3970: PetscMalloc((1+am)*sizeof(PetscInt),&ci);
3971: ci[0] = 0;
3972: for (i=0; i<am; i++){
3973: ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3974: }
3975: PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);
3976: PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);
3977: k = 0;
3978: for (i=0; i<am; i++) {
3979: ncols_o = bi[i+1] - bi[i];
3980: ncols_d = ai[i+1] - ai[i];
3981: /* off-diagonal portion of A */
3982: for (jo=0; jo<ncols_o; jo++) {
3983: col = cmap[*bj];
3984: if (col >= cstart) break;
3985: cj[k] = col; bj++;
3986: ca[k++] = *ba++;
3987: }
3988: /* diagonal portion of A */
3989: for (j=0; j<ncols_d; j++) {
3990: cj[k] = cstart + *aj++;
3991: ca[k++] = *aa++;
3992: }
3993: /* off-diagonal portion of A */
3994: for (j=jo; j<ncols_o; j++) {
3995: cj[k] = cmap[*bj++];
3996: ca[k++] = *ba++;
3997: }
3998: }
3999: /* put together the new matrix */
4000: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);
4001: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4002: /* Since these are PETSc arrays, change flags to free them as necessary. */
4003: mat = (Mat_SeqAIJ*)(*A_loc)->data;
4004: mat->free_a = PETSC_TRUE;
4005: mat->free_ij = PETSC_TRUE;
4006: mat->nonew = 0;
4007: } else if (scall == MAT_REUSE_MATRIX){
4008: mat=(Mat_SeqAIJ*)(*A_loc)->data;
4009: ci = mat->i; cj = mat->j; ca = mat->a;
4010: for (i=0; i<am; i++) {
4011: /* off-diagonal portion of A */
4012: ncols_o = bi[i+1] - bi[i];
4013: for (jo=0; jo<ncols_o; jo++) {
4014: col = cmap[*bj];
4015: if (col >= cstart) break;
4016: *ca++ = *ba++; bj++;
4017: }
4018: /* diagonal portion of A */
4019: ncols_d = ai[i+1] - ai[i];
4020: for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4021: /* off-diagonal portion of A */
4022: for (j=jo; j<ncols_o; j++) {
4023: *ca++ = *ba++; bj++;
4024: }
4025: }
4026: } else {
4027: SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4028: }
4031: return(0);
4032: }
4034: static PetscEvent logkey_getlocalmatcondensed = 0;
4037: /*@C
4038: MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4040: Not Collective
4042: Input Parameters:
4043: + A - the matrix
4044: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4045: - row, col - index sets of rows and columns to extract (or PETSC_NULL)
4047: Output Parameter:
4048: . A_loc - the local sequential matrix generated
4050: Level: developer
4052: @*/
4053: PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4054: {
4055: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4056: PetscErrorCode ierr;
4057: PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4058: IS isrowa,iscola;
4059: Mat *aloc;
4062: if (!logkey_getlocalmatcondensed) {
4064: }
4066: if (!row){
4067: start = A->rmap.rstart; end = A->rmap.rend;
4068: ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);
4069: } else {
4070: isrowa = *row;
4071: }
4072: if (!col){
4073: start = A->cmap.rstart;
4074: cmap = a->garray;
4075: nzA = a->A->cmap.n;
4076: nzB = a->B->cmap.n;
4077: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4078: ncols = 0;
4079: for (i=0; i<nzB; i++) {
4080: if (cmap[i] < start) idx[ncols++] = cmap[i];
4081: else break;
4082: }
4083: imark = i;
4084: for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4085: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4086: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);
4087: PetscFree(idx);
4088: } else {
4089: iscola = *col;
4090: }
4091: if (scall != MAT_INITIAL_MATRIX){
4092: PetscMalloc(sizeof(Mat),&aloc);
4093: aloc[0] = *A_loc;
4094: }
4095: MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);
4096: *A_loc = aloc[0];
4097: PetscFree(aloc);
4098: if (!row){
4099: ISDestroy(isrowa);
4100: }
4101: if (!col){
4102: ISDestroy(iscola);
4103: }
4105: return(0);
4106: }
4108: static PetscEvent logkey_GetBrowsOfAcols = 0;
4111: /*@C
4112: MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4114: Collective on Mat
4116: Input Parameters:
4117: + A,B - the matrices in mpiaij format
4118: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4119: - rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4121: Output Parameter:
4122: + rowb, colb - index sets of rows and columns of B to extract
4123: . brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4124: - B_seq - the sequential matrix generated
4126: Level: developer
4128: @*/
4129: PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4130: {
4131: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4132: PetscErrorCode ierr;
4133: PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4134: IS isrowb,iscolb;
4135: Mat *bseq;
4136:
4138: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4139: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4140: }
4141: if (!logkey_GetBrowsOfAcols) {
4143: }
4145:
4146: if (scall == MAT_INITIAL_MATRIX){
4147: start = A->cmap.rstart;
4148: cmap = a->garray;
4149: nzA = a->A->cmap.n;
4150: nzB = a->B->cmap.n;
4151: PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);
4152: ncols = 0;
4153: for (i=0; i<nzB; i++) { /* row < local row index */
4154: if (cmap[i] < start) idx[ncols++] = cmap[i];
4155: else break;
4156: }
4157: imark = i;
4158: for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */
4159: for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4160: ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);
4161: PetscFree(idx);
4162: *brstart = imark;
4163: ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);
4164: } else {
4165: if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4166: isrowb = *rowb; iscolb = *colb;
4167: PetscMalloc(sizeof(Mat),&bseq);
4168: bseq[0] = *B_seq;
4169: }
4170: MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);
4171: *B_seq = bseq[0];
4172: PetscFree(bseq);
4173: if (!rowb){
4174: ISDestroy(isrowb);
4175: } else {
4176: *rowb = isrowb;
4177: }
4178: if (!colb){
4179: ISDestroy(iscolb);
4180: } else {
4181: *colb = iscolb;
4182: }
4184: return(0);
4185: }
4187: static PetscEvent logkey_GetBrowsOfAocols = 0;
4190: /*@C
4191: MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4192: of the OFF-DIAGONAL portion of local A
4194: Collective on Mat
4196: Input Parameters:
4197: + A,B - the matrices in mpiaij format
4198: . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4199: . startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4200: - bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4202: Output Parameter:
4203: + B_oth - the sequential matrix generated
4205: Level: developer
4207: @*/
4208: PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4209: {
4210: VecScatter_MPI_General *gen_to,*gen_from;
4211: PetscErrorCode ierr;
4212: Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data;
4213: Mat_SeqAIJ *b_oth;
4214: VecScatter ctx=a->Mvctx;
4215: MPI_Comm comm=ctx->comm;
4216: PetscMPIInt *rprocs,*sprocs,tag=ctx->tag,rank;
4217: PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4218: PetscScalar *rvalues,*svalues,*b_otha,*bufa,*bufA;
4219: PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4220: MPI_Request *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4221: MPI_Status *sstatus,rstatus;
4222: PetscInt *cols,sbs,rbs;
4223: PetscScalar *vals;
4226: if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4227: SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4228: }
4229: if (!logkey_GetBrowsOfAocols) {
4231: }
4233: MPI_Comm_rank(comm,&rank);
4235: gen_to = (VecScatter_MPI_General*)ctx->todata;
4236: gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4237: rvalues = gen_from->values; /* holds the length of receiving row */
4238: svalues = gen_to->values; /* holds the length of sending row */
4239: nrecvs = gen_from->n;
4240: nsends = gen_to->n;
4242: PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);
4243: srow = gen_to->indices; /* local row index to be sent */
4244: sstarts = gen_to->starts;
4245: sprocs = gen_to->procs;
4246: sstatus = gen_to->sstatus;
4247: sbs = gen_to->bs;
4248: rstarts = gen_from->starts;
4249: rprocs = gen_from->procs;
4250: rbs = gen_from->bs;
4252: if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4253: if (scall == MAT_INITIAL_MATRIX){
4254: /* i-array */
4255: /*---------*/
4256: /* post receives */
4257: for (i=0; i<nrecvs; i++){
4258: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4259: nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4260: MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4261: }
4263: /* pack the outgoing message */
4264: PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);
4265: rstartsj = sstartsj + nsends +1;
4266: sstartsj[0] = 0; rstartsj[0] = 0;
4267: len = 0; /* total length of j or a array to be sent */
4268: k = 0;
4269: for (i=0; i<nsends; i++){
4270: rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4271: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4272: for (j=0; j<nrows; j++) {
4273: row = srow[k] + B->rmap.range[rank]; /* global row idx */
4274: for (l=0; l<sbs; l++){
4275: MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL); /* rowlength */
4276: rowlen[j*sbs+l] = ncols;
4277: len += ncols;
4278: MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);
4279: }
4280: k++;
4281: }
4282: MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);
4283: sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4284: }
4285: /* recvs and sends of i-array are completed */
4286: i = nrecvs;
4287: while (i--) {
4288: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
4289: }
4290: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4292: /* allocate buffers for sending j and a arrays */
4293: PetscMalloc((len+1)*sizeof(PetscInt),&bufj);
4294: PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);
4296: /* create i-array of B_oth */
4297: PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);
4298: b_othi[0] = 0;
4299: len = 0; /* total length of j or a array to be received */
4300: k = 0;
4301: for (i=0; i<nrecvs; i++){
4302: rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4303: nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4304: for (j=0; j<nrows; j++) {
4305: b_othi[k+1] = b_othi[k] + rowlen[j];
4306: len += rowlen[j]; k++;
4307: }
4308: rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4309: }
4311: /* allocate space for j and a arrrays of B_oth */
4312: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);
4313: PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);
4315: /* j-array */
4316: /*---------*/
4317: /* post receives of j-array */
4318: for (i=0; i<nrecvs; i++){
4319: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4320: MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);
4321: }
4323: /* pack the outgoing message j-array */
4324: k = 0;
4325: for (i=0; i<nsends; i++){
4326: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4327: bufJ = bufj+sstartsj[i];
4328: for (j=0; j<nrows; j++) {
4329: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4330: for (ll=0; ll<sbs; ll++){
4331: MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4332: for (l=0; l<ncols; l++){
4333: *bufJ++ = cols[l];
4334: }
4335: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);
4336: }
4337: }
4338: MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);
4339: }
4341: /* recvs and sends of j-array are completed */
4342: i = nrecvs;
4343: while (i--) {
4344: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
4345: }
4346: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4347: } else if (scall == MAT_REUSE_MATRIX){
4348: sstartsj = *startsj;
4349: rstartsj = sstartsj + nsends +1;
4350: bufa = *bufa_ptr;
4351: b_oth = (Mat_SeqAIJ*)(*B_oth)->data;
4352: b_otha = b_oth->a;
4353: } else {
4354: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4355: }
4357: /* a-array */
4358: /*---------*/
4359: /* post receives of a-array */
4360: for (i=0; i<nrecvs; i++){
4361: nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4362: MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
4363: }
4365: /* pack the outgoing message a-array */
4366: k = 0;
4367: for (i=0; i<nsends; i++){
4368: nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4369: bufA = bufa+sstartsj[i];
4370: for (j=0; j<nrows; j++) {
4371: row = srow[k++] + B->rmap.range[rank]; /* global row idx */
4372: for (ll=0; ll<sbs; ll++){
4373: MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4374: for (l=0; l<ncols; l++){
4375: *bufA++ = vals[l];
4376: }
4377: MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);
4378: }
4379: }
4380: MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
4381: }
4382: /* recvs and sends of a-array are completed */
4383: i = nrecvs;
4384: while (i--) {
4385: MPI_Waitany(nrecvs,rwaits,&j,&rstatus);
4386: }
4387: if (nsends) {MPI_Waitall(nsends,swaits,sstatus);}
4388: PetscFree2(rwaits,swaits);
4390: if (scall == MAT_INITIAL_MATRIX){
4391: /* put together the new matrix */
4392: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);
4394: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4395: /* Since these are PETSc arrays, change flags to free them as necessary. */
4396: b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
4397: b_oth->free_a = PETSC_TRUE;
4398: b_oth->free_ij = PETSC_TRUE;
4399: b_oth->nonew = 0;
4401: PetscFree(bufj);
4402: if (!startsj || !bufa_ptr){
4403: PetscFree(sstartsj);
4404: PetscFree(bufa_ptr);
4405: } else {
4406: *startsj = sstartsj;
4407: *bufa_ptr = bufa;
4408: }
4409: }
4411: return(0);
4412: }
4416: /*@C
4417: MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4419: Not Collective
4421: Input Parameters:
4422: . A - The matrix in mpiaij format
4424: Output Parameter:
4425: + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4426: . colmap - A map from global column index to local index into lvec
4427: - multScatter - A scatter from the argument of a matrix-vector product to lvec
4429: Level: developer
4431: @*/
4432: #if defined (PETSC_USE_CTABLE)
4433: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4434: #else
4435: PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4436: #endif
4437: {
4438: Mat_MPIAIJ *a;
4445: a = (Mat_MPIAIJ *) A->data;
4446: if (lvec) *lvec = a->lvec;
4447: if (colmap) *colmap = a->colmap;
4448: if (multScatter) *multScatter = a->Mvctx;
4449: return(0);
4450: }
4457: /*MC
4458: MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4460: Options Database Keys:
4461: . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4463: Level: beginner
4465: .seealso: MatCreateMPIAIJ
4466: M*/
4471: PetscErrorCode MatCreate_MPIAIJ(Mat B)
4472: {
4473: Mat_MPIAIJ *b;
4475: PetscMPIInt size;
4478: MPI_Comm_size(B->comm,&size);
4480: PetscNew(Mat_MPIAIJ,&b);
4481: B->data = (void*)b;
4482: PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
4483: B->factor = 0;
4484: B->rmap.bs = 1;
4485: B->assembled = PETSC_FALSE;
4486: B->mapping = 0;
4488: B->insertmode = NOT_SET_VALUES;
4489: b->size = size;
4490: MPI_Comm_rank(B->comm,&b->rank);
4492: /* build cache for off array entries formed */
4493: MatStashCreate_Private(B->comm,1,&B->stash);
4494: b->donotstash = PETSC_FALSE;
4495: b->colmap = 0;
4496: b->garray = 0;
4497: b->roworiented = PETSC_TRUE;
4499: /* stuff used for matrix vector multiply */
4500: b->lvec = PETSC_NULL;
4501: b->Mvctx = PETSC_NULL;
4503: /* stuff for MatGetRow() */
4504: b->rowindices = 0;
4505: b->rowvalues = 0;
4506: b->getrowactive = PETSC_FALSE;
4509: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4510: "MatStoreValues_MPIAIJ",
4511: MatStoreValues_MPIAIJ);
4512: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4513: "MatRetrieveValues_MPIAIJ",
4514: MatRetrieveValues_MPIAIJ);
4515: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4516: "MatGetDiagonalBlock_MPIAIJ",
4517: MatGetDiagonalBlock_MPIAIJ);
4518: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4519: "MatIsTranspose_MPIAIJ",
4520: MatIsTranspose_MPIAIJ);
4521: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4522: "MatMPIAIJSetPreallocation_MPIAIJ",
4523: MatMPIAIJSetPreallocation_MPIAIJ);
4524: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4525: "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4526: MatMPIAIJSetPreallocationCSR_MPIAIJ);
4527: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4528: "MatDiagonalScaleLocal_MPIAIJ",
4529: MatDiagonalScaleLocal_MPIAIJ);
4530: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4531: "MatConvert_MPIAIJ_MPICSRPERM",
4532: MatConvert_MPIAIJ_MPICSRPERM);
4533: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4534: "MatConvert_MPIAIJ_MPICRL",
4535: MatConvert_MPIAIJ_MPICRL);
4536: PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);
4537: return(0);
4538: }
4543: /*@C
4544: MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4545: and "off-diagonal" part of the matrix in CSR format.
4547: Collective on MPI_Comm
4549: Input Parameters:
4550: + comm - MPI communicator
4551: . m - number of local rows (Cannot be PETSC_DECIDE)
4552: . n - This value should be the same as the local size used in creating the
4553: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4554: calculated if N is given) For square matrices n is almost always m.
4555: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4556: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4557: . i - row indices for "diagonal" portion of matrix
4558: . j - column indices
4559: . a - matrix values
4560: . oi - row indices for "off-diagonal" portion of matrix
4561: . oj - column indices
4562: - oa - matrix values
4564: Output Parameter:
4565: . mat - the matrix
4567: Level: advanced
4569: Notes:
4570: The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4572: The i and j indices are 0 based
4573:
4574: See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4577: .keywords: matrix, aij, compressed row, sparse, parallel
4579: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4580: MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4581: @*/
4582: PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4583: PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4584: {
4586: Mat_MPIAIJ *maij;
4589: if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4590: if (i[0]) {
4591: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4592: }
4593: if (oi[0]) {
4594: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4595: }
4596: MatCreate(comm,mat);
4597: MatSetSizes(*mat,m,n,M,N);
4598: MatSetType(*mat,MATMPIAIJ);
4599: maij = (Mat_MPIAIJ*) (*mat)->data;
4600: maij->donotstash = PETSC_TRUE;
4601: (*mat)->preallocated = PETSC_TRUE;
4603: (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4604: PetscMapSetUp(&(*mat)->rmap);
4605: PetscMapSetUp(&(*mat)->cmap);
4607: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);
4608: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);
4610: MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);
4611: MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);
4612: MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);
4613: MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);
4615: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
4616: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
4617: return(0);
4618: }
4620: /*
4621: Special version for direct calls from Fortran
4622: */
4623: #if defined(PETSC_HAVE_FORTRAN_CAPS)
4624: #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4625: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4626: #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4627: #endif
4629: /* Change these macros so can be used in void function */
4630: #undef CHKERRQ
4631: #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4632: #undef SETERRQ2
4633: #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4634: #undef SETERRQ
4635: #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4640: void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4641: {
4642: Mat mat = *mmat;
4643: PetscInt m = *mm, n = *mn;
4644: InsertMode addv = *maddv;
4645: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
4646: PetscScalar value;
4649: MatPreallocated(mat);
4650: if (mat->insertmode == NOT_SET_VALUES) {
4651: mat->insertmode = addv;
4652: }
4653: #if defined(PETSC_USE_DEBUG)
4654: else if (mat->insertmode != addv) {
4655: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4656: }
4657: #endif
4658: {
4659: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4660: PetscInt cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4661: PetscTruth roworiented = aij->roworiented;
4663: /* Some Variables required in the macro */
4664: Mat A = aij->A;
4665: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
4666: PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4667: PetscScalar *aa = a->a;
4668: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4669: Mat B = aij->B;
4670: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
4671: PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4672: PetscScalar *ba = b->a;
4674: PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4675: PetscInt nonew = a->nonew;
4676: PetscScalar *ap1,*ap2;
4679: for (i=0; i<m; i++) {
4680: if (im[i] < 0) continue;
4681: #if defined(PETSC_USE_DEBUG)
4682: if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4683: #endif
4684: if (im[i] >= rstart && im[i] < rend) {
4685: row = im[i] - rstart;
4686: lastcol1 = -1;
4687: rp1 = aj + ai[row];
4688: ap1 = aa + ai[row];
4689: rmax1 = aimax[row];
4690: nrow1 = ailen[row];
4691: low1 = 0;
4692: high1 = nrow1;
4693: lastcol2 = -1;
4694: rp2 = bj + bi[row];
4695: ap2 = ba + bi[row];
4696: rmax2 = bimax[row];
4697: nrow2 = bilen[row];
4698: low2 = 0;
4699: high2 = nrow2;
4701: for (j=0; j<n; j++) {
4702: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4703: if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4704: if (in[j] >= cstart && in[j] < cend){
4705: col = in[j] - cstart;
4706: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4707: } else if (in[j] < 0) continue;
4708: #if defined(PETSC_USE_DEBUG)
4709: else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4710: #endif
4711: else {
4712: if (mat->was_assembled) {
4713: if (!aij->colmap) {
4714: CreateColmap_MPIAIJ_Private(mat);
4715: }
4716: #if defined (PETSC_USE_CTABLE)
4717: PetscTableFind(aij->colmap,in[j]+1,&col);
4718: col--;
4719: #else
4720: col = aij->colmap[in[j]] - 1;
4721: #endif
4722: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4723: DisAssemble_MPIAIJ(mat);
4724: col = in[j];
4725: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4726: B = aij->B;
4727: b = (Mat_SeqAIJ*)B->data;
4728: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4729: rp2 = bj + bi[row];
4730: ap2 = ba + bi[row];
4731: rmax2 = bimax[row];
4732: nrow2 = bilen[row];
4733: low2 = 0;
4734: high2 = nrow2;
4735: bm = aij->B->rmap.n;
4736: ba = b->a;
4737: }
4738: } else col = in[j];
4739: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4740: }
4741: }
4742: } else {
4743: if (!aij->donotstash) {
4744: if (roworiented) {
4745: if (ignorezeroentries && v[i*n] == 0.0) continue;
4746: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
4747: } else {
4748: if (ignorezeroentries && v[i] == 0.0) continue;
4749: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
4750: }
4751: }
4752: }
4753: }}
4754: PetscFunctionReturnVoid();
4755: }