Actual source code: mpidense.c
1: #define PETSCMAT_DLL
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
11: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
12: {
16: MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);
17: return(0);
18: }
22: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
23: {
27: MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);
28: return(0);
29: }
33: /*@
35: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
36: matrix that represents the operator. For sequential matrices it returns itself.
38: Input Parameter:
39: . A - the Seq or MPI dense matrix
41: Output Parameter:
42: . B - the inner matrix
44: Level: intermediate
46: @*/
47: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
48: {
49: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
51: PetscTruth flg;
52: PetscMPIInt size;
55: PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
56: if (!flg) { /* this check sucks! */
57: PetscTypeCompare((PetscObject)A,MATDENSE,&flg);
58: if (flg) {
59: MPI_Comm_size(A->comm,&size);
60: if (size == 1) flg = PETSC_FALSE;
61: }
62: }
63: if (flg) {
64: *B = mat->A;
65: } else {
66: *B = A;
67: }
68: return(0);
69: }
73: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
74: {
75: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
77: PetscInt lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;
80: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
81: lrow = row - rstart;
82: MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
83: return(0);
84: }
88: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
89: {
93: if (idx) {PetscFree(*idx);}
94: if (v) {PetscFree(*v);}
95: return(0);
96: }
101: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102: {
103: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
105: PetscInt m = A->rmap.n,rstart = A->rmap.rstart;
106: PetscScalar *array;
107: MPI_Comm comm;
110: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
112: /* The reuse aspect is not implemented efficiently */
113: if (reuse) { MatDestroy(*B);}
115: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
116: MatGetArray(mdn->A,&array);
117: MatCreate(comm,B);
118: MatSetSizes(*B,m,m,m,m);
119: MatSetType(*B,mdn->A->type_name);
120: MatSeqDenseSetPreallocation(*B,array+m*rstart);
121: MatRestoreArray(mdn->A,&array);
122: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
123: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
124:
125: *iscopy = PETSC_TRUE;
126: return(0);
127: }
132: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133: {
134: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
136: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137: PetscTruth roworiented = A->roworiented;
140: for (i=0; i<m; i++) {
141: if (idxm[i] < 0) continue;
142: if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143: if (idxm[i] >= rstart && idxm[i] < rend) {
144: row = idxm[i] - rstart;
145: if (roworiented) {
146: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
147: } else {
148: for (j=0; j<n; j++) {
149: if (idxn[j] < 0) continue;
150: if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
152: }
153: }
154: } else {
155: if (!A->donotstash) {
156: if (roworiented) {
157: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
158: } else {
159: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
160: }
161: }
162: }
163: }
164: return(0);
165: }
169: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170: {
171: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
173: PetscInt i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
176: for (i=0; i<m; i++) {
177: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178: if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179: if (idxm[i] >= rstart && idxm[i] < rend) {
180: row = idxm[i] - rstart;
181: for (j=0; j<n; j++) {
182: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183: if (idxn[j] >= mat->cmap.N) {
184: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185: }
186: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
187: }
188: } else {
189: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190: }
191: }
192: return(0);
193: }
197: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198: {
199: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
203: MatGetArray(a->A,array);
204: return(0);
205: }
209: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210: {
211: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
212: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
214: PetscInt i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215: PetscScalar *av,*bv,*v = lmat->v;
216: Mat newmat;
219: ISGetIndices(isrow,&irow);
220: ISGetIndices(iscol,&icol);
221: ISGetLocalSize(isrow,&nrows);
222: ISGetLocalSize(iscol,&ncols);
224: /* No parallel redistribution currently supported! Should really check each index set
225: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
226: original matrix! */
228: MatGetLocalSize(A,&nlrows,&nlcols);
229: MatGetOwnershipRange(A,&rstart,&rend);
230:
231: /* Check submatrix call */
232: if (scall == MAT_REUSE_MATRIX) {
233: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234: /* Really need to test rows and column sizes! */
235: newmat = *B;
236: } else {
237: /* Create and fill new matrix */
238: MatCreate(A->comm,&newmat);
239: MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
240: MatSetType(newmat,A->type_name);
241: MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
242: }
244: /* Now extract the data pointers and do the copy, column at a time */
245: newmatd = (Mat_MPIDense*)newmat->data;
246: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
247:
248: for (i=0; i<ncols; i++) {
249: av = v + nlrows*icol[i];
250: for (j=0; j<nrows; j++) {
251: *bv++ = av[irow[j] - rstart];
252: }
253: }
255: /* Assemble the matrices so that the correct flags are set */
256: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
257: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
259: /* Free work space */
260: ISRestoreIndices(isrow,&irow);
261: ISRestoreIndices(iscol,&icol);
262: *B = newmat;
263: return(0);
264: }
268: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269: {
271: return(0);
272: }
276: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277: {
278: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
279: MPI_Comm comm = mat->comm;
281: PetscInt nstash,reallocs;
282: InsertMode addv;
285: /* make sure all processors are either in INSERTMODE or ADDMODE */
286: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
287: if (addv == (ADD_VALUES|INSERT_VALUES)) {
288: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289: }
290: mat->insertmode = addv; /* in case this processor had no cache */
292: MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
293: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
294: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
295: return(0);
296: }
300: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301: {
302: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
303: PetscErrorCode ierr;
304: PetscInt i,*row,*col,flg,j,rstart,ncols;
305: PetscMPIInt n;
306: PetscScalar *val;
307: InsertMode addv=mat->insertmode;
310: /* wait on receives */
311: while (1) {
312: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
313: if (!flg) break;
314:
315: for (i=0; i<n;) {
316: /* Now identify the consecutive vals belonging to the same row */
317: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318: if (j < n) ncols = j-i;
319: else ncols = n-i;
320: /* Now assemble all these values with a single function call */
321: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
322: i = j;
323: }
324: }
325: MatStashScatterEnd_Private(&mat->stash);
326:
327: MatAssemblyBegin(mdn->A,mode);
328: MatAssemblyEnd(mdn->A,mode);
330: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331: MatSetUpMultiply_MPIDense(mat);
332: }
333: return(0);
334: }
338: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339: {
341: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
344: MatZeroEntries(l->A);
345: return(0);
346: }
348: /* the code does not do the diagonal entries correctly unless the
349: matrix is square and the column and row owerships are identical.
350: This is a BUG. The only way to fix it seems to be to access
351: mdn->A and mdn->B directly and not through the MatZeroRows()
352: routine.
353: */
356: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357: {
358: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
360: PetscInt i,*owners = A->rmap.range;
361: PetscInt *nprocs,j,idx,nsends;
362: PetscInt nmax,*svalues,*starts,*owner,nrecvs;
363: PetscInt *rvalues,tag = A->tag,count,base,slen,*source;
364: PetscInt *lens,*lrows,*values;
365: PetscMPIInt n,imdex,rank = l->rank,size = l->size;
366: MPI_Comm comm = A->comm;
367: MPI_Request *send_waits,*recv_waits;
368: MPI_Status recv_status,*send_status;
369: PetscTruth found;
372: /* first count number of contributors to each processor */
373: PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
374: PetscMemzero(nprocs,2*size*sizeof(PetscInt));
375: PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
376: for (i=0; i<N; i++) {
377: idx = rows[i];
378: found = PETSC_FALSE;
379: for (j=0; j<size; j++) {
380: if (idx >= owners[j] && idx < owners[j+1]) {
381: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382: }
383: }
384: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385: }
386: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
388: /* inform other processors of number of messages and max length*/
389: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
391: /* post receives: */
392: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
393: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
394: for (i=0; i<nrecvs; i++) {
395: MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
396: }
398: /* do sends:
399: 1) starts[i] gives the starting index in svalues for stuff going to
400: the ith processor
401: */
402: PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
403: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
404: PetscMalloc((size+1)*sizeof(PetscInt),&starts);
405: starts[0] = 0;
406: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407: for (i=0; i<N; i++) {
408: svalues[starts[owner[i]]++] = rows[i];
409: }
411: starts[0] = 0;
412: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413: count = 0;
414: for (i=0; i<size; i++) {
415: if (nprocs[2*i+1]) {
416: MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
417: }
418: }
419: PetscFree(starts);
421: base = owners[rank];
423: /* wait on receives */
424: PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
425: source = lens + nrecvs;
426: count = nrecvs; slen = 0;
427: while (count) {
428: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
429: /* unpack receives into our local space */
430: MPI_Get_count(&recv_status,MPIU_INT,&n);
431: source[imdex] = recv_status.MPI_SOURCE;
432: lens[imdex] = n;
433: slen += n;
434: count--;
435: }
436: PetscFree(recv_waits);
437:
438: /* move the data into the send scatter */
439: PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
440: count = 0;
441: for (i=0; i<nrecvs; i++) {
442: values = rvalues + i*nmax;
443: for (j=0; j<lens[i]; j++) {
444: lrows[count++] = values[j] - base;
445: }
446: }
447: PetscFree(rvalues);
448: PetscFree(lens);
449: PetscFree(owner);
450: PetscFree(nprocs);
451:
452: /* actually zap the local rows */
453: MatZeroRows(l->A,slen,lrows,diag);
454: PetscFree(lrows);
456: /* wait on sends */
457: if (nsends) {
458: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
459: MPI_Waitall(nsends,send_waits,send_status);
460: PetscFree(send_status);
461: }
462: PetscFree(send_waits);
463: PetscFree(svalues);
465: return(0);
466: }
470: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471: {
472: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
476: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
477: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
478: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
479: return(0);
480: }
484: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485: {
486: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
490: VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
491: VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
492: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
493: return(0);
494: }
498: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499: {
500: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
502: PetscScalar zero = 0.0;
505: VecSet(yy,zero);
506: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
507: VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
508: VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
509: return(0);
510: }
514: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515: {
516: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
520: VecCopy(yy,zz);
521: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
522: VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
523: VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
524: return(0);
525: }
529: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530: {
531: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
532: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
534: PetscInt len,i,n,m = A->rmap.n,radd;
535: PetscScalar *x,zero = 0.0;
536:
538: VecSet(v,zero);
539: VecGetArray(v,&x);
540: VecGetSize(v,&n);
541: if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542: len = PetscMin(a->A->rmap.n,a->A->cmap.n);
543: radd = A->rmap.rstart*m;
544: for (i=0; i<len; i++) {
545: x[i] = aloc->v[radd + i*m + i];
546: }
547: VecRestoreArray(v,&x);
548: return(0);
549: }
553: PetscErrorCode MatDestroy_MPIDense(Mat mat)
554: {
555: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
560: #if defined(PETSC_USE_LOG)
561: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562: #endif
563: MatStashDestroy_Private(&mat->stash);
564: MatDestroy(mdn->A);
565: if (mdn->lvec) {VecDestroy(mdn->lvec);}
566: if (mdn->Mvctx) {VecScatterDestroy(mdn->Mvctx);}
567: if (mdn->factor) {
568: PetscFree(mdn->factor->temp);
569: PetscFree(mdn->factor->tag);
570: PetscFree(mdn->factor->pivots);
571: PetscFree(mdn->factor);
572: }
573: PetscFree(mdn);
574: PetscObjectChangeTypeName((PetscObject)mat,0);
575: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
576: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
577: PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
578: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
579: PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
580: return(0);
581: }
585: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586: {
587: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
588: PetscErrorCode ierr;
589: PetscViewerFormat format;
590: int fd;
591: PetscInt header[4],mmax,N = mat->cmap.N,i,j,m,k;
592: PetscMPIInt rank,tag = ((PetscObject)viewer)->tag,size;
593: PetscScalar *work,*v;
594: Mat_SeqDense *a = (Mat_SeqDense*)mdn->A->data;
595: MPI_Status status;
598: if (mdn->size == 1) {
599: MatView(mdn->A,viewer);
600: } else {
601: PetscViewerBinaryGetDescriptor(viewer,&fd);
602: MPI_Comm_rank(mat->comm,&rank);
603: MPI_Comm_size(mat->comm,&size);
605: PetscViewerGetFormat(viewer,&format);
606: if (format == PETSC_VIEWER_BINARY_NATIVE) {
608: if (!rank) {
609: /* store the matrix as a dense matrix */
610: header[0] = MAT_FILE_COOKIE;
611: header[1] = mat->rmap.N;
612: header[2] = N;
613: header[3] = MATRIX_BINARY_FORMAT_DENSE;
614: PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);
616: /* get largest work array needed for transposing array */
617: mmax = mat->rmap.n;
618: for (i=1; i<size; i++) {
619: mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620: }
621: PetscMalloc(mmax*N*sizeof(PetscScalar),&work);
623: /* write out local array, by rows */
624: m = mat->rmap.n;
625: v = a->v;
626: for (j=0; j<N; j++) {
627: for (i=0; i<m; i++) {
628: work[i + j*N] = *v++;
629: }
630: }
631: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
633: /* get largest work array to receive messages from other processes, excludes process zero */
634: mmax = 0;
635: for (i=1; i<size; i++) {
636: mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
637: }
638: PetscMalloc(mmax*N*sizeof(PetscScalar),&v);
639: for (k=1; k<size; k++) {
640: m = mat->rmap.range[k+1] - mat->rmap.range[k];
641: MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,mat->comm,&status);
643: for (j=0; j<N; j++) {
644: for (i=0; i<m; i++) {
645: work[i + j*N] = *v++;
646: }
647: }
648: PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
649: }
650: PetscFree(work);
651: PetscFree(v);
652: } else {
653: MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,mat->comm);
654: }
655: }
656: }
657: return(0);
658: }
662: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663: {
664: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
665: PetscErrorCode ierr;
666: PetscMPIInt size = mdn->size,rank = mdn->rank;
667: PetscViewerType vtype;
668: PetscTruth iascii,isdraw;
669: PetscViewer sviewer;
670: PetscViewerFormat format;
673: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
674: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
675: if (iascii) {
676: PetscViewerGetType(viewer,&vtype);
677: PetscViewerGetFormat(viewer,&format);
678: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679: MatInfo info;
680: MatGetInfo(mat,MAT_LOCAL,&info);
681: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682: (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
683: PetscViewerFlush(viewer);
684: VecScatterView(mdn->Mvctx,viewer);
685: return(0);
686: } else if (format == PETSC_VIEWER_ASCII_INFO) {
687: return(0);
688: }
689: } else if (isdraw) {
690: PetscDraw draw;
691: PetscTruth isnull;
693: PetscViewerDrawGetDraw(viewer,0,&draw);
694: PetscDrawIsNull(draw,&isnull);
695: if (isnull) return(0);
696: }
698: if (size == 1) {
699: MatView(mdn->A,viewer);
700: } else {
701: /* assemble the entire matrix onto first processor. */
702: Mat A;
703: PetscInt M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704: PetscInt *cols;
705: PetscScalar *vals;
707: MatCreate(mat->comm,&A);
708: if (!rank) {
709: MatSetSizes(A,M,N,M,N);
710: } else {
711: MatSetSizes(A,0,0,M,N);
712: }
713: /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
714: MatSetType(A,MATMPIDENSE);
715: MatMPIDenseSetPreallocation(A,PETSC_NULL);
716: PetscLogObjectParent(mat,A);
718: /* Copy the matrix ... This isn't the most efficient means,
719: but it's quick for now */
720: A->insertmode = INSERT_VALUES;
721: row = mat->rmap.rstart; m = mdn->A->rmap.n;
722: for (i=0; i<m; i++) {
723: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
724: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
725: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
726: row++;
727: }
729: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
730: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
731: PetscViewerGetSingleton(viewer,&sviewer);
732: if (!rank) {
733: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
734: }
735: PetscViewerRestoreSingleton(viewer,&sviewer);
736: PetscViewerFlush(viewer);
737: MatDestroy(A);
738: }
739: return(0);
740: }
744: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745: {
747: PetscTruth iascii,isbinary,isdraw,issocket;
748:
750:
751: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
752: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
753: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
754: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
756: if (iascii || issocket || isdraw) {
757: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
758: } else if (isbinary) {
759: MatView_MPIDense_Binary(mat,viewer);
760: } else {
761: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762: }
763: return(0);
764: }
768: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769: {
770: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
771: Mat mdn = mat->A;
773: PetscReal isend[5],irecv[5];
776: info->rows_global = (double)A->rmap.N;
777: info->columns_global = (double)A->cmap.N;
778: info->rows_local = (double)A->rmap.n;
779: info->columns_local = (double)A->cmap.N;
780: info->block_size = 1.0;
781: MatGetInfo(mdn,MAT_LOCAL,info);
782: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783: isend[3] = info->memory; isend[4] = info->mallocs;
784: if (flag == MAT_LOCAL) {
785: info->nz_used = isend[0];
786: info->nz_allocated = isend[1];
787: info->nz_unneeded = isend[2];
788: info->memory = isend[3];
789: info->mallocs = isend[4];
790: } else if (flag == MAT_GLOBAL_MAX) {
791: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
792: info->nz_used = irecv[0];
793: info->nz_allocated = irecv[1];
794: info->nz_unneeded = irecv[2];
795: info->memory = irecv[3];
796: info->mallocs = irecv[4];
797: } else if (flag == MAT_GLOBAL_SUM) {
798: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
799: info->nz_used = irecv[0];
800: info->nz_allocated = irecv[1];
801: info->nz_unneeded = irecv[2];
802: info->memory = irecv[3];
803: info->mallocs = irecv[4];
804: }
805: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
806: info->fill_ratio_needed = 0;
807: info->factor_mallocs = 0;
808: return(0);
809: }
813: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
814: {
815: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
819: switch (op) {
820: case MAT_NO_NEW_NONZERO_LOCATIONS:
821: case MAT_YES_NEW_NONZERO_LOCATIONS:
822: case MAT_NEW_NONZERO_LOCATION_ERR:
823: case MAT_NEW_NONZERO_ALLOCATION_ERR:
824: case MAT_COLUMNS_SORTED:
825: case MAT_COLUMNS_UNSORTED:
826: MatSetOption(a->A,op);
827: break;
828: case MAT_ROW_ORIENTED:
829: a->roworiented = PETSC_TRUE;
830: MatSetOption(a->A,op);
831: break;
832: case MAT_ROWS_SORTED:
833: case MAT_ROWS_UNSORTED:
834: case MAT_YES_NEW_DIAGONALS:
835: case MAT_USE_HASH_TABLE:
836: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
837: break;
838: case MAT_COLUMN_ORIENTED:
839: a->roworiented = PETSC_FALSE;
840: MatSetOption(a->A,op);
841: break;
842: case MAT_IGNORE_OFF_PROC_ENTRIES:
843: a->donotstash = PETSC_TRUE;
844: break;
845: case MAT_NO_NEW_DIAGONALS:
846: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
847: case MAT_SYMMETRIC:
848: case MAT_STRUCTURALLY_SYMMETRIC:
849: case MAT_NOT_SYMMETRIC:
850: case MAT_NOT_STRUCTURALLY_SYMMETRIC:
851: case MAT_HERMITIAN:
852: case MAT_NOT_HERMITIAN:
853: case MAT_SYMMETRY_ETERNAL:
854: case MAT_NOT_SYMMETRY_ETERNAL:
855: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
856: break;
857: default:
858: SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
859: }
860: return(0);
861: }
866: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
867: {
868: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
869: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
870: PetscScalar *l,*r,x,*v;
872: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;
875: MatGetLocalSize(A,&s2,&s3);
876: if (ll) {
877: VecGetLocalSize(ll,&s2a);
878: if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
879: VecGetArray(ll,&l);
880: for (i=0; i<m; i++) {
881: x = l[i];
882: v = mat->v + i;
883: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
884: }
885: VecRestoreArray(ll,&l);
886: PetscLogFlops(n*m);
887: }
888: if (rr) {
889: VecGetLocalSize(rr,&s3a);
890: if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
891: VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
892: VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
893: VecGetArray(mdn->lvec,&r);
894: for (i=0; i<n; i++) {
895: x = r[i];
896: v = mat->v + i*m;
897: for (j=0; j<m; j++) { (*v++) *= x;}
898: }
899: VecRestoreArray(mdn->lvec,&r);
900: PetscLogFlops(n*m);
901: }
902: return(0);
903: }
907: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
908: {
909: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
910: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
912: PetscInt i,j;
913: PetscReal sum = 0.0;
914: PetscScalar *v = mat->v;
917: if (mdn->size == 1) {
918: MatNorm(mdn->A,type,nrm);
919: } else {
920: if (type == NORM_FROBENIUS) {
921: for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
922: #if defined(PETSC_USE_COMPLEX)
923: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
924: #else
925: sum += (*v)*(*v); v++;
926: #endif
927: }
928: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
929: *nrm = sqrt(*nrm);
930: PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);
931: } else if (type == NORM_1) {
932: PetscReal *tmp,*tmp2;
933: PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);
934: tmp2 = tmp + A->cmap.N;
935: PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));
936: *nrm = 0.0;
937: v = mat->v;
938: for (j=0; j<mdn->A->cmap.n; j++) {
939: for (i=0; i<mdn->A->rmap.n; i++) {
940: tmp[j] += PetscAbsScalar(*v); v++;
941: }
942: }
943: MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);
944: for (j=0; j<A->cmap.N; j++) {
945: if (tmp2[j] > *nrm) *nrm = tmp2[j];
946: }
947: PetscFree(tmp);
948: PetscLogFlops(A->cmap.n*A->rmap.n);
949: } else if (type == NORM_INFINITY) { /* max row norm */
950: PetscReal ntemp;
951: MatNorm(mdn->A,type,&ntemp);
952: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
953: } else {
954: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
955: }
956: }
957: return(0);
958: }
962: PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
963: {
964: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
965: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
966: Mat B;
967: PetscInt M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
969: PetscInt j,i;
970: PetscScalar *v;
973: if (!matout && M != N) {
974: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
975: }
976: MatCreate(A->comm,&B);
977: MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);
978: MatSetType(B,A->type_name);
979: MatMPIDenseSetPreallocation(B,PETSC_NULL);
981: m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
982: PetscMalloc(m*sizeof(PetscInt),&rwork);
983: for (i=0; i<m; i++) rwork[i] = rstart + i;
984: for (j=0; j<n; j++) {
985: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
986: v += m;
987: }
988: PetscFree(rwork);
989: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
990: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
991: if (matout) {
992: *matout = B;
993: } else {
994: MatHeaderCopy(A,B);
995: }
996: return(0);
997: }
999: #include petscblaslapack.h
1002: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
1003: {
1004: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
1005: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
1006: PetscScalar oalpha = alpha;
1007: PetscBLASInt one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;
1011: BLASscal_(&nz,&oalpha,a->v,&one);
1012: PetscLogFlops(nz);
1013: return(0);
1014: }
1016: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
1020: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1021: {
1025: MatMPIDenseSetPreallocation(A,0);
1026: return(0);
1027: }
1031: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1032: {
1034: PetscInt m=A->rmap.n,n=B->cmap.n;
1035: Mat Cmat;
1038: if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1039: MatCreate(B->comm,&Cmat);
1040: MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);
1041: MatSetType(Cmat,MATMPIDENSE);
1042: MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
1043: MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1044: MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1045: *C = Cmat;
1046: return(0);
1047: }
1049: /* -------------------------------------------------------------------*/
1050: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1051: MatGetRow_MPIDense,
1052: MatRestoreRow_MPIDense,
1053: MatMult_MPIDense,
1054: /* 4*/ MatMultAdd_MPIDense,
1055: MatMultTranspose_MPIDense,
1056: MatMultTransposeAdd_MPIDense,
1057: 0,
1058: 0,
1059: 0,
1060: /*10*/ 0,
1061: 0,
1062: 0,
1063: 0,
1064: MatTranspose_MPIDense,
1065: /*15*/ MatGetInfo_MPIDense,
1066: MatEqual_MPIDense,
1067: MatGetDiagonal_MPIDense,
1068: MatDiagonalScale_MPIDense,
1069: MatNorm_MPIDense,
1070: /*20*/ MatAssemblyBegin_MPIDense,
1071: MatAssemblyEnd_MPIDense,
1072: 0,
1073: MatSetOption_MPIDense,
1074: MatZeroEntries_MPIDense,
1075: /*25*/ MatZeroRows_MPIDense,
1076: MatLUFactorSymbolic_MPIDense,
1077: 0,
1078: MatCholeskyFactorSymbolic_MPIDense,
1079: 0,
1080: /*30*/ MatSetUpPreallocation_MPIDense,
1081: 0,
1082: 0,
1083: MatGetArray_MPIDense,
1084: MatRestoreArray_MPIDense,
1085: /*35*/ MatDuplicate_MPIDense,
1086: 0,
1087: 0,
1088: 0,
1089: 0,
1090: /*40*/ 0,
1091: MatGetSubMatrices_MPIDense,
1092: 0,
1093: MatGetValues_MPIDense,
1094: 0,
1095: /*45*/ 0,
1096: MatScale_MPIDense,
1097: 0,
1098: 0,
1099: 0,
1100: /*50*/ 0,
1101: 0,
1102: 0,
1103: 0,
1104: 0,
1105: /*55*/ 0,
1106: 0,
1107: 0,
1108: 0,
1109: 0,
1110: /*60*/ MatGetSubMatrix_MPIDense,
1111: MatDestroy_MPIDense,
1112: MatView_MPIDense,
1113: 0,
1114: 0,
1115: /*65*/ 0,
1116: 0,
1117: 0,
1118: 0,
1119: 0,
1120: /*70*/ 0,
1121: 0,
1122: 0,
1123: 0,
1124: 0,
1125: /*75*/ 0,
1126: 0,
1127: 0,
1128: 0,
1129: 0,
1130: /*80*/ 0,
1131: 0,
1132: 0,
1133: 0,
1134: /*84*/ MatLoad_MPIDense,
1135: 0,
1136: 0,
1137: 0,
1138: 0,
1139: 0,
1140: /*90*/ 0,
1141: MatMatMultSymbolic_MPIDense_MPIDense,
1142: 0,
1143: 0,
1144: 0,
1145: /*95*/ 0,
1146: 0,
1147: 0,
1148: 0};
1153: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1154: {
1155: Mat_MPIDense *a;
1159: mat->preallocated = PETSC_TRUE;
1160: /* Note: For now, when data is specified above, this assumes the user correctly
1161: allocates the local dense storage space. We should add error checking. */
1163: a = (Mat_MPIDense*)mat->data;
1164: MatCreate(PETSC_COMM_SELF,&a->A);
1165: MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);
1166: MatSetType(a->A,MATSEQDENSE);
1167: MatSeqDenseSetPreallocation(a->A,data);
1168: PetscLogObjectParent(mat,a->A);
1169: return(0);
1170: }
1173: /*MC
1174: MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.
1176: Options Database Keys:
1177: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()
1179: Level: beginner
1181: .seealso: MatCreateMPIDense
1182: M*/
1187: PetscErrorCode MatCreate_MPIDense(Mat mat)
1188: {
1189: Mat_MPIDense *a;
1193: PetscNew(Mat_MPIDense,&a);
1194: mat->data = (void*)a;
1195: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1196: mat->factor = 0;
1197: mat->mapping = 0;
1199: a->factor = 0;
1200: mat->insertmode = NOT_SET_VALUES;
1201: MPI_Comm_rank(mat->comm,&a->rank);
1202: MPI_Comm_size(mat->comm,&a->size);
1204: mat->rmap.bs = mat->cmap.bs = 1;
1205: PetscMapSetUp(&mat->rmap);
1206: PetscMapSetUp(&mat->cmap);
1207: a->nvec = mat->cmap.n;
1209: /* build cache for off array entries formed */
1210: a->donotstash = PETSC_FALSE;
1211: MatStashCreate_Private(mat->comm,1,&mat->stash);
1213: /* stuff used for matrix vector multiply */
1214: a->lvec = 0;
1215: a->Mvctx = 0;
1216: a->roworiented = PETSC_TRUE;
1218: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1219: "MatGetDiagonalBlock_MPIDense",
1220: MatGetDiagonalBlock_MPIDense);
1221: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1222: "MatMPIDenseSetPreallocation_MPIDense",
1223: MatMPIDenseSetPreallocation_MPIDense);
1224: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1225: "MatMatMult_MPIAIJ_MPIDense",
1226: MatMatMult_MPIAIJ_MPIDense);
1227: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1228: "MatMatMultSymbolic_MPIAIJ_MPIDense",
1229: MatMatMultSymbolic_MPIAIJ_MPIDense);
1230: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1231: "MatMatMultNumeric_MPIAIJ_MPIDense",
1232: MatMatMultNumeric_MPIAIJ_MPIDense);
1233: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1234: return(0);
1235: }
1238: /*MC
1239: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1241: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1242: and MATMPIDENSE otherwise.
1244: Options Database Keys:
1245: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1247: Level: beginner
1249: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1250: M*/
1255: PetscErrorCode MatCreate_Dense(Mat A)
1256: {
1258: PetscMPIInt size;
1261: MPI_Comm_size(A->comm,&size);
1262: if (size == 1) {
1263: MatSetType(A,MATSEQDENSE);
1264: } else {
1265: MatSetType(A,MATMPIDENSE);
1266: }
1267: return(0);
1268: }
1273: /*@C
1274: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1276: Not collective
1278: Input Parameters:
1279: . A - the matrix
1280: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1281: to control all matrix memory allocation.
1283: Notes:
1284: The dense format is fully compatible with standard Fortran 77
1285: storage by columns.
1287: The data input variable is intended primarily for Fortran programmers
1288: who wish to allocate their own matrix memory space. Most users should
1289: set data=PETSC_NULL.
1291: Level: intermediate
1293: .keywords: matrix,dense, parallel
1295: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1296: @*/
1297: PetscErrorCode MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1298: {
1299: PetscErrorCode ierr,(*f)(Mat,PetscScalar *);
1302: PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1303: if (f) {
1304: (*f)(mat,data);
1305: }
1306: return(0);
1307: }
1311: /*@C
1312: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1314: Collective on MPI_Comm
1316: Input Parameters:
1317: + comm - MPI communicator
1318: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1319: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1320: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1321: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1322: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1323: to control all matrix memory allocation.
1325: Output Parameter:
1326: . A - the matrix
1328: Notes:
1329: The dense format is fully compatible with standard Fortran 77
1330: storage by columns.
1332: The data input variable is intended primarily for Fortran programmers
1333: who wish to allocate their own matrix memory space. Most users should
1334: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1336: The user MUST specify either the local or global matrix dimensions
1337: (possibly both).
1339: Level: intermediate
1341: .keywords: matrix,dense, parallel
1343: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1344: @*/
1345: PetscErrorCode MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1346: {
1348: PetscMPIInt size;
1351: MatCreate(comm,A);
1352: MatSetSizes(*A,m,n,M,N);
1353: MPI_Comm_size(comm,&size);
1354: if (size > 1) {
1355: MatSetType(*A,MATMPIDENSE);
1356: MatMPIDenseSetPreallocation(*A,data);
1357: } else {
1358: MatSetType(*A,MATSEQDENSE);
1359: MatSeqDenseSetPreallocation(*A,data);
1360: }
1361: return(0);
1362: }
1366: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1367: {
1368: Mat mat;
1369: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1373: *newmat = 0;
1374: MatCreate(A->comm,&mat);
1375: MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
1376: MatSetType(mat,A->type_name);
1377: a = (Mat_MPIDense*)mat->data;
1378: PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1379: mat->factor = A->factor;
1380: mat->assembled = PETSC_TRUE;
1381: mat->preallocated = PETSC_TRUE;
1383: mat->rmap.rstart = A->rmap.rstart;
1384: mat->rmap.rend = A->rmap.rend;
1385: a->size = oldmat->size;
1386: a->rank = oldmat->rank;
1387: mat->insertmode = NOT_SET_VALUES;
1388: a->nvec = oldmat->nvec;
1389: a->donotstash = oldmat->donotstash;
1390:
1391: PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));
1392: PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));
1393: MatStashCreate_Private(A->comm,1,&mat->stash);
1395: MatSetUpMultiply_MPIDense(mat);
1396: MatDuplicate(oldmat->A,cpvalues,&a->A);
1397: PetscLogObjectParent(mat,a->A);
1398: *newmat = mat;
1399: return(0);
1400: }
1402: #include petscsys.h
1406: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1407: {
1409: PetscMPIInt rank,size;
1410: PetscInt *rowners,i,m,nz,j;
1411: PetscScalar *array,*vals,*vals_ptr;
1412: MPI_Status status;
1415: MPI_Comm_rank(comm,&rank);
1416: MPI_Comm_size(comm,&size);
1418: /* determine ownership of all rows */
1419: m = M/size + ((M % size) > rank);
1420: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1421: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1422: rowners[0] = 0;
1423: for (i=2; i<=size; i++) {
1424: rowners[i] += rowners[i-1];
1425: }
1427: MatCreate(comm,newmat);
1428: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1429: MatSetType(*newmat,type);
1430: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1431: MatGetArray(*newmat,&array);
1433: if (!rank) {
1434: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1436: /* read in my part of the matrix numerical values */
1437: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1438:
1439: /* insert into matrix-by row (this is why cannot directly read into array */
1440: vals_ptr = vals;
1441: for (i=0; i<m; i++) {
1442: for (j=0; j<N; j++) {
1443: array[i + j*m] = *vals_ptr++;
1444: }
1445: }
1447: /* read in other processors and ship out */
1448: for (i=1; i<size; i++) {
1449: nz = (rowners[i+1] - rowners[i])*N;
1450: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1451: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1452: }
1453: } else {
1454: /* receive numeric values */
1455: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1457: /* receive message of values*/
1458: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1460: /* insert into matrix-by row (this is why cannot directly read into array */
1461: vals_ptr = vals;
1462: for (i=0; i<m; i++) {
1463: for (j=0; j<N; j++) {
1464: array[i + j*m] = *vals_ptr++;
1465: }
1466: }
1467: }
1468: PetscFree(rowners);
1469: PetscFree(vals);
1470: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1471: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1472: return(0);
1473: }
1477: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1478: {
1479: Mat A;
1480: PetscScalar *vals,*svals;
1481: MPI_Comm comm = ((PetscObject)viewer)->comm;
1482: MPI_Status status;
1483: PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1484: PetscInt header[4],*rowlengths = 0,M,N,*cols;
1485: PetscInt *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1486: PetscInt i,nz,j,rstart,rend;
1487: int fd;
1491: MPI_Comm_size(comm,&size);
1492: MPI_Comm_rank(comm,&rank);
1493: if (!rank) {
1494: PetscViewerBinaryGetDescriptor(viewer,&fd);
1495: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1496: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1497: }
1499: MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1500: M = header[1]; N = header[2]; nz = header[3];
1502: /*
1503: Handle case where matrix is stored on disk as a dense matrix
1504: */
1505: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1506: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1507: return(0);
1508: }
1510: /* determine ownership of all rows */
1511: m = M/size + ((M % size) > rank);
1512: PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1513: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1514: rowners[0] = 0;
1515: for (i=2; i<=size; i++) {
1516: rowners[i] += rowners[i-1];
1517: }
1518: rstart = rowners[rank];
1519: rend = rowners[rank+1];
1521: /* distribute row lengths to all processors */
1522: PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1523: offlens = ourlens + (rend-rstart);
1524: if (!rank) {
1525: PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1526: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1527: PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1528: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1529: MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1530: PetscFree(sndcounts);
1531: } else {
1532: MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1533: }
1535: if (!rank) {
1536: /* calculate the number of nonzeros on each processor */
1537: PetscMalloc(size*sizeof(PetscInt),&procsnz);
1538: PetscMemzero(procsnz,size*sizeof(PetscInt));
1539: for (i=0; i<size; i++) {
1540: for (j=rowners[i]; j< rowners[i+1]; j++) {
1541: procsnz[i] += rowlengths[j];
1542: }
1543: }
1544: PetscFree(rowlengths);
1546: /* determine max buffer needed and allocate it */
1547: maxnz = 0;
1548: for (i=0; i<size; i++) {
1549: maxnz = PetscMax(maxnz,procsnz[i]);
1550: }
1551: PetscMalloc(maxnz*sizeof(PetscInt),&cols);
1553: /* read in my part of the matrix column indices */
1554: nz = procsnz[0];
1555: PetscMalloc(nz*sizeof(PetscInt),&mycols);
1556: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1558: /* read in every one elses and ship off */
1559: for (i=1; i<size; i++) {
1560: nz = procsnz[i];
1561: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1562: MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1563: }
1564: PetscFree(cols);
1565: } else {
1566: /* determine buffer space needed for message */
1567: nz = 0;
1568: for (i=0; i<m; i++) {
1569: nz += ourlens[i];
1570: }
1571: PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);
1573: /* receive message of column indices*/
1574: MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1575: MPI_Get_count(&status,MPIU_INT,&maxnz);
1576: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1577: }
1579: /* loop over local rows, determining number of off diagonal entries */
1580: PetscMemzero(offlens,m*sizeof(PetscInt));
1581: jj = 0;
1582: for (i=0; i<m; i++) {
1583: for (j=0; j<ourlens[i]; j++) {
1584: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1585: jj++;
1586: }
1587: }
1589: /* create our matrix */
1590: for (i=0; i<m; i++) {
1591: ourlens[i] -= offlens[i];
1592: }
1593: MatCreate(comm,newmat);
1594: MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1595: MatSetType(*newmat,type);
1596: MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1597: A = *newmat;
1598: for (i=0; i<m; i++) {
1599: ourlens[i] += offlens[i];
1600: }
1602: if (!rank) {
1603: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1605: /* read in my part of the matrix numerical values */
1606: nz = procsnz[0];
1607: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1608:
1609: /* insert into matrix */
1610: jj = rstart;
1611: smycols = mycols;
1612: svals = vals;
1613: for (i=0; i<m; i++) {
1614: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1615: smycols += ourlens[i];
1616: svals += ourlens[i];
1617: jj++;
1618: }
1620: /* read in other processors and ship out */
1621: for (i=1; i<size; i++) {
1622: nz = procsnz[i];
1623: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1624: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1625: }
1626: PetscFree(procsnz);
1627: } else {
1628: /* receive numeric values */
1629: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1631: /* receive message of values*/
1632: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1633: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1634: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1636: /* insert into matrix */
1637: jj = rstart;
1638: smycols = mycols;
1639: svals = vals;
1640: for (i=0; i<m; i++) {
1641: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1642: smycols += ourlens[i];
1643: svals += ourlens[i];
1644: jj++;
1645: }
1646: }
1647: PetscFree(ourlens);
1648: PetscFree(vals);
1649: PetscFree(mycols);
1650: PetscFree(rowners);
1652: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1653: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1654: return(0);
1655: }
1659: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1660: {
1661: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1662: Mat a,b;
1663: PetscTruth flg;
1667: a = matA->A;
1668: b = matB->A;
1669: MatEqual(a,b,&flg);
1670: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1671: return(0);
1672: }