Actual source code: mpidense.c

  1: #define PETSCMAT_DLL

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h

 11: PetscErrorCode MatLUFactorSymbolic_MPIDense(Mat A,IS row,IS col,MatFactorInfo *info,Mat *fact)
 12: {

 16:   MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);
 17:   return(0);
 18: }

 22: PetscErrorCode MatCholeskyFactorSymbolic_MPIDense(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
 23: {

 27:   MatDuplicate(A,MAT_DO_NOT_COPY_VALUES,fact);
 28:   return(0);
 29: }

 33: /*@

 35:       MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
 36:               matrix that represents the operator. For sequential matrices it returns itself.

 38:     Input Parameter:
 39: .      A - the Seq or MPI dense matrix

 41:     Output Parameter:
 42: .      B - the inner matrix

 44:     Level: intermediate

 46: @*/
 47: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
 48: {
 49:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 51:   PetscTruth     flg;
 52:   PetscMPIInt    size;

 55:   PetscTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
 56:   if (!flg) {  /* this check sucks! */
 57:     PetscTypeCompare((PetscObject)A,MATDENSE,&flg);
 58:     if (flg) {
 59:       MPI_Comm_size(A->comm,&size);
 60:       if (size == 1) flg = PETSC_FALSE;
 61:     }
 62:   }
 63:   if (flg) {
 64:     *B = mat->A;
 65:   } else {
 66:     *B = A;
 67:   }
 68:   return(0);
 69: }

 73: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 74: {
 75:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
 77:   PetscInt       lrow,rstart = A->rmap.rstart,rend = A->rmap.rend;

 80:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
 81:   lrow = row - rstart;
 82:   MatGetRow(mat->A,lrow,nz,(const PetscInt **)idx,(const PetscScalar **)v);
 83:   return(0);
 84: }

 88: PetscErrorCode MatRestoreRow_MPIDense(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
 89: {

 93:   if (idx) {PetscFree(*idx);}
 94:   if (v) {PetscFree(*v);}
 95:   return(0);
 96: }

101: PetscErrorCode  MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
102: {
103:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
105:   PetscInt       m = A->rmap.n,rstart = A->rmap.rstart;
106:   PetscScalar    *array;
107:   MPI_Comm       comm;

110:   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

112:   /* The reuse aspect is not implemented efficiently */
113:   if (reuse) { MatDestroy(*B);}

115:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
116:   MatGetArray(mdn->A,&array);
117:   MatCreate(comm,B);
118:   MatSetSizes(*B,m,m,m,m);
119:   MatSetType(*B,mdn->A->type_name);
120:   MatSeqDenseSetPreallocation(*B,array+m*rstart);
121:   MatRestoreArray(mdn->A,&array);
122:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
123:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
124: 
125:   *iscopy = PETSC_TRUE;
126:   return(0);
127: }

132: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
133: {
134:   Mat_MPIDense   *A = (Mat_MPIDense*)mat->data;
136:   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;
137:   PetscTruth     roworiented = A->roworiented;

140:   for (i=0; i<m; i++) {
141:     if (idxm[i] < 0) continue;
142:     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
143:     if (idxm[i] >= rstart && idxm[i] < rend) {
144:       row = idxm[i] - rstart;
145:       if (roworiented) {
146:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
147:       } else {
148:         for (j=0; j<n; j++) {
149:           if (idxn[j] < 0) continue;
150:           if (idxn[j] >= mat->cmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
151:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
152:         }
153:       }
154:     } else {
155:       if (!A->donotstash) {
156:         if (roworiented) {
157:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
158:         } else {
159:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
160:         }
161:       }
162:     }
163:   }
164:   return(0);
165: }

169: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
170: {
171:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
173:   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend,row;

176:   for (i=0; i<m; i++) {
177:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
178:     if (idxm[i] >= mat->rmap.N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
179:     if (idxm[i] >= rstart && idxm[i] < rend) {
180:       row = idxm[i] - rstart;
181:       for (j=0; j<n; j++) {
182:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
183:         if (idxn[j] >= mat->cmap.N) {
184:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
185:         }
186:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
187:       }
188:     } else {
189:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
190:     }
191:   }
192:   return(0);
193: }

197: PetscErrorCode MatGetArray_MPIDense(Mat A,PetscScalar *array[])
198: {
199:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

203:   MatGetArray(a->A,array);
204:   return(0);
205: }

209: static PetscErrorCode MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,PetscInt cs,MatReuse scall,Mat *B)
210: {
211:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data,*newmatd;
212:   Mat_SeqDense   *lmat = (Mat_SeqDense*)mat->A->data;
214:   PetscInt       i,j,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
215:   PetscScalar    *av,*bv,*v = lmat->v;
216:   Mat            newmat;

219:   ISGetIndices(isrow,&irow);
220:   ISGetIndices(iscol,&icol);
221:   ISGetLocalSize(isrow,&nrows);
222:   ISGetLocalSize(iscol,&ncols);

224:   /* No parallel redistribution currently supported! Should really check each index set
225:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
226:      original matrix! */

228:   MatGetLocalSize(A,&nlrows,&nlcols);
229:   MatGetOwnershipRange(A,&rstart,&rend);
230: 
231:   /* Check submatrix call */
232:   if (scall == MAT_REUSE_MATRIX) {
233:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
234:     /* Really need to test rows and column sizes! */
235:     newmat = *B;
236:   } else {
237:     /* Create and fill new matrix */
238:     MatCreate(A->comm,&newmat);
239:     MatSetSizes(newmat,nrows,cs,PETSC_DECIDE,ncols);
240:     MatSetType(newmat,A->type_name);
241:     MatMPIDenseSetPreallocation(newmat,PETSC_NULL);
242:   }

244:   /* Now extract the data pointers and do the copy, column at a time */
245:   newmatd = (Mat_MPIDense*)newmat->data;
246:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
247: 
248:   for (i=0; i<ncols; i++) {
249:     av = v + nlrows*icol[i];
250:     for (j=0; j<nrows; j++) {
251:       *bv++ = av[irow[j] - rstart];
252:     }
253:   }

255:   /* Assemble the matrices so that the correct flags are set */
256:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
257:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

259:   /* Free work space */
260:   ISRestoreIndices(isrow,&irow);
261:   ISRestoreIndices(iscol,&icol);
262:   *B = newmat;
263:   return(0);
264: }

268: PetscErrorCode MatRestoreArray_MPIDense(Mat A,PetscScalar *array[])
269: {
271:   return(0);
272: }

276: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
277: {
278:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;
279:   MPI_Comm       comm = mat->comm;
281:   PetscInt       nstash,reallocs;
282:   InsertMode     addv;

285:   /* make sure all processors are either in INSERTMODE or ADDMODE */
286:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
287:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
288:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
289:   }
290:   mat->insertmode = addv; /* in case this processor had no cache */

292:   MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);
293:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
294:   PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
295:   return(0);
296: }

300: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
301: {
302:   Mat_MPIDense    *mdn=(Mat_MPIDense*)mat->data;
303:   PetscErrorCode  ierr;
304:   PetscInt        i,*row,*col,flg,j,rstart,ncols;
305:   PetscMPIInt     n;
306:   PetscScalar     *val;
307:   InsertMode      addv=mat->insertmode;

310:   /*  wait on receives */
311:   while (1) {
312:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
313:     if (!flg) break;
314: 
315:     for (i=0; i<n;) {
316:       /* Now identify the consecutive vals belonging to the same row */
317:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
318:       if (j < n) ncols = j-i;
319:       else       ncols = n-i;
320:       /* Now assemble all these values with a single function call */
321:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
322:       i = j;
323:     }
324:   }
325:   MatStashScatterEnd_Private(&mat->stash);
326: 
327:   MatAssemblyBegin(mdn->A,mode);
328:   MatAssemblyEnd(mdn->A,mode);

330:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
331:     MatSetUpMultiply_MPIDense(mat);
332:   }
333:   return(0);
334: }

338: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
339: {
341:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;

344:   MatZeroEntries(l->A);
345:   return(0);
346: }

348: /* the code does not do the diagonal entries correctly unless the 
349:    matrix is square and the column and row owerships are identical.
350:    This is a BUG. The only way to fix it seems to be to access 
351:    mdn->A and mdn->B directly and not through the MatZeroRows() 
352:    routine. 
353: */
356: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
357: {
358:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
360:   PetscInt       i,*owners = A->rmap.range;
361:   PetscInt       *nprocs,j,idx,nsends;
362:   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
363:   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source;
364:   PetscInt       *lens,*lrows,*values;
365:   PetscMPIInt    n,imdex,rank = l->rank,size = l->size;
366:   MPI_Comm       comm = A->comm;
367:   MPI_Request    *send_waits,*recv_waits;
368:   MPI_Status     recv_status,*send_status;
369:   PetscTruth     found;

372:   /*  first count number of contributors to each processor */
373:   PetscMalloc(2*size*sizeof(PetscInt),&nprocs);
374:   PetscMemzero(nprocs,2*size*sizeof(PetscInt));
375:   PetscMalloc((N+1)*sizeof(PetscInt),&owner); /* see note*/
376:   for (i=0; i<N; i++) {
377:     idx = rows[i];
378:     found = PETSC_FALSE;
379:     for (j=0; j<size; j++) {
380:       if (idx >= owners[j] && idx < owners[j+1]) {
381:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
382:       }
383:     }
384:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
385:   }
386:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

388:   /* inform other processors of number of messages and max length*/
389:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

391:   /* post receives:   */
392:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);
393:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
394:   for (i=0; i<nrecvs; i++) {
395:     MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
396:   }

398:   /* do sends:
399:       1) starts[i] gives the starting index in svalues for stuff going to 
400:          the ith processor
401:   */
402:   PetscMalloc((N+1)*sizeof(PetscInt),&svalues);
403:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
404:   PetscMalloc((size+1)*sizeof(PetscInt),&starts);
405:   starts[0]  = 0;
406:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
407:   for (i=0; i<N; i++) {
408:     svalues[starts[owner[i]]++] = rows[i];
409:   }

411:   starts[0] = 0;
412:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
413:   count = 0;
414:   for (i=0; i<size; i++) {
415:     if (nprocs[2*i+1]) {
416:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);
417:     }
418:   }
419:   PetscFree(starts);

421:   base = owners[rank];

423:   /*  wait on receives */
424:   PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);
425:   source = lens + nrecvs;
426:   count  = nrecvs; slen = 0;
427:   while (count) {
428:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
429:     /* unpack receives into our local space */
430:     MPI_Get_count(&recv_status,MPIU_INT,&n);
431:     source[imdex]  = recv_status.MPI_SOURCE;
432:     lens[imdex]    = n;
433:     slen += n;
434:     count--;
435:   }
436:   PetscFree(recv_waits);
437: 
438:   /* move the data into the send scatter */
439:   PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);
440:   count = 0;
441:   for (i=0; i<nrecvs; i++) {
442:     values = rvalues + i*nmax;
443:     for (j=0; j<lens[i]; j++) {
444:       lrows[count++] = values[j] - base;
445:     }
446:   }
447:   PetscFree(rvalues);
448:   PetscFree(lens);
449:   PetscFree(owner);
450:   PetscFree(nprocs);
451: 
452:   /* actually zap the local rows */
453:   MatZeroRows(l->A,slen,lrows,diag);
454:   PetscFree(lrows);

456:   /* wait on sends */
457:   if (nsends) {
458:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
459:     MPI_Waitall(nsends,send_waits,send_status);
460:     PetscFree(send_status);
461:   }
462:   PetscFree(send_waits);
463:   PetscFree(svalues);

465:   return(0);
466: }

470: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
471: {
472:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

476:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
477:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
478:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
479:   return(0);
480: }

484: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
485: {
486:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;

490:   VecScatterBegin(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
491:   VecScatterEnd(mdn->Mvctx,xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
492:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
493:   return(0);
494: }

498: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
499: {
500:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
502:   PetscScalar    zero = 0.0;

505:   VecSet(yy,zero);
506:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
507:   VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
508:   VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);
509:   return(0);
510: }

514: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
515: {
516:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

520:   VecCopy(yy,zz);
521:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
522:   VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
523:   VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);
524:   return(0);
525: }

529: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
530: {
531:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
532:   Mat_SeqDense   *aloc = (Mat_SeqDense*)a->A->data;
534:   PetscInt       len,i,n,m = A->rmap.n,radd;
535:   PetscScalar    *x,zero = 0.0;
536: 
538:   VecSet(v,zero);
539:   VecGetArray(v,&x);
540:   VecGetSize(v,&n);
541:   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
542:   len  = PetscMin(a->A->rmap.n,a->A->cmap.n);
543:   radd = A->rmap.rstart*m;
544:   for (i=0; i<len; i++) {
545:     x[i] = aloc->v[radd + i*m + i];
546:   }
547:   VecRestoreArray(v,&x);
548:   return(0);
549: }

553: PetscErrorCode MatDestroy_MPIDense(Mat mat)
554: {
555:   Mat_MPIDense   *mdn = (Mat_MPIDense*)mat->data;


560: #if defined(PETSC_USE_LOG)
561:   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
562: #endif
563:   MatStashDestroy_Private(&mat->stash);
564:   MatDestroy(mdn->A);
565:   if (mdn->lvec)   {VecDestroy(mdn->lvec);}
566:   if (mdn->Mvctx)  {VecScatterDestroy(mdn->Mvctx);}
567:   if (mdn->factor) {
568:     PetscFree(mdn->factor->temp);
569:     PetscFree(mdn->factor->tag);
570:     PetscFree(mdn->factor->pivots);
571:     PetscFree(mdn->factor);
572:   }
573:   PetscFree(mdn);
574:   PetscObjectChangeTypeName((PetscObject)mat,0);
575:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);
576:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C","",PETSC_NULL);
577:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);
578:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C","",PETSC_NULL);
579:   PetscObjectComposeFunction((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C","",PETSC_NULL);
580:   return(0);
581: }

585: static PetscErrorCode MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
586: {
587:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
588:   PetscErrorCode    ierr;
589:   PetscViewerFormat format;
590:   int               fd;
591:   PetscInt          header[4],mmax,N = mat->cmap.N,i,j,m,k;
592:   PetscMPIInt       rank,tag  = ((PetscObject)viewer)->tag,size;
593:   PetscScalar       *work,*v;
594:   Mat_SeqDense      *a = (Mat_SeqDense*)mdn->A->data;
595:   MPI_Status        status;

598:   if (mdn->size == 1) {
599:     MatView(mdn->A,viewer);
600:   } else {
601:     PetscViewerBinaryGetDescriptor(viewer,&fd);
602:     MPI_Comm_rank(mat->comm,&rank);
603:     MPI_Comm_size(mat->comm,&size);

605:     PetscViewerGetFormat(viewer,&format);
606:     if (format == PETSC_VIEWER_BINARY_NATIVE) {

608:       if (!rank) {
609:         /* store the matrix as a dense matrix */
610:         header[0] = MAT_FILE_COOKIE;
611:         header[1] = mat->rmap.N;
612:         header[2] = N;
613:         header[3] = MATRIX_BINARY_FORMAT_DENSE;
614:         PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);

616:         /* get largest work array needed for transposing array */
617:         mmax = mat->rmap.n;
618:         for (i=1; i<size; i++) {
619:           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
620:         }
621:         PetscMalloc(mmax*N*sizeof(PetscScalar),&work);

623:         /* write out local array, by rows */
624:         m    = mat->rmap.n;
625:         v    = a->v;
626:         for (j=0; j<N; j++) {
627:           for (i=0; i<m; i++) {
628:             work[i + j*N] = *v++;
629:           }
630:         }
631:         PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);

633:         /* get largest work array to receive messages from other processes, excludes process zero */
634:         mmax = 0;
635:         for (i=1; i<size; i++) {
636:           mmax = PetscMax(mmax,mat->rmap.range[i+1] - mat->rmap.range[i]);
637:         }
638:         PetscMalloc(mmax*N*sizeof(PetscScalar),&v);
639:         for (k=1; k<size; k++) {
640:           m    = mat->rmap.range[k+1] - mat->rmap.range[k];
641:           MPI_Recv(v,m*N,MPIU_SCALAR,k,tag,mat->comm,&status);

643:           for (j=0; j<N; j++) {
644:             for (i=0; i<m; i++) {
645:               work[i + j*N] = *v++;
646:             }
647:           }
648:           PetscBinaryWrite(fd,work,m*N,PETSC_SCALAR,PETSC_FALSE);
649:         }
650:         PetscFree(work);
651:         PetscFree(v);
652:       } else {
653:         MPI_Send(a->v,mat->rmap.n*mat->cmap.N,MPIU_SCALAR,0,tag,mat->comm);
654:       }
655:     }
656:   }
657:   return(0);
658: }

662: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
663: {
664:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
665:   PetscErrorCode    ierr;
666:   PetscMPIInt       size = mdn->size,rank = mdn->rank;
667:   PetscViewerType   vtype;
668:   PetscTruth        iascii,isdraw;
669:   PetscViewer       sviewer;
670:   PetscViewerFormat format;

673:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
674:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
675:   if (iascii) {
676:     PetscViewerGetType(viewer,&vtype);
677:     PetscViewerGetFormat(viewer,&format);
678:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
679:       MatInfo info;
680:       MatGetInfo(mat,MAT_LOCAL,&info);
681:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap.n,
682:                    (PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
683:       PetscViewerFlush(viewer);
684:       VecScatterView(mdn->Mvctx,viewer);
685:       return(0);
686:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
687:       return(0);
688:     }
689:   } else if (isdraw) {
690:     PetscDraw  draw;
691:     PetscTruth isnull;

693:     PetscViewerDrawGetDraw(viewer,0,&draw);
694:     PetscDrawIsNull(draw,&isnull);
695:     if (isnull) return(0);
696:   }

698:   if (size == 1) {
699:     MatView(mdn->A,viewer);
700:   } else {
701:     /* assemble the entire matrix onto first processor. */
702:     Mat         A;
703:     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,row,i,nz;
704:     PetscInt    *cols;
705:     PetscScalar *vals;

707:     MatCreate(mat->comm,&A);
708:     if (!rank) {
709:       MatSetSizes(A,M,N,M,N);
710:     } else {
711:       MatSetSizes(A,0,0,M,N);
712:     }
713:     /* Since this is a temporary matrix, MATMPIDENSE instead of A->type_name here is probably acceptable. */
714:     MatSetType(A,MATMPIDENSE);
715:     MatMPIDenseSetPreallocation(A,PETSC_NULL);
716:     PetscLogObjectParent(mat,A);

718:     /* Copy the matrix ... This isn't the most efficient means,
719:        but it's quick for now */
720:     A->insertmode = INSERT_VALUES;
721:     row = mat->rmap.rstart; m = mdn->A->rmap.n;
722:     for (i=0; i<m; i++) {
723:       MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
724:       MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
725:       MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
726:       row++;
727:     }

729:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
730:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
731:     PetscViewerGetSingleton(viewer,&sviewer);
732:     if (!rank) {
733:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
734:     }
735:     PetscViewerRestoreSingleton(viewer,&sviewer);
736:     PetscViewerFlush(viewer);
737:     MatDestroy(A);
738:   }
739:   return(0);
740: }

744: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
745: {
747:   PetscTruth     iascii,isbinary,isdraw,issocket;
748: 
750: 
751:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
752:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
753:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
754:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

756:   if (iascii || issocket || isdraw) {
757:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
758:   } else if (isbinary) {
759:     MatView_MPIDense_Binary(mat,viewer);
760:   } else {
761:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
762:   }
763:   return(0);
764: }

768: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
769: {
770:   Mat_MPIDense   *mat = (Mat_MPIDense*)A->data;
771:   Mat            mdn = mat->A;
773:   PetscReal      isend[5],irecv[5];

776:   info->rows_global    = (double)A->rmap.N;
777:   info->columns_global = (double)A->cmap.N;
778:   info->rows_local     = (double)A->rmap.n;
779:   info->columns_local  = (double)A->cmap.N;
780:   info->block_size     = 1.0;
781:   MatGetInfo(mdn,MAT_LOCAL,info);
782:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
783:   isend[3] = info->memory;  isend[4] = info->mallocs;
784:   if (flag == MAT_LOCAL) {
785:     info->nz_used      = isend[0];
786:     info->nz_allocated = isend[1];
787:     info->nz_unneeded  = isend[2];
788:     info->memory       = isend[3];
789:     info->mallocs      = isend[4];
790:   } else if (flag == MAT_GLOBAL_MAX) {
791:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
792:     info->nz_used      = irecv[0];
793:     info->nz_allocated = irecv[1];
794:     info->nz_unneeded  = irecv[2];
795:     info->memory       = irecv[3];
796:     info->mallocs      = irecv[4];
797:   } else if (flag == MAT_GLOBAL_SUM) {
798:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
799:     info->nz_used      = irecv[0];
800:     info->nz_allocated = irecv[1];
801:     info->nz_unneeded  = irecv[2];
802:     info->memory       = irecv[3];
803:     info->mallocs      = irecv[4];
804:   }
805:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
806:   info->fill_ratio_needed = 0;
807:   info->factor_mallocs    = 0;
808:   return(0);
809: }

813: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op)
814: {
815:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;

819:   switch (op) {
820:   case MAT_NO_NEW_NONZERO_LOCATIONS:
821:   case MAT_YES_NEW_NONZERO_LOCATIONS:
822:   case MAT_NEW_NONZERO_LOCATION_ERR:
823:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
824:   case MAT_COLUMNS_SORTED:
825:   case MAT_COLUMNS_UNSORTED:
826:     MatSetOption(a->A,op);
827:     break;
828:   case MAT_ROW_ORIENTED:
829:     a->roworiented = PETSC_TRUE;
830:     MatSetOption(a->A,op);
831:     break;
832:   case MAT_ROWS_SORTED:
833:   case MAT_ROWS_UNSORTED:
834:   case MAT_YES_NEW_DIAGONALS:
835:   case MAT_USE_HASH_TABLE:
836:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
837:     break;
838:   case MAT_COLUMN_ORIENTED:
839:     a->roworiented = PETSC_FALSE;
840:     MatSetOption(a->A,op);
841:     break;
842:   case MAT_IGNORE_OFF_PROC_ENTRIES:
843:     a->donotstash = PETSC_TRUE;
844:     break;
845:   case MAT_NO_NEW_DIAGONALS:
846:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
847:   case MAT_SYMMETRIC:
848:   case MAT_STRUCTURALLY_SYMMETRIC:
849:   case MAT_NOT_SYMMETRIC:
850:   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
851:   case MAT_HERMITIAN:
852:   case MAT_NOT_HERMITIAN:
853:   case MAT_SYMMETRY_ETERNAL:
854:   case MAT_NOT_SYMMETRY_ETERNAL:
855:     PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
856:     break;
857:   default:
858:     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
859:   }
860:   return(0);
861: }


866: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
867: {
868:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
869:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
870:   PetscScalar    *l,*r,x,*v;
872:   PetscInt       i,j,s2a,s3a,s2,s3,m=mdn->A->rmap.n,n=mdn->A->cmap.n;

875:   MatGetLocalSize(A,&s2,&s3);
876:   if (ll) {
877:     VecGetLocalSize(ll,&s2a);
878:     if (s2a != s2) SETERRQ2(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %d != %d.", s2a, s2);
879:     VecGetArray(ll,&l);
880:     for (i=0; i<m; i++) {
881:       x = l[i];
882:       v = mat->v + i;
883:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
884:     }
885:     VecRestoreArray(ll,&l);
886:     PetscLogFlops(n*m);
887:   }
888:   if (rr) {
889:     VecGetLocalSize(rr,&s3a);
890:     if (s3a != s3) SETERRQ2(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
891:     VecScatterBegin(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
892:     VecScatterEnd(mdn->Mvctx,rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD);
893:     VecGetArray(mdn->lvec,&r);
894:     for (i=0; i<n; i++) {
895:       x = r[i];
896:       v = mat->v + i*m;
897:       for (j=0; j<m; j++) { (*v++) *= x;}
898:     }
899:     VecRestoreArray(mdn->lvec,&r);
900:     PetscLogFlops(n*m);
901:   }
902:   return(0);
903: }

907: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
908: {
909:   Mat_MPIDense   *mdn = (Mat_MPIDense*)A->data;
910:   Mat_SeqDense   *mat = (Mat_SeqDense*)mdn->A->data;
912:   PetscInt       i,j;
913:   PetscReal      sum = 0.0;
914:   PetscScalar    *v = mat->v;

917:   if (mdn->size == 1) {
918:      MatNorm(mdn->A,type,nrm);
919:   } else {
920:     if (type == NORM_FROBENIUS) {
921:       for (i=0; i<mdn->A->cmap.n*mdn->A->rmap.n; i++) {
922: #if defined(PETSC_USE_COMPLEX)
923:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
924: #else
925:         sum += (*v)*(*v); v++;
926: #endif
927:       }
928:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
929:       *nrm = sqrt(*nrm);
930:       PetscLogFlops(2*mdn->A->cmap.n*mdn->A->rmap.n);
931:     } else if (type == NORM_1) {
932:       PetscReal *tmp,*tmp2;
933:       PetscMalloc(2*A->cmap.N*sizeof(PetscReal),&tmp);
934:       tmp2 = tmp + A->cmap.N;
935:       PetscMemzero(tmp,2*A->cmap.N*sizeof(PetscReal));
936:       *nrm = 0.0;
937:       v = mat->v;
938:       for (j=0; j<mdn->A->cmap.n; j++) {
939:         for (i=0; i<mdn->A->rmap.n; i++) {
940:           tmp[j] += PetscAbsScalar(*v);  v++;
941:         }
942:       }
943:       MPI_Allreduce(tmp,tmp2,A->cmap.N,MPIU_REAL,MPI_SUM,A->comm);
944:       for (j=0; j<A->cmap.N; j++) {
945:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
946:       }
947:       PetscFree(tmp);
948:       PetscLogFlops(A->cmap.n*A->rmap.n);
949:     } else if (type == NORM_INFINITY) { /* max row norm */
950:       PetscReal ntemp;
951:       MatNorm(mdn->A,type,&ntemp);
952:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
953:     } else {
954:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
955:     }
956:   }
957:   return(0);
958: }

962: PetscErrorCode MatTranspose_MPIDense(Mat A,Mat *matout)
963: {
964:   Mat_MPIDense   *a = (Mat_MPIDense*)A->data;
965:   Mat_SeqDense   *Aloc = (Mat_SeqDense*)a->A->data;
966:   Mat            B;
967:   PetscInt       M = A->rmap.N,N = A->cmap.N,m,n,*rwork,rstart = A->rmap.rstart;
969:   PetscInt       j,i;
970:   PetscScalar    *v;

973:   if (!matout && M != N) {
974:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
975:   }
976:   MatCreate(A->comm,&B);
977:   MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,N,M);
978:   MatSetType(B,A->type_name);
979:   MatMPIDenseSetPreallocation(B,PETSC_NULL);

981:   m = a->A->rmap.n; n = a->A->cmap.n; v = Aloc->v;
982:   PetscMalloc(m*sizeof(PetscInt),&rwork);
983:   for (i=0; i<m; i++) rwork[i] = rstart + i;
984:   for (j=0; j<n; j++) {
985:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
986:     v   += m;
987:   }
988:   PetscFree(rwork);
989:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
990:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
991:   if (matout) {
992:     *matout = B;
993:   } else {
994:     MatHeaderCopy(A,B);
995:   }
996:   return(0);
997: }

999:  #include petscblaslapack.h
1002: PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
1003: {
1004:   Mat_MPIDense   *A = (Mat_MPIDense*)inA->data;
1005:   Mat_SeqDense   *a = (Mat_SeqDense*)A->A->data;
1006:   PetscScalar    oalpha = alpha;
1007:   PetscBLASInt   one = 1,nz = (PetscBLASInt)inA->rmap.n*inA->cmap.N;

1011:   BLASscal_(&nz,&oalpha,a->v,&one);
1012:   PetscLogFlops(nz);
1013:   return(0);
1014: }

1016: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);

1020: PetscErrorCode MatSetUpPreallocation_MPIDense(Mat A)
1021: {

1025:    MatMPIDenseSetPreallocation(A,0);
1026:   return(0);
1027: }

1031: PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat *C)
1032: {
1034:   PetscInt       m=A->rmap.n,n=B->cmap.n;
1035:   Mat            Cmat;

1038:   if (A->cmap.n != B->rmap.n) SETERRQ2(PETSC_ERR_ARG_SIZ,"A->cmap.n %d != B->rmap.n %d\n",A->cmap.n,B->rmap.n);
1039:   MatCreate(B->comm,&Cmat);
1040:   MatSetSizes(Cmat,m,n,A->rmap.N,B->cmap.N);
1041:   MatSetType(Cmat,MATMPIDENSE);
1042:   MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);
1043:   MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);
1044:   MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);
1045:   *C = Cmat;
1046:   return(0);
1047: }

1049: /* -------------------------------------------------------------------*/
1050: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
1051:        MatGetRow_MPIDense,
1052:        MatRestoreRow_MPIDense,
1053:        MatMult_MPIDense,
1054: /* 4*/ MatMultAdd_MPIDense,
1055:        MatMultTranspose_MPIDense,
1056:        MatMultTransposeAdd_MPIDense,
1057:        0,
1058:        0,
1059:        0,
1060: /*10*/ 0,
1061:        0,
1062:        0,
1063:        0,
1064:        MatTranspose_MPIDense,
1065: /*15*/ MatGetInfo_MPIDense,
1066:        MatEqual_MPIDense,
1067:        MatGetDiagonal_MPIDense,
1068:        MatDiagonalScale_MPIDense,
1069:        MatNorm_MPIDense,
1070: /*20*/ MatAssemblyBegin_MPIDense,
1071:        MatAssemblyEnd_MPIDense,
1072:        0,
1073:        MatSetOption_MPIDense,
1074:        MatZeroEntries_MPIDense,
1075: /*25*/ MatZeroRows_MPIDense,
1076:        MatLUFactorSymbolic_MPIDense,
1077:        0,
1078:        MatCholeskyFactorSymbolic_MPIDense,
1079:        0,
1080: /*30*/ MatSetUpPreallocation_MPIDense,
1081:        0,
1082:        0,
1083:        MatGetArray_MPIDense,
1084:        MatRestoreArray_MPIDense,
1085: /*35*/ MatDuplicate_MPIDense,
1086:        0,
1087:        0,
1088:        0,
1089:        0,
1090: /*40*/ 0,
1091:        MatGetSubMatrices_MPIDense,
1092:        0,
1093:        MatGetValues_MPIDense,
1094:        0,
1095: /*45*/ 0,
1096:        MatScale_MPIDense,
1097:        0,
1098:        0,
1099:        0,
1100: /*50*/ 0,
1101:        0,
1102:        0,
1103:        0,
1104:        0,
1105: /*55*/ 0,
1106:        0,
1107:        0,
1108:        0,
1109:        0,
1110: /*60*/ MatGetSubMatrix_MPIDense,
1111:        MatDestroy_MPIDense,
1112:        MatView_MPIDense,
1113:        0,
1114:        0,
1115: /*65*/ 0,
1116:        0,
1117:        0,
1118:        0,
1119:        0,
1120: /*70*/ 0,
1121:        0,
1122:        0,
1123:        0,
1124:        0,
1125: /*75*/ 0,
1126:        0,
1127:        0,
1128:        0,
1129:        0,
1130: /*80*/ 0,
1131:        0,
1132:        0,
1133:        0,
1134: /*84*/ MatLoad_MPIDense,
1135:        0,
1136:        0,
1137:        0,
1138:        0,
1139:        0,
1140: /*90*/ 0,
1141:        MatMatMultSymbolic_MPIDense_MPIDense,
1142:        0,
1143:        0,
1144:        0,
1145: /*95*/ 0,
1146:        0,
1147:        0,
1148:        0};

1153: PetscErrorCode  MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1154: {
1155:   Mat_MPIDense   *a;

1159:   mat->preallocated = PETSC_TRUE;
1160:   /* Note:  For now, when data is specified above, this assumes the user correctly
1161:    allocates the local dense storage space.  We should add error checking. */

1163:   a    = (Mat_MPIDense*)mat->data;
1164:   MatCreate(PETSC_COMM_SELF,&a->A);
1165:   MatSetSizes(a->A,mat->rmap.n,mat->cmap.N,mat->rmap.n,mat->cmap.N);
1166:   MatSetType(a->A,MATSEQDENSE);
1167:   MatSeqDenseSetPreallocation(a->A,data);
1168:   PetscLogObjectParent(mat,a->A);
1169:   return(0);
1170: }

1173: /*MC
1174:    MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices.

1176:    Options Database Keys:
1177: . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions()

1179:   Level: beginner

1181: .seealso: MatCreateMPIDense
1182: M*/

1187: PetscErrorCode  MatCreate_MPIDense(Mat mat)
1188: {
1189:   Mat_MPIDense   *a;

1193:   PetscNew(Mat_MPIDense,&a);
1194:   mat->data         = (void*)a;
1195:   PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1196:   mat->factor       = 0;
1197:   mat->mapping      = 0;

1199:   a->factor       = 0;
1200:   mat->insertmode = NOT_SET_VALUES;
1201:   MPI_Comm_rank(mat->comm,&a->rank);
1202:   MPI_Comm_size(mat->comm,&a->size);

1204:   mat->rmap.bs = mat->cmap.bs = 1;
1205:   PetscMapSetUp(&mat->rmap);
1206:   PetscMapSetUp(&mat->cmap);
1207:   a->nvec = mat->cmap.n;

1209:   /* build cache for off array entries formed */
1210:   a->donotstash = PETSC_FALSE;
1211:   MatStashCreate_Private(mat->comm,1,&mat->stash);

1213:   /* stuff used for matrix vector multiply */
1214:   a->lvec        = 0;
1215:   a->Mvctx       = 0;
1216:   a->roworiented = PETSC_TRUE;

1218:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1219:                                      "MatGetDiagonalBlock_MPIDense",
1220:                                      MatGetDiagonalBlock_MPIDense);
1221:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMPIDenseSetPreallocation_C",
1222:                                      "MatMPIDenseSetPreallocation_MPIDense",
1223:                                      MatMPIDenseSetPreallocation_MPIDense);
1224:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C",
1225:                                      "MatMatMult_MPIAIJ_MPIDense",
1226:                                       MatMatMult_MPIAIJ_MPIDense);
1227:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultSymbolic_mpiaij_mpidense_C",
1228:                                      "MatMatMultSymbolic_MPIAIJ_MPIDense",
1229:                                       MatMatMultSymbolic_MPIAIJ_MPIDense);
1230:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatMatMultNumeric_mpiaij_mpidense_C",
1231:                                      "MatMatMultNumeric_MPIAIJ_MPIDense",
1232:                                       MatMatMultNumeric_MPIAIJ_MPIDense);
1233:   PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1234:   return(0);
1235: }

1238: /*MC
1239:    MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.

1241:    This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1242:    and MATMPIDENSE otherwise.

1244:    Options Database Keys:
1245: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()

1247:   Level: beginner

1249: .seealso: MatCreateMPIDense,MATSEQDENSE,MATMPIDENSE
1250: M*/

1255: PetscErrorCode  MatCreate_Dense(Mat A)
1256: {
1258:   PetscMPIInt    size;

1261:   MPI_Comm_size(A->comm,&size);
1262:   if (size == 1) {
1263:     MatSetType(A,MATSEQDENSE);
1264:   } else {
1265:     MatSetType(A,MATMPIDENSE);
1266:   }
1267:   return(0);
1268: }

1273: /*@C
1274:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1276:    Not collective

1278:    Input Parameters:
1279: .  A - the matrix
1280: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1281:    to control all matrix memory allocation.

1283:    Notes:
1284:    The dense format is fully compatible with standard Fortran 77
1285:    storage by columns.

1287:    The data input variable is intended primarily for Fortran programmers
1288:    who wish to allocate their own matrix memory space.  Most users should
1289:    set data=PETSC_NULL.

1291:    Level: intermediate

1293: .keywords: matrix,dense, parallel

1295: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1296: @*/
1297: PetscErrorCode  MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1298: {
1299:   PetscErrorCode ierr,(*f)(Mat,PetscScalar *);

1302:   PetscObjectQueryFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",(void (**)(void))&f);
1303:   if (f) {
1304:     (*f)(mat,data);
1305:   }
1306:   return(0);
1307: }

1311: /*@C
1312:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1314:    Collective on MPI_Comm

1316:    Input Parameters:
1317: +  comm - MPI communicator
1318: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1319: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1320: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1321: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1322: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1323:    to control all matrix memory allocation.

1325:    Output Parameter:
1326: .  A - the matrix

1328:    Notes:
1329:    The dense format is fully compatible with standard Fortran 77
1330:    storage by columns.

1332:    The data input variable is intended primarily for Fortran programmers
1333:    who wish to allocate their own matrix memory space.  Most users should
1334:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1336:    The user MUST specify either the local or global matrix dimensions
1337:    (possibly both).

1339:    Level: intermediate

1341: .keywords: matrix,dense, parallel

1343: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1344: @*/
1345: PetscErrorCode  MatCreateMPIDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
1346: {
1348:   PetscMPIInt    size;

1351:   MatCreate(comm,A);
1352:   MatSetSizes(*A,m,n,M,N);
1353:   MPI_Comm_size(comm,&size);
1354:   if (size > 1) {
1355:     MatSetType(*A,MATMPIDENSE);
1356:     MatMPIDenseSetPreallocation(*A,data);
1357:   } else {
1358:     MatSetType(*A,MATSEQDENSE);
1359:     MatSeqDenseSetPreallocation(*A,data);
1360:   }
1361:   return(0);
1362: }

1366: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1367: {
1368:   Mat            mat;
1369:   Mat_MPIDense   *a,*oldmat = (Mat_MPIDense*)A->data;

1373:   *newmat       = 0;
1374:   MatCreate(A->comm,&mat);
1375:   MatSetSizes(mat,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);
1376:   MatSetType(mat,A->type_name);
1377:   a                 = (Mat_MPIDense*)mat->data;
1378:   PetscMemcpy(mat->ops,A->ops,sizeof(struct _MatOps));
1379:   mat->factor       = A->factor;
1380:   mat->assembled    = PETSC_TRUE;
1381:   mat->preallocated = PETSC_TRUE;

1383:   mat->rmap.rstart     = A->rmap.rstart;
1384:   mat->rmap.rend       = A->rmap.rend;
1385:   a->size              = oldmat->size;
1386:   a->rank              = oldmat->rank;
1387:   mat->insertmode      = NOT_SET_VALUES;
1388:   a->nvec              = oldmat->nvec;
1389:   a->donotstash        = oldmat->donotstash;
1390: 
1391:   PetscMemcpy(mat->rmap.range,A->rmap.range,(a->size+1)*sizeof(PetscInt));
1392:   PetscMemcpy(mat->cmap.range,A->cmap.range,(a->size+1)*sizeof(PetscInt));
1393:   MatStashCreate_Private(A->comm,1,&mat->stash);

1395:   MatSetUpMultiply_MPIDense(mat);
1396:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1397:   PetscLogObjectParent(mat,a->A);
1398:   *newmat = mat;
1399:   return(0);
1400: }

1402:  #include petscsys.h

1406: PetscErrorCode MatLoad_MPIDense_DenseInFile(MPI_Comm comm,PetscInt fd,PetscInt M,PetscInt N, MatType type,Mat *newmat)
1407: {
1409:   PetscMPIInt    rank,size;
1410:   PetscInt       *rowners,i,m,nz,j;
1411:   PetscScalar    *array,*vals,*vals_ptr;
1412:   MPI_Status     status;

1415:   MPI_Comm_rank(comm,&rank);
1416:   MPI_Comm_size(comm,&size);

1418:   /* determine ownership of all rows */
1419:   m          = M/size + ((M % size) > rank);
1420:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1421:   MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
1422:   rowners[0] = 0;
1423:   for (i=2; i<=size; i++) {
1424:     rowners[i] += rowners[i-1];
1425:   }

1427:   MatCreate(comm,newmat);
1428:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1429:   MatSetType(*newmat,type);
1430:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1431:   MatGetArray(*newmat,&array);

1433:   if (!rank) {
1434:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1436:     /* read in my part of the matrix numerical values  */
1437:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1438: 
1439:     /* insert into matrix-by row (this is why cannot directly read into array */
1440:     vals_ptr = vals;
1441:     for (i=0; i<m; i++) {
1442:       for (j=0; j<N; j++) {
1443:         array[i + j*m] = *vals_ptr++;
1444:       }
1445:     }

1447:     /* read in other processors and ship out */
1448:     for (i=1; i<size; i++) {
1449:       nz   = (rowners[i+1] - rowners[i])*N;
1450:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1451:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1452:     }
1453:   } else {
1454:     /* receive numeric values */
1455:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1457:     /* receive message of values*/
1458:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1460:     /* insert into matrix-by row (this is why cannot directly read into array */
1461:     vals_ptr = vals;
1462:     for (i=0; i<m; i++) {
1463:       for (j=0; j<N; j++) {
1464:         array[i + j*m] = *vals_ptr++;
1465:       }
1466:     }
1467:   }
1468:   PetscFree(rowners);
1469:   PetscFree(vals);
1470:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1471:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1472:   return(0);
1473: }

1477: PetscErrorCode MatLoad_MPIDense(PetscViewer viewer, MatType type,Mat *newmat)
1478: {
1479:   Mat            A;
1480:   PetscScalar    *vals,*svals;
1481:   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1482:   MPI_Status     status;
1483:   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*rowners,*sndcounts,m,maxnz;
1484:   PetscInt       header[4],*rowlengths = 0,M,N,*cols;
1485:   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1486:   PetscInt       i,nz,j,rstart,rend;
1487:   int            fd;

1491:   MPI_Comm_size(comm,&size);
1492:   MPI_Comm_rank(comm,&rank);
1493:   if (!rank) {
1494:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1495:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1496:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1497:   }

1499:   MPI_Bcast(header+1,3,MPIU_INT,0,comm);
1500:   M = header[1]; N = header[2]; nz = header[3];

1502:   /*
1503:        Handle case where matrix is stored on disk as a dense matrix 
1504:   */
1505:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1506:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,type,newmat);
1507:     return(0);
1508:   }

1510:   /* determine ownership of all rows */
1511:   m          = M/size + ((M % size) > rank);
1512:   PetscMalloc((size+2)*sizeof(PetscInt),&rowners);
1513:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1514:   rowners[0] = 0;
1515:   for (i=2; i<=size; i++) {
1516:     rowners[i] += rowners[i-1];
1517:   }
1518:   rstart = rowners[rank];
1519:   rend   = rowners[rank+1];

1521:   /* distribute row lengths to all processors */
1522:   PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);
1523:   offlens = ourlens + (rend-rstart);
1524:   if (!rank) {
1525:     PetscMalloc(M*sizeof(PetscInt),&rowlengths);
1526:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1527:     PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);
1528:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1529:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1530:     PetscFree(sndcounts);
1531:   } else {
1532:     MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);
1533:   }

1535:   if (!rank) {
1536:     /* calculate the number of nonzeros on each processor */
1537:     PetscMalloc(size*sizeof(PetscInt),&procsnz);
1538:     PetscMemzero(procsnz,size*sizeof(PetscInt));
1539:     for (i=0; i<size; i++) {
1540:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1541:         procsnz[i] += rowlengths[j];
1542:       }
1543:     }
1544:     PetscFree(rowlengths);

1546:     /* determine max buffer needed and allocate it */
1547:     maxnz = 0;
1548:     for (i=0; i<size; i++) {
1549:       maxnz = PetscMax(maxnz,procsnz[i]);
1550:     }
1551:     PetscMalloc(maxnz*sizeof(PetscInt),&cols);

1553:     /* read in my part of the matrix column indices  */
1554:     nz = procsnz[0];
1555:     PetscMalloc(nz*sizeof(PetscInt),&mycols);
1556:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1558:     /* read in every one elses and ship off */
1559:     for (i=1; i<size; i++) {
1560:       nz   = procsnz[i];
1561:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1562:       MPI_Send(cols,nz,MPIU_INT,i,tag,comm);
1563:     }
1564:     PetscFree(cols);
1565:   } else {
1566:     /* determine buffer space needed for message */
1567:     nz = 0;
1568:     for (i=0; i<m; i++) {
1569:       nz += ourlens[i];
1570:     }
1571:     PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);

1573:     /* receive message of column indices*/
1574:     MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);
1575:     MPI_Get_count(&status,MPIU_INT,&maxnz);
1576:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1577:   }

1579:   /* loop over local rows, determining number of off diagonal entries */
1580:   PetscMemzero(offlens,m*sizeof(PetscInt));
1581:   jj = 0;
1582:   for (i=0; i<m; i++) {
1583:     for (j=0; j<ourlens[i]; j++) {
1584:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1585:       jj++;
1586:     }
1587:   }

1589:   /* create our matrix */
1590:   for (i=0; i<m; i++) {
1591:     ourlens[i] -= offlens[i];
1592:   }
1593:   MatCreate(comm,newmat);
1594:   MatSetSizes(*newmat,m,PETSC_DECIDE,M,N);
1595:   MatSetType(*newmat,type);
1596:   MatMPIDenseSetPreallocation(*newmat,PETSC_NULL);
1597:   A = *newmat;
1598:   for (i=0; i<m; i++) {
1599:     ourlens[i] += offlens[i];
1600:   }

1602:   if (!rank) {
1603:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1605:     /* read in my part of the matrix numerical values  */
1606:     nz = procsnz[0];
1607:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1608: 
1609:     /* insert into matrix */
1610:     jj      = rstart;
1611:     smycols = mycols;
1612:     svals   = vals;
1613:     for (i=0; i<m; i++) {
1614:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1615:       smycols += ourlens[i];
1616:       svals   += ourlens[i];
1617:       jj++;
1618:     }

1620:     /* read in other processors and ship out */
1621:     for (i=1; i<size; i++) {
1622:       nz   = procsnz[i];
1623:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1624:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1625:     }
1626:     PetscFree(procsnz);
1627:   } else {
1628:     /* receive numeric values */
1629:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1631:     /* receive message of values*/
1632:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1633:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1634:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1636:     /* insert into matrix */
1637:     jj      = rstart;
1638:     smycols = mycols;
1639:     svals   = vals;
1640:     for (i=0; i<m; i++) {
1641:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1642:       smycols += ourlens[i];
1643:       svals   += ourlens[i];
1644:       jj++;
1645:     }
1646:   }
1647:   PetscFree(ourlens);
1648:   PetscFree(vals);
1649:   PetscFree(mycols);
1650:   PetscFree(rowners);

1652:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1653:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1654:   return(0);
1655: }

1659: PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscTruth *flag)
1660: {
1661:   Mat_MPIDense   *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
1662:   Mat            a,b;
1663:   PetscTruth     flg;

1667:   a = matA->A;
1668:   b = matB->A;
1669:   MatEqual(a,b,&flg);
1670:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1671:   return(0);
1672: }