Actual source code: openmp.c

  1: #define PETSCKSP_DLL

 3:  #include private/pcimpl.h
 4:  #include petscksp.h

  6: typedef struct {
  7:   MatStructure flag;               /* pc->flag */
  8:   PetscInt     setupcalled;        /* pc->setupcalled */
  9:   PetscInt     n;
 10:   MPI_Comm     comm;                 /* local world used by this preconditioner */
 11:   KSP          ksp;                  /* actual solver used across local world */
 12:   Mat          mat;                  /* matrix in local world */
 13:   Mat          gmat;                 /* matrix known only to process 0 in the local world */
 14:   Vec          x,y,xdummy,ydummy;
 15:   VecScatter   scatter;
 16: } PC_OpenMP;


 21: /*
 22:     Would like to have this simply call PCView() on the inner PC. The problem is
 23:   that the outter comm does not live on the inside so cannot do this. Instead 
 24:   handle the special case when the viewer is stdout, construct a new one just
 25:   for this call.
 26: */

 28: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
 29: {
 30:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
 32:   PetscViewer    viewer;

 35:   PetscViewerASCIIGetStdout(comm,&viewer);
 36:   PetscViewerASCIIPushTab(viewer);         /* this is bogus in general */
 37:   KSPView(red->ksp,viewer);
 38:   PetscViewerASCIIPopTab(viewer);
 39:   return(0);
 40: }

 44: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
 45: {
 46:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;
 47:   PetscMPIInt    size;
 49:   PetscTruth     iascii;


 54:   MPI_Comm_size(red->comm,&size);
 55:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 56:   if (iascii) {
 57:     PetscViewerASCIIPrintf(viewer,"  Size of solver nodes %d\n",size);
 58:     PetscViewerASCIIPrintf(viewer,"  Parallel sub-solver given next\n",size);
 59:     /* should only make the next call if the viewer is associated with stdout */
 60:     PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
 61:   }
 62:   return(0);
 63: }

 65:  #include include/private/matimpl.h
 66:  #include private/vecimpl.h
 67:  #include src/mat/impls/aij/mpi/mpiaij.h
 68:  #include src/mat/impls/aij/seq/aij.h

 72: /*
 73:     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
 74:     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.

 76:     Only for square matrices
 77: */
 78: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
 79: {
 80:   PetscMPIInt    rank,size;
 81:   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
 83:   Mat            mat;
 84:   Mat_SeqAIJ     *gmata;
 85:   PetscMPIInt    tag;
 86:   MPI_Status     status;
 87:   PetscTruth     aij;
 88:   PetscScalar    *gmataa,*ao,*ad,*gmataarestore=0;

 91:   CHKMEMQ;
 92:   MPI_Comm_rank(comm,&rank);
 93:   MPI_Comm_size(comm,&size);
 94:   if (!rank) {
 95:     PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
 96:     if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",gmat->type_name);
 97:   }
 98:   if (reuse == MAT_INITIAL_MATRIX) {
 99:     MatCreate(comm,&mat);
100:     MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
101:     MatSetType(mat,MATAIJ);
102:     PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
103:     PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
104:     MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
105:     rowners[0] = 0;
106:     for (i=2; i<=size; i++) {
107:       rowners[i] += rowners[i-1];
108:     }
109:     rstart = rowners[rank];
110:     rend   = rowners[rank+1];
111:     PetscObjectGetNewTag((PetscObject)mat,&tag);
112:     if (!rank) {
113:       gmata = (Mat_SeqAIJ*) gmat->data;
114:       /* send row lengths to all processors */
115:       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
116:       for (i=1; i<size; i++) {
117:         MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
118:       }
119:       /* determine number diagonal and off-diagonal counts */
120:       PetscMemzero(olens,m*sizeof(PetscInt));
121:       PetscMalloc(m*sizeof(PetscInt),&ld);
122:       PetscMemzero(ld,m*sizeof(PetscInt));
123:       jj = 0;
124:       for (i=0; i<m; i++) {
125:         for (j=0; j<dlens[i]; j++) {
126:           if (gmata->j[jj] < rstart) ld[i]++;
127:           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
128:           jj++;
129:         }
130:       }
131:       /* send column indices to other processes */
132:       for (i=1; i<size; i++) {
133:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
134:         MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
135:         MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
136:       }

138:       /* send numerical values to other processes */
139:       for (i=1; i<size; i++) {
140:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
141:         MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
142:       }
143:       gmataa = gmata->a;
144:       gmataj = gmata->j;

146:     } else {
147:       /* receive row lengths */
148:       MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
149:       /* receive column indices */
150:       MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
151:       PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
152:       MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
153:       /* determine number diagonal and off-diagonal counts */
154:       PetscMemzero(olens,m*sizeof(PetscInt));
155:       PetscMalloc(m*sizeof(PetscInt),&ld);
156:       PetscMemzero(ld,m*sizeof(PetscInt));
157:       jj = 0;
158:       for (i=0; i<m; i++) {
159:         for (j=0; j<dlens[i]; j++) {
160:           if (gmataj[jj] < rstart) ld[i]++;
161:           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
162:           jj++;
163:         }
164:       }
165:       /* receive numerical values */
166:       PetscMemzero(gmataa,nz*sizeof(PetscScalar));
167:       MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
168:     }
169:     /* set preallocation */
170:     for (i=0; i<m; i++) {
171:       dlens[i] -= olens[i];
172:     }
173:     MatSeqAIJSetPreallocation(mat,0,dlens);
174:     MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
175: 
176:     for (i=0; i<m; i++) {
177:       dlens[i] += olens[i];
178:     }
179:     cnt  = 0;
180:     for (i=0; i<m; i++) {
181:       row  = rstart + i;
182:       MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
183:       cnt += dlens[i];
184:     }
185:     if (rank) {
186:       PetscFree2(gmataa,gmataj);
187:     }
188:     PetscFree2(dlens,olens);
189:     PetscFree(rowners);
190:     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
191:     *inmat = mat;
192:   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
193:     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
194:     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
195:     mat   = *inmat;
196:     PetscObjectGetNewTag((PetscObject)mat,&tag);
197:     if (!rank) {
198:       /* send numerical values to other processes */
199:       gmata = (Mat_SeqAIJ*) gmat->data;
200:       MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
201:       gmataa = gmata->a;
202:       for (i=1; i<size; i++) {
203:         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
204:         MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
205:       }
206:       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
207:     } else {
208:       /* receive numerical values from process 0*/
209:       nz   = Ad->nz + Ao->nz;
210:       PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
211:       MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
212:     }
213:     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
214:     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
215:     ad = Ad->a;
216:     ao = Ao->a;
217:     if (mat->rmap.n) {
218:       i  = 0;
219:       nz = ld[i];                                   PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
220:       nz = Ad->i[i+1] - Ad->i[i];                   PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
221:     }
222:     for (i=1; i<mat->rmap.n; i++) {
223:       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
224:       nz = Ad->i[i+1] - Ad->i[i];                   PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
225:     }
226:     i--;
227:     if (mat->rmap.n) {
228:       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
229:     }
230:     if (rank) {
231:       PetscFree(gmataarestore);
232:     }
233:   }
234:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
235:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
236:   CHKMEMQ;
237:   return(0);
238: }

242: static PetscErrorCode PCApply_OpenMP_1(PC pc,Vec x,Vec y)
243: {
244:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

248:   KSPSolve(red->ksp,x,y);
249:   return(0);
250: }

254: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
255: {
256:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
258:   PetscInt       m;
259:   MatReuse       scal;
260:   PetscMPIInt    rank;

263:   MPI_Bcast(&red->setupcalled,1,MPIU_INT,0,comm);
264:   MPI_Bcast(&red->flag,1,MPI_INT,0,comm);
265:   if (!red->setupcalled) {
266:     /* setup vector communication */
267:     MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
268:     VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
269:     VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
270:     VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
271:     VecDuplicate(red->xdummy,&red->ydummy);
272:     MPI_Comm_rank(comm,&rank);
273:     if (!rank) {
274:       VecDestroy(red->xdummy);
275:       VecDestroy(red->ydummy);
276:     }
277:     scal = MAT_INITIAL_MATRIX;
278:   } else {
279:     if (red->flag == DIFFERENT_NONZERO_PATTERN) {
280:       MatDestroy(red->mat);
281:       scal = MAT_INITIAL_MATRIX;
282:       CHKMEMQ;
283:     } else {
284:       scal = MAT_REUSE_MATRIX;
285:     }
286:   }

288:   /* copy matrix out onto processes */
289:   VecGetLocalSize(red->x,&m);
290:   MatDistribute_MPIAIJ(comm,red->gmat,m,scal,&red->mat);
291:   if (!red->setupcalled) {
292:     /* create the solver */
293:     KSPCreate(comm,&red->ksp);
294:     KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
295:     KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
296:     KSPSetFromOptions(red->ksp);
297:   } else {
298:     KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
299:   }
300:   return(0);
301: }

305: static PetscErrorCode PCSetUp_OpenMP(PC pc)
306: {
307:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;
309:   PetscMPIInt    size;

312:   red->gmat        = pc->mat;
313:   red->flag        = pc->flag;
314:   red->setupcalled = pc->setupcalled;

316:   MPI_Comm_size(red->comm,&size);
317:   if (size == 1) {  /* special case where copy of matrix is not needed */
318:     if (!red->setupcalled) {
319:       /* create the solver */
320:       KSPCreate(pc->comm,&red->ksp);
321:       KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
322:       KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
323:       KSPSetFromOptions(red->ksp);
324:     } else {
325:       KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
326:     }
327:     pc->ops->apply = PCApply_OpenMP_1;
328:     return(0);
329:   } else {
330:     MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
331:     PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
332:   }
333:   return(0);
334: }

338: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
339: {
340:   PC_OpenMP      *red = (PC_OpenMP*)ctx;

344:   VecScatterBegin(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
345:   VecScatterEnd(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);

347:   KSPSolve(red->ksp,red->x,red->y);

349:   VecScatterBegin(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
350:   VecScatterEnd(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
351:   return(0);
352: }

356: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
357: {
358:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

362:   red->xdummy = x;
363:   red->ydummy = y;
364:   PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
365:   return(0);
366: }

370: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
371: {
372:   PC_OpenMP      *red = (PC_OpenMP*)ctx;
373:   PetscMPIInt    rank;

377:   if (red->scatter) {VecScatterDestroy(red->scatter);}
378:   if (red->x) {VecDestroy(red->x);}
379:   if (red->y) {VecDestroy(red->y);}
380:   if (red->ksp) {KSPDestroy(red->ksp);}
381:   if (red->mat) {MatDestroy(red->mat);}
382:   MPI_Comm_rank(comm,&rank);
383:   if (rank) {
384:     if (red->xdummy) {VecDestroy(red->xdummy);}
385:     if (red->ydummy) {VecDestroy(red->ydummy);}
386:   }
387:   return(0);
388: }

392: static PetscErrorCode PCDestroy_OpenMP(PC pc)
393: {
394:   PC_OpenMP      *red = (PC_OpenMP*)pc->data;

398:   PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
399:   PetscOpenMPFree(red->comm,red);
400:   return(0);
401: }

405: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
406: {
408:   return(0);
409: }


412: /* -------------------------------------------------------------------------------------*/
413: /*MC
414:      PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes

416: $     This will usually be run with -pc_type openmp -ksp_type preonly
417: $     solver options are set with -openmp_ksp_... and -openmp_pc_... for example
418: $     -openmp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_monitor or
419: $     -openmp_pc_type hypre -openmp_pc_hypre_type boomeramg

421:        Always run with -ksp_view (or -snes_view) to see what solver is actually being used.

423:        Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
424:       options database.

426:    Level: intermediate

428:    See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner

430: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types)

432: M*/

438: PetscErrorCode  PCCreate_OpenMP(PC pc)
439: {
441:   PC_OpenMP      *red;
442:   PetscMPIInt    size;

445:   MPI_Comm_size(pc->comm,&size);
446:   if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
447:   /* caste the struct length to a PetscInt for easier MPI calls */

449:   PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,(PetscInt)sizeof(PC_OpenMP),(void**)&red);
450:   red->comm = PETSC_COMM_LOCAL_WORLD;
451:   pc->data  = (void*) red;

453:   pc->ops->apply          = PCApply_OpenMP;
454:   pc->ops->destroy        = PCDestroy_OpenMP;
455:   pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
456:   pc->ops->setup          = PCSetUp_OpenMP;
457:   pc->ops->view           = PCView_OpenMP;
458:   return(0);
459: }