Actual source code: openmp.c
1: #define PETSCKSP_DLL
3: #include private/pcimpl.h
4: #include petscksp.h
6: typedef struct {
7: MatStructure flag; /* pc->flag */
8: PetscInt setupcalled; /* pc->setupcalled */
9: PetscInt n;
10: MPI_Comm comm; /* local world used by this preconditioner */
11: KSP ksp; /* actual solver used across local world */
12: Mat mat; /* matrix in local world */
13: Mat gmat; /* matrix known only to process 0 in the local world */
14: Vec x,y,xdummy,ydummy;
15: VecScatter scatter;
16: } PC_OpenMP;
21: /*
22: Would like to have this simply call PCView() on the inner PC. The problem is
23: that the outter comm does not live on the inside so cannot do this. Instead
24: handle the special case when the viewer is stdout, construct a new one just
25: for this call.
26: */
28: static PetscErrorCode PCView_OpenMP_MP(MPI_Comm comm,void *ctx)
29: {
30: PC_OpenMP *red = (PC_OpenMP*)ctx;
32: PetscViewer viewer;
35: PetscViewerASCIIGetStdout(comm,&viewer);
36: PetscViewerASCIIPushTab(viewer); /* this is bogus in general */
37: KSPView(red->ksp,viewer);
38: PetscViewerASCIIPopTab(viewer);
39: return(0);
40: }
44: static PetscErrorCode PCView_OpenMP(PC pc,PetscViewer viewer)
45: {
46: PC_OpenMP *red = (PC_OpenMP*)pc->data;
47: PetscMPIInt size;
49: PetscTruth iascii;
54: MPI_Comm_size(red->comm,&size);
55: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
56: if (iascii) {
57: PetscViewerASCIIPrintf(viewer," Size of solver nodes %d\n",size);
58: PetscViewerASCIIPrintf(viewer," Parallel sub-solver given next\n",size);
59: /* should only make the next call if the viewer is associated with stdout */
60: PetscOpenMPRun(red->comm,PCView_OpenMP_MP,red);
61: }
62: return(0);
63: }
65: #include include/private/matimpl.h
66: #include private/vecimpl.h
67: #include src/mat/impls/aij/mpi/mpiaij.h
68: #include src/mat/impls/aij/seq/aij.h
72: /*
73: Distributes a SeqAIJ matrix across a set of processes. Code stolen from
74: MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
76: Only for square matrices
77: */
78: static PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
79: {
80: PetscMPIInt rank,size;
81: PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
83: Mat mat;
84: Mat_SeqAIJ *gmata;
85: PetscMPIInt tag;
86: MPI_Status status;
87: PetscTruth aij;
88: PetscScalar *gmataa,*ao,*ad,*gmataarestore=0;
91: CHKMEMQ;
92: MPI_Comm_rank(comm,&rank);
93: MPI_Comm_size(comm,&size);
94: if (!rank) {
95: PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);
96: if (!aij) SETERRQ1(PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",gmat->type_name);
97: }
98: if (reuse == MAT_INITIAL_MATRIX) {
99: MatCreate(comm,&mat);
100: MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);
101: MatSetType(mat,MATAIJ);
102: PetscMalloc((size+1)*sizeof(PetscInt),&rowners);
103: PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);
104: MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);
105: rowners[0] = 0;
106: for (i=2; i<=size; i++) {
107: rowners[i] += rowners[i-1];
108: }
109: rstart = rowners[rank];
110: rend = rowners[rank+1];
111: PetscObjectGetNewTag((PetscObject)mat,&tag);
112: if (!rank) {
113: gmata = (Mat_SeqAIJ*) gmat->data;
114: /* send row lengths to all processors */
115: for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
116: for (i=1; i<size; i++) {
117: MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);
118: }
119: /* determine number diagonal and off-diagonal counts */
120: PetscMemzero(olens,m*sizeof(PetscInt));
121: PetscMalloc(m*sizeof(PetscInt),&ld);
122: PetscMemzero(ld,m*sizeof(PetscInt));
123: jj = 0;
124: for (i=0; i<m; i++) {
125: for (j=0; j<dlens[i]; j++) {
126: if (gmata->j[jj] < rstart) ld[i]++;
127: if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
128: jj++;
129: }
130: }
131: /* send column indices to other processes */
132: for (i=1; i<size; i++) {
133: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
134: MPI_Send(&nz,1,MPIU_INT,i,tag,comm);
135: MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);
136: }
138: /* send numerical values to other processes */
139: for (i=1; i<size; i++) {
140: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
141: MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
142: }
143: gmataa = gmata->a;
144: gmataj = gmata->j;
146: } else {
147: /* receive row lengths */
148: MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);
149: /* receive column indices */
150: MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);
151: PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);
152: MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);
153: /* determine number diagonal and off-diagonal counts */
154: PetscMemzero(olens,m*sizeof(PetscInt));
155: PetscMalloc(m*sizeof(PetscInt),&ld);
156: PetscMemzero(ld,m*sizeof(PetscInt));
157: jj = 0;
158: for (i=0; i<m; i++) {
159: for (j=0; j<dlens[i]; j++) {
160: if (gmataj[jj] < rstart) ld[i]++;
161: if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
162: jj++;
163: }
164: }
165: /* receive numerical values */
166: PetscMemzero(gmataa,nz*sizeof(PetscScalar));
167: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
168: }
169: /* set preallocation */
170: for (i=0; i<m; i++) {
171: dlens[i] -= olens[i];
172: }
173: MatSeqAIJSetPreallocation(mat,0,dlens);
174: MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);
175:
176: for (i=0; i<m; i++) {
177: dlens[i] += olens[i];
178: }
179: cnt = 0;
180: for (i=0; i<m; i++) {
181: row = rstart + i;
182: MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);
183: cnt += dlens[i];
184: }
185: if (rank) {
186: PetscFree2(gmataa,gmataj);
187: }
188: PetscFree2(dlens,olens);
189: PetscFree(rowners);
190: ((Mat_MPIAIJ*)(mat->data))->ld = ld;
191: *inmat = mat;
192: } else { /* column indices are already set; only need to move over numerical values from process 0 */
193: Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
194: Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
195: mat = *inmat;
196: PetscObjectGetNewTag((PetscObject)mat,&tag);
197: if (!rank) {
198: /* send numerical values to other processes */
199: gmata = (Mat_SeqAIJ*) gmat->data;
200: MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);
201: gmataa = gmata->a;
202: for (i=1; i<size; i++) {
203: nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
204: MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);
205: }
206: nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
207: } else {
208: /* receive numerical values from process 0*/
209: nz = Ad->nz + Ao->nz;
210: PetscMalloc(nz*sizeof(PetscScalar),&gmataa); gmataarestore = gmataa;
211: MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);
212: }
213: /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
214: ld = ((Mat_MPIAIJ*)(mat->data))->ld;
215: ad = Ad->a;
216: ao = Ao->a;
217: if (mat->rmap.n) {
218: i = 0;
219: nz = ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
220: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
221: }
222: for (i=1; i<mat->rmap.n; i++) {
223: nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
224: nz = Ad->i[i+1] - Ad->i[i]; PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar)); ad += nz; gmataa += nz;
225: }
226: i--;
227: if (mat->rmap.n) {
228: nz = Ao->i[i+1] - Ao->i[i] - ld[i]; PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar)); ao += nz; gmataa += nz;
229: }
230: if (rank) {
231: PetscFree(gmataarestore);
232: }
233: }
234: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
235: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
236: CHKMEMQ;
237: return(0);
238: }
242: static PetscErrorCode PCApply_OpenMP_1(PC pc,Vec x,Vec y)
243: {
244: PC_OpenMP *red = (PC_OpenMP*)pc->data;
248: KSPSolve(red->ksp,x,y);
249: return(0);
250: }
254: static PetscErrorCode PCSetUp_OpenMP_MP(MPI_Comm comm,void *ctx)
255: {
256: PC_OpenMP *red = (PC_OpenMP*)ctx;
258: PetscInt m;
259: MatReuse scal;
260: PetscMPIInt rank;
263: MPI_Bcast(&red->setupcalled,1,MPIU_INT,0,comm);
264: MPI_Bcast(&red->flag,1,MPI_INT,0,comm);
265: if (!red->setupcalled) {
266: /* setup vector communication */
267: MPI_Bcast(&red->n,1,MPIU_INT,0,comm);
268: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->x);
269: VecCreateMPI(comm,PETSC_DECIDE,red->n,&red->y);
270: VecScatterCreateToZero(red->x,&red->scatter,&red->xdummy);
271: VecDuplicate(red->xdummy,&red->ydummy);
272: MPI_Comm_rank(comm,&rank);
273: if (!rank) {
274: VecDestroy(red->xdummy);
275: VecDestroy(red->ydummy);
276: }
277: scal = MAT_INITIAL_MATRIX;
278: } else {
279: if (red->flag == DIFFERENT_NONZERO_PATTERN) {
280: MatDestroy(red->mat);
281: scal = MAT_INITIAL_MATRIX;
282: CHKMEMQ;
283: } else {
284: scal = MAT_REUSE_MATRIX;
285: }
286: }
288: /* copy matrix out onto processes */
289: VecGetLocalSize(red->x,&m);
290: MatDistribute_MPIAIJ(comm,red->gmat,m,scal,&red->mat);
291: if (!red->setupcalled) {
292: /* create the solver */
293: KSPCreate(comm,&red->ksp);
294: KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
295: KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
296: KSPSetFromOptions(red->ksp);
297: } else {
298: KSPSetOperators(red->ksp,red->mat,red->mat,red->flag);
299: }
300: return(0);
301: }
305: static PetscErrorCode PCSetUp_OpenMP(PC pc)
306: {
307: PC_OpenMP *red = (PC_OpenMP*)pc->data;
309: PetscMPIInt size;
312: red->gmat = pc->mat;
313: red->flag = pc->flag;
314: red->setupcalled = pc->setupcalled;
316: MPI_Comm_size(red->comm,&size);
317: if (size == 1) { /* special case where copy of matrix is not needed */
318: if (!red->setupcalled) {
319: /* create the solver */
320: KSPCreate(pc->comm,&red->ksp);
321: KSPSetOptionsPrefix(red->ksp,"openmp_"); /* should actually append with global pc prefix */
322: KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
323: KSPSetFromOptions(red->ksp);
324: } else {
325: KSPSetOperators(red->ksp,red->gmat,red->gmat,red->flag);
326: }
327: pc->ops->apply = PCApply_OpenMP_1;
328: return(0);
329: } else {
330: MatGetSize(pc->mat,&red->n,PETSC_IGNORE);
331: PetscOpenMPRun(red->comm,PCSetUp_OpenMP_MP,red);
332: }
333: return(0);
334: }
338: static PetscErrorCode PCApply_OpenMP_MP(MPI_Comm comm,void *ctx)
339: {
340: PC_OpenMP *red = (PC_OpenMP*)ctx;
344: VecScatterBegin(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
345: VecScatterEnd(red->scatter,red->xdummy,red->x,INSERT_VALUES,SCATTER_REVERSE);
347: KSPSolve(red->ksp,red->x,red->y);
349: VecScatterBegin(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
350: VecScatterEnd(red->scatter,red->y,red->ydummy,INSERT_VALUES,SCATTER_FORWARD);
351: return(0);
352: }
356: static PetscErrorCode PCApply_OpenMP(PC pc,Vec x,Vec y)
357: {
358: PC_OpenMP *red = (PC_OpenMP*)pc->data;
362: red->xdummy = x;
363: red->ydummy = y;
364: PetscOpenMPRun(red->comm,PCApply_OpenMP_MP,red);
365: return(0);
366: }
370: static PetscErrorCode PCDestroy_OpenMP_MP(MPI_Comm comm,void *ctx)
371: {
372: PC_OpenMP *red = (PC_OpenMP*)ctx;
373: PetscMPIInt rank;
377: if (red->scatter) {VecScatterDestroy(red->scatter);}
378: if (red->x) {VecDestroy(red->x);}
379: if (red->y) {VecDestroy(red->y);}
380: if (red->ksp) {KSPDestroy(red->ksp);}
381: if (red->mat) {MatDestroy(red->mat);}
382: MPI_Comm_rank(comm,&rank);
383: if (rank) {
384: if (red->xdummy) {VecDestroy(red->xdummy);}
385: if (red->ydummy) {VecDestroy(red->ydummy);}
386: }
387: return(0);
388: }
392: static PetscErrorCode PCDestroy_OpenMP(PC pc)
393: {
394: PC_OpenMP *red = (PC_OpenMP*)pc->data;
398: PetscOpenMPRun(red->comm,PCDestroy_OpenMP_MP,red);
399: PetscOpenMPFree(red->comm,red);
400: return(0);
401: }
405: static PetscErrorCode PCSetFromOptions_OpenMP(PC pc)
406: {
408: return(0);
409: }
412: /* -------------------------------------------------------------------------------------*/
413: /*MC
414: PCOPENMP - Runs a preconditioner for a single process matrix across several MPI processes
416: $ This will usually be run with -pc_type openmp -ksp_type preonly
417: $ solver options are set with -openmp_ksp_... and -openmp_pc_... for example
418: $ -openmp_ksp_type cg would use cg as the Krylov method or -openmp_ksp_monitor or
419: $ -openmp_pc_type hypre -openmp_pc_hypre_type boomeramg
421: Always run with -ksp_view (or -snes_view) to see what solver is actually being used.
423: Currently the solver options INSIDE the OpenMP preconditioner can ONLY be set via the
424: options database.
426: Level: intermediate
428: See PetscOpenMPMerge() and PetscOpenMPSpawn() for two ways to start up MPI for use with this preconditioner
430: .seealso: PCCreate(), PCSetType(), PCType (for list of available types)
432: M*/
438: PetscErrorCode PCCreate_OpenMP(PC pc)
439: {
441: PC_OpenMP *red;
442: PetscMPIInt size;
445: MPI_Comm_size(pc->comm,&size);
446: if (size > 1) SETERRQ(PETSC_ERR_ARG_SIZ,"OpenMP preconditioner only works for sequential solves");
447: /* caste the struct length to a PetscInt for easier MPI calls */
449: PetscOpenMPNew(PETSC_COMM_LOCAL_WORLD,(PetscInt)sizeof(PC_OpenMP),(void**)&red);
450: red->comm = PETSC_COMM_LOCAL_WORLD;
451: pc->data = (void*) red;
453: pc->ops->apply = PCApply_OpenMP;
454: pc->ops->destroy = PCDestroy_OpenMP;
455: pc->ops->setfromoptions = PCSetFromOptions_OpenMP;
456: pc->ops->setup = PCSetUp_OpenMP;
457: pc->ops->view = PCView_OpenMP;
458: return(0);
459: }