Actual source code: comm.c

  1: #define PETSCKSP_DLL

  3: /***********************************comm.c*************************************

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 11.21.97
 16: ***********************************comm.c*************************************/

 18: /***********************************comm.c*************************************
 19: File Description:
 20: -----------------

 22: ***********************************comm.c*************************************/
 23:  #include src/ksp/pc/impls/tfs/tfs.h


 26: /* global program control variables - explicitly exported */
 27: int my_id            = 0;
 28: int num_nodes        = 1;
 29: int floor_num_nodes  = 0;
 30: int i_log2_num_nodes = 0;

 32: /* global program control variables */
 33: static int p_init = 0;
 34: static int modfl_num_nodes;
 35: static int edge_not_pow_2;

 37: static unsigned int edge_node[sizeof(PetscInt)*32];

 39: /***********************************comm.c*************************************
 40: Function: giop()

 42: Input : 
 43: Output: 
 44: Return: 
 45: Description: 
 46: ***********************************comm.c*************************************/
 47: void
 48: comm_init (void)
 49: {

 51:   if (p_init++) return;

 53: #if PETSC_SIZEOF_INT != 4
 54:   error_msg_warning("Int != 4 Bytes!");
 55: #endif


 58:   MPI_Comm_size(MPI_COMM_WORLD,&num_nodes);
 59:   MPI_Comm_rank(MPI_COMM_WORLD,&my_id);

 61:   if (num_nodes> (INT_MAX >> 1))
 62:   {error_msg_fatal("Can't have more then MAX_INT/2 nodes!!!");}

 64:   ivec_zero((int*)edge_node,sizeof(PetscInt)*32);

 66:   floor_num_nodes = 1;
 67:   i_log2_num_nodes = modfl_num_nodes = 0;
 68:   while (floor_num_nodes <= num_nodes)
 69:     {
 70:       edge_node[i_log2_num_nodes] = my_id ^ floor_num_nodes;
 71:       floor_num_nodes <<= 1;
 72:       i_log2_num_nodes++;
 73:     }

 75:   i_log2_num_nodes--;
 76:   floor_num_nodes >>= 1;
 77:   modfl_num_nodes = (num_nodes - floor_num_nodes);

 79:   if ((my_id > 0) && (my_id <= modfl_num_nodes))
 80:     {edge_not_pow_2=((my_id|floor_num_nodes)-1);}
 81:   else if (my_id >= floor_num_nodes)
 82:     {edge_not_pow_2=((my_id^floor_num_nodes)+1);
 83:     }
 84:   else
 85:     {edge_not_pow_2 = 0;}

 87: }



 91: /***********************************comm.c*************************************
 92: Function: giop()

 94: Input : 
 95: Output: 
 96: Return: 
 97: Description: fan-in/out version
 98: ***********************************comm.c*************************************/
 99: void
100: giop(int *vals, int *work, int n, int *oprs)
101: {
102:    int mask, edge;
103:   int type, dest;
104:   vfp fp;
105:   MPI_Status  status;
106:   int ierr;


109: #ifdef SAFE
110:   /* ok ... should have some data, work, and operator(s) */
111:   if (!vals||!work||!oprs)
112:     {error_msg_fatal("giop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

114:   /* non-uniform should have at least two entries */
115:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
116:     {error_msg_fatal("giop() :: non_uniform and n=0,1?");}

118:   /* check to make sure comm package has been initialized */
119:   if (!p_init)
120:     {comm_init();}
121: #endif

123:   /* if there's nothing to do return */
124:   if ((num_nodes<2)||(!n))
125:     {
126:       return;
127:     }

129:   /* a negative number if items to send ==> fatal */
130:   if (n<0)
131:     {error_msg_fatal("giop() :: n=%D<0?",n);}

133:   /* advance to list of n operations for custom */
134:   if ((type=oprs[0])==NON_UNIFORM)
135:     {oprs++;}

137:   /* major league hack */
138:   if (!(fp = (vfp) ivec_fct_addr(type))) {
139:     error_msg_warning("giop() :: hope you passed in a rbfp!\n");
140:     fp = (vfp) oprs;
141:   }

143:   /* all msgs will be of the same length */
144:   /* if not a hypercube must colapse partial dim */
145:   if (edge_not_pow_2)
146:     {
147:       if (my_id >= floor_num_nodes)
148:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG0+my_id,MPI_COMM_WORLD);}
149:       else
150:         {
151:           MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
152:                    MPI_COMM_WORLD,&status);
153:           (*fp)(vals,work,n,oprs);
154:         }
155:     }

157:   /* implement the mesh fan in/out exchange algorithm */
158:   if (my_id<floor_num_nodes)
159:     {
160:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
161:         {
162:           dest = my_id^mask;
163:           if (my_id > dest)
164:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
165:           else
166:             {
167:               MPI_Recv(work,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG2+dest,
168:                        MPI_COMM_WORLD, &status);
169:               (*fp)(vals, work, n, oprs);
170:             }
171:         }

173:       mask=floor_num_nodes>>1;
174:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
175:         {
176:           if (my_id%mask)
177:             {continue;}
178: 
179:           dest = my_id^mask;
180:           if (my_id < dest)
181:             {MPI_Send(vals,n,MPI_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
182:           else
183:             {
184:               MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG4+dest,
185:                        MPI_COMM_WORLD, &status);
186:             }
187:         }
188:     }

190:   /* if not a hypercube must expand to partial dim */
191:   if (edge_not_pow_2)
192:     {
193:       if (my_id >= floor_num_nodes)
194:         {
195:           MPI_Recv(vals,n,MPI_INT,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
196:                    MPI_COMM_WORLD,&status);
197:         }
198:       else
199:         {MPI_Send(vals,n,MPI_INT,edge_not_pow_2,MSGTAG5+my_id,MPI_COMM_WORLD);}
200:     }
201: }

203: /***********************************comm.c*************************************
204: Function: grop()

206: Input : 
207: Output: 
208: Return: 
209: Description: fan-in/out version
210: ***********************************comm.c*************************************/
211: void
212: grop(PetscScalar *vals, PetscScalar *work, int n, int *oprs)
213: {
214:    int mask, edge;
215:   int type, dest;
216:   vfp fp;
217:   MPI_Status  status;
218:   int ierr;


221: #ifdef SAFE
222:   /* ok ... should have some data, work, and operator(s) */
223:   if (!vals||!work||!oprs)
224:     {error_msg_fatal("grop() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

226:   /* non-uniform should have at least two entries */
227:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
228:     {error_msg_fatal("grop() :: non_uniform and n=0,1?");}

230:   /* check to make sure comm package has been initialized */
231:   if (!p_init)
232:     {comm_init();}
233: #endif

235:   /* if there's nothing to do return */
236:   if ((num_nodes<2)||(!n))
237:     {return;}

239:   /* a negative number of items to send ==> fatal */
240:   if (n<0)
241:     {error_msg_fatal("gdop() :: n=%D<0?",n);}

243:   /* advance to list of n operations for custom */
244:   if ((type=oprs[0])==NON_UNIFORM)
245:     {oprs++;}

247:   if (!(fp = (vfp) rvec_fct_addr(type))) {
248:     error_msg_warning("grop() :: hope you passed in a rbfp!\n");
249:     fp = (vfp) oprs;
250:   }

252:   /* all msgs will be of the same length */
253:   /* if not a hypercube must colapse partial dim */
254:   if (edge_not_pow_2)
255:     {
256:       if (my_id >= floor_num_nodes)
257:         {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG0+my_id,
258:                   MPI_COMM_WORLD);}
259:       else
260:         {
261:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
262:                    MPI_COMM_WORLD,&status);
263:           (*fp)(vals,work,n,oprs);
264:         }
265:     }

267:   /* implement the mesh fan in/out exchange algorithm */
268:   if (my_id<floor_num_nodes)
269:     {
270:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
271:         {
272:           dest = my_id^mask;
273:           if (my_id > dest)
274:             {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
275:           else
276:             {
277:               MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
278:                        MPI_COMM_WORLD, &status);
279:               (*fp)(vals, work, n, oprs);
280:             }
281:         }

283:       mask=floor_num_nodes>>1;
284:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
285:         {
286:           if (my_id%mask)
287:             {continue;}
288: 
289:           dest = my_id^mask;
290:           if (my_id < dest)
291:             {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
292:           else
293:             {
294:               MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,
295:                        MPI_COMM_WORLD, &status);
296:             }
297:         }
298:     }

300:   /* if not a hypercube must expand to partial dim */
301:   if (edge_not_pow_2)
302:     {
303:       if (my_id >= floor_num_nodes)
304:         {
305:           MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
306:                    MPI_COMM_WORLD,&status);
307:         }
308:       else
309:         {MPI_Send(vals,n,MPIU_SCALAR,edge_not_pow_2,MSGTAG5+my_id,
310:                   MPI_COMM_WORLD);}
311:     }
312: }


315: /***********************************comm.c*************************************
316: Function: grop()

318: Input : 
319: Output: 
320: Return: 
321: Description: fan-in/out version

323: note good only for num_nodes=2^k!!!

325: ***********************************comm.c*************************************/
326: void
327: grop_hc(PetscScalar *vals, PetscScalar *work, int n, int *oprs, int dim)
328: {
329:    int mask, edge;
330:   int type, dest;
331:   vfp fp;
332:   MPI_Status  status;
333:   int ierr;

335: #ifdef SAFE
336:   /* ok ... should have some data, work, and operator(s) */
337:   if (!vals||!work||!oprs)
338:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

340:   /* non-uniform should have at least two entries */
341:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
342:     {error_msg_fatal("grop_hc() :: non_uniform and n=0,1?");}

344:   /* check to make sure comm package has been initialized */
345:   if (!p_init)
346:     {comm_init();}
347: #endif

349:   /* if there's nothing to do return */
350:   if ((num_nodes<2)||(!n)||(dim<=0))
351:     {return;}

353:   /* the error msg says it all!!! */
354:   if (modfl_num_nodes)
355:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

357:   /* a negative number of items to send ==> fatal */
358:   if (n<0)
359:     {error_msg_fatal("grop_hc() :: n=%D<0?",n);}

361:   /* can't do more dimensions then exist */
362:   dim = PetscMin(dim,i_log2_num_nodes);

364:   /* advance to list of n operations for custom */
365:   if ((type=oprs[0])==NON_UNIFORM)
366:     {oprs++;}

368:   if (!(fp = (vfp) rvec_fct_addr(type))) {
369:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
370:     fp = (vfp) oprs;
371:   }

373:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
374:     {
375:       dest = my_id^mask;
376:       if (my_id > dest)
377:         {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
378:       else
379:         {
380:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
381:                    &status);
382:           (*fp)(vals, work, n, oprs);
383:         }
384:     }

386:   if (edge==dim)
387:     {mask>>=1;}
388:   else
389:     {while (++edge<dim) {mask<<=1;}}

391:   for (edge=0; edge<dim; edge++,mask>>=1)
392:     {
393:       if (my_id%mask)
394:         {continue;}
395: 
396:       dest = my_id^mask;
397:       if (my_id < dest)
398:         {MPI_Send(vals,n,MPIU_SCALAR,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
399:       else
400:         {
401:           MPI_Recv(vals,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
402:                    &status);
403:         }
404:     }
405: }


408: /***********************************comm.c*************************************
409: Function: gop()

411: Input : 
412: Output: 
413: Return: 
414: Description: fan-in/out version
415: ***********************************comm.c*************************************/
416: void gfop(void *vals, void *work, int n, vbfp fp, MPI_Datatype dt, int comm_type)
417: {
418:    int mask, edge;
419:   int dest;
420:   MPI_Status  status;
421:   MPI_Op op;
422:   int ierr;

424: #ifdef SAFE
425:   /* check to make sure comm package has been initialized */
426:   if (!p_init)
427:     {comm_init();}

429:   /* ok ... should have some data, work, and operator(s) */
430:   if (!vals||!work||!fp)
431:     {error_msg_fatal("gop() :: v=%D, w=%D, f=%D",vals,work,fp);}
432: #endif

434:   /* if there's nothing to do return */
435:   if ((num_nodes<2)||(!n))
436:     {return;}

438:   /* a negative number of items to send ==> fatal */
439:   if (n<0)
440:     {error_msg_fatal("gop() :: n=%D<0?",n);}

442:   if (comm_type==MPI)
443:     {
444:       MPI_Op_create(fp,TRUE,&op);
445:       MPI_Allreduce (vals, work, n, dt, op, MPI_COMM_WORLD);
446:       MPI_Op_free(&op);
447:       return;
448:     }

450:   /* if not a hypercube must colapse partial dim */
451:   if (edge_not_pow_2)
452:     {
453:       if (my_id >= floor_num_nodes)
454:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG0+my_id,
455:                   MPI_COMM_WORLD);}
456:       else
457:         {
458:           MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG0+edge_not_pow_2,
459:                    MPI_COMM_WORLD,&status);
460:           (*fp)(vals,work,&n,&dt);
461:         }
462:     }

464:   /* implement the mesh fan in/out exchange algorithm */
465:   if (my_id<floor_num_nodes)
466:     {
467:       for (mask=1,edge=0; edge<i_log2_num_nodes; edge++,mask<<=1)
468:         {
469:           dest = my_id^mask;
470:           if (my_id > dest)
471:             {MPI_Send(vals,n,dt,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
472:           else
473:             {
474:               MPI_Recv(work,n,dt,MPI_ANY_SOURCE,MSGTAG2+dest,
475:                        MPI_COMM_WORLD, &status);
476:               (*fp)(vals, work, &n, &dt);
477:             }
478:         }

480:       mask=floor_num_nodes>>1;
481:       for (edge=0; edge<i_log2_num_nodes; edge++,mask>>=1)
482:         {
483:           if (my_id%mask)
484:             {continue;}
485: 
486:           dest = my_id^mask;
487:           if (my_id < dest)
488:             {MPI_Send(vals,n,dt,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
489:           else
490:             {
491:               MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG4+dest,
492:                        MPI_COMM_WORLD, &status);
493:             }
494:         }
495:     }

497:   /* if not a hypercube must expand to partial dim */
498:   if (edge_not_pow_2)
499:     {
500:       if (my_id >= floor_num_nodes)
501:         {
502:           MPI_Recv(vals,n,dt,MPI_ANY_SOURCE,MSGTAG5+edge_not_pow_2,
503:                    MPI_COMM_WORLD,&status);
504:         }
505:       else
506:         {MPI_Send(vals,n,dt,edge_not_pow_2,MSGTAG5+my_id,
507:                   MPI_COMM_WORLD);}
508:     }
509: }






516: /******************************************************************************
517: Function: giop()

519: Input : 
520: Output: 
521: Return: 
522: Description: 
523:  
524: ii+1 entries in seg :: 0 .. ii

526: ******************************************************************************/
527: void 
528: ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, 
529:            int *segs)
530: {
531:    int edge, type, dest, mask;
532:    int stage_n;
533:   MPI_Status  status;
534:   int ierr;

536: #ifdef SAFE
537:   /* check to make sure comm package has been initialized */
538:   if (!p_init)
539:     {comm_init();}
540: #endif


543:   /* all msgs are *NOT* the same length */
544:   /* implement the mesh fan in/out exchange algorithm */
545:   for (mask=0, edge=0; edge<level; edge++, mask++)
546:     {
547:       stage_n = (segs[level] - segs[edge]);
548:       if (stage_n && !(my_id & mask))
549:         {
550:           dest = edge_node[edge];
551:           type = MSGTAG3 + my_id + (num_nodes*edge);
552:           if (my_id>dest)
553:           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
554:                       MPI_COMM_WORLD);}
555:           else
556:             {
557:               type =  type - my_id + dest;
558:               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
559:                        MPI_COMM_WORLD,&status);
560:               rvec_add(vals+segs[edge], work, stage_n);
561:             }
562:         }
563:       mask <<= 1;
564:     }
565:   mask>>=1;
566:   for (edge=0; edge<level; edge++)
567:     {
568:       stage_n = (segs[level] - segs[level-1-edge]);
569:       if (stage_n && !(my_id & mask))
570:         {
571:           dest = edge_node[level-edge-1];
572:           type = MSGTAG6 + my_id + (num_nodes*edge);
573:           if (my_id<dest)
574:             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
575:                       MPI_COMM_WORLD);}
576:           else
577:             {
578:               type =  type - my_id + dest;
579:               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
580:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
581:             }
582:         }
583:       mask >>= 1;
584:     }
585: }



589: /***********************************comm.c*************************************
590: Function: grop_hc_vvl()

592: Input : 
593: Output: 
594: Return: 
595: Description: fan-in/out version

597: note good only for num_nodes=2^k!!!

599: ***********************************comm.c*************************************/
600: void
601: grop_hc_vvl(PetscScalar *vals, PetscScalar *work, int *segs, int *oprs, int dim)
602: {
603:    int mask, edge, n;
604:   int type, dest;
605:   vfp fp;
606:   MPI_Status  status;
607:   int ierr;

609:   error_msg_fatal("grop_hc_vvl() :: is not working!\n");

611: #ifdef SAFE
612:   /* ok ... should have some data, work, and operator(s) */
613:   if (!vals||!work||!oprs||!segs)
614:     {error_msg_fatal("grop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

616:   /* non-uniform should have at least two entries */

618:   /* check to make sure comm package has been initialized */
619:   if (!p_init)
620:     {comm_init();}
621: #endif

623:   /* if there's nothing to do return */
624:   if ((num_nodes<2)||(dim<=0))
625:     {return;}

627:   /* the error msg says it all!!! */
628:   if (modfl_num_nodes)
629:     {error_msg_fatal("grop_hc() :: num_nodes not a power of 2!?!");}

631:   /* can't do more dimensions then exist */
632:   dim = PetscMin(dim,i_log2_num_nodes);

634:   /* advance to list of n operations for custom */
635:   if ((type=oprs[0])==NON_UNIFORM)
636:     {oprs++;}

638:   if (!(fp = (vfp) rvec_fct_addr(type))){
639:     error_msg_warning("grop_hc() :: hope you passed in a rbfp!\n");
640:     fp = (vfp) oprs;
641:   }

643:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
644:     {
645:       n = segs[dim]-segs[edge];
646:       dest = my_id^mask;
647:       if (my_id > dest)
648:         {MPI_Send(vals+segs[edge],n,MPIU_SCALAR,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
649:       else
650:         {
651:           MPI_Recv(work,n,MPIU_SCALAR,MPI_ANY_SOURCE,MSGTAG2+dest,
652:                    MPI_COMM_WORLD, &status);
653:           (*fp)(vals+segs[edge], work, n, oprs);
654:         }
655:     }

657:   if (edge==dim)
658:     {mask>>=1;}
659:   else
660:     {while (++edge<dim) {mask<<=1;}}

662:   for (edge=0; edge<dim; edge++,mask>>=1)
663:     {
664:       if (my_id%mask)
665:         {continue;}
666: 
667:       n = (segs[dim]-segs[dim-1-edge]);
668: 
669:       dest = my_id^mask;
670:       if (my_id < dest)
671:         {MPI_Send(vals+segs[dim-1-edge],n,MPIU_SCALAR,dest,MSGTAG4+my_id,
672:                   MPI_COMM_WORLD);}
673:       else
674:         {
675:           MPI_Recv(vals+segs[dim-1-edge],n,MPIU_SCALAR,MPI_ANY_SOURCE,
676:                    MSGTAG4+dest,MPI_COMM_WORLD, &status);
677:         }
678:     }
679: }

681: /******************************************************************************
682: Function: giop()

684: Input : 
685: Output: 
686: Return: 
687: Description: 
688:  
689: ii+1 entries in seg :: 0 .. ii

691: ******************************************************************************/
692: void new_ssgl_radd( PetscScalar *vals,  PetscScalar *work,  int level, int *segs)
693: {
694:    int edge, type, dest, mask;
695:    int stage_n;
696:   MPI_Status  status;
697:   int ierr;

699: #ifdef SAFE
700:   /* check to make sure comm package has been initialized */
701:   if (!p_init)
702:     {comm_init();}
703: #endif

705:   /* all msgs are *NOT* the same length */
706:   /* implement the mesh fan in/out exchange algorithm */
707:   for (mask=0, edge=0; edge<level; edge++, mask++)
708:     {
709:       stage_n = (segs[level] - segs[edge]);
710:       if (stage_n && !(my_id & mask))
711:         {
712:           dest = edge_node[edge];
713:           type = MSGTAG3 + my_id + (num_nodes*edge);
714:           if (my_id>dest)
715:           {MPI_Send(vals+segs[edge],stage_n,MPIU_SCALAR,dest,type,
716:                       MPI_COMM_WORLD);}
717:           else
718:             {
719:               type =  type - my_id + dest;
720:               MPI_Recv(work,stage_n,MPIU_SCALAR,MPI_ANY_SOURCE,type,
721:                        MPI_COMM_WORLD,&status);
722:               rvec_add(vals+segs[edge], work, stage_n);
723:             }
724:         }
725:       mask <<= 1;
726:     }
727:   mask>>=1;
728:   for (edge=0; edge<level; edge++)
729:     {
730:       stage_n = (segs[level] - segs[level-1-edge]);
731:       if (stage_n && !(my_id & mask))
732:         {
733:           dest = edge_node[level-edge-1];
734:           type = MSGTAG6 + my_id + (num_nodes*edge);
735:           if (my_id<dest)
736:             {MPI_Send(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,dest,type,
737:                       MPI_COMM_WORLD);}
738:           else
739:             {
740:               type =  type - my_id + dest;
741:               MPI_Recv(vals+segs[level-1-edge],stage_n,MPIU_SCALAR,
742:                        MPI_ANY_SOURCE,type,MPI_COMM_WORLD,&status);
743:             }
744:         }
745:       mask >>= 1;
746:     }
747: }



751: /***********************************comm.c*************************************
752: Function: giop()

754: Input : 
755: Output: 
756: Return: 
757: Description: fan-in/out version

759: note good only for num_nodes=2^k!!!

761: ***********************************comm.c*************************************/
762: void giop_hc(int *vals, int *work, int n, int *oprs, int dim)
763: {
764:    int mask, edge;
765:   int type, dest;
766:   vfp fp;
767:   MPI_Status  status;
768:   int ierr;

770: #ifdef SAFE
771:   /* ok ... should have some data, work, and operator(s) */
772:   if (!vals||!work||!oprs)
773:     {error_msg_fatal("giop_hc() :: vals=%D, work=%D, oprs=%D",vals,work,oprs);}

775:   /* non-uniform should have at least two entries */
776:   if ((oprs[0] == NON_UNIFORM)&&(n<2))
777:     {error_msg_fatal("giop_hc() :: non_uniform and n=0,1?");}

779:   /* check to make sure comm package has been initialized */
780:   if (!p_init)
781:     {comm_init();}
782: #endif

784:   /* if there's nothing to do return */
785:   if ((num_nodes<2)||(!n)||(dim<=0))
786:     {return;}

788:   /* the error msg says it all!!! */
789:   if (modfl_num_nodes)
790:     {error_msg_fatal("giop_hc() :: num_nodes not a power of 2!?!");}

792:   /* a negative number of items to send ==> fatal */
793:   if (n<0)
794:     {error_msg_fatal("giop_hc() :: n=%D<0?",n);}

796:   /* can't do more dimensions then exist */
797:   dim = PetscMin(dim,i_log2_num_nodes);

799:   /* advance to list of n operations for custom */
800:   if ((type=oprs[0])==NON_UNIFORM)
801:     {oprs++;}

803:   if (!(fp = (vfp) ivec_fct_addr(type))){
804:     error_msg_warning("giop_hc() :: hope you passed in a rbfp!\n");
805:     fp = (vfp) oprs;
806:   }

808:   for (mask=1,edge=0; edge<dim; edge++,mask<<=1)
809:     {
810:       dest = my_id^mask;
811:       if (my_id > dest)
812:         {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG2+my_id,MPI_COMM_WORLD);}
813:       else
814:         {
815:           MPI_Recv(work,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG2+dest,MPI_COMM_WORLD,
816:                    &status);
817:           (*fp)(vals, work, n, oprs);
818:         }
819:     }

821:   if (edge==dim)
822:     {mask>>=1;}
823:   else
824:     {while (++edge<dim) {mask<<=1;}}

826:   for (edge=0; edge<dim; edge++,mask>>=1)
827:     {
828:       if (my_id%mask)
829:         {continue;}
830: 
831:       dest = my_id^mask;
832:       if (my_id < dest)
833:         {MPI_Send(vals,n,MPIU_INT,dest,MSGTAG4+my_id,MPI_COMM_WORLD);}
834:       else
835:         {
836:           MPI_Recv(vals,n,MPIU_INT,MPI_ANY_SOURCE,MSGTAG4+dest,MPI_COMM_WORLD,
837:                    &status);
838:         }
839:     }
840: }