Actual source code: gs.c

  1: #define PETSCKSP_DLL

  3: /***********************************gs.c***************************************

  5: Author: Henry M. Tufo III

  7: e-mail: hmt@cs.brown.edu

  9: snail-mail:
 10: Division of Applied Mathematics
 11: Brown University
 12: Providence, RI 02912

 14: Last Modification: 
 15: 6.21.97
 16: ************************************gs.c**************************************/

 18: /***********************************gs.c***************************************
 19: File Description:
 20: -----------------

 22: ************************************gs.c**************************************/

 24:  #include src/ksp/pc/impls/tfs/tfs.h

 26: /* default length of number of items via tree - doubles if exceeded */
 27: #define TREE_BUF_SZ 2048;
 28: #define GS_VEC_SZ   1



 32: /***********************************gs.c***************************************
 33: Type: struct gather_scatter_id 
 34: ------------------------------

 36: ************************************gs.c**************************************/
 37: typedef struct gather_scatter_id {
 38:   int id;
 39:   int nel_min;
 40:   int nel_max;
 41:   int nel_sum;
 42:   int negl;
 43:   int gl_max;
 44:   int gl_min;
 45:   int repeats;
 46:   int ordered;
 47:   int positive;
 48:   PetscScalar *vals;

 50:   /* bit mask info */
 51:   int *my_proc_mask;
 52:   int mask_sz;
 53:   int *ngh_buf;
 54:   int ngh_buf_sz;
 55:   int *nghs;
 56:   int num_nghs;
 57:   int max_nghs;
 58:   int *pw_nghs;
 59:   int num_pw_nghs;
 60:   int *tree_nghs;
 61:   int num_tree_nghs;

 63:   int num_loads;

 65:   /* repeats == true -> local info */
 66:   int nel;         /* number of unique elememts */
 67:   int *elms;       /* of size nel */
 68:   int nel_total;
 69:   int *local_elms; /* of size nel_total */
 70:   int *companion;  /* of size nel_total */

 72:   /* local info */
 73:   int num_local_total;
 74:   int local_strength;
 75:   int num_local;
 76:   int *num_local_reduce;
 77:   int **local_reduce;
 78:   int num_local_gop;
 79:   int *num_gop_local_reduce;
 80:   int **gop_local_reduce;

 82:   /* pairwise info */
 83:   int level;
 84:   int num_pairs;
 85:   int max_pairs;
 86:   int loc_node_pairs;
 87:   int max_node_pairs;
 88:   int min_node_pairs;
 89:   int avg_node_pairs;
 90:   int *pair_list;
 91:   int *msg_sizes;
 92:   int **node_list;
 93:   int len_pw_list;
 94:   int *pw_elm_list;
 95:   PetscScalar *pw_vals;

 97:   MPI_Request *msg_ids_in;
 98:   MPI_Request *msg_ids_out;

100:   PetscScalar *out;
101:   PetscScalar *in;
102:   int msg_total;

104:   /* tree - crystal accumulator info */
105:   int max_left_over;
106:   int *pre;
107:   int *in_num;
108:   int *out_num;
109:   int **in_list;
110:   int **out_list;

112:   /* new tree work*/
113:   int  tree_nel;
114:   int *tree_elms;
115:   PetscScalar *tree_buf;
116:   PetscScalar *tree_work;

118:   int  tree_map_sz;
119:   int *tree_map_in;
120:   int *tree_map_out;

122:   /* current memory status */
123:   int gl_bss_min;
124:   int gl_perm_min;

126:   /* max segment size for gs_gop_vec() */
127:   int vec_sz;

129:   /* hack to make paul happy */
130:   MPI_Comm gs_comm;

132: } gs_id;


135: /* to be made public */

137: /* PRIVATE - and definitely not exported */
138: /*static void gs_print_template( gs_id* gs, int who);*/
139: /*static void gs_print_stemplate( gs_id* gs, int who);*/

141: static gs_id *gsi_check_args(int *elms, int nel, int level);
142: static void gsi_via_bit_mask(gs_id *gs);
143: static void get_ngh_buf(gs_id *gs);
144: static void set_pairwise(gs_id *gs);
145: static gs_id * gsi_new(void);
146: static void set_tree(gs_id *gs);

148: /* same for all but vector flavor */
149: static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150: /* vector flavor */
151: static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);

153: static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154: static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155: static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156: static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157: static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);


160: static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161: static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162: static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163: static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164: static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);

166: static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167: static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168: static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);

170: static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171: static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172: static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173: static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174: static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);

176: static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177: static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178: static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179: static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180: static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);

182: static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183: static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184: static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185: static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186: static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);

188: static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189: static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190: static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191: static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192: static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);

194: static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195: static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196: static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197: static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198: static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);

200: static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201: static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202: static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203: static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204: static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);

206: static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207: static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208: static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209: static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);



213: /* global vars */
214: /* from comm.c module */

216: /* module state inf and fortran interface */
217: static int num_gs_ids = 0;

219: /* should make this dynamic ... later */
220: static int msg_buf=MAX_MSG_BUF;
221: static int vec_sz=GS_VEC_SZ;
222: static int *tree_buf=NULL;
223: static int tree_buf_sz=0;
224: static int ntree=0;


227: /******************************************************************************
228: Function: gs_init_()

230: Input : 
231: Output: 
232: Return: 
233: Description:  
234: ******************************************************************************/
235: void gs_init_vec_sz(int size)
236: {
237:   /*  vec_ch = TRUE; */

239:   vec_sz = size;
240: }

242: /******************************************************************************
243: Function: gs_init_()

245: Input : 
246: Output: 
247: Return: 
248: Description:  
249: ******************************************************************************/
250: void gs_init_msg_buf_sz(int buf_size)
251: {
252:   /*  msg_ch = TRUE; */

254:   msg_buf = buf_size;
255: }

257: /******************************************************************************
258: Function: gs_init()

260: Input : 

262: Output: 

264: RETURN: 

266: Description:  
267: ******************************************************************************/
268: gs_id *
269: gs_init( int *elms, int nel, int level)
270: {
271:    gs_id *gs;
272:   MPI_Group gs_group;
273:   MPI_Comm  gs_comm;

275:   /* ensure that communication package has been initialized */
276:   comm_init();


279:   /* determines if we have enough dynamic/semi-static memory */
280:   /* checks input, allocs and sets gd_id template            */
281:   gs = gsi_check_args(elms,nel,level);

283:   /* only bit mask version up and working for the moment    */
284:   /* LATER :: get int list version working for sparse pblms */
285:   gsi_via_bit_mask(gs);


288:   MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289:   MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290:   gs->gs_comm=gs_comm;

292:   return(gs);
293: }



297: /******************************************************************************
298: Function: gsi_new()

300: Input : 
301: Output: 
302: Return: 
303: Description: 

305: elm list must >= 0!!!
306: elm repeats allowed
307: ******************************************************************************/
308: static
309: gs_id *
310: gsi_new(void)
311: {
312:   gs_id *gs;
313:   gs = (gs_id *) malloc(sizeof(gs_id));
314:   PetscMemzero(gs,sizeof(gs_id));
315:   return(gs);
316: }



320: /******************************************************************************
321: Function: gsi_check_args()

323: Input : 
324: Output: 
325: Return: 
326: Description: 

328: elm list must >= 0!!!
329: elm repeats allowed
330: local working copy of elms is sorted
331: ******************************************************************************/
332: static
333: gs_id *
334: gsi_check_args(int *in_elms, int nel, int level)
335: {
336:    int i, j, k, t2;
337:   int *companion, *elms, *unique, *iptr;
338:   int num_local=0, *num_to_reduce, **local_reduce;
339:   int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340:   int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341:   int work[sizeof(oprs)/sizeof(oprs[0])-1];
342:   gs_id *gs;



346: #ifdef SAFE
347:   if (!in_elms)
348:     {error_msg_fatal("elms point to nothing!!!\n");}

350:   if (nel<0)
351:     {error_msg_fatal("can't have fewer than 0 elms!!!\n");}

353:   if (nel==0)
354:     {error_msg_warning("I don't have any elements!!!\n");}
355: #endif

357:   /* get space for gs template */
358:   gs = gsi_new();
359:   gs->id = ++num_gs_ids;

361:   /* hmt 6.4.99                                            */
362:   /* caller can set global ids that don't participate to 0 */
363:   /* gs_init ignores all zeros in elm list                 */
364:   /* negative global ids are still invalid                 */
365:   for (i=j=0;i<nel;i++)
366:     {if (in_elms[i]!=0) {j++;}}

368:   k=nel; nel=j;

370:   /* copy over in_elms list and create inverse map */
371:   elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372:   companion = (int*) malloc(nel*sizeof(PetscInt));
373:   /* ivec_c_index(companion,nel); */
374:   /* ivec_copy(elms,in_elms,nel); */
375:   for (i=j=0;i<k;i++)
376:     {
377:       if (in_elms[i]!=0)
378:         {elms[j] = in_elms[i]; companion[j++] = i;}
379:     }

381:   if (j!=nel)
382:     {error_msg_fatal("nel j mismatch!\n");}

384: #ifdef SAFE
385:   /* pre-pass ... check to see if sorted */
386:   elms[nel] = INT_MAX;
387:   iptr = elms;
388:   unique = elms+1;
389:   j=0;
390:   while (*iptr!=INT_MAX)
391:     {
392:       if (*iptr++>*unique++)
393:         {j=1; break;}
394:     }

396:   /* set up inverse map */
397:   if (j)
398:     {
399:       error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400:       SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401:     }
402:   else
403:     {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404: #else
405:   SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406: #endif
407:   elms[nel] = INT_MIN;

409:   /* first pass */
410:   /* determine number of unique elements, check pd */
411:   for (i=k=0;i<nel;i+=j)
412:     {
413:       t2 = elms[i];
414:       j=++i;
415: 
416:       /* clump 'em for now */
417:       while (elms[j]==t2) {j++;}
418: 
419:       /* how many together and num local */
420:       if (j-=i)
421:         {num_local++; k+=j;}
422:     }

424:   /* how many unique elements? */
425:   gs->repeats=k;
426:   gs->nel = nel-k;


429:   /* number of repeats? */
430:   gs->num_local = num_local;
431:   num_local+=2;
432:   gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433:   gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));

435:   unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436:   gs->elms = unique;
437:   gs->nel_total = nel;
438:   gs->local_elms = elms;
439:   gs->companion = companion;

441:   /* compess map as well as keep track of local ops */
442:   for (num_local=i=j=0;i<gs->nel;i++)
443:     {
444:       k=j;
445:       t2 = unique[i] = elms[j];
446:       companion[i] = companion[j];
447: 
448:       while (elms[j]==t2) {j++;}

450:       if ((t2=(j-k))>1)
451:         {
452:           /* number together */
453:           num_to_reduce[num_local] = t2++;
454:           iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));

456:           /* to use binary searching don't remap until we check intersection */
457:           *iptr++ = i;
458: 
459:           /* note that we're skipping the first one */
460:           while (++k<j)
461:             {*(iptr++) = companion[k];}
462:           *iptr = -1;
463:         }
464:     }

466:   /* sentinel for ngh_buf */
467:   unique[gs->nel]=INT_MAX;

469:   /* for two partition sort hack */
470:   num_to_reduce[num_local] = 0;
471:   local_reduce[num_local] = NULL;
472:   num_to_reduce[++num_local] = 0;
473:   local_reduce[num_local] = NULL;

475:   /* load 'em up */
476:   /* note one extra to hold NON_UNIFORM flag!!! */
477:   vals[2] = vals[1] = vals[0] = nel;
478:   if (gs->nel>0)
479:     {
480:        vals[3] = unique[0];           /* ivec_lb(elms,nel); */
481:        vals[4] = unique[gs->nel-1];   /* ivec_ub(elms,nel); */
482:     }
483:   else
484:     {
485:        vals[3] = INT_MAX;             /* ivec_lb(elms,nel); */
486:        vals[4] = INT_MIN;             /* ivec_ub(elms,nel); */
487:     }
488:   vals[5] = level;
489:   vals[6] = num_gs_ids;

491:   /* GLOBAL: send 'em out */
492:   giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);

494:   /* must be semi-pos def - only pairwise depends on this */
495:   /* LATER - remove this restriction */
496:   if (vals[3]<0)
497:     {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}

499:   if (vals[4]==INT_MAX)
500:     {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}

502:   gs->nel_min = vals[0];
503:   gs->nel_max = vals[1];
504:   gs->nel_sum = vals[2];
505:   gs->gl_min  = vals[3];
506:   gs->gl_max  = vals[4];
507:   gs->negl    = vals[4]-vals[3]+1;

509:   if (gs->negl<=0)
510:     {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511: 
512:   /* LATER :: add level == -1 -> program selects level */
513:   if (vals[5]<0)
514:     {vals[5]=0;}
515:   else if (vals[5]>num_nodes)
516:     {vals[5]=num_nodes;}
517:   gs->level = vals[5];

519:   return(gs);
520: }


523: /******************************************************************************
524: Function: gsi_via_bit_mask()

526: Input : 
527: Output: 
528: Return: 
529: Description: 


532: ******************************************************************************/
533: static
534: void
535: gsi_via_bit_mask(gs_id *gs)
536: {
537:    int i, nel, *elms;
538:   int t1;
539:   int **reduce;
540:   int *map;

542:   /* totally local removes ... ct_bits == 0 */
543:   get_ngh_buf(gs);

545:   if (gs->level)
546:     {set_pairwise(gs);}

548:   if (gs->max_left_over)
549:     {set_tree(gs);}

551:   /* intersection local and pairwise/tree? */
552:   gs->num_local_total = gs->num_local;
553:   gs->gop_local_reduce = gs->local_reduce;
554:   gs->num_gop_local_reduce = gs->num_local_reduce;

556:   map = gs->companion;

558:   /* is there any local compression */
559:   if (!gs->num_local) {
560:     gs->local_strength = NONE;
561:     gs->num_local_gop = 0;
562:   } else {
563:       /* ok find intersection */
564:       map = gs->companion;
565:       reduce = gs->local_reduce;
566:       for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567:         {
568:           if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569:               ||
570:               ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571:             {
572:               /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573:               t1++;
574:               if (gs->num_local_reduce[i]<=0)
575:                 {error_msg_fatal("nobody in list?");}
576:               gs->num_local_reduce[i] *= -1;
577:             }
578:            **reduce=map[**reduce];
579:         }

581:       /* intersection is empty */
582:       if (!t1)
583:         {
584:           gs->local_strength = FULL;
585:           gs->num_local_gop = 0;
586:         }
587:       /* intersection not empty */
588:       else
589:         {
590:           gs->local_strength = PARTIAL;
591:           SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592:                    gs->num_local + 1, SORT_INT_PTR);

594:           gs->num_local_gop = t1;
595:           gs->num_local_total =  gs->num_local;
596:           gs->num_local    -= t1;
597:           gs->gop_local_reduce = gs->local_reduce;
598:           gs->num_gop_local_reduce = gs->num_local_reduce;

600:           for (i=0; i<t1; i++)
601:             {
602:               if (gs->num_gop_local_reduce[i]>=0)
603:                 {error_msg_fatal("they aren't negative?");}
604:               gs->num_gop_local_reduce[i] *= -1;
605:               gs->local_reduce++;
606:               gs->num_local_reduce++;
607:             }
608:           gs->local_reduce++;
609:           gs->num_local_reduce++;
610:         }
611:     }

613:   elms = gs->pw_elm_list;
614:   nel  = gs->len_pw_list;
615:   for (i=0; i<nel; i++)
616:     {elms[i] = map[elms[i]];}

618:   elms = gs->tree_map_in;
619:   nel  = gs->tree_map_sz;
620:   for (i=0; i<nel; i++)
621:     {elms[i] = map[elms[i]];}

623:   /* clean up */
624:   free((void*) gs->local_elms);
625:   free((void*) gs->companion);
626:   free((void*) gs->elms);
627:   free((void*) gs->ngh_buf);
628:   gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629: }



633: /******************************************************************************
634: Function: place_in_tree()

636: Input : 
637: Output: 
638: Return: 
639: Description: 


642: ******************************************************************************/
643: static
644: void
645: place_in_tree( int elm)
646: {
647:    int *tp, n;


650:   if (ntree==tree_buf_sz)
651:     {
652:       if (tree_buf_sz)
653:         {
654:           tp = tree_buf;
655:           n = tree_buf_sz;
656:           tree_buf_sz<<=1;
657:           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658:           ivec_copy(tree_buf,tp,n);
659:           free(tp);
660:         }
661:       else
662:         {
663:           tree_buf_sz = TREE_BUF_SZ;
664:           tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665:         }
666:     }

668:   tree_buf[ntree++] = elm;
669: }



673: /******************************************************************************
674: Function: get_ngh_buf()

676: Input : 
677: Output: 
678: Return: 
679: Description: 


682: ******************************************************************************/
683: static
684: void
685: get_ngh_buf(gs_id *gs)
686: {
687:    int i, j, npw=0, ntree_map=0;
688:   int p_mask_size, ngh_buf_size, buf_size;
689:   int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690:   int *ngh_buf, *buf1, *buf2;
691:   int offset, per_load, num_loads, or_ct, start, end;
692:   int *ptr1, *ptr2, i_start, negl, nel, *elms;
693:   int oper=GL_B_OR;
694:   int *ptr3, *t_mask, level, ct1, ct2;

696:   /* to make life easier */
697:   nel   = gs->nel;
698:   elms  = gs->elms;
699:   level = gs->level;
700: 
701:   /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702:   p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703:   set_bit_mask(p_mask,p_mask_size,my_id);

705:   /* allocate space for masks and info bufs */
706:   gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707:   gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708:   gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709:   t_mask = (int*) malloc(p_mask_size);
710:   gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);

712:   /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713:   /* had thought I could exploit rendezvous threshold */

715:   /* default is one pass */
716:   per_load = negl  = gs->negl;
717:   gs->num_loads = num_loads = 1;
718:   i=p_mask_size*negl;

720:   /* possible overflow on buffer size */
721:   /* overflow hack                    */
722:   if (i<0) {i=INT_MAX;}

724:   buf_size = PetscMin(msg_buf,i);

726:   /* can we do it? */
727:   if (p_mask_size>buf_size)
728:     {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}

730:   /* get giop buf space ... make *only* one malloc */
731:   buf1 = (int*) malloc(buf_size<<1);

733:   /* more than one gior exchange needed? */
734:   if (buf_size!=i)
735:     {
736:       per_load = buf_size/p_mask_size;
737:       buf_size = per_load*p_mask_size;
738:       gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739:     }


742:   /* convert buf sizes from #bytes to #ints - 32 bit only! */
743: #ifdef SAFE  
744:   p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745: #else  
746:   p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747: #endif
748: 
749:   /* find giop work space */
750:   buf2 = buf1+buf_size;

752:   /* hold #ints needed for processor masks */
753:   gs->mask_sz=p_mask_size;

755:   /* init buffers */
756:   ivec_zero(sh_proc_mask,p_mask_size);
757:   ivec_zero(pw_sh_proc_mask,p_mask_size);
758:   ivec_zero(ngh_buf,ngh_buf_size);

760:   /* HACK reset tree info */
761:   tree_buf=NULL;
762:   tree_buf_sz=ntree=0;

764:   /* queue the tree elements for now */
765:   /* elms_q = new_queue(); */
766: 
767:   /* can also queue tree info for pruned or forest implememtation */
768:   /*  mask_q = new_queue(); */

770:   /* ok do it */
771:   for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772:     {
773:       /* identity for bitwise or is 000...000 */
774:       ivec_zero(buf1,buf_size);

776:       /* load msg buffer */
777:       for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778:         {
779:           offset = (offset-start)*p_mask_size;
780:           ivec_copy(buf1+offset,p_mask,p_mask_size);
781:         }

783:       /* GLOBAL: pass buffer */
784:       giop(buf1,buf2,buf_size,&oper);


787:       /* unload buffer into ngh_buf */
788:       ptr2=(elms+i_start);
789:       for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790:         {
791:           /* I own it ... may have to pairwise it */
792:           if (j==*ptr2)
793:             {
794:               /* do i share it w/anyone? */
795: #ifdef SAFE
796:               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797: #else
798:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799: #endif
800:               /* guess not */
801:               if (ct1<2)
802:                 {ptr2++; ptr1+=p_mask_size; continue;}

804:               /* i do ... so keep info and turn off my bit */
805:               ivec_copy(ptr1,ptr3,p_mask_size);
806:               ivec_xor(ptr1,p_mask,p_mask_size);
807:               ivec_or(sh_proc_mask,ptr1,p_mask_size);
808: 
809:               /* is it to be done pairwise? */
810:               if (--ct1<=level)
811:                 {
812:                   npw++;
813: 
814:                   /* turn on high bit to indicate pw need to process */
815:                   *ptr2++ |= TOP_BIT;
816:                   ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817:                   ptr1+=p_mask_size;
818:                   continue;
819:                 }

821:               /* get set for next and note that I have a tree contribution */
822:               /* could save exact elm index for tree here -> save a search */
823:               ptr2++; ptr1+=p_mask_size; ntree_map++;
824:             }
825:           /* i don't but still might be involved in tree */
826:           else
827:             {

829:               /* shared by how many? */
830: #ifdef SAFE
831:               ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832: #else
833:               ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834: #endif

836:               /* none! */
837:               if (ct1<2)
838:                 {continue;}

840:               /* is it going to be done pairwise? but not by me of course!*/
841:               if (--ct1<=level)
842:                 {continue;}
843:             }
844:           /* LATER we're going to have to process it NOW */
845:           /* nope ... tree it */
846:           place_in_tree(j);
847:         }
848:     }

850:   free((void*)t_mask);
851:   free((void*)buf1);

853:   gs->len_pw_list=npw;
854:   gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));

856:   /* expand from bit mask list to int list and save ngh list */
857:   gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858:   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);

860:   gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));

862:   oper = GL_MAX;
863:   ct1 = gs->num_nghs;
864:   giop(&ct1,&ct2,1,&oper);
865:   gs->max_nghs = ct1;

867:   gs->tree_map_sz  = ntree_map;
868:   gs->max_left_over=ntree;

870:   free((void*)p_mask);
871:   free((void*)sh_proc_mask);

873: }





879: /******************************************************************************
880: Function: pairwise_init()

882: Input : 
883: Output: 
884: Return: 
885: Description: 

887: if an element is shared by fewer that level# of nodes do pairwise exch 
888: ******************************************************************************/
889: static
890: void
891: set_pairwise(gs_id *gs)
892: {
893:    int i, j;
894:   int p_mask_size;
895:   int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896:   int *ngh_buf, *buf2;
897:   int offset;
898:   int *msg_list, *msg_size, **msg_nodes, nprs;
899:   int *pairwise_elm_list, len_pair_list=0;
900:   int *iptr, t1, i_start, nel, *elms;
901:   int ct;



905:   /* to make life easier */
906:   nel  = gs->nel;
907:   elms = gs->elms;
908:   ngh_buf = gs->ngh_buf;
909:   sh_proc_mask  = gs->pw_nghs;

911:   /* need a few temp masks */
912:   p_mask_size   = len_bit_mask(num_nodes);
913:   p_mask        = (int*) malloc(p_mask_size);
914:   tmp_proc_mask = (int*) malloc(p_mask_size);

916:   /* set mask to my my_id's bit mask */
917:   set_bit_mask(p_mask,p_mask_size,my_id);

919: #ifdef SAFE
920:   p_mask_size /= sizeof(PetscInt);
921: #else
922:   p_mask_size >>= 2;
923: #endif
924: 
925:   len_pair_list=gs->len_pw_list;
926:   gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));

928:   /* how many processors (nghs) do we have to exchange with? */
929:   nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));


932:   /* allocate space for gs_gop() info */
933:   gs->pair_list = msg_list = (int*)  malloc(sizeof(PetscInt)*nprs);
934:   gs->msg_sizes = msg_size  = (int*)  malloc(sizeof(PetscInt)*nprs);
935:   gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));

937:   /* init msg_size list */
938:   ivec_zero(msg_size,nprs);

940:   /* expand from bit mask list to int list */
941:   bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942: 
943:   /* keep list of elements being handled pairwise */
944:   for (i=j=0;i<nel;i++)
945:     {
946:       if (elms[i] & TOP_BIT)
947:         {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948:     }
949:   pairwise_elm_list[j] = -1;

951:   gs->msg_ids_out = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
952:   gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953:   gs->msg_ids_in = (MPI_Request *)  malloc(sizeof(MPI_Request)*(nprs+1));
954:   gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955:   gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);

957:   /* find who goes to each processor */
958:   for (i_start=i=0;i<nprs;i++)
959:     {
960:       /* processor i's mask */
961:       set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);

963:       /* det # going to processor i */
964:       for (ct=j=0;j<len_pair_list;j++)
965:         {
966:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969:             {ct++;}
970:         }
971:       msg_size[i] = ct;
972:       i_start = PetscMax(i_start,ct);

974:       /*space to hold nodes in message to first neighbor */
975:       msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));

977:       for (j=0;j<len_pair_list;j++)
978:         {
979:           buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980:           ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981:           if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982:             {*iptr++ = j;}
983:         }
984:       *iptr = -1;
985:     }
986:   msg_nodes[nprs] = NULL;

988:   j=gs->loc_node_pairs=i_start;
989:   t1 = GL_MAX;
990:   giop(&i_start,&offset,1,&t1);
991:   gs->max_node_pairs = i_start;

993:   i_start=j;
994:   t1 = GL_MIN;
995:   giop(&i_start,&offset,1,&t1);
996:   gs->min_node_pairs = i_start;

998:   i_start=j;
999:   t1 = GL_ADD;
1000:   giop(&i_start,&offset,1,&t1);
1001:   gs->avg_node_pairs = i_start/num_nodes + 1;

1003:   i_start=nprs;
1004:   t1 = GL_MAX;
1005:   giop(&i_start,&offset,1,&t1);
1006:   gs->max_pairs = i_start;


1009:   /* remap pairwise in tail of gsi_via_bit_mask() */
1010:   gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011:   gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012:   gs->in  = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);

1014:   /* reset malloc pool */
1015:   free((void*)p_mask);
1016:   free((void*)tmp_proc_mask);

1018: }



1022: /******************************************************************************
1023: Function: set_tree()

1025: Input : 
1026: Output: 
1027: Return: 
1028: Description: 

1030: to do pruned tree just save ngh buf copy for each one and decode here!
1031: ******************************************************************************/
1032: static
1033: void
1034: set_tree(gs_id *gs)
1035: {
1036:    int i, j, n, nel;
1037:    int *iptr_in, *iptr_out, *tree_elms, *elms;


1040:   /* local work ptrs */
1041:   elms = gs->elms;
1042:   nel     = gs->nel;

1044:   /* how many via tree */
1045:   gs->tree_nel  = n = ntree;
1046:   gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047:   gs->tree_buf  = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048:   gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049:   j=gs->tree_map_sz;
1050:   gs->tree_map_in = iptr_in  = (int*) malloc(sizeof(PetscInt)*(j+1));
1051:   gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));

1053:   /* search the longer of the two lists */
1054:   /* note ... could save this info in get_ngh_buf and save searches */
1055:   if (n<=nel)
1056:     {
1057:       /* bijective fct w/remap - search elm list */
1058:       for (i=0; i<n; i++)
1059:         {
1060:           if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061:             {*iptr_in++ = j; *iptr_out++ = i;}
1062:         }
1063:     }
1064:   else
1065:     {
1066:       for (i=0; i<nel; i++)
1067:         {
1068:           if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069:             {*iptr_in++ = i; *iptr_out++ = j;}
1070:         }
1071:     }

1073:   /* sentinel */
1074:   *iptr_in = *iptr_out = -1;

1076: }


1079: /******************************************************************************
1080: Function: gather_scatter

1082: Input : 
1083: Output: 
1084: Return: 
1085: Description: 
1086: ******************************************************************************/
1087: static
1088: void
1089: gs_gop_local_out( gs_id *gs,  PetscScalar *vals)
1090: {
1091:    int *num, *map, **reduce;
1092:    PetscScalar tmp;


1095:   num    = gs->num_gop_local_reduce;
1096:   reduce = gs->gop_local_reduce;
1097:   while ((map = *reduce++))
1098:     {
1099:       /* wall */
1100:       if (*num == 2)
1101:         {
1102:           num ++;
1103:           vals[map[1]] = vals[map[0]];
1104:         }
1105:       /* corner shared by three elements */
1106:       else if (*num == 3)
1107:         {
1108:           num ++;
1109:           vals[map[2]] = vals[map[1]] = vals[map[0]];
1110:         }
1111:       /* corner shared by four elements */
1112:       else if (*num == 4)
1113:         {
1114:           num ++;
1115:           vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116:         }
1117:       /* general case ... odd geoms ... 3D*/
1118:       else
1119:         {
1120:           num++;
1121:           tmp = *(vals + *map++);
1122:           while (*map >= 0)
1123:             {*(vals + *map++) = tmp;}
1124:         }
1125:     }
1126: }



1130: /******************************************************************************
1131: Function: gather_scatter

1133: Input : 
1134: Output: 
1135: Return: 
1136: Description: 
1137: ******************************************************************************/
1138: void
1139: gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140: {
1141:   /* local only operations!!! */
1142:   if (gs->num_local)
1143:     {gs_gop_local_binary(gs,vals,fct);}
1144: 
1145:   /* if intersection tree/pairwise and local isn't empty */
1146:   if (gs->num_local_gop)
1147:     {
1148:       gs_gop_local_in_binary(gs,vals,fct);
1149: 
1150:       /* pairwise */
1151:       if (gs->num_pairs)
1152:         {gs_gop_pairwise_binary(gs,vals,fct);}
1153: 
1154:       /* tree */
1155:       else if (gs->max_left_over)
1156:         {gs_gop_tree_binary(gs,vals,fct);}
1157: 
1158:       gs_gop_local_out(gs,vals);
1159:     }
1160:   /* if intersection tree/pairwise and local is empty */
1161:   else
1162:     {
1163:       /* pairwise */
1164:       if (gs->num_pairs)
1165:         {gs_gop_pairwise_binary(gs,vals,fct);}
1166: 
1167:       /* tree */
1168:       else if (gs->max_left_over)
1169:         {gs_gop_tree_binary(gs,vals,fct);}
1170:     }
1171: }



1175: /******************************************************************************
1176: Function: gather_scatter

1178: Input : 
1179: Output: 
1180: Return: 
1181: Description: 
1182: ******************************************************************************/
1183: static
1184: void
1185: gs_gop_local_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1186: {
1187:    int *num, *map, **reduce;
1188:   PetscScalar tmp;


1191:   num    = gs->num_local_reduce;
1192:   reduce = gs->local_reduce;
1193:   while ((map = *reduce))
1194:     {
1195:       num ++;
1196:       (*fct)(&tmp,NULL,1);
1197:       /* tmp = 0.0; */
1198:       while (*map >= 0)
1199:         {(*fct)(&tmp,(vals + *map),1); map++;}
1200:         /*        {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201: 
1202:       map = *reduce++;
1203:       while (*map >= 0)
1204:         {*(vals + *map++) = tmp;}
1205:     }
1206: }



1210: /******************************************************************************
1211: Function: gather_scatter

1213: Input : 
1214: Output: 
1215: Return: 
1216: Description: 
1217: ******************************************************************************/
1218: static
1219: void
1220: gs_gop_local_in_binary( gs_id *gs,  PetscScalar *vals,  rbfp fct)
1221: {
1222:    int *num, *map, **reduce;
1223:    PetscScalar *base;


1226:   num    = gs->num_gop_local_reduce;

1228:   reduce = gs->gop_local_reduce;
1229:   while ((map = *reduce++))
1230:     {
1231:       num++;
1232:       base = vals + *map++;
1233:       while (*map >= 0)
1234:         {(*fct)(base,(vals + *map),1); map++;}
1235:     }
1236: }



1240: /******************************************************************************
1241: Function: gather_scatter

1243: VERSION 3 :: 

1245: Input : 
1246: Output: 
1247: Return: 
1248: Description: 
1249: ******************************************************************************/
1250: static
1251: void
1252: gs_gop_pairwise_binary( gs_id *gs,  PetscScalar *in_vals,
1253:                         rbfp fct)
1254: {
1255:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1257:    int *pw, *list, *size, **nodes;
1258:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259:   MPI_Status status;
1260:   int ierr;


1263:   /* strip and load s */
1264:   msg_list =list         = gs->pair_list;
1265:   msg_size =size         = gs->msg_sizes;
1266:   msg_nodes=nodes        = gs->node_list;
1267:   iptr=pw                = gs->pw_elm_list;
1268:   dptr1=dptr3            = gs->pw_vals;
1269:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1270:   msg_ids_out = ids_out  = gs->msg_ids_out;
1271:   dptr2                  = gs->out;
1272:   in1=in2                = gs->in;

1274:   /* post the receives */
1275:   /*  msg_nodes=nodes; */
1276:   do
1277:     {
1278:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1279:          second one *list and do list++ afterwards */
1280:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1281:                 gs->gs_comm, msg_ids_in++);
1282:       in1 += *size++;
1283:     }
1284:   while (*++msg_nodes);
1285:   msg_nodes=nodes;

1287:   /* load gs values into in out gs buffers */
1288:   while (*iptr >= 0)
1289:     {*dptr3++ = *(in_vals + *iptr++);}

1291:   /* load out buffers and post the sends */
1292:   while ((iptr = *msg_nodes++))
1293:     {
1294:       dptr3 = dptr2;
1295:       while (*iptr >= 0)
1296:         {*dptr2++ = *(dptr1 + *iptr++);}
1297:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1298:       /* is msg_ids_out++ correct? */
1299:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1300:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1301:     }

1303:   if (gs->max_left_over)
1304:     {gs_gop_tree_binary(gs,in_vals,fct);}

1306:   /* process the received data */
1307:   msg_nodes=nodes;
1308:   while ((iptr = *nodes++))
1309:     {
1310:       /* Should I check the return value of MPI_Wait() or status? */
1311:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1312:       MPI_Wait(ids_in++, &status);
1313:       while (*iptr >= 0)
1314:         {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1315:       /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1316:     }

1318:   /* replace vals */
1319:   while (*pw >= 0)
1320:     {*(in_vals + *pw++) = *dptr1++;}

1322:   /* clear isend message handles */
1323:   /* This changed for clarity though it could be the same */
1324:   while (*msg_nodes++)
1325:     /* Should I check the return value of MPI_Wait() or status? */
1326:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1327:     {MPI_Wait(ids_out++, &status);}
1328: }



1332: /******************************************************************************
1333: Function: gather_scatter

1335: Input : 
1336: Output: 
1337: Return: 
1338: Description: 
1339: ******************************************************************************/
1340: static
1341: void
1342: gs_gop_tree_binary(gs_id *gs, PetscScalar *vals,  rbfp fct)
1343: {
1344:   int size;
1345:   int *in, *out;
1346:   PetscScalar *buf, *work;

1348:   in   = gs->tree_map_in;
1349:   out  = gs->tree_map_out;
1350:   buf  = gs->tree_buf;
1351:   work = gs->tree_work;
1352:   size = gs->tree_nel;

1354:   /* load vals vector w/identity */
1355:   (*fct)(buf,NULL,size);
1356: 
1357:   /* load my contribution into val vector */
1358:   while (*in >= 0)
1359:     {(*fct)((buf + *out++),(vals + *in++),-1);}

1361:   gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);

1363:   in   = gs->tree_map_in;
1364:   out  = gs->tree_map_out;
1365:   while (*in >= 0)
1366:     {(*fct)((vals + *in++),(buf + *out++),-1);}

1368: }




1373: /******************************************************************************
1374: Function: gather_scatter

1376: Input : 
1377: Output: 
1378: Return: 
1379: Description: 
1380: ******************************************************************************/
1381: void
1382: gs_gop( gs_id *gs,  PetscScalar *vals,  const char *op)
1383: {
1384:   switch (*op) {
1385:   case '+':
1386:     gs_gop_plus(gs,vals);
1387:     break;
1388:   case '*':
1389:     gs_gop_times(gs,vals);
1390:     break;
1391:   case 'a':
1392:     gs_gop_min_abs(gs,vals);
1393:     break;
1394:   case 'A':
1395:     gs_gop_max_abs(gs,vals);
1396:     break;
1397:   case 'e':
1398:     gs_gop_exists(gs,vals);
1399:     break;
1400:   case 'm':
1401:     gs_gop_min(gs,vals);
1402:     break;
1403:   case 'M':
1404:     gs_gop_max(gs,vals); break;
1405:     /*
1406:     if (*(op+1)=='\0')
1407:       {gs_gop_max(gs,vals); break;}
1408:     else if (*(op+1)=='X')
1409:       {gs_gop_max_abs(gs,vals); break;}
1410:     else if (*(op+1)=='N')
1411:       {gs_gop_min_abs(gs,vals); break;}
1412:     */
1413:   default:
1414:     error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1415:     error_msg_warning("gs_gop() :: default :: plus");
1416:     gs_gop_plus(gs,vals);
1417:     break;
1418:   }
1419: }


1422: /******************************************************************************
1423: Function: gather_scatter

1425: Input : 
1426: Output: 
1427: Return: 
1428: Description: 
1429: ******************************************************************************/
1430: static void
1431: gs_gop_exists( gs_id *gs,  PetscScalar *vals)
1432: {
1433:   /* local only operations!!! */
1434:   if (gs->num_local)
1435:     {gs_gop_local_exists(gs,vals);}

1437:   /* if intersection tree/pairwise and local isn't empty */
1438:   if (gs->num_local_gop)
1439:     {
1440:       gs_gop_local_in_exists(gs,vals);

1442:       /* pairwise */
1443:       if (gs->num_pairs)
1444:         {gs_gop_pairwise_exists(gs,vals);}
1445: 
1446:       /* tree */
1447:       else if (gs->max_left_over)
1448:         {gs_gop_tree_exists(gs,vals);}
1449: 
1450:       gs_gop_local_out(gs,vals);
1451:     }
1452:   /* if intersection tree/pairwise and local is empty */
1453:   else
1454:     {
1455:       /* pairwise */
1456:       if (gs->num_pairs)
1457:         {gs_gop_pairwise_exists(gs,vals);}
1458: 
1459:       /* tree */
1460:       else if (gs->max_left_over)
1461:         {gs_gop_tree_exists(gs,vals);}
1462:     }
1463: }



1467: /******************************************************************************
1468: Function: gather_scatter

1470: Input : 
1471: Output: 
1472: Return: 
1473: Description: 
1474: ******************************************************************************/
1475: static
1476: void
1477: gs_gop_local_exists( gs_id *gs,  PetscScalar *vals)
1478: {
1479:    int *num, *map, **reduce;
1480:    PetscScalar tmp;


1483:   num    = gs->num_local_reduce;
1484:   reduce = gs->local_reduce;
1485:   while ((map = *reduce))
1486:     {
1487:       num ++;
1488:       tmp = 0.0;
1489:       while (*map >= 0)
1490:         {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1491: 
1492:       map = *reduce++;
1493:       while (*map >= 0)
1494:         {*(vals + *map++) = tmp;}
1495:     }
1496: }



1500: /******************************************************************************
1501: Function: gather_scatter

1503: Input : 
1504: Output: 
1505: Return: 
1506: Description: 
1507: ******************************************************************************/
1508: static
1509: void
1510: gs_gop_local_in_exists( gs_id *gs,  PetscScalar *vals)
1511: {
1512:    int *num, *map, **reduce;
1513:    PetscScalar *base;


1516:   num    = gs->num_gop_local_reduce;
1517:   reduce = gs->gop_local_reduce;
1518:   while ((map = *reduce++))
1519:     {
1520:       num++;
1521:       base = vals + *map++;
1522:       while (*map >= 0)
1523:         {*base = EXISTS(*base,*(vals + *map)); map++;}
1524:     }
1525: }



1529: /******************************************************************************
1530: Function: gather_scatter

1532: VERSION 3 :: 

1534: Input : 
1535: Output: 
1536: Return: 
1537: Description: 
1538: ******************************************************************************/
1539: static
1540: void
1541: gs_gop_pairwise_exists( gs_id *gs,  PetscScalar *in_vals)
1542: {
1543:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1544:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1545:    int *pw, *list, *size, **nodes;
1546:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1547:   MPI_Status status;
1548:   int ierr;


1551:   /* strip and load s */
1552:   msg_list =list         = gs->pair_list;
1553:   msg_size =size         = gs->msg_sizes;
1554:   msg_nodes=nodes        = gs->node_list;
1555:   iptr=pw                = gs->pw_elm_list;
1556:   dptr1=dptr3            = gs->pw_vals;
1557:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1558:   msg_ids_out = ids_out  = gs->msg_ids_out;
1559:   dptr2                  = gs->out;
1560:   in1=in2                = gs->in;

1562:   /* post the receives */
1563:   /*  msg_nodes=nodes; */
1564:   do
1565:     {
1566:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1567:          second one *list and do list++ afterwards */
1568:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1569:                 gs->gs_comm, msg_ids_in++);
1570:       in1 += *size++;
1571:     }
1572:   while (*++msg_nodes);
1573:   msg_nodes=nodes;

1575:   /* load gs values into in out gs buffers */
1576:   while (*iptr >= 0)
1577:     {*dptr3++ = *(in_vals + *iptr++);}

1579:   /* load out buffers and post the sends */
1580:   while ((iptr = *msg_nodes++))
1581:     {
1582:       dptr3 = dptr2;
1583:       while (*iptr >= 0)
1584:         {*dptr2++ = *(dptr1 + *iptr++);}
1585:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1586:       /* is msg_ids_out++ correct? */
1587:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1588:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1589:     }

1591:   if (gs->max_left_over)
1592:     {gs_gop_tree_exists(gs,in_vals);}

1594:   /* process the received data */
1595:   msg_nodes=nodes;
1596:   while ((iptr = *nodes++))
1597:     {
1598:       /* Should I check the return value of MPI_Wait() or status? */
1599:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1600:       MPI_Wait(ids_in++, &status);
1601:       while (*iptr >= 0)
1602:         {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1603:     }

1605:   /* replace vals */
1606:   while (*pw >= 0)
1607:     {*(in_vals + *pw++) = *dptr1++;}

1609:   /* clear isend message handles */
1610:   /* This changed for clarity though it could be the same */
1611:   while (*msg_nodes++)
1612:     /* Should I check the return value of MPI_Wait() or status? */
1613:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1614:     {MPI_Wait(ids_out++, &status);}
1615: }



1619: /******************************************************************************
1620: Function: gather_scatter

1622: Input : 
1623: Output: 
1624: Return: 
1625: Description: 
1626: ******************************************************************************/
1627: static
1628: void
1629: gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1630: {
1631:   int size;
1632:   int *in, *out;
1633:   PetscScalar *buf, *work;
1634:   int op[] = {GL_EXISTS,0};


1637:   in   = gs->tree_map_in;
1638:   out  = gs->tree_map_out;
1639:   buf  = gs->tree_buf;
1640:   work = gs->tree_work;
1641:   size = gs->tree_nel;

1643:   rvec_zero(buf,size);

1645:   while (*in >= 0)
1646:     {
1647:       /*
1648:       printf("%d :: out=%d\n",my_id,*out);
1649:       printf("%d :: in=%d\n",my_id,*in);
1650:       */
1651:       *(buf + *out++) = *(vals + *in++);
1652:     }

1654:   grop(buf,work,size,op);

1656:   in   = gs->tree_map_in;
1657:   out  = gs->tree_map_out;

1659:   while (*in >= 0)
1660:     {*(vals + *in++) = *(buf + *out++);}

1662: }



1666: /******************************************************************************
1667: Function: gather_scatter

1669: Input : 
1670: Output: 
1671: Return: 
1672: Description: 
1673: ******************************************************************************/
1674: static void
1675: gs_gop_max_abs( gs_id *gs,  PetscScalar *vals)
1676: {
1677:   /* local only operations!!! */
1678:   if (gs->num_local)
1679:     {gs_gop_local_max_abs(gs,vals);}

1681:   /* if intersection tree/pairwise and local isn't empty */
1682:   if (gs->num_local_gop)
1683:     {
1684:       gs_gop_local_in_max_abs(gs,vals);

1686:       /* pairwise */
1687:       if (gs->num_pairs)
1688:         {gs_gop_pairwise_max_abs(gs,vals);}
1689: 
1690:       /* tree */
1691:       else if (gs->max_left_over)
1692:         {gs_gop_tree_max_abs(gs,vals);}
1693: 
1694:       gs_gop_local_out(gs,vals);
1695:     }
1696:   /* if intersection tree/pairwise and local is empty */
1697:   else
1698:     {
1699:       /* pairwise */
1700:       if (gs->num_pairs)
1701:         {gs_gop_pairwise_max_abs(gs,vals);}
1702: 
1703:       /* tree */
1704:       else if (gs->max_left_over)
1705:         {gs_gop_tree_max_abs(gs,vals);}
1706:     }
1707: }



1711: /******************************************************************************
1712: Function: gather_scatter

1714: Input : 
1715: Output: 
1716: Return: 
1717: Description: 
1718: ******************************************************************************/
1719: static
1720: void
1721: gs_gop_local_max_abs( gs_id *gs,  PetscScalar *vals)
1722: {
1723:    int *num, *map, **reduce;
1724:    PetscScalar tmp;


1727:   num    = gs->num_local_reduce;
1728:   reduce = gs->local_reduce;
1729:   while ((map = *reduce))
1730:     {
1731:       num ++;
1732:       tmp = 0.0;
1733:       while (*map >= 0)
1734:         {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1735: 
1736:       map = *reduce++;
1737:       while (*map >= 0)
1738:         {*(vals + *map++) = tmp;}
1739:     }
1740: }



1744: /******************************************************************************
1745: Function: gather_scatter

1747: Input : 
1748: Output: 
1749: Return: 
1750: Description: 
1751: ******************************************************************************/
1752: static
1753: void
1754: gs_gop_local_in_max_abs( gs_id *gs,  PetscScalar *vals)
1755: {
1756:    int *num, *map, **reduce;
1757:    PetscScalar *base;


1760:   num    = gs->num_gop_local_reduce;
1761:   reduce = gs->gop_local_reduce;
1762:   while ((map = *reduce++))
1763:     {
1764:       num++;
1765:       base = vals + *map++;
1766:       while (*map >= 0)
1767:         {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1768:     }
1769: }



1773: /******************************************************************************
1774: Function: gather_scatter

1776: VERSION 3 :: 

1778: Input : 
1779: Output: 
1780: Return: 
1781: Description: 
1782: ******************************************************************************/
1783: static
1784: void
1785: gs_gop_pairwise_max_abs( gs_id *gs,  PetscScalar *in_vals)
1786: {
1787:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1788:    int *iptr, *msg_list, *msg_size, **msg_nodes;
1789:    int *pw, *list, *size, **nodes;
1790:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1791:   MPI_Status status;
1792:   int ierr;

1794:   /* strip and load s */
1795:   msg_list =list         = gs->pair_list;
1796:   msg_size =size         = gs->msg_sizes;
1797:   msg_nodes=nodes        = gs->node_list;
1798:   iptr=pw                = gs->pw_elm_list;
1799:   dptr1=dptr3            = gs->pw_vals;
1800:   msg_ids_in  = ids_in   = gs->msg_ids_in;
1801:   msg_ids_out = ids_out  = gs->msg_ids_out;
1802:   dptr2                  = gs->out;
1803:   in1=in2                = gs->in;

1805:   /* post the receives */
1806:   /*  msg_nodes=nodes; */
1807:   do
1808:     {
1809:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1810:          second one *list and do list++ afterwards */
1811:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1812:                 gs->gs_comm, msg_ids_in++);
1813:       in1 += *size++;
1814:     }
1815:   while (*++msg_nodes);
1816:   msg_nodes=nodes;

1818:   /* load gs values into in out gs buffers */
1819:   while (*iptr >= 0)
1820:     {*dptr3++ = *(in_vals + *iptr++);}

1822:   /* load out buffers and post the sends */
1823:   while ((iptr = *msg_nodes++))
1824:     {
1825:       dptr3 = dptr2;
1826:       while (*iptr >= 0)
1827:         {*dptr2++ = *(dptr1 + *iptr++);}
1828:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1829:       /* is msg_ids_out++ correct? */
1830:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1831:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1832:     }

1834:   if (gs->max_left_over)
1835:     {gs_gop_tree_max_abs(gs,in_vals);}

1837:   /* process the received data */
1838:   msg_nodes=nodes;
1839:   while ((iptr = *nodes++))
1840:     {
1841:       /* Should I check the return value of MPI_Wait() or status? */
1842:       /* Can this loop be replaced by a call to MPI_Waitall()? */
1843:       MPI_Wait(ids_in++, &status);
1844:       while (*iptr >= 0)
1845:         {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1846:     }

1848:   /* replace vals */
1849:   while (*pw >= 0)
1850:     {*(in_vals + *pw++) = *dptr1++;}

1852:   /* clear isend message handles */
1853:   /* This changed for clarity though it could be the same */
1854:   while (*msg_nodes++)
1855:     /* Should I check the return value of MPI_Wait() or status? */
1856:     /* Can this loop be replaced by a call to MPI_Waitall()? */
1857:     {MPI_Wait(ids_out++, &status);}
1858: }



1862: /******************************************************************************
1863: Function: gather_scatter

1865: Input : 
1866: Output: 
1867: Return: 
1868: Description: 
1869: ******************************************************************************/
1870: static
1871: void
1872: gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1873: {
1874:   int size;
1875:   int *in, *out;
1876:   PetscScalar *buf, *work;
1877:   int op[] = {GL_MAX_ABS,0};


1880:   in   = gs->tree_map_in;
1881:   out  = gs->tree_map_out;
1882:   buf  = gs->tree_buf;
1883:   work = gs->tree_work;
1884:   size = gs->tree_nel;

1886:   rvec_zero(buf,size);

1888:   while (*in >= 0)
1889:     {
1890:       /*
1891:       printf("%d :: out=%d\n",my_id,*out);
1892:       printf("%d :: in=%d\n",my_id,*in);
1893:       */
1894:       *(buf + *out++) = *(vals + *in++);
1895:     }

1897:   grop(buf,work,size,op);

1899:   in   = gs->tree_map_in;
1900:   out  = gs->tree_map_out;

1902:   while (*in >= 0)
1903:     {*(vals + *in++) = *(buf + *out++);}

1905: }



1909: /******************************************************************************
1910: Function: gather_scatter

1912: Input : 
1913: Output: 
1914: Return: 
1915: Description: 
1916: ******************************************************************************/
1917: static void
1918: gs_gop_max( gs_id *gs,  PetscScalar *vals)
1919: {

1921:   /* local only operations!!! */
1922:   if (gs->num_local)
1923:     {gs_gop_local_max(gs,vals);}

1925:   /* if intersection tree/pairwise and local isn't empty */
1926:   if (gs->num_local_gop)
1927:     {
1928:       gs_gop_local_in_max(gs,vals);

1930:       /* pairwise */
1931:       if (gs->num_pairs)
1932:         {gs_gop_pairwise_max(gs,vals);}
1933: 
1934:       /* tree */
1935:       else if (gs->max_left_over)
1936:         {gs_gop_tree_max(gs,vals);}
1937: 
1938:       gs_gop_local_out(gs,vals);
1939:     }
1940:   /* if intersection tree/pairwise and local is empty */
1941:   else
1942:     {
1943:       /* pairwise */
1944:       if (gs->num_pairs)
1945:         {gs_gop_pairwise_max(gs,vals);}
1946: 
1947:       /* tree */
1948:       else if (gs->max_left_over)
1949:         {gs_gop_tree_max(gs,vals);}
1950:     }
1951: }



1955: /******************************************************************************
1956: Function: gather_scatter

1958: Input : 
1959: Output: 
1960: Return: 
1961: Description: 
1962: ******************************************************************************/
1963: static
1964: void
1965: gs_gop_local_max( gs_id *gs,  PetscScalar *vals)
1966: {
1967:    int *num, *map, **reduce;
1968:    PetscScalar tmp;


1971:   num    = gs->num_local_reduce;
1972:   reduce = gs->local_reduce;
1973:   while ((map = *reduce))
1974:     {
1975:       num ++;
1976:       tmp = -REAL_MAX;
1977:       while (*map >= 0)
1978:         {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1979: 
1980:       map = *reduce++;
1981:       while (*map >= 0)
1982:         {*(vals + *map++) = tmp;}
1983:     }
1984: }



1988: /******************************************************************************
1989: Function: gather_scatter

1991: Input : 
1992: Output: 
1993: Return: 
1994: Description: 
1995: ******************************************************************************/
1996: static
1997: void
1998: gs_gop_local_in_max( gs_id *gs,  PetscScalar *vals)
1999: {
2000:    int *num, *map, **reduce;
2001:    PetscScalar *base;


2004:   num    = gs->num_gop_local_reduce;
2005:   reduce = gs->gop_local_reduce;
2006:   while ((map = *reduce++))
2007:     {
2008:       num++;
2009:       base = vals + *map++;
2010:       while (*map >= 0)
2011:         {*base = PetscMax(*base,*(vals + *map)); map++;}
2012:     }
2013: }



2017: /******************************************************************************
2018: Function: gather_scatter

2020: VERSION 3 :: 

2022: Input : 
2023: Output: 
2024: Return: 
2025: Description: 
2026: ******************************************************************************/
2027: static
2028: void
2029: gs_gop_pairwise_max( gs_id *gs,  PetscScalar *in_vals)
2030: {
2031:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2032:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2033:    int *pw, *list, *size, **nodes;
2034:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2035:   MPI_Status status;
2036:   int ierr;

2038:   /* strip and load s */
2039:   msg_list =list         = gs->pair_list;
2040:   msg_size =size         = gs->msg_sizes;
2041:   msg_nodes=nodes        = gs->node_list;
2042:   iptr=pw                = gs->pw_elm_list;
2043:   dptr1=dptr3            = gs->pw_vals;
2044:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2045:   msg_ids_out = ids_out  = gs->msg_ids_out;
2046:   dptr2                  = gs->out;
2047:   in1=in2                = gs->in;

2049:   /* post the receives */
2050:   /*  msg_nodes=nodes; */
2051:   do
2052:     {
2053:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2054:          second one *list and do list++ afterwards */
2055:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2056:                 gs->gs_comm, msg_ids_in++);
2057:       in1 += *size++;
2058:     }
2059:   while (*++msg_nodes);
2060:   msg_nodes=nodes;

2062:   /* load gs values into in out gs buffers */
2063:   while (*iptr >= 0)
2064:     {*dptr3++ = *(in_vals + *iptr++);}

2066:   /* load out buffers and post the sends */
2067:   while ((iptr = *msg_nodes++))
2068:     {
2069:       dptr3 = dptr2;
2070:       while (*iptr >= 0)
2071:         {*dptr2++ = *(dptr1 + *iptr++);}
2072:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2073:       /* is msg_ids_out++ correct? */
2074:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2075:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2076:     }

2078:   if (gs->max_left_over)
2079:     {gs_gop_tree_max(gs,in_vals);}

2081:   /* process the received data */
2082:   msg_nodes=nodes;
2083:   while ((iptr = *nodes++))
2084:     {
2085:       /* Should I check the return value of MPI_Wait() or status? */
2086:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2087:       MPI_Wait(ids_in++, &status);
2088:       while (*iptr >= 0)
2089:         {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2090:     }

2092:   /* replace vals */
2093:   while (*pw >= 0)
2094:     {*(in_vals + *pw++) = *dptr1++;}

2096:   /* clear isend message handles */
2097:   /* This changed for clarity though it could be the same */
2098:   while (*msg_nodes++)
2099:     /* Should I check the return value of MPI_Wait() or status? */
2100:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2101:     {MPI_Wait(ids_out++, &status);}
2102: }



2106: /******************************************************************************
2107: Function: gather_scatter

2109: Input : 
2110: Output: 
2111: Return: 
2112: Description: 
2113: ******************************************************************************/
2114: static
2115: void
2116: gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2117: {
2118:   int size;
2119:   int *in, *out;
2120:   PetscScalar *buf, *work;
2121:   int ierr;
2122: 
2123:   in   = gs->tree_map_in;
2124:   out  = gs->tree_map_out;
2125:   buf  = gs->tree_buf;
2126:   work = gs->tree_work;
2127:   size = gs->tree_nel;

2129:   rvec_set(buf,-REAL_MAX,size);

2131:   while (*in >= 0)
2132:     {*(buf + *out++) = *(vals + *in++);}

2134:   in   = gs->tree_map_in;
2135:   out  = gs->tree_map_out;
2136:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2137:   while (*in >= 0)
2138:     {*(vals + *in++) = *(work + *out++);}

2140: }



2144: /******************************************************************************
2145: Function: gather_scatter

2147: Input : 
2148: Output: 
2149: Return: 
2150: Description: 
2151: ******************************************************************************/
2152: static void
2153: gs_gop_min_abs( gs_id *gs,  PetscScalar *vals)
2154: {

2156:   /* local only operations!!! */
2157:   if (gs->num_local)
2158:     {gs_gop_local_min_abs(gs,vals);}

2160:   /* if intersection tree/pairwise and local isn't empty */
2161:   if (gs->num_local_gop)
2162:     {
2163:       gs_gop_local_in_min_abs(gs,vals);

2165:       /* pairwise */
2166:       if (gs->num_pairs)
2167:         {gs_gop_pairwise_min_abs(gs,vals);}
2168: 
2169:       /* tree */
2170:       else if (gs->max_left_over)
2171:         {gs_gop_tree_min_abs(gs,vals);}
2172: 
2173:       gs_gop_local_out(gs,vals);
2174:     }
2175:   /* if intersection tree/pairwise and local is empty */
2176:   else
2177:     {
2178:       /* pairwise */
2179:       if (gs->num_pairs)
2180:         {gs_gop_pairwise_min_abs(gs,vals);}
2181: 
2182:       /* tree */
2183:       else if (gs->max_left_over)
2184:         {gs_gop_tree_min_abs(gs,vals);}
2185:     }
2186: }



2190: /******************************************************************************
2191: Function: gather_scatter

2193: Input : 
2194: Output: 
2195: Return: 
2196: Description: 
2197: ******************************************************************************/
2198: static
2199: void
2200: gs_gop_local_min_abs( gs_id *gs,  PetscScalar *vals)
2201: {
2202:    int *num, *map, **reduce;
2203:    PetscScalar tmp;


2206:   num    = gs->num_local_reduce;
2207:   reduce = gs->local_reduce;
2208:   while ((map = *reduce))
2209:     {
2210:       num ++;
2211:       tmp = REAL_MAX;
2212:       while (*map >= 0)
2213:         {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2214: 
2215:       map = *reduce++;
2216:       while (*map >= 0)
2217:         {*(vals + *map++) = tmp;}
2218:     }
2219: }



2223: /******************************************************************************
2224: Function: gather_scatter

2226: Input : 
2227: Output: 
2228: Return: 
2229: Description: 
2230: ******************************************************************************/
2231: static
2232: void
2233: gs_gop_local_in_min_abs( gs_id *gs,  PetscScalar *vals)
2234: {
2235:    int *num, *map, **reduce;
2236:    PetscScalar *base;

2238:   num    = gs->num_gop_local_reduce;
2239:   reduce = gs->gop_local_reduce;
2240:   while ((map = *reduce++))
2241:     {
2242:       num++;
2243:       base = vals + *map++;
2244:       while (*map >= 0)
2245:         {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2246:     }
2247: }



2251: /******************************************************************************
2252: Function: gather_scatter

2254: VERSION 3 :: 

2256: Input : 
2257: Output: 
2258: Return: 
2259: Description: 
2260: ******************************************************************************/
2261: static
2262: void
2263: gs_gop_pairwise_min_abs( gs_id *gs,  PetscScalar *in_vals)
2264: {
2265:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2266:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2267:    int *pw, *list, *size, **nodes;
2268:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2269:   MPI_Status status;
2270:   int ierr;

2272:   /* strip and load s */
2273:   msg_list =list         = gs->pair_list;
2274:   msg_size =size         = gs->msg_sizes;
2275:   msg_nodes=nodes        = gs->node_list;
2276:   iptr=pw                = gs->pw_elm_list;
2277:   dptr1=dptr3            = gs->pw_vals;
2278:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2279:   msg_ids_out = ids_out  = gs->msg_ids_out;
2280:   dptr2                  = gs->out;
2281:   in1=in2                = gs->in;

2283:   /* post the receives */
2284:   /*  msg_nodes=nodes; */
2285:   do
2286:     {
2287:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2288:          second one *list and do list++ afterwards */
2289:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2290:                 gs->gs_comm, msg_ids_in++);
2291:       in1 += *size++;
2292:     }
2293:   while (*++msg_nodes);
2294:   msg_nodes=nodes;

2296:   /* load gs values into in out gs buffers */
2297:   while (*iptr >= 0)
2298:     {*dptr3++ = *(in_vals + *iptr++);}

2300:   /* load out buffers and post the sends */
2301:   while ((iptr = *msg_nodes++))
2302:     {
2303:       dptr3 = dptr2;
2304:       while (*iptr >= 0)
2305:         {*dptr2++ = *(dptr1 + *iptr++);}
2306:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2307:       /* is msg_ids_out++ correct? */
2308:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2309:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2310:     }

2312:   if (gs->max_left_over)
2313:     {gs_gop_tree_min_abs(gs,in_vals);}

2315:   /* process the received data */
2316:   msg_nodes=nodes;
2317:   while ((iptr = *nodes++))
2318:     {
2319:       /* Should I check the return value of MPI_Wait() or status? */
2320:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2321:       MPI_Wait(ids_in++, &status);
2322:       while (*iptr >= 0)
2323:         {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2324:     }

2326:   /* replace vals */
2327:   while (*pw >= 0)
2328:     {*(in_vals + *pw++) = *dptr1++;}

2330:   /* clear isend message handles */
2331:   /* This changed for clarity though it could be the same */
2332:   while (*msg_nodes++)
2333:     /* Should I check the return value of MPI_Wait() or status? */
2334:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2335:     {MPI_Wait(ids_out++, &status);}
2336: }



2340: /******************************************************************************
2341: Function: gather_scatter

2343: Input : 
2344: Output: 
2345: Return: 
2346: Description: 
2347: ******************************************************************************/
2348: static
2349: void
2350: gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2351: {
2352:   int size;
2353:   int *in, *out;
2354:   PetscScalar *buf, *work;
2355:   int op[] = {GL_MIN_ABS,0};


2358:   in   = gs->tree_map_in;
2359:   out  = gs->tree_map_out;
2360:   buf  = gs->tree_buf;
2361:   work = gs->tree_work;
2362:   size = gs->tree_nel;

2364:   rvec_set(buf,REAL_MAX,size);

2366:   while (*in >= 0)
2367:     {*(buf + *out++) = *(vals + *in++);}

2369:   in   = gs->tree_map_in;
2370:   out  = gs->tree_map_out;
2371:   grop(buf,work,size,op);
2372:   while (*in >= 0)
2373:     {*(vals + *in++) = *(buf + *out++);}

2375: }



2379: /******************************************************************************
2380: Function: gather_scatter

2382: Input : 
2383: Output: 
2384: Return: 
2385: Description: 
2386: ******************************************************************************/
2387: static void
2388: gs_gop_min( gs_id *gs,  PetscScalar *vals)
2389: {

2391:   /* local only operations!!! */
2392:   if (gs->num_local)
2393:     {gs_gop_local_min(gs,vals);}

2395:   /* if intersection tree/pairwise and local isn't empty */
2396:   if (gs->num_local_gop)
2397:     {
2398:       gs_gop_local_in_min(gs,vals);

2400:       /* pairwise */
2401:       if (gs->num_pairs)
2402:         {gs_gop_pairwise_min(gs,vals);}
2403: 
2404:       /* tree */
2405:       else if (gs->max_left_over)
2406:         {gs_gop_tree_min(gs,vals);}
2407: 
2408:       gs_gop_local_out(gs,vals);
2409:     }
2410:   /* if intersection tree/pairwise and local is empty */
2411:   else
2412:     {
2413:       /* pairwise */
2414:       if (gs->num_pairs)
2415:         {gs_gop_pairwise_min(gs,vals);}
2416: 
2417:       /* tree */
2418:       else if (gs->max_left_over)
2419:         {gs_gop_tree_min(gs,vals);}
2420:     }
2421: }



2425: /******************************************************************************
2426: Function: gather_scatter

2428: Input : 
2429: Output: 
2430: Return: 
2431: Description: 
2432: ******************************************************************************/
2433: static
2434: void
2435: gs_gop_local_min( gs_id *gs,  PetscScalar *vals)
2436: {
2437:    int *num, *map, **reduce;
2438:    PetscScalar tmp;

2440:   num    = gs->num_local_reduce;
2441:   reduce = gs->local_reduce;
2442:   while ((map = *reduce))
2443:     {
2444:       num ++;
2445:       tmp = REAL_MAX;
2446:       while (*map >= 0)
2447:         {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2448: 
2449:       map = *reduce++;
2450:       while (*map >= 0)
2451:         {*(vals + *map++) = tmp;}
2452:     }
2453: }



2457: /******************************************************************************
2458: Function: gather_scatter

2460: Input : 
2461: Output: 
2462: Return: 
2463: Description: 
2464: ******************************************************************************/
2465: static
2466: void
2467: gs_gop_local_in_min( gs_id *gs,  PetscScalar *vals)
2468: {
2469:    int *num, *map, **reduce;
2470:    PetscScalar *base;

2472:   num    = gs->num_gop_local_reduce;
2473:   reduce = gs->gop_local_reduce;
2474:   while ((map = *reduce++))
2475:     {
2476:       num++;
2477:       base = vals + *map++;
2478:       while (*map >= 0)
2479:         {*base = PetscMin(*base,*(vals + *map)); map++;}
2480:     }
2481: }



2485: /******************************************************************************
2486: Function: gather_scatter

2488: VERSION 3 :: 

2490: Input : 
2491: Output: 
2492: Return: 
2493: Description: 
2494: ******************************************************************************/
2495: static
2496: void
2497: gs_gop_pairwise_min( gs_id *gs,  PetscScalar *in_vals)
2498: {
2499:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2500:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2501:    int *pw, *list, *size, **nodes;
2502:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2503:   MPI_Status status;
2504:   int ierr;

2506:   /* strip and load s */
2507:   msg_list =list         = gs->pair_list;
2508:   msg_size =size         = gs->msg_sizes;
2509:   msg_nodes=nodes        = gs->node_list;
2510:   iptr=pw                = gs->pw_elm_list;
2511:   dptr1=dptr3            = gs->pw_vals;
2512:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2513:   msg_ids_out = ids_out  = gs->msg_ids_out;
2514:   dptr2                  = gs->out;
2515:   in1=in2                = gs->in;

2517:   /* post the receives */
2518:   /*  msg_nodes=nodes; */
2519:   do
2520:     {
2521:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2522:          second one *list and do list++ afterwards */
2523:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2524:                 gs->gs_comm, msg_ids_in++);
2525:       in1 += *size++;
2526:     }
2527:   while (*++msg_nodes);
2528:   msg_nodes=nodes;

2530:   /* load gs values into in out gs buffers */
2531:   while (*iptr >= 0)
2532:     {*dptr3++ = *(in_vals + *iptr++);}

2534:   /* load out buffers and post the sends */
2535:   while ((iptr = *msg_nodes++))
2536:     {
2537:       dptr3 = dptr2;
2538:       while (*iptr >= 0)
2539:         {*dptr2++ = *(dptr1 + *iptr++);}
2540:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2541:       /* is msg_ids_out++ correct? */
2542:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2543:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2544:     }

2546:   /* process the received data */
2547:   if (gs->max_left_over)
2548:     {gs_gop_tree_min(gs,in_vals);}

2550:   msg_nodes=nodes;
2551:   while ((iptr = *nodes++))
2552:     {
2553:       /* Should I check the return value of MPI_Wait() or status? */
2554:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2555:       MPI_Wait(ids_in++, &status);
2556:       while (*iptr >= 0)
2557:         {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2558:     }

2560:   /* replace vals */
2561:   while (*pw >= 0)
2562:     {*(in_vals + *pw++) = *dptr1++;}

2564:   /* clear isend message handles */
2565:   /* This changed for clarity though it could be the same */
2566:   while (*msg_nodes++)
2567:     /* Should I check the return value of MPI_Wait() or status? */
2568:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2569:     {MPI_Wait(ids_out++, &status);}
2570: }



2574: /******************************************************************************
2575: Function: gather_scatter

2577: Input : 
2578: Output: 
2579: Return: 
2580: Description: 
2581: ******************************************************************************/
2582: static
2583: void
2584: gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2585: {
2586:   int size;
2587:   int *in, *out;
2588:   PetscScalar *buf, *work;
2589:   int ierr;
2590: 
2591:   in   = gs->tree_map_in;
2592:   out  = gs->tree_map_out;
2593:   buf  = gs->tree_buf;
2594:   work = gs->tree_work;
2595:   size = gs->tree_nel;

2597:   rvec_set(buf,REAL_MAX,size);

2599:   while (*in >= 0)
2600:     {*(buf + *out++) = *(vals + *in++);}

2602:   in   = gs->tree_map_in;
2603:   out  = gs->tree_map_out;
2604:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2605:   while (*in >= 0)
2606:     {*(vals + *in++) = *(work + *out++);}
2607: }



2611: /******************************************************************************
2612: Function: gather_scatter

2614: Input : 
2615: Output: 
2616: Return: 
2617: Description: 
2618: ******************************************************************************/
2619: static void
2620: gs_gop_times( gs_id *gs,  PetscScalar *vals)
2621: {

2623:   /* local only operations!!! */
2624:   if (gs->num_local)
2625:     {gs_gop_local_times(gs,vals);}

2627:   /* if intersection tree/pairwise and local isn't empty */
2628:   if (gs->num_local_gop)
2629:     {
2630:       gs_gop_local_in_times(gs,vals);

2632:       /* pairwise */
2633:       if (gs->num_pairs)
2634:         {gs_gop_pairwise_times(gs,vals);}
2635: 
2636:       /* tree */
2637:       else if (gs->max_left_over)
2638:         {gs_gop_tree_times(gs,vals);}
2639: 
2640:       gs_gop_local_out(gs,vals);
2641:     }
2642:   /* if intersection tree/pairwise and local is empty */
2643:   else
2644:     {
2645:       /* pairwise */
2646:       if (gs->num_pairs)
2647:         {gs_gop_pairwise_times(gs,vals);}
2648: 
2649:       /* tree */
2650:       else if (gs->max_left_over)
2651:         {gs_gop_tree_times(gs,vals);}
2652:     }
2653: }



2657: /******************************************************************************
2658: Function: gather_scatter

2660: Input : 
2661: Output: 
2662: Return: 
2663: Description: 
2664: ******************************************************************************/
2665: static
2666: void
2667: gs_gop_local_times( gs_id *gs,  PetscScalar *vals)
2668: {
2669:    int *num, *map, **reduce;
2670:    PetscScalar tmp;

2672:   num    = gs->num_local_reduce;
2673:   reduce = gs->local_reduce;
2674:   while ((map = *reduce))
2675:     {
2676:       /* wall */
2677:       if (*num == 2)
2678:         {
2679:           num ++; reduce++;
2680:           vals[map[1]] = vals[map[0]] *= vals[map[1]];
2681:         }
2682:       /* corner shared by three elements */
2683:       else if (*num == 3)
2684:         {
2685:           num ++; reduce++;
2686:           vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2687:         }
2688:       /* corner shared by four elements */
2689:       else if (*num == 4)
2690:         {
2691:           num ++; reduce++;
2692:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2693:                                  (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2694:         }
2695:       /* general case ... odd geoms ... 3D*/
2696:       else
2697:         {
2698:           num ++;
2699:           tmp = 1.0;
2700:           while (*map >= 0)
2701:             {tmp *= *(vals + *map++);}

2703:           map = *reduce++;
2704:           while (*map >= 0)
2705:             {*(vals + *map++) = tmp;}
2706:         }
2707:     }
2708: }



2712: /******************************************************************************
2713: Function: gather_scatter

2715: Input : 
2716: Output: 
2717: Return: 
2718: Description: 
2719: ******************************************************************************/
2720: static
2721: void
2722: gs_gop_local_in_times( gs_id *gs,  PetscScalar *vals)
2723: {
2724:    int *num, *map, **reduce;
2725:    PetscScalar *base;

2727:   num    = gs->num_gop_local_reduce;
2728:   reduce = gs->gop_local_reduce;
2729:   while ((map = *reduce++))
2730:     {
2731:       /* wall */
2732:       if (*num == 2)
2733:         {
2734:           num ++;
2735:           vals[map[0]] *= vals[map[1]];
2736:         }
2737:       /* corner shared by three elements */
2738:       else if (*num == 3)
2739:         {
2740:           num ++;
2741:           vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2742:         }
2743:       /* corner shared by four elements */
2744:       else if (*num == 4)
2745:         {
2746:           num ++;
2747:           vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2748:         }
2749:       /* general case ... odd geoms ... 3D*/
2750:       else
2751:         {
2752:           num++;
2753:           base = vals + *map++;
2754:           while (*map >= 0)
2755:             {*base *= *(vals + *map++);}
2756:         }
2757:     }
2758: }



2762: /******************************************************************************
2763: Function: gather_scatter

2765: VERSION 3 :: 

2767: Input : 
2768: Output: 
2769: Return: 
2770: Description: 
2771: ******************************************************************************/
2772: static
2773: void
2774: gs_gop_pairwise_times( gs_id *gs,  PetscScalar *in_vals)
2775: {
2776:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2777:    int *iptr, *msg_list, *msg_size, **msg_nodes;
2778:    int *pw, *list, *size, **nodes;
2779:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2780:   MPI_Status status;
2781:   int ierr;

2783:   /* strip and load s */
2784:   msg_list =list         = gs->pair_list;
2785:   msg_size =size         = gs->msg_sizes;
2786:   msg_nodes=nodes        = gs->node_list;
2787:   iptr=pw                = gs->pw_elm_list;
2788:   dptr1=dptr3            = gs->pw_vals;
2789:   msg_ids_in  = ids_in   = gs->msg_ids_in;
2790:   msg_ids_out = ids_out  = gs->msg_ids_out;
2791:   dptr2                  = gs->out;
2792:   in1=in2                = gs->in;

2794:   /* post the receives */
2795:   /*  msg_nodes=nodes; */
2796:   do
2797:     {
2798:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2799:          second one *list and do list++ afterwards */
2800:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2801:                 gs->gs_comm, msg_ids_in++);
2802:       in1 += *size++;
2803:     }
2804:   while (*++msg_nodes);
2805:   msg_nodes=nodes;

2807:   /* load gs values into in out gs buffers */
2808:   while (*iptr >= 0)
2809:     {*dptr3++ = *(in_vals + *iptr++);}

2811:   /* load out buffers and post the sends */
2812:   while ((iptr = *msg_nodes++))
2813:     {
2814:       dptr3 = dptr2;
2815:       while (*iptr >= 0)
2816:         {*dptr2++ = *(dptr1 + *iptr++);}
2817:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2818:       /* is msg_ids_out++ correct? */
2819:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2820:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2821:     }

2823:   if (gs->max_left_over)
2824:     {gs_gop_tree_times(gs,in_vals);}

2826:   /* process the received data */
2827:   msg_nodes=nodes;
2828:   while ((iptr = *nodes++))
2829:     {
2830:       /* Should I check the return value of MPI_Wait() or status? */
2831:       /* Can this loop be replaced by a call to MPI_Waitall()? */
2832:       MPI_Wait(ids_in++, &status);
2833:       while (*iptr >= 0)
2834:         {*(dptr1 + *iptr++) *= *in2++;}
2835:     }

2837:   /* replace vals */
2838:   while (*pw >= 0)
2839:     {*(in_vals + *pw++) = *dptr1++;}

2841:   /* clear isend message handles */
2842:   /* This changed for clarity though it could be the same */
2843:   while (*msg_nodes++)
2844:     /* Should I check the return value of MPI_Wait() or status? */
2845:     /* Can this loop be replaced by a call to MPI_Waitall()? */
2846:     {MPI_Wait(ids_out++, &status);}
2847: }



2851: /******************************************************************************
2852: Function: gather_scatter

2854: Input : 
2855: Output: 
2856: Return: 
2857: Description: 
2858: ******************************************************************************/
2859: static
2860: void
2861: gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2862: {
2863:   int size;
2864:   int *in, *out;
2865:   PetscScalar *buf, *work;
2866:   int ierr;
2867: 
2868:   in   = gs->tree_map_in;
2869:   out  = gs->tree_map_out;
2870:   buf  = gs->tree_buf;
2871:   work = gs->tree_work;
2872:   size = gs->tree_nel;

2874:   rvec_one(buf,size);

2876:   while (*in >= 0)
2877:     {*(buf + *out++) = *(vals + *in++);}

2879:   in   = gs->tree_map_in;
2880:   out  = gs->tree_map_out;
2881:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2882:   while (*in >= 0)
2883:     {*(vals + *in++) = *(work + *out++);}

2885: }



2889: /******************************************************************************
2890: Function: gather_scatter


2893: Input : 
2894: Output: 
2895: Return: 
2896: Description: 
2897: ******************************************************************************/
2898: static void
2899: gs_gop_plus( gs_id *gs,  PetscScalar *vals)
2900: {

2902:   /* local only operations!!! */
2903:   if (gs->num_local)
2904:     {gs_gop_local_plus(gs,vals);}

2906:   /* if intersection tree/pairwise and local isn't empty */
2907:   if (gs->num_local_gop)
2908:     {
2909:       gs_gop_local_in_plus(gs,vals);

2911:       /* pairwise will NOT do tree inside ... */
2912:       if (gs->num_pairs)
2913:         {gs_gop_pairwise_plus(gs,vals);}

2915:       /* tree */
2916:       if (gs->max_left_over)
2917:         {gs_gop_tree_plus(gs,vals);}
2918: 
2919:       gs_gop_local_out(gs,vals);
2920:     }
2921:   /* if intersection tree/pairwise and local is empty */
2922:   else
2923:     {
2924:       /* pairwise will NOT do tree inside */
2925:       if (gs->num_pairs)
2926:         {gs_gop_pairwise_plus(gs,vals);}
2927: 
2928:       /* tree */
2929:       if (gs->max_left_over)
2930:         {gs_gop_tree_plus(gs,vals);}
2931:     }

2933: }



2937: /******************************************************************************
2938: Function: gather_scatter

2940: Input : 
2941: Output: 
2942: Return: 
2943: Description: 
2944: ******************************************************************************/
2945: static
2946: void
2947: gs_gop_local_plus( gs_id *gs,  PetscScalar *vals)
2948: {
2949:    int *num, *map, **reduce;
2950:    PetscScalar tmp;


2953:   num    = gs->num_local_reduce;
2954:   reduce = gs->local_reduce;
2955:   while ((map = *reduce))
2956:     {
2957:       /* wall */
2958:       if (*num == 2)
2959:         {
2960:           num ++; reduce++;
2961:           vals[map[1]] = vals[map[0]] += vals[map[1]];
2962:         }
2963:       /* corner shared by three elements */
2964:       else if (*num == 3)
2965:         {
2966:           num ++; reduce++;
2967:           vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2968:         }
2969:       /* corner shared by four elements */
2970:       else if (*num == 4)
2971:         {
2972:           num ++; reduce++;
2973:           vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2974:                                  (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2975:         }
2976:       /* general case ... odd geoms ... 3D*/
2977:       else
2978:         {
2979:           num ++;
2980:           tmp = 0.0;
2981:           while (*map >= 0)
2982:             {tmp += *(vals + *map++);}

2984:           map = *reduce++;
2985:           while (*map >= 0)
2986:             {*(vals + *map++) = tmp;}
2987:         }
2988:     }
2989: }



2993: /******************************************************************************
2994: Function: gather_scatter

2996: Input : 
2997: Output: 
2998: Return: 
2999: Description: 
3000: ******************************************************************************/
3001: static
3002: void
3003: gs_gop_local_in_plus( gs_id *gs,  PetscScalar *vals)
3004: {
3005:    int *num, *map, **reduce;
3006:    PetscScalar *base;


3009:   num    = gs->num_gop_local_reduce;
3010:   reduce = gs->gop_local_reduce;
3011:   while ((map = *reduce++))
3012:     {
3013:       /* wall */
3014:       if (*num == 2)
3015:         {
3016:           num ++;
3017:           vals[map[0]] += vals[map[1]];
3018:         }
3019:       /* corner shared by three elements */
3020:       else if (*num == 3)
3021:         {
3022:           num ++;
3023:           vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3024:         }
3025:       /* corner shared by four elements */
3026:       else if (*num == 4)
3027:         {
3028:           num ++;
3029:           vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3030:         }
3031:       /* general case ... odd geoms ... 3D*/
3032:       else
3033:         {
3034:           num++;
3035:           base = vals + *map++;
3036:           while (*map >= 0)
3037:             {*base += *(vals + *map++);}
3038:         }
3039:     }
3040: }



3044: /******************************************************************************
3045: Function: gather_scatter

3047: VERSION 3 :: 

3049: Input : 
3050: Output: 
3051: Return: 
3052: Description: 
3053: ******************************************************************************/
3054: static
3055: void
3056: gs_gop_pairwise_plus( gs_id *gs,  PetscScalar *in_vals)
3057: {
3058:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3059:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3060:    int *pw, *list, *size, **nodes;
3061:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3062:   MPI_Status status;
3063:   int ierr;

3065:   /* strip and load s */
3066:   msg_list =list         = gs->pair_list;
3067:   msg_size =size         = gs->msg_sizes;
3068:   msg_nodes=nodes        = gs->node_list;
3069:   iptr=pw                = gs->pw_elm_list;
3070:   dptr1=dptr3            = gs->pw_vals;
3071:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3072:   msg_ids_out = ids_out  = gs->msg_ids_out;
3073:   dptr2                  = gs->out;
3074:   in1=in2                = gs->in;

3076:   /* post the receives */
3077:   /*  msg_nodes=nodes; */
3078:   do
3079:     {
3080:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3081:          second one *list and do list++ afterwards */
3082:       MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3083:                 gs->gs_comm, msg_ids_in++);
3084:       in1 += *size++;
3085:     }
3086:   while (*++msg_nodes);
3087:   msg_nodes=nodes;

3089:   /* load gs values into in out gs buffers */
3090:   while (*iptr >= 0)
3091:     {*dptr3++ = *(in_vals + *iptr++);}

3093:   /* load out buffers and post the sends */
3094:   while ((iptr = *msg_nodes++))
3095:     {
3096:       dptr3 = dptr2;
3097:       while (*iptr >= 0)
3098:         {*dptr2++ = *(dptr1 + *iptr++);}
3099:       /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3100:       /* is msg_ids_out++ correct? */
3101:       MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3102:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3103:     }

3105:   /* do the tree while we're waiting */
3106:   if (gs->max_left_over)
3107:     {gs_gop_tree_plus(gs,in_vals);}

3109:   /* process the received data */
3110:   msg_nodes=nodes;
3111:   while ((iptr = *nodes++))
3112:     {
3113:       /* Should I check the return value of MPI_Wait() or status? */
3114:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3115:       MPI_Wait(ids_in++, &status);
3116:       while (*iptr >= 0)
3117:         {*(dptr1 + *iptr++) += *in2++;}
3118:     }

3120:   /* replace vals */
3121:   while (*pw >= 0)
3122:     {*(in_vals + *pw++) = *dptr1++;}

3124:   /* clear isend message handles */
3125:   /* This changed for clarity though it could be the same */
3126:   while (*msg_nodes++)
3127:     /* Should I check the return value of MPI_Wait() or status? */
3128:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3129:     {MPI_Wait(ids_out++, &status);}

3131: }



3135: /******************************************************************************
3136: Function: gather_scatter

3138: Input : 
3139: Output: 
3140: Return: 
3141: Description: 
3142: ******************************************************************************/
3143: static
3144: void
3145: gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3146: {
3147:   int size;
3148:   int *in, *out;
3149:   PetscScalar *buf, *work;
3150:   int ierr;
3151: 
3152:   in   = gs->tree_map_in;
3153:   out  = gs->tree_map_out;
3154:   buf  = gs->tree_buf;
3155:   work = gs->tree_work;
3156:   size = gs->tree_nel;

3158:   rvec_zero(buf,size);

3160:   while (*in >= 0)
3161:     {*(buf + *out++) = *(vals + *in++);}

3163:   in   = gs->tree_map_in;
3164:   out  = gs->tree_map_out;
3165:   MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3166:   while (*in >= 0)
3167:     {*(vals + *in++) = *(work + *out++);}

3169: }

3171: /******************************************************************************
3172: Function: gs_free()

3174: Input : 

3176: Output: 

3178: Return: 

3180: Description:  
3181:   if (gs->sss) {free((void*) gs->sss);}
3182: ******************************************************************************/
3183: void
3184: gs_free( gs_id *gs)
3185: {
3186:    int i;


3189:   if (gs->nghs) {free((void*) gs->nghs);}
3190:   if (gs->pw_nghs) {free((void*) gs->pw_nghs);}

3192:   /* tree */
3193:   if (gs->max_left_over)
3194:     {
3195:       if (gs->tree_elms) {free((void*) gs->tree_elms);}
3196:       if (gs->tree_buf) {free((void*) gs->tree_buf);}
3197:       if (gs->tree_work) {free((void*) gs->tree_work);}
3198:       if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3199:       if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3200:     }

3202:   /* pairwise info */
3203:   if (gs->num_pairs)
3204:     {
3205:       /* should be NULL already */
3206:       if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3207:       if (gs->elms) {free((void*) gs->elms);}
3208:       if (gs->local_elms) {free((void*) gs->local_elms);}
3209:       if (gs->companion) {free((void*) gs->companion);}
3210: 
3211:       /* only set if pairwise */
3212:       if (gs->vals) {free((void*) gs->vals);}
3213:       if (gs->in) {free((void*) gs->in);}
3214:       if (gs->out) {free((void*) gs->out);}
3215:       if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3216:       if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3217:       if (gs->pw_vals) {free((void*) gs->pw_vals);}
3218:       if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3219:       if (gs->node_list)
3220:         {
3221:           for (i=0;i<gs->num_pairs;i++)
3222:             {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3223:           free((void*) gs->node_list);
3224:         }
3225:       if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3226:       if (gs->pair_list) {free((void*) gs->pair_list);}
3227:     }

3229:   /* local info */
3230:   if (gs->num_local_total>=0)
3231:     {
3232:       for (i=0;i<gs->num_local_total+1;i++)
3233:         /*      for (i=0;i<gs->num_local_total;i++) */
3234:         {
3235:           if (gs->num_gop_local_reduce[i])
3236:             {free((void*) gs->gop_local_reduce[i]);}
3237:         }
3238:     }

3240:   /* if intersection tree/pairwise and local isn't empty */
3241:   if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3242:   if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}

3244:   free((void*) gs);
3245: }






3252: /******************************************************************************
3253: Function: gather_scatter

3255: Input : 
3256: Output: 
3257: Return: 
3258: Description: 
3259: ******************************************************************************/
3260: void
3261: gs_gop_vec( gs_id *gs,  PetscScalar *vals,  const char *op,  int step)
3262: {

3264:   switch (*op) {
3265:   case '+':
3266:     gs_gop_vec_plus(gs,vals,step);
3267:     break;
3268:   default:
3269:     error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3270:     error_msg_warning("gs_gop_vec() :: default :: plus");
3271:     gs_gop_vec_plus(gs,vals,step);
3272:     break;
3273:   }
3274: }



3278: /******************************************************************************
3279: Function: gather_scatter

3281: Input : 
3282: Output: 
3283: Return: 
3284: Description: 
3285: ******************************************************************************/
3286: static void
3287: gs_gop_vec_plus( gs_id *gs,  PetscScalar *vals,  int step)
3288: {
3289:   if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}

3291:   /* local only operations!!! */
3292:   if (gs->num_local)
3293:     {gs_gop_vec_local_plus(gs,vals,step);}

3295:   /* if intersection tree/pairwise and local isn't empty */
3296:   if (gs->num_local_gop)
3297:     {
3298:       gs_gop_vec_local_in_plus(gs,vals,step);

3300:       /* pairwise */
3301:       if (gs->num_pairs)
3302:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

3304:       /* tree */
3305:       else if (gs->max_left_over)
3306:         {gs_gop_vec_tree_plus(gs,vals,step);}

3308:       gs_gop_vec_local_out(gs,vals,step);
3309:     }
3310:   /* if intersection tree/pairwise and local is empty */
3311:   else
3312:     {
3313:       /* pairwise */
3314:       if (gs->num_pairs)
3315:         {gs_gop_vec_pairwise_plus(gs,vals,step);}

3317:       /* tree */
3318:       else if (gs->max_left_over)
3319:         {gs_gop_vec_tree_plus(gs,vals,step);}
3320:     }
3321: }



3325: /******************************************************************************
3326: Function: gather_scatter

3328: Input : 
3329: Output: 
3330: Return: 
3331: Description: 
3332: ******************************************************************************/
3333: static
3334: void
3335: gs_gop_vec_local_plus( gs_id *gs,  PetscScalar *vals, 
3336:                        int step)
3337: {
3338:    int *num, *map, **reduce;
3339:    PetscScalar *base;


3342:   num    = gs->num_local_reduce;
3343:   reduce = gs->local_reduce;
3344:   while ((map = *reduce))
3345:     {
3346:       base = vals + map[0] * step;

3348:       /* wall */
3349:       if (*num == 2)
3350:         {
3351:           num++; reduce++;
3352:           rvec_add (base,vals+map[1]*step,step);
3353:           rvec_copy(vals+map[1]*step,base,step);
3354:         }
3355:       /* corner shared by three elements */
3356:       else if (*num == 3)
3357:         {
3358:           num++; reduce++;
3359:           rvec_add (base,vals+map[1]*step,step);
3360:           rvec_add (base,vals+map[2]*step,step);
3361:           rvec_copy(vals+map[2]*step,base,step);
3362:           rvec_copy(vals+map[1]*step,base,step);
3363:         }
3364:       /* corner shared by four elements */
3365:       else if (*num == 4)
3366:         {
3367:           num++; reduce++;
3368:           rvec_add (base,vals+map[1]*step,step);
3369:           rvec_add (base,vals+map[2]*step,step);
3370:           rvec_add (base,vals+map[3]*step,step);
3371:           rvec_copy(vals+map[3]*step,base,step);
3372:           rvec_copy(vals+map[2]*step,base,step);
3373:           rvec_copy(vals+map[1]*step,base,step);
3374:         }
3375:       /* general case ... odd geoms ... 3D */
3376:       else
3377:         {
3378:           num++;
3379:           while (*++map >= 0)
3380:             {rvec_add (base,vals+*map*step,step);}
3381: 
3382:           map = *reduce;
3383:           while (*++map >= 0)
3384:             {rvec_copy(vals+*map*step,base,step);}
3385: 
3386:           reduce++;
3387:         }
3388:     }
3389: }



3393: /******************************************************************************
3394: Function: gather_scatter

3396: Input : 
3397: Output: 
3398: Return: 
3399: Description: 
3400: ******************************************************************************/
3401: static
3402: void
3403: gs_gop_vec_local_in_plus( gs_id *gs,  PetscScalar *vals, 
3404:                           int step)
3405: {
3406:    int  *num, *map, **reduce;
3407:    PetscScalar *base;

3409:   num    = gs->num_gop_local_reduce;
3410:   reduce = gs->gop_local_reduce;
3411:   while ((map = *reduce++))
3412:     {
3413:       base = vals + map[0] * step;

3415:       /* wall */
3416:       if (*num == 2)
3417:         {
3418:           num ++;
3419:           rvec_add(base,vals+map[1]*step,step);
3420:         }
3421:       /* corner shared by three elements */
3422:       else if (*num == 3)
3423:         {
3424:           num ++;
3425:           rvec_add(base,vals+map[1]*step,step);
3426:           rvec_add(base,vals+map[2]*step,step);
3427:         }
3428:       /* corner shared by four elements */
3429:       else if (*num == 4)
3430:         {
3431:           num ++;
3432:           rvec_add(base,vals+map[1]*step,step);
3433:           rvec_add(base,vals+map[2]*step,step);
3434:           rvec_add(base,vals+map[3]*step,step);
3435:         }
3436:       /* general case ... odd geoms ... 3D*/
3437:       else
3438:         {
3439:           num++;
3440:           while (*++map >= 0)
3441:             {rvec_add(base,vals+*map*step,step);}
3442:         }
3443:     }
3444: }


3447: /******************************************************************************
3448: Function: gather_scatter

3450: Input : 
3451: Output: 
3452: Return: 
3453: Description: 
3454: ******************************************************************************/
3455: static
3456: void
3457: gs_gop_vec_local_out( gs_id *gs,  PetscScalar *vals, 
3458:                       int step)
3459: {
3460:    int *num, *map, **reduce;
3461:    PetscScalar *base;


3464:   num    = gs->num_gop_local_reduce;
3465:   reduce = gs->gop_local_reduce;
3466:   while ((map = *reduce++))
3467:     {
3468:       base = vals + map[0] * step;

3470:       /* wall */
3471:       if (*num == 2)
3472:         {
3473:           num ++;
3474:           rvec_copy(vals+map[1]*step,base,step);
3475:         }
3476:       /* corner shared by three elements */
3477:       else if (*num == 3)
3478:         {
3479:           num ++;
3480:           rvec_copy(vals+map[1]*step,base,step);
3481:           rvec_copy(vals+map[2]*step,base,step);
3482:         }
3483:       /* corner shared by four elements */
3484:       else if (*num == 4)
3485:         {
3486:           num ++;
3487:           rvec_copy(vals+map[1]*step,base,step);
3488:           rvec_copy(vals+map[2]*step,base,step);
3489:           rvec_copy(vals+map[3]*step,base,step);
3490:         }
3491:       /* general case ... odd geoms ... 3D*/
3492:       else
3493:         {
3494:           num++;
3495:           while (*++map >= 0)
3496:             {rvec_copy(vals+*map*step,base,step);}
3497:         }
3498:     }
3499: }



3503: /******************************************************************************
3504: Function: gather_scatter

3506: VERSION 3 :: 

3508: Input : 
3509: Output: 
3510: Return: 
3511: Description: 
3512: ******************************************************************************/
3513: static
3514: void
3515: gs_gop_vec_pairwise_plus( gs_id *gs,  PetscScalar *in_vals,
3516:                           int step)
3517: {
3518:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3519:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3520:    int *pw, *list, *size, **nodes;
3521:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3522:   MPI_Status status;
3523:   PetscBLASInt i1;
3524:   int ierr;

3526:   /* strip and load s */
3527:   msg_list =list         = gs->pair_list;
3528:   msg_size =size         = gs->msg_sizes;
3529:   msg_nodes=nodes        = gs->node_list;
3530:   iptr=pw                = gs->pw_elm_list;
3531:   dptr1=dptr3            = gs->pw_vals;
3532:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3533:   msg_ids_out = ids_out  = gs->msg_ids_out;
3534:   dptr2                  = gs->out;
3535:   in1=in2                = gs->in;

3537:   /* post the receives */
3538:   /*  msg_nodes=nodes; */
3539:   do
3540:     {
3541:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3542:          second one *list and do list++ afterwards */
3543:       MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3544:                 gs->gs_comm, msg_ids_in++);
3545:       in1 += *size++ *step;
3546:     }
3547:   while (*++msg_nodes);
3548:   msg_nodes=nodes;

3550:   /* load gs values into in out gs buffers */
3551:   while (*iptr >= 0)
3552:     {
3553:       rvec_copy(dptr3,in_vals + *iptr*step,step);
3554:       dptr3+=step;
3555:       iptr++;
3556:     }

3558:   /* load out buffers and post the sends */
3559:   while ((iptr = *msg_nodes++))
3560:     {
3561:       dptr3 = dptr2;
3562:       while (*iptr >= 0)
3563:         {
3564:           rvec_copy(dptr2,dptr1 + *iptr*step,step);
3565:           dptr2+=step;
3566:           iptr++;
3567:         }
3568:       MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3569:                 MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3570:     }

3572:   /* tree */
3573:   if (gs->max_left_over)
3574:     {gs_gop_vec_tree_plus(gs,in_vals,step);}

3576:   /* process the received data */
3577:   msg_nodes=nodes;
3578:   while ((iptr = *nodes++)){
3579:     PetscScalar d1 = 1.0;
3580:       /* Should I check the return value of MPI_Wait() or status? */
3581:       /* Can this loop be replaced by a call to MPI_Waitall()? */
3582:       MPI_Wait(ids_in++, &status);
3583:       while (*iptr >= 0) {
3584:           BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3585:           in2+=step;
3586:           iptr++;
3587:       }
3588:   }

3590:   /* replace vals */
3591:   while (*pw >= 0)
3592:     {
3593:       rvec_copy(in_vals + *pw*step,dptr1,step);
3594:       dptr1+=step;
3595:       pw++;
3596:     }

3598:   /* clear isend message handles */
3599:   /* This changed for clarity though it could be the same */
3600:   while (*msg_nodes++)
3601:     /* Should I check the return value of MPI_Wait() or status? */
3602:     /* Can this loop be replaced by a call to MPI_Waitall()? */
3603:     {MPI_Wait(ids_out++, &status);}


3606: }



3610: /******************************************************************************
3611: Function: gather_scatter

3613: Input : 
3614: Output: 
3615: Return: 
3616: Description: 
3617: ******************************************************************************/
3618: static
3619: void
3620: gs_gop_vec_tree_plus( gs_id *gs,  PetscScalar *vals,  int step)
3621: {
3622:   int size, *in, *out;
3623:   PetscScalar *buf, *work;
3624:   int op[] = {GL_ADD,0};
3625:   PetscBLASInt i1 = 1;


3628:   /* copy over to local variables */
3629:   in   = gs->tree_map_in;
3630:   out  = gs->tree_map_out;
3631:   buf  = gs->tree_buf;
3632:   work = gs->tree_work;
3633:   size = gs->tree_nel*step;

3635:   /* zero out collection buffer */
3636:   rvec_zero(buf,size);


3639:   /* copy over my contributions */
3640:   while (*in >= 0)
3641:     {
3642:       BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3643:     }

3645:   /* perform fan in/out on full buffer */
3646:   /* must change grop to handle the blas */
3647:   grop(buf,work,size,op);

3649:   /* reset */
3650:   in   = gs->tree_map_in;
3651:   out  = gs->tree_map_out;

3653:   /* get the portion of the results I need */
3654:   while (*in >= 0)
3655:     {
3656:       BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3657:     }

3659: }



3663: /******************************************************************************
3664: Function: gather_scatter

3666: Input : 
3667: Output: 
3668: Return: 
3669: Description: 
3670: ******************************************************************************/
3671: void
3672: gs_gop_hc( gs_id *gs,  PetscScalar *vals,  const char *op,  int dim)
3673: {

3675:   switch (*op) {
3676:   case '+':
3677:     gs_gop_plus_hc(gs,vals,dim);
3678:     break;
3679:   default:
3680:     error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3681:     error_msg_warning("gs_gop_hc() :: default :: plus\n");
3682:     gs_gop_plus_hc(gs,vals,dim);
3683:     break;
3684:   }
3685: }



3689: /******************************************************************************
3690: Function: gather_scatter

3692: Input : 
3693: Output: 
3694: Return: 
3695: Description: 
3696: ******************************************************************************/
3697: static void
3698: gs_gop_plus_hc( gs_id *gs,  PetscScalar *vals, int dim)
3699: {
3700:   /* if there's nothing to do return */
3701:   if (dim<=0)
3702:     {return;}

3704:   /* can't do more dimensions then exist */
3705:   dim = PetscMin(dim,i_log2_num_nodes);

3707:   /* local only operations!!! */
3708:   if (gs->num_local)
3709:     {gs_gop_local_plus(gs,vals);}

3711:   /* if intersection tree/pairwise and local isn't empty */
3712:   if (gs->num_local_gop)
3713:     {
3714:       gs_gop_local_in_plus(gs,vals);

3716:       /* pairwise will do tree inside ... */
3717:       if (gs->num_pairs)
3718:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}

3720:       /* tree only */
3721:       else if (gs->max_left_over)
3722:         {gs_gop_tree_plus_hc(gs,vals,dim);}
3723: 
3724:       gs_gop_local_out(gs,vals);
3725:     }
3726:   /* if intersection tree/pairwise and local is empty */
3727:   else
3728:     {
3729:       /* pairwise will do tree inside */
3730:       if (gs->num_pairs)
3731:         {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3732: 
3733:       /* tree */
3734:       else if (gs->max_left_over)
3735:         {gs_gop_tree_plus_hc(gs,vals,dim);}
3736:     }

3738: }


3741: /******************************************************************************
3742: VERSION 3 :: 

3744: Input : 
3745: Output: 
3746: Return: 
3747: Description: 
3748: ******************************************************************************/
3749: static
3750: void
3751: gs_gop_pairwise_plus_hc( gs_id *gs,  PetscScalar *in_vals, int dim)
3752: {
3753:    PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3754:    int *iptr, *msg_list, *msg_size, **msg_nodes;
3755:    int *pw, *list, *size, **nodes;
3756:   MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3757:   MPI_Status status;
3758:   int i, mask=1;
3759:   int ierr;

3761:   for (i=1; i<dim; i++)
3762:     {mask<<=1; mask++;}


3765:   /* strip and load s */
3766:   msg_list =list         = gs->pair_list;
3767:   msg_size =size         = gs->msg_sizes;
3768:   msg_nodes=nodes        = gs->node_list;
3769:   iptr=pw                = gs->pw_elm_list;
3770:   dptr1=dptr3            = gs->pw_vals;
3771:   msg_ids_in  = ids_in   = gs->msg_ids_in;
3772:   msg_ids_out = ids_out  = gs->msg_ids_out;
3773:   dptr2                  = gs->out;
3774:   in1=in2                = gs->in;

3776:   /* post the receives */
3777:   /*  msg_nodes=nodes; */
3778:   do
3779:     {
3780:       /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3781:          second one *list and do list++ afterwards */
3782:       if ((my_id|mask)==(*list|mask))
3783:         {
3784:           MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3785:                     gs->gs_comm, msg_ids_in++);
3786:           in1 += *size++;
3787:         }
3788:       else
3789:         {list++; size++;}
3790:     }
3791:   while (*++msg_nodes);

3793:   /* load gs values into in out gs buffers */
3794:   while (*iptr >= 0)
3795:     {*dptr3++ = *(in_vals + *iptr++);}

3797:   /* load out buffers and post the sends */
3798:   msg_nodes=nodes;
3799:   list = msg_list;
3800:   while ((iptr = *msg_nodes++))
3801:     {
3802:       if ((my_id|mask)==(*list|mask))
3803:         {
3804:           dptr3 = dptr2;
3805:           while (*iptr >= 0)
3806:             {*dptr2++ = *(dptr1 + *iptr++);}
3807:           /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3808:           /* is msg_ids_out++ correct? */
3809:           MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3810:                     MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3811:         }
3812:       else
3813:         {list++; msg_size++;}
3814:     }

3816:   /* do the tree while we're waiting */
3817:   if (gs->max_left_over)
3818:     {gs_gop_tree_plus_hc(gs,in_vals,dim);}

3820:   /* process the received data */
3821:   msg_nodes=nodes;
3822:   list = msg_list;
3823:   while ((iptr = *nodes++))
3824:     {
3825:       if ((my_id|mask)==(*list|mask))
3826:         {
3827:           /* Should I check the return value of MPI_Wait() or status? */
3828:           /* Can this loop be replaced by a call to MPI_Waitall()? */
3829:           MPI_Wait(ids_in++, &status);
3830:           while (*iptr >= 0)
3831:             {*(dptr1 + *iptr++) += *in2++;}
3832:         }
3833:       list++;
3834:     }

3836:   /* replace vals */
3837:   while (*pw >= 0)
3838:     {*(in_vals + *pw++) = *dptr1++;}

3840:   /* clear isend message handles */
3841:   /* This changed for clarity though it could be the same */
3842:   while (*msg_nodes++)
3843:     {
3844:       if ((my_id|mask)==(*msg_list|mask))
3845:         {
3846:           /* Should I check the return value of MPI_Wait() or status? */
3847:           /* Can this loop be replaced by a call to MPI_Waitall()? */
3848:           MPI_Wait(ids_out++, &status);
3849:         }
3850:       msg_list++;
3851:     }


3854: }



3858: /******************************************************************************
3859: Function: gather_scatter

3861: Input : 
3862: Output: 
3863: Return: 
3864: Description: 
3865: ******************************************************************************/
3866: static
3867: void
3868: gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3869: {
3870:   int size;
3871:   int *in, *out;
3872:   PetscScalar *buf, *work;
3873:   int op[] = {GL_ADD,0};

3875:   in   = gs->tree_map_in;
3876:   out  = gs->tree_map_out;
3877:   buf  = gs->tree_buf;
3878:   work = gs->tree_work;
3879:   size = gs->tree_nel;

3881:   rvec_zero(buf,size);

3883:   while (*in >= 0)
3884:     {*(buf + *out++) = *(vals + *in++);}

3886:   in   = gs->tree_map_in;
3887:   out  = gs->tree_map_out;

3889:   grop_hc(buf,work,size,op,dim);

3891:   while (*in >= 0)
3892:     {*(vals + *in++) = *(buf + *out++);}

3894: }