Actual source code: gs.c
1: #define PETSCKSP_DLL
3: /***********************************gs.c***************************************
5: Author: Henry M. Tufo III
7: e-mail: hmt@cs.brown.edu
9: snail-mail:
10: Division of Applied Mathematics
11: Brown University
12: Providence, RI 02912
14: Last Modification:
15: 6.21.97
16: ************************************gs.c**************************************/
18: /***********************************gs.c***************************************
19: File Description:
20: -----------------
22: ************************************gs.c**************************************/
24: #include src/ksp/pc/impls/tfs/tfs.h
26: /* default length of number of items via tree - doubles if exceeded */
27: #define TREE_BUF_SZ 2048;
28: #define GS_VEC_SZ 1
32: /***********************************gs.c***************************************
33: Type: struct gather_scatter_id
34: ------------------------------
36: ************************************gs.c**************************************/
37: typedef struct gather_scatter_id {
38: int id;
39: int nel_min;
40: int nel_max;
41: int nel_sum;
42: int negl;
43: int gl_max;
44: int gl_min;
45: int repeats;
46: int ordered;
47: int positive;
48: PetscScalar *vals;
50: /* bit mask info */
51: int *my_proc_mask;
52: int mask_sz;
53: int *ngh_buf;
54: int ngh_buf_sz;
55: int *nghs;
56: int num_nghs;
57: int max_nghs;
58: int *pw_nghs;
59: int num_pw_nghs;
60: int *tree_nghs;
61: int num_tree_nghs;
63: int num_loads;
65: /* repeats == true -> local info */
66: int nel; /* number of unique elememts */
67: int *elms; /* of size nel */
68: int nel_total;
69: int *local_elms; /* of size nel_total */
70: int *companion; /* of size nel_total */
72: /* local info */
73: int num_local_total;
74: int local_strength;
75: int num_local;
76: int *num_local_reduce;
77: int **local_reduce;
78: int num_local_gop;
79: int *num_gop_local_reduce;
80: int **gop_local_reduce;
82: /* pairwise info */
83: int level;
84: int num_pairs;
85: int max_pairs;
86: int loc_node_pairs;
87: int max_node_pairs;
88: int min_node_pairs;
89: int avg_node_pairs;
90: int *pair_list;
91: int *msg_sizes;
92: int **node_list;
93: int len_pw_list;
94: int *pw_elm_list;
95: PetscScalar *pw_vals;
97: MPI_Request *msg_ids_in;
98: MPI_Request *msg_ids_out;
100: PetscScalar *out;
101: PetscScalar *in;
102: int msg_total;
104: /* tree - crystal accumulator info */
105: int max_left_over;
106: int *pre;
107: int *in_num;
108: int *out_num;
109: int **in_list;
110: int **out_list;
112: /* new tree work*/
113: int tree_nel;
114: int *tree_elms;
115: PetscScalar *tree_buf;
116: PetscScalar *tree_work;
118: int tree_map_sz;
119: int *tree_map_in;
120: int *tree_map_out;
122: /* current memory status */
123: int gl_bss_min;
124: int gl_perm_min;
126: /* max segment size for gs_gop_vec() */
127: int vec_sz;
129: /* hack to make paul happy */
130: MPI_Comm gs_comm;
132: } gs_id;
135: /* to be made public */
137: /* PRIVATE - and definitely not exported */
138: /*static void gs_print_template( gs_id* gs, int who);*/
139: /*static void gs_print_stemplate( gs_id* gs, int who);*/
141: static gs_id *gsi_check_args(int *elms, int nel, int level);
142: static void gsi_via_bit_mask(gs_id *gs);
143: static void get_ngh_buf(gs_id *gs);
144: static void set_pairwise(gs_id *gs);
145: static gs_id * gsi_new(void);
146: static void set_tree(gs_id *gs);
148: /* same for all but vector flavor */
149: static void gs_gop_local_out(gs_id *gs, PetscScalar *vals);
150: /* vector flavor */
151: static void gs_gop_vec_local_out(gs_id *gs, PetscScalar *vals, int step);
153: static void gs_gop_vec_plus(gs_id *gs, PetscScalar *in_vals, int step);
154: static void gs_gop_vec_pairwise_plus(gs_id *gs, PetscScalar *in_vals, int step);
155: static void gs_gop_vec_local_plus(gs_id *gs, PetscScalar *vals, int step);
156: static void gs_gop_vec_local_in_plus(gs_id *gs, PetscScalar *vals, int step);
157: static void gs_gop_vec_tree_plus(gs_id *gs, PetscScalar *vals, int step);
160: static void gs_gop_plus(gs_id *gs, PetscScalar *in_vals);
161: static void gs_gop_pairwise_plus(gs_id *gs, PetscScalar *in_vals);
162: static void gs_gop_local_plus(gs_id *gs, PetscScalar *vals);
163: static void gs_gop_local_in_plus(gs_id *gs, PetscScalar *vals);
164: static void gs_gop_tree_plus(gs_id *gs, PetscScalar *vals);
166: static void gs_gop_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
167: static void gs_gop_pairwise_plus_hc(gs_id *gs, PetscScalar *in_vals, int dim);
168: static void gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim);
170: static void gs_gop_times(gs_id *gs, PetscScalar *in_vals);
171: static void gs_gop_pairwise_times(gs_id *gs, PetscScalar *in_vals);
172: static void gs_gop_local_times(gs_id *gs, PetscScalar *vals);
173: static void gs_gop_local_in_times(gs_id *gs, PetscScalar *vals);
174: static void gs_gop_tree_times(gs_id *gs, PetscScalar *vals);
176: static void gs_gop_min(gs_id *gs, PetscScalar *in_vals);
177: static void gs_gop_pairwise_min(gs_id *gs, PetscScalar *in_vals);
178: static void gs_gop_local_min(gs_id *gs, PetscScalar *vals);
179: static void gs_gop_local_in_min(gs_id *gs, PetscScalar *vals);
180: static void gs_gop_tree_min(gs_id *gs, PetscScalar *vals);
182: static void gs_gop_min_abs(gs_id *gs, PetscScalar *in_vals);
183: static void gs_gop_pairwise_min_abs(gs_id *gs, PetscScalar *in_vals);
184: static void gs_gop_local_min_abs(gs_id *gs, PetscScalar *vals);
185: static void gs_gop_local_in_min_abs(gs_id *gs, PetscScalar *vals);
186: static void gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals);
188: static void gs_gop_max(gs_id *gs, PetscScalar *in_vals);
189: static void gs_gop_pairwise_max(gs_id *gs, PetscScalar *in_vals);
190: static void gs_gop_local_max(gs_id *gs, PetscScalar *vals);
191: static void gs_gop_local_in_max(gs_id *gs, PetscScalar *vals);
192: static void gs_gop_tree_max(gs_id *gs, PetscScalar *vals);
194: static void gs_gop_max_abs(gs_id *gs, PetscScalar *in_vals);
195: static void gs_gop_pairwise_max_abs(gs_id *gs, PetscScalar *in_vals);
196: static void gs_gop_local_max_abs(gs_id *gs, PetscScalar *vals);
197: static void gs_gop_local_in_max_abs(gs_id *gs, PetscScalar *vals);
198: static void gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals);
200: static void gs_gop_exists(gs_id *gs, PetscScalar *in_vals);
201: static void gs_gop_pairwise_exists(gs_id *gs, PetscScalar *in_vals);
202: static void gs_gop_local_exists(gs_id *gs, PetscScalar *vals);
203: static void gs_gop_local_in_exists(gs_id *gs, PetscScalar *vals);
204: static void gs_gop_tree_exists(gs_id *gs, PetscScalar *vals);
206: static void gs_gop_pairwise_binary(gs_id *gs, PetscScalar *in_vals, rbfp fct);
207: static void gs_gop_local_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
208: static void gs_gop_local_in_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
209: static void gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct);
213: /* global vars */
214: /* from comm.c module */
216: /* module state inf and fortran interface */
217: static int num_gs_ids = 0;
219: /* should make this dynamic ... later */
220: static int msg_buf=MAX_MSG_BUF;
221: static int vec_sz=GS_VEC_SZ;
222: static int *tree_buf=NULL;
223: static int tree_buf_sz=0;
224: static int ntree=0;
227: /******************************************************************************
228: Function: gs_init_()
230: Input :
231: Output:
232: Return:
233: Description:
234: ******************************************************************************/
235: void gs_init_vec_sz(int size)
236: {
237: /* vec_ch = TRUE; */
239: vec_sz = size;
240: }
242: /******************************************************************************
243: Function: gs_init_()
245: Input :
246: Output:
247: Return:
248: Description:
249: ******************************************************************************/
250: void gs_init_msg_buf_sz(int buf_size)
251: {
252: /* msg_ch = TRUE; */
254: msg_buf = buf_size;
255: }
257: /******************************************************************************
258: Function: gs_init()
260: Input :
262: Output:
264: RETURN:
266: Description:
267: ******************************************************************************/
268: gs_id *
269: gs_init( int *elms, int nel, int level)
270: {
271: gs_id *gs;
272: MPI_Group gs_group;
273: MPI_Comm gs_comm;
275: /* ensure that communication package has been initialized */
276: comm_init();
279: /* determines if we have enough dynamic/semi-static memory */
280: /* checks input, allocs and sets gd_id template */
281: gs = gsi_check_args(elms,nel,level);
283: /* only bit mask version up and working for the moment */
284: /* LATER :: get int list version working for sparse pblms */
285: gsi_via_bit_mask(gs);
288: MPI_Comm_group(MPI_COMM_WORLD,&gs_group);
289: MPI_Comm_create(MPI_COMM_WORLD,gs_group,&gs_comm);
290: gs->gs_comm=gs_comm;
292: return(gs);
293: }
297: /******************************************************************************
298: Function: gsi_new()
300: Input :
301: Output:
302: Return:
303: Description:
305: elm list must >= 0!!!
306: elm repeats allowed
307: ******************************************************************************/
308: static
309: gs_id *
310: gsi_new(void)
311: {
312: gs_id *gs;
313: gs = (gs_id *) malloc(sizeof(gs_id));
314: PetscMemzero(gs,sizeof(gs_id));
315: return(gs);
316: }
320: /******************************************************************************
321: Function: gsi_check_args()
323: Input :
324: Output:
325: Return:
326: Description:
328: elm list must >= 0!!!
329: elm repeats allowed
330: local working copy of elms is sorted
331: ******************************************************************************/
332: static
333: gs_id *
334: gsi_check_args(int *in_elms, int nel, int level)
335: {
336: int i, j, k, t2;
337: int *companion, *elms, *unique, *iptr;
338: int num_local=0, *num_to_reduce, **local_reduce;
339: int oprs[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_MIN,GL_B_AND};
340: int vals[sizeof(oprs)/sizeof(oprs[0])-1];
341: int work[sizeof(oprs)/sizeof(oprs[0])-1];
342: gs_id *gs;
346: #ifdef SAFE
347: if (!in_elms)
348: {error_msg_fatal("elms point to nothing!!!\n");}
350: if (nel<0)
351: {error_msg_fatal("can't have fewer than 0 elms!!!\n");}
353: if (nel==0)
354: {error_msg_warning("I don't have any elements!!!\n");}
355: #endif
357: /* get space for gs template */
358: gs = gsi_new();
359: gs->id = ++num_gs_ids;
361: /* hmt 6.4.99 */
362: /* caller can set global ids that don't participate to 0 */
363: /* gs_init ignores all zeros in elm list */
364: /* negative global ids are still invalid */
365: for (i=j=0;i<nel;i++)
366: {if (in_elms[i]!=0) {j++;}}
368: k=nel; nel=j;
370: /* copy over in_elms list and create inverse map */
371: elms = (int*) malloc((nel+1)*sizeof(PetscInt));
372: companion = (int*) malloc(nel*sizeof(PetscInt));
373: /* ivec_c_index(companion,nel); */
374: /* ivec_copy(elms,in_elms,nel); */
375: for (i=j=0;i<k;i++)
376: {
377: if (in_elms[i]!=0)
378: {elms[j] = in_elms[i]; companion[j++] = i;}
379: }
381: if (j!=nel)
382: {error_msg_fatal("nel j mismatch!\n");}
384: #ifdef SAFE
385: /* pre-pass ... check to see if sorted */
386: elms[nel] = INT_MAX;
387: iptr = elms;
388: unique = elms+1;
389: j=0;
390: while (*iptr!=INT_MAX)
391: {
392: if (*iptr++>*unique++)
393: {j=1; break;}
394: }
396: /* set up inverse map */
397: if (j)
398: {
399: error_msg_warning("gsi_check_args() :: elm list *not* sorted!\n");
400: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
401: }
402: else
403: {error_msg_warning("gsi_check_args() :: elm list sorted!\n");}
404: #else
405: SMI_sort((void*)elms, (void*)companion, nel, SORT_INTEGER);
406: #endif
407: elms[nel] = INT_MIN;
409: /* first pass */
410: /* determine number of unique elements, check pd */
411: for (i=k=0;i<nel;i+=j)
412: {
413: t2 = elms[i];
414: j=++i;
415:
416: /* clump 'em for now */
417: while (elms[j]==t2) {j++;}
418:
419: /* how many together and num local */
420: if (j-=i)
421: {num_local++; k+=j;}
422: }
424: /* how many unique elements? */
425: gs->repeats=k;
426: gs->nel = nel-k;
429: /* number of repeats? */
430: gs->num_local = num_local;
431: num_local+=2;
432: gs->local_reduce=local_reduce=(int **)malloc(num_local*sizeof(PetscInt*));
433: gs->num_local_reduce=num_to_reduce=(int*) malloc(num_local*sizeof(PetscInt));
435: unique = (int*) malloc((gs->nel+1)*sizeof(PetscInt));
436: gs->elms = unique;
437: gs->nel_total = nel;
438: gs->local_elms = elms;
439: gs->companion = companion;
441: /* compess map as well as keep track of local ops */
442: for (num_local=i=j=0;i<gs->nel;i++)
443: {
444: k=j;
445: t2 = unique[i] = elms[j];
446: companion[i] = companion[j];
447:
448: while (elms[j]==t2) {j++;}
450: if ((t2=(j-k))>1)
451: {
452: /* number together */
453: num_to_reduce[num_local] = t2++;
454: iptr = local_reduce[num_local++] = (int*)malloc(t2*sizeof(PetscInt));
456: /* to use binary searching don't remap until we check intersection */
457: *iptr++ = i;
458:
459: /* note that we're skipping the first one */
460: while (++k<j)
461: {*(iptr++) = companion[k];}
462: *iptr = -1;
463: }
464: }
466: /* sentinel for ngh_buf */
467: unique[gs->nel]=INT_MAX;
469: /* for two partition sort hack */
470: num_to_reduce[num_local] = 0;
471: local_reduce[num_local] = NULL;
472: num_to_reduce[++num_local] = 0;
473: local_reduce[num_local] = NULL;
475: /* load 'em up */
476: /* note one extra to hold NON_UNIFORM flag!!! */
477: vals[2] = vals[1] = vals[0] = nel;
478: if (gs->nel>0)
479: {
480: vals[3] = unique[0]; /* ivec_lb(elms,nel); */
481: vals[4] = unique[gs->nel-1]; /* ivec_ub(elms,nel); */
482: }
483: else
484: {
485: vals[3] = INT_MAX; /* ivec_lb(elms,nel); */
486: vals[4] = INT_MIN; /* ivec_ub(elms,nel); */
487: }
488: vals[5] = level;
489: vals[6] = num_gs_ids;
491: /* GLOBAL: send 'em out */
492: giop(vals,work,sizeof(oprs)/sizeof(oprs[0])-1,oprs);
494: /* must be semi-pos def - only pairwise depends on this */
495: /* LATER - remove this restriction */
496: if (vals[3]<0)
497: {error_msg_fatal("gsi_check_args() :: system not semi-pos def ::%d\n",vals[3]);}
499: if (vals[4]==INT_MAX)
500: {error_msg_fatal("gsi_check_args() :: system ub too large ::%d!\n",vals[4]);}
502: gs->nel_min = vals[0];
503: gs->nel_max = vals[1];
504: gs->nel_sum = vals[2];
505: gs->gl_min = vals[3];
506: gs->gl_max = vals[4];
507: gs->negl = vals[4]-vals[3]+1;
509: if (gs->negl<=0)
510: {error_msg_fatal("gsi_check_args() :: system empty or neg :: %d\n",gs->negl);}
511:
512: /* LATER :: add level == -1 -> program selects level */
513: if (vals[5]<0)
514: {vals[5]=0;}
515: else if (vals[5]>num_nodes)
516: {vals[5]=num_nodes;}
517: gs->level = vals[5];
519: return(gs);
520: }
523: /******************************************************************************
524: Function: gsi_via_bit_mask()
526: Input :
527: Output:
528: Return:
529: Description:
532: ******************************************************************************/
533: static
534: void
535: gsi_via_bit_mask(gs_id *gs)
536: {
537: int i, nel, *elms;
538: int t1;
539: int **reduce;
540: int *map;
542: /* totally local removes ... ct_bits == 0 */
543: get_ngh_buf(gs);
545: if (gs->level)
546: {set_pairwise(gs);}
548: if (gs->max_left_over)
549: {set_tree(gs);}
551: /* intersection local and pairwise/tree? */
552: gs->num_local_total = gs->num_local;
553: gs->gop_local_reduce = gs->local_reduce;
554: gs->num_gop_local_reduce = gs->num_local_reduce;
556: map = gs->companion;
558: /* is there any local compression */
559: if (!gs->num_local) {
560: gs->local_strength = NONE;
561: gs->num_local_gop = 0;
562: } else {
563: /* ok find intersection */
564: map = gs->companion;
565: reduce = gs->local_reduce;
566: for (i=0, t1=0; i<gs->num_local; i++, reduce++)
567: {
568: if ((ivec_binary_search(**reduce,gs->pw_elm_list,gs->len_pw_list)>=0)
569: ||
570: ivec_binary_search(**reduce,gs->tree_map_in,gs->tree_map_sz)>=0)
571: {
572: /* printf("C%d :: i=%d, **reduce=%d\n",my_id,i,**reduce); */
573: t1++;
574: if (gs->num_local_reduce[i]<=0)
575: {error_msg_fatal("nobody in list?");}
576: gs->num_local_reduce[i] *= -1;
577: }
578: **reduce=map[**reduce];
579: }
581: /* intersection is empty */
582: if (!t1)
583: {
584: gs->local_strength = FULL;
585: gs->num_local_gop = 0;
586: }
587: /* intersection not empty */
588: else
589: {
590: gs->local_strength = PARTIAL;
591: SMI_sort((void*)gs->num_local_reduce, (void*)gs->local_reduce,
592: gs->num_local + 1, SORT_INT_PTR);
594: gs->num_local_gop = t1;
595: gs->num_local_total = gs->num_local;
596: gs->num_local -= t1;
597: gs->gop_local_reduce = gs->local_reduce;
598: gs->num_gop_local_reduce = gs->num_local_reduce;
600: for (i=0; i<t1; i++)
601: {
602: if (gs->num_gop_local_reduce[i]>=0)
603: {error_msg_fatal("they aren't negative?");}
604: gs->num_gop_local_reduce[i] *= -1;
605: gs->local_reduce++;
606: gs->num_local_reduce++;
607: }
608: gs->local_reduce++;
609: gs->num_local_reduce++;
610: }
611: }
613: elms = gs->pw_elm_list;
614: nel = gs->len_pw_list;
615: for (i=0; i<nel; i++)
616: {elms[i] = map[elms[i]];}
618: elms = gs->tree_map_in;
619: nel = gs->tree_map_sz;
620: for (i=0; i<nel; i++)
621: {elms[i] = map[elms[i]];}
623: /* clean up */
624: free((void*) gs->local_elms);
625: free((void*) gs->companion);
626: free((void*) gs->elms);
627: free((void*) gs->ngh_buf);
628: gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
629: }
633: /******************************************************************************
634: Function: place_in_tree()
636: Input :
637: Output:
638: Return:
639: Description:
642: ******************************************************************************/
643: static
644: void
645: place_in_tree( int elm)
646: {
647: int *tp, n;
650: if (ntree==tree_buf_sz)
651: {
652: if (tree_buf_sz)
653: {
654: tp = tree_buf;
655: n = tree_buf_sz;
656: tree_buf_sz<<=1;
657: tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
658: ivec_copy(tree_buf,tp,n);
659: free(tp);
660: }
661: else
662: {
663: tree_buf_sz = TREE_BUF_SZ;
664: tree_buf = (int*)malloc(tree_buf_sz*sizeof(PetscInt));
665: }
666: }
668: tree_buf[ntree++] = elm;
669: }
673: /******************************************************************************
674: Function: get_ngh_buf()
676: Input :
677: Output:
678: Return:
679: Description:
682: ******************************************************************************/
683: static
684: void
685: get_ngh_buf(gs_id *gs)
686: {
687: int i, j, npw=0, ntree_map=0;
688: int p_mask_size, ngh_buf_size, buf_size;
689: int *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
690: int *ngh_buf, *buf1, *buf2;
691: int offset, per_load, num_loads, or_ct, start, end;
692: int *ptr1, *ptr2, i_start, negl, nel, *elms;
693: int oper=GL_B_OR;
694: int *ptr3, *t_mask, level, ct1, ct2;
696: /* to make life easier */
697: nel = gs->nel;
698: elms = gs->elms;
699: level = gs->level;
700:
701: /* det #bytes needed for processor bit masks and init w/mask cor. to my_id */
702: p_mask = (int*) malloc(p_mask_size=len_bit_mask(num_nodes));
703: set_bit_mask(p_mask,p_mask_size,my_id);
705: /* allocate space for masks and info bufs */
706: gs->nghs = sh_proc_mask = (int*) malloc(p_mask_size);
707: gs->pw_nghs = pw_sh_proc_mask = (int*) malloc(p_mask_size);
708: gs->ngh_buf_sz = ngh_buf_size = p_mask_size*nel;
709: t_mask = (int*) malloc(p_mask_size);
710: gs->ngh_buf = ngh_buf = (int*) malloc(ngh_buf_size);
712: /* comm buffer size ... memory usage bounded by ~2*msg_buf */
713: /* had thought I could exploit rendezvous threshold */
715: /* default is one pass */
716: per_load = negl = gs->negl;
717: gs->num_loads = num_loads = 1;
718: i=p_mask_size*negl;
720: /* possible overflow on buffer size */
721: /* overflow hack */
722: if (i<0) {i=INT_MAX;}
724: buf_size = PetscMin(msg_buf,i);
726: /* can we do it? */
727: if (p_mask_size>buf_size)
728: {error_msg_fatal("get_ngh_buf() :: buf<pms :: %d>%d\n",p_mask_size,buf_size);}
730: /* get giop buf space ... make *only* one malloc */
731: buf1 = (int*) malloc(buf_size<<1);
733: /* more than one gior exchange needed? */
734: if (buf_size!=i)
735: {
736: per_load = buf_size/p_mask_size;
737: buf_size = per_load*p_mask_size;
738: gs->num_loads = num_loads = negl/per_load + (negl%per_load>0);
739: }
742: /* convert buf sizes from #bytes to #ints - 32 bit only! */
743: #ifdef SAFE
744: p_mask_size/=sizeof(PetscInt); ngh_buf_size/=sizeof(PetscInt); buf_size/=sizeof(PetscInt);
745: #else
746: p_mask_size>>=2; ngh_buf_size>>=2; buf_size>>=2;
747: #endif
748:
749: /* find giop work space */
750: buf2 = buf1+buf_size;
752: /* hold #ints needed for processor masks */
753: gs->mask_sz=p_mask_size;
755: /* init buffers */
756: ivec_zero(sh_proc_mask,p_mask_size);
757: ivec_zero(pw_sh_proc_mask,p_mask_size);
758: ivec_zero(ngh_buf,ngh_buf_size);
760: /* HACK reset tree info */
761: tree_buf=NULL;
762: tree_buf_sz=ntree=0;
764: /* queue the tree elements for now */
765: /* elms_q = new_queue(); */
766:
767: /* can also queue tree info for pruned or forest implememtation */
768: /* mask_q = new_queue(); */
770: /* ok do it */
771: for (ptr1=ngh_buf,ptr2=elms,end=gs->gl_min,or_ct=i=0; or_ct<num_loads; or_ct++)
772: {
773: /* identity for bitwise or is 000...000 */
774: ivec_zero(buf1,buf_size);
776: /* load msg buffer */
777: for (start=end,end+=per_load,i_start=i; (offset=*ptr2)<end; i++, ptr2++)
778: {
779: offset = (offset-start)*p_mask_size;
780: ivec_copy(buf1+offset,p_mask,p_mask_size);
781: }
783: /* GLOBAL: pass buffer */
784: giop(buf1,buf2,buf_size,&oper);
787: /* unload buffer into ngh_buf */
788: ptr2=(elms+i_start);
789: for(ptr3=buf1,j=start; j<end; ptr3+=p_mask_size,j++)
790: {
791: /* I own it ... may have to pairwise it */
792: if (j==*ptr2)
793: {
794: /* do i share it w/anyone? */
795: #ifdef SAFE
796: ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
797: #else
798: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
799: #endif
800: /* guess not */
801: if (ct1<2)
802: {ptr2++; ptr1+=p_mask_size; continue;}
804: /* i do ... so keep info and turn off my bit */
805: ivec_copy(ptr1,ptr3,p_mask_size);
806: ivec_xor(ptr1,p_mask,p_mask_size);
807: ivec_or(sh_proc_mask,ptr1,p_mask_size);
808:
809: /* is it to be done pairwise? */
810: if (--ct1<=level)
811: {
812: npw++;
813:
814: /* turn on high bit to indicate pw need to process */
815: *ptr2++ |= TOP_BIT;
816: ivec_or(pw_sh_proc_mask,ptr1,p_mask_size);
817: ptr1+=p_mask_size;
818: continue;
819: }
821: /* get set for next and note that I have a tree contribution */
822: /* could save exact elm index for tree here -> save a search */
823: ptr2++; ptr1+=p_mask_size; ntree_map++;
824: }
825: /* i don't but still might be involved in tree */
826: else
827: {
829: /* shared by how many? */
830: #ifdef SAFE
831: ct1 = ct_bits((char *)ptr3,p_mask_size*sizeof(PetscInt));
832: #else
833: ct1 = ct_bits((char *)ptr3,p_mask_size<<2);
834: #endif
836: /* none! */
837: if (ct1<2)
838: {continue;}
840: /* is it going to be done pairwise? but not by me of course!*/
841: if (--ct1<=level)
842: {continue;}
843: }
844: /* LATER we're going to have to process it NOW */
845: /* nope ... tree it */
846: place_in_tree(j);
847: }
848: }
850: free((void*)t_mask);
851: free((void*)buf1);
853: gs->len_pw_list=npw;
854: gs->num_nghs = ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
856: /* expand from bit mask list to int list and save ngh list */
857: gs->nghs = (int*) malloc(gs->num_nghs * sizeof(PetscInt));
858: bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),gs->nghs);
860: gs->num_pw_nghs = ct_bits((char *)pw_sh_proc_mask,p_mask_size*sizeof(PetscInt));
862: oper = GL_MAX;
863: ct1 = gs->num_nghs;
864: giop(&ct1,&ct2,1,&oper);
865: gs->max_nghs = ct1;
867: gs->tree_map_sz = ntree_map;
868: gs->max_left_over=ntree;
870: free((void*)p_mask);
871: free((void*)sh_proc_mask);
873: }
879: /******************************************************************************
880: Function: pairwise_init()
882: Input :
883: Output:
884: Return:
885: Description:
887: if an element is shared by fewer that level# of nodes do pairwise exch
888: ******************************************************************************/
889: static
890: void
891: set_pairwise(gs_id *gs)
892: {
893: int i, j;
894: int p_mask_size;
895: int *p_mask, *sh_proc_mask, *tmp_proc_mask;
896: int *ngh_buf, *buf2;
897: int offset;
898: int *msg_list, *msg_size, **msg_nodes, nprs;
899: int *pairwise_elm_list, len_pair_list=0;
900: int *iptr, t1, i_start, nel, *elms;
901: int ct;
905: /* to make life easier */
906: nel = gs->nel;
907: elms = gs->elms;
908: ngh_buf = gs->ngh_buf;
909: sh_proc_mask = gs->pw_nghs;
911: /* need a few temp masks */
912: p_mask_size = len_bit_mask(num_nodes);
913: p_mask = (int*) malloc(p_mask_size);
914: tmp_proc_mask = (int*) malloc(p_mask_size);
916: /* set mask to my my_id's bit mask */
917: set_bit_mask(p_mask,p_mask_size,my_id);
919: #ifdef SAFE
920: p_mask_size /= sizeof(PetscInt);
921: #else
922: p_mask_size >>= 2;
923: #endif
924:
925: len_pair_list=gs->len_pw_list;
926: gs->pw_elm_list=pairwise_elm_list=(int*)malloc((len_pair_list+1)*sizeof(PetscInt));
928: /* how many processors (nghs) do we have to exchange with? */
929: nprs=gs->num_pairs=ct_bits((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt));
932: /* allocate space for gs_gop() info */
933: gs->pair_list = msg_list = (int*) malloc(sizeof(PetscInt)*nprs);
934: gs->msg_sizes = msg_size = (int*) malloc(sizeof(PetscInt)*nprs);
935: gs->node_list = msg_nodes = (int **) malloc(sizeof(PetscInt*)*(nprs+1));
937: /* init msg_size list */
938: ivec_zero(msg_size,nprs);
940: /* expand from bit mask list to int list */
941: bm_to_proc((char *)sh_proc_mask,p_mask_size*sizeof(PetscInt),msg_list);
942:
943: /* keep list of elements being handled pairwise */
944: for (i=j=0;i<nel;i++)
945: {
946: if (elms[i] & TOP_BIT)
947: {elms[i] ^= TOP_BIT; pairwise_elm_list[j++] = i;}
948: }
949: pairwise_elm_list[j] = -1;
951: gs->msg_ids_out = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1));
952: gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
953: gs->msg_ids_in = (MPI_Request *) malloc(sizeof(MPI_Request)*(nprs+1));
954: gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
955: gs->pw_vals = (PetscScalar *) malloc(sizeof(PetscScalar)*len_pair_list*vec_sz);
957: /* find who goes to each processor */
958: for (i_start=i=0;i<nprs;i++)
959: {
960: /* processor i's mask */
961: set_bit_mask(p_mask,p_mask_size*sizeof(PetscInt),msg_list[i]);
963: /* det # going to processor i */
964: for (ct=j=0;j<len_pair_list;j++)
965: {
966: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
967: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
968: if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
969: {ct++;}
970: }
971: msg_size[i] = ct;
972: i_start = PetscMax(i_start,ct);
974: /*space to hold nodes in message to first neighbor */
975: msg_nodes[i] = iptr = (int*) malloc(sizeof(PetscInt)*(ct+1));
977: for (j=0;j<len_pair_list;j++)
978: {
979: buf2 = ngh_buf+(pairwise_elm_list[j]*p_mask_size);
980: ivec_and3(tmp_proc_mask,p_mask,buf2,p_mask_size);
981: if (ct_bits((char *)tmp_proc_mask,p_mask_size*sizeof(PetscInt)))
982: {*iptr++ = j;}
983: }
984: *iptr = -1;
985: }
986: msg_nodes[nprs] = NULL;
988: j=gs->loc_node_pairs=i_start;
989: t1 = GL_MAX;
990: giop(&i_start,&offset,1,&t1);
991: gs->max_node_pairs = i_start;
993: i_start=j;
994: t1 = GL_MIN;
995: giop(&i_start,&offset,1,&t1);
996: gs->min_node_pairs = i_start;
998: i_start=j;
999: t1 = GL_ADD;
1000: giop(&i_start,&offset,1,&t1);
1001: gs->avg_node_pairs = i_start/num_nodes + 1;
1003: i_start=nprs;
1004: t1 = GL_MAX;
1005: giop(&i_start,&offset,1,&t1);
1006: gs->max_pairs = i_start;
1009: /* remap pairwise in tail of gsi_via_bit_mask() */
1010: gs->msg_total = ivec_sum(gs->msg_sizes,nprs);
1011: gs->out = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1012: gs->in = (PetscScalar *) malloc(sizeof(PetscScalar)*gs->msg_total*vec_sz);
1014: /* reset malloc pool */
1015: free((void*)p_mask);
1016: free((void*)tmp_proc_mask);
1018: }
1022: /******************************************************************************
1023: Function: set_tree()
1025: Input :
1026: Output:
1027: Return:
1028: Description:
1030: to do pruned tree just save ngh buf copy for each one and decode here!
1031: ******************************************************************************/
1032: static
1033: void
1034: set_tree(gs_id *gs)
1035: {
1036: int i, j, n, nel;
1037: int *iptr_in, *iptr_out, *tree_elms, *elms;
1040: /* local work ptrs */
1041: elms = gs->elms;
1042: nel = gs->nel;
1044: /* how many via tree */
1045: gs->tree_nel = n = ntree;
1046: gs->tree_elms = tree_elms = iptr_in = tree_buf;
1047: gs->tree_buf = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1048: gs->tree_work = (PetscScalar *) malloc(sizeof(PetscScalar)*n*vec_sz);
1049: j=gs->tree_map_sz;
1050: gs->tree_map_in = iptr_in = (int*) malloc(sizeof(PetscInt)*(j+1));
1051: gs->tree_map_out = iptr_out = (int*) malloc(sizeof(PetscInt)*(j+1));
1053: /* search the longer of the two lists */
1054: /* note ... could save this info in get_ngh_buf and save searches */
1055: if (n<=nel)
1056: {
1057: /* bijective fct w/remap - search elm list */
1058: for (i=0; i<n; i++)
1059: {
1060: if ((j=ivec_binary_search(*tree_elms++,elms,nel))>=0)
1061: {*iptr_in++ = j; *iptr_out++ = i;}
1062: }
1063: }
1064: else
1065: {
1066: for (i=0; i<nel; i++)
1067: {
1068: if ((j=ivec_binary_search(*elms++,tree_elms,n))>=0)
1069: {*iptr_in++ = i; *iptr_out++ = j;}
1070: }
1071: }
1073: /* sentinel */
1074: *iptr_in = *iptr_out = -1;
1076: }
1079: /******************************************************************************
1080: Function: gather_scatter
1082: Input :
1083: Output:
1084: Return:
1085: Description:
1086: ******************************************************************************/
1087: static
1088: void
1089: gs_gop_local_out( gs_id *gs, PetscScalar *vals)
1090: {
1091: int *num, *map, **reduce;
1092: PetscScalar tmp;
1095: num = gs->num_gop_local_reduce;
1096: reduce = gs->gop_local_reduce;
1097: while ((map = *reduce++))
1098: {
1099: /* wall */
1100: if (*num == 2)
1101: {
1102: num ++;
1103: vals[map[1]] = vals[map[0]];
1104: }
1105: /* corner shared by three elements */
1106: else if (*num == 3)
1107: {
1108: num ++;
1109: vals[map[2]] = vals[map[1]] = vals[map[0]];
1110: }
1111: /* corner shared by four elements */
1112: else if (*num == 4)
1113: {
1114: num ++;
1115: vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
1116: }
1117: /* general case ... odd geoms ... 3D*/
1118: else
1119: {
1120: num++;
1121: tmp = *(vals + *map++);
1122: while (*map >= 0)
1123: {*(vals + *map++) = tmp;}
1124: }
1125: }
1126: }
1130: /******************************************************************************
1131: Function: gather_scatter
1133: Input :
1134: Output:
1135: Return:
1136: Description:
1137: ******************************************************************************/
1138: void
1139: gs_gop_binary(gs_ADT gs, PetscScalar *vals, rbfp fct)
1140: {
1141: /* local only operations!!! */
1142: if (gs->num_local)
1143: {gs_gop_local_binary(gs,vals,fct);}
1144:
1145: /* if intersection tree/pairwise and local isn't empty */
1146: if (gs->num_local_gop)
1147: {
1148: gs_gop_local_in_binary(gs,vals,fct);
1149:
1150: /* pairwise */
1151: if (gs->num_pairs)
1152: {gs_gop_pairwise_binary(gs,vals,fct);}
1153:
1154: /* tree */
1155: else if (gs->max_left_over)
1156: {gs_gop_tree_binary(gs,vals,fct);}
1157:
1158: gs_gop_local_out(gs,vals);
1159: }
1160: /* if intersection tree/pairwise and local is empty */
1161: else
1162: {
1163: /* pairwise */
1164: if (gs->num_pairs)
1165: {gs_gop_pairwise_binary(gs,vals,fct);}
1166:
1167: /* tree */
1168: else if (gs->max_left_over)
1169: {gs_gop_tree_binary(gs,vals,fct);}
1170: }
1171: }
1175: /******************************************************************************
1176: Function: gather_scatter
1178: Input :
1179: Output:
1180: Return:
1181: Description:
1182: ******************************************************************************/
1183: static
1184: void
1185: gs_gop_local_binary( gs_id *gs, PetscScalar *vals, rbfp fct)
1186: {
1187: int *num, *map, **reduce;
1188: PetscScalar tmp;
1191: num = gs->num_local_reduce;
1192: reduce = gs->local_reduce;
1193: while ((map = *reduce))
1194: {
1195: num ++;
1196: (*fct)(&tmp,NULL,1);
1197: /* tmp = 0.0; */
1198: while (*map >= 0)
1199: {(*fct)(&tmp,(vals + *map),1); map++;}
1200: /* {tmp = (*fct)(tmp,*(vals + *map)); map++;} */
1201:
1202: map = *reduce++;
1203: while (*map >= 0)
1204: {*(vals + *map++) = tmp;}
1205: }
1206: }
1210: /******************************************************************************
1211: Function: gather_scatter
1213: Input :
1214: Output:
1215: Return:
1216: Description:
1217: ******************************************************************************/
1218: static
1219: void
1220: gs_gop_local_in_binary( gs_id *gs, PetscScalar *vals, rbfp fct)
1221: {
1222: int *num, *map, **reduce;
1223: PetscScalar *base;
1226: num = gs->num_gop_local_reduce;
1228: reduce = gs->gop_local_reduce;
1229: while ((map = *reduce++))
1230: {
1231: num++;
1232: base = vals + *map++;
1233: while (*map >= 0)
1234: {(*fct)(base,(vals + *map),1); map++;}
1235: }
1236: }
1240: /******************************************************************************
1241: Function: gather_scatter
1243: VERSION 3 ::
1245: Input :
1246: Output:
1247: Return:
1248: Description:
1249: ******************************************************************************/
1250: static
1251: void
1252: gs_gop_pairwise_binary( gs_id *gs, PetscScalar *in_vals,
1253: rbfp fct)
1254: {
1255: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1256: int *iptr, *msg_list, *msg_size, **msg_nodes;
1257: int *pw, *list, *size, **nodes;
1258: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1259: MPI_Status status;
1260: int ierr;
1263: /* strip and load s */
1264: msg_list =list = gs->pair_list;
1265: msg_size =size = gs->msg_sizes;
1266: msg_nodes=nodes = gs->node_list;
1267: iptr=pw = gs->pw_elm_list;
1268: dptr1=dptr3 = gs->pw_vals;
1269: msg_ids_in = ids_in = gs->msg_ids_in;
1270: msg_ids_out = ids_out = gs->msg_ids_out;
1271: dptr2 = gs->out;
1272: in1=in2 = gs->in;
1274: /* post the receives */
1275: /* msg_nodes=nodes; */
1276: do
1277: {
1278: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1279: second one *list and do list++ afterwards */
1280: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1281: gs->gs_comm, msg_ids_in++);
1282: in1 += *size++;
1283: }
1284: while (*++msg_nodes);
1285: msg_nodes=nodes;
1287: /* load gs values into in out gs buffers */
1288: while (*iptr >= 0)
1289: {*dptr3++ = *(in_vals + *iptr++);}
1291: /* load out buffers and post the sends */
1292: while ((iptr = *msg_nodes++))
1293: {
1294: dptr3 = dptr2;
1295: while (*iptr >= 0)
1296: {*dptr2++ = *(dptr1 + *iptr++);}
1297: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1298: /* is msg_ids_out++ correct? */
1299: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1300: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1301: }
1303: if (gs->max_left_over)
1304: {gs_gop_tree_binary(gs,in_vals,fct);}
1306: /* process the received data */
1307: msg_nodes=nodes;
1308: while ((iptr = *nodes++))
1309: {
1310: /* Should I check the return value of MPI_Wait() or status? */
1311: /* Can this loop be replaced by a call to MPI_Waitall()? */
1312: MPI_Wait(ids_in++, &status);
1313: while (*iptr >= 0)
1314: {(*fct)((dptr1 + *iptr),in2,1); iptr++; in2++;}
1315: /* {*(dptr1 + *iptr) = (*fct)(*(dptr1 + *iptr),*in2); iptr++; in2++;} */
1316: }
1318: /* replace vals */
1319: while (*pw >= 0)
1320: {*(in_vals + *pw++) = *dptr1++;}
1322: /* clear isend message handles */
1323: /* This changed for clarity though it could be the same */
1324: while (*msg_nodes++)
1325: /* Should I check the return value of MPI_Wait() or status? */
1326: /* Can this loop be replaced by a call to MPI_Waitall()? */
1327: {MPI_Wait(ids_out++, &status);}
1328: }
1332: /******************************************************************************
1333: Function: gather_scatter
1335: Input :
1336: Output:
1337: Return:
1338: Description:
1339: ******************************************************************************/
1340: static
1341: void
1342: gs_gop_tree_binary(gs_id *gs, PetscScalar *vals, rbfp fct)
1343: {
1344: int size;
1345: int *in, *out;
1346: PetscScalar *buf, *work;
1348: in = gs->tree_map_in;
1349: out = gs->tree_map_out;
1350: buf = gs->tree_buf;
1351: work = gs->tree_work;
1352: size = gs->tree_nel;
1354: /* load vals vector w/identity */
1355: (*fct)(buf,NULL,size);
1356:
1357: /* load my contribution into val vector */
1358: while (*in >= 0)
1359: {(*fct)((buf + *out++),(vals + *in++),-1);}
1361: gfop(buf,work,size,(vbfp)fct,MPIU_SCALAR,0);
1363: in = gs->tree_map_in;
1364: out = gs->tree_map_out;
1365: while (*in >= 0)
1366: {(*fct)((vals + *in++),(buf + *out++),-1);}
1368: }
1373: /******************************************************************************
1374: Function: gather_scatter
1376: Input :
1377: Output:
1378: Return:
1379: Description:
1380: ******************************************************************************/
1381: void
1382: gs_gop( gs_id *gs, PetscScalar *vals, const char *op)
1383: {
1384: switch (*op) {
1385: case '+':
1386: gs_gop_plus(gs,vals);
1387: break;
1388: case '*':
1389: gs_gop_times(gs,vals);
1390: break;
1391: case 'a':
1392: gs_gop_min_abs(gs,vals);
1393: break;
1394: case 'A':
1395: gs_gop_max_abs(gs,vals);
1396: break;
1397: case 'e':
1398: gs_gop_exists(gs,vals);
1399: break;
1400: case 'm':
1401: gs_gop_min(gs,vals);
1402: break;
1403: case 'M':
1404: gs_gop_max(gs,vals); break;
1405: /*
1406: if (*(op+1)=='\0')
1407: {gs_gop_max(gs,vals); break;}
1408: else if (*(op+1)=='X')
1409: {gs_gop_max_abs(gs,vals); break;}
1410: else if (*(op+1)=='N')
1411: {gs_gop_min_abs(gs,vals); break;}
1412: */
1413: default:
1414: error_msg_warning("gs_gop() :: %c is not a valid op",op[0]);
1415: error_msg_warning("gs_gop() :: default :: plus");
1416: gs_gop_plus(gs,vals);
1417: break;
1418: }
1419: }
1422: /******************************************************************************
1423: Function: gather_scatter
1425: Input :
1426: Output:
1427: Return:
1428: Description:
1429: ******************************************************************************/
1430: static void
1431: gs_gop_exists( gs_id *gs, PetscScalar *vals)
1432: {
1433: /* local only operations!!! */
1434: if (gs->num_local)
1435: {gs_gop_local_exists(gs,vals);}
1437: /* if intersection tree/pairwise and local isn't empty */
1438: if (gs->num_local_gop)
1439: {
1440: gs_gop_local_in_exists(gs,vals);
1442: /* pairwise */
1443: if (gs->num_pairs)
1444: {gs_gop_pairwise_exists(gs,vals);}
1445:
1446: /* tree */
1447: else if (gs->max_left_over)
1448: {gs_gop_tree_exists(gs,vals);}
1449:
1450: gs_gop_local_out(gs,vals);
1451: }
1452: /* if intersection tree/pairwise and local is empty */
1453: else
1454: {
1455: /* pairwise */
1456: if (gs->num_pairs)
1457: {gs_gop_pairwise_exists(gs,vals);}
1458:
1459: /* tree */
1460: else if (gs->max_left_over)
1461: {gs_gop_tree_exists(gs,vals);}
1462: }
1463: }
1467: /******************************************************************************
1468: Function: gather_scatter
1470: Input :
1471: Output:
1472: Return:
1473: Description:
1474: ******************************************************************************/
1475: static
1476: void
1477: gs_gop_local_exists( gs_id *gs, PetscScalar *vals)
1478: {
1479: int *num, *map, **reduce;
1480: PetscScalar tmp;
1483: num = gs->num_local_reduce;
1484: reduce = gs->local_reduce;
1485: while ((map = *reduce))
1486: {
1487: num ++;
1488: tmp = 0.0;
1489: while (*map >= 0)
1490: {tmp = EXISTS(tmp,*(vals + *map)); map++;}
1491:
1492: map = *reduce++;
1493: while (*map >= 0)
1494: {*(vals + *map++) = tmp;}
1495: }
1496: }
1500: /******************************************************************************
1501: Function: gather_scatter
1503: Input :
1504: Output:
1505: Return:
1506: Description:
1507: ******************************************************************************/
1508: static
1509: void
1510: gs_gop_local_in_exists( gs_id *gs, PetscScalar *vals)
1511: {
1512: int *num, *map, **reduce;
1513: PetscScalar *base;
1516: num = gs->num_gop_local_reduce;
1517: reduce = gs->gop_local_reduce;
1518: while ((map = *reduce++))
1519: {
1520: num++;
1521: base = vals + *map++;
1522: while (*map >= 0)
1523: {*base = EXISTS(*base,*(vals + *map)); map++;}
1524: }
1525: }
1529: /******************************************************************************
1530: Function: gather_scatter
1532: VERSION 3 ::
1534: Input :
1535: Output:
1536: Return:
1537: Description:
1538: ******************************************************************************/
1539: static
1540: void
1541: gs_gop_pairwise_exists( gs_id *gs, PetscScalar *in_vals)
1542: {
1543: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1544: int *iptr, *msg_list, *msg_size, **msg_nodes;
1545: int *pw, *list, *size, **nodes;
1546: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1547: MPI_Status status;
1548: int ierr;
1551: /* strip and load s */
1552: msg_list =list = gs->pair_list;
1553: msg_size =size = gs->msg_sizes;
1554: msg_nodes=nodes = gs->node_list;
1555: iptr=pw = gs->pw_elm_list;
1556: dptr1=dptr3 = gs->pw_vals;
1557: msg_ids_in = ids_in = gs->msg_ids_in;
1558: msg_ids_out = ids_out = gs->msg_ids_out;
1559: dptr2 = gs->out;
1560: in1=in2 = gs->in;
1562: /* post the receives */
1563: /* msg_nodes=nodes; */
1564: do
1565: {
1566: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1567: second one *list and do list++ afterwards */
1568: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1569: gs->gs_comm, msg_ids_in++);
1570: in1 += *size++;
1571: }
1572: while (*++msg_nodes);
1573: msg_nodes=nodes;
1575: /* load gs values into in out gs buffers */
1576: while (*iptr >= 0)
1577: {*dptr3++ = *(in_vals + *iptr++);}
1579: /* load out buffers and post the sends */
1580: while ((iptr = *msg_nodes++))
1581: {
1582: dptr3 = dptr2;
1583: while (*iptr >= 0)
1584: {*dptr2++ = *(dptr1 + *iptr++);}
1585: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1586: /* is msg_ids_out++ correct? */
1587: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1588: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1589: }
1591: if (gs->max_left_over)
1592: {gs_gop_tree_exists(gs,in_vals);}
1594: /* process the received data */
1595: msg_nodes=nodes;
1596: while ((iptr = *nodes++))
1597: {
1598: /* Should I check the return value of MPI_Wait() or status? */
1599: /* Can this loop be replaced by a call to MPI_Waitall()? */
1600: MPI_Wait(ids_in++, &status);
1601: while (*iptr >= 0)
1602: {*(dptr1 + *iptr) = EXISTS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1603: }
1605: /* replace vals */
1606: while (*pw >= 0)
1607: {*(in_vals + *pw++) = *dptr1++;}
1609: /* clear isend message handles */
1610: /* This changed for clarity though it could be the same */
1611: while (*msg_nodes++)
1612: /* Should I check the return value of MPI_Wait() or status? */
1613: /* Can this loop be replaced by a call to MPI_Waitall()? */
1614: {MPI_Wait(ids_out++, &status);}
1615: }
1619: /******************************************************************************
1620: Function: gather_scatter
1622: Input :
1623: Output:
1624: Return:
1625: Description:
1626: ******************************************************************************/
1627: static
1628: void
1629: gs_gop_tree_exists(gs_id *gs, PetscScalar *vals)
1630: {
1631: int size;
1632: int *in, *out;
1633: PetscScalar *buf, *work;
1634: int op[] = {GL_EXISTS,0};
1637: in = gs->tree_map_in;
1638: out = gs->tree_map_out;
1639: buf = gs->tree_buf;
1640: work = gs->tree_work;
1641: size = gs->tree_nel;
1643: rvec_zero(buf,size);
1645: while (*in >= 0)
1646: {
1647: /*
1648: printf("%d :: out=%d\n",my_id,*out);
1649: printf("%d :: in=%d\n",my_id,*in);
1650: */
1651: *(buf + *out++) = *(vals + *in++);
1652: }
1654: grop(buf,work,size,op);
1656: in = gs->tree_map_in;
1657: out = gs->tree_map_out;
1659: while (*in >= 0)
1660: {*(vals + *in++) = *(buf + *out++);}
1662: }
1666: /******************************************************************************
1667: Function: gather_scatter
1669: Input :
1670: Output:
1671: Return:
1672: Description:
1673: ******************************************************************************/
1674: static void
1675: gs_gop_max_abs( gs_id *gs, PetscScalar *vals)
1676: {
1677: /* local only operations!!! */
1678: if (gs->num_local)
1679: {gs_gop_local_max_abs(gs,vals);}
1681: /* if intersection tree/pairwise and local isn't empty */
1682: if (gs->num_local_gop)
1683: {
1684: gs_gop_local_in_max_abs(gs,vals);
1686: /* pairwise */
1687: if (gs->num_pairs)
1688: {gs_gop_pairwise_max_abs(gs,vals);}
1689:
1690: /* tree */
1691: else if (gs->max_left_over)
1692: {gs_gop_tree_max_abs(gs,vals);}
1693:
1694: gs_gop_local_out(gs,vals);
1695: }
1696: /* if intersection tree/pairwise and local is empty */
1697: else
1698: {
1699: /* pairwise */
1700: if (gs->num_pairs)
1701: {gs_gop_pairwise_max_abs(gs,vals);}
1702:
1703: /* tree */
1704: else if (gs->max_left_over)
1705: {gs_gop_tree_max_abs(gs,vals);}
1706: }
1707: }
1711: /******************************************************************************
1712: Function: gather_scatter
1714: Input :
1715: Output:
1716: Return:
1717: Description:
1718: ******************************************************************************/
1719: static
1720: void
1721: gs_gop_local_max_abs( gs_id *gs, PetscScalar *vals)
1722: {
1723: int *num, *map, **reduce;
1724: PetscScalar tmp;
1727: num = gs->num_local_reduce;
1728: reduce = gs->local_reduce;
1729: while ((map = *reduce))
1730: {
1731: num ++;
1732: tmp = 0.0;
1733: while (*map >= 0)
1734: {tmp = MAX_FABS(tmp,*(vals + *map)); map++;}
1735:
1736: map = *reduce++;
1737: while (*map >= 0)
1738: {*(vals + *map++) = tmp;}
1739: }
1740: }
1744: /******************************************************************************
1745: Function: gather_scatter
1747: Input :
1748: Output:
1749: Return:
1750: Description:
1751: ******************************************************************************/
1752: static
1753: void
1754: gs_gop_local_in_max_abs( gs_id *gs, PetscScalar *vals)
1755: {
1756: int *num, *map, **reduce;
1757: PetscScalar *base;
1760: num = gs->num_gop_local_reduce;
1761: reduce = gs->gop_local_reduce;
1762: while ((map = *reduce++))
1763: {
1764: num++;
1765: base = vals + *map++;
1766: while (*map >= 0)
1767: {*base = MAX_FABS(*base,*(vals + *map)); map++;}
1768: }
1769: }
1773: /******************************************************************************
1774: Function: gather_scatter
1776: VERSION 3 ::
1778: Input :
1779: Output:
1780: Return:
1781: Description:
1782: ******************************************************************************/
1783: static
1784: void
1785: gs_gop_pairwise_max_abs( gs_id *gs, PetscScalar *in_vals)
1786: {
1787: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1788: int *iptr, *msg_list, *msg_size, **msg_nodes;
1789: int *pw, *list, *size, **nodes;
1790: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1791: MPI_Status status;
1792: int ierr;
1794: /* strip and load s */
1795: msg_list =list = gs->pair_list;
1796: msg_size =size = gs->msg_sizes;
1797: msg_nodes=nodes = gs->node_list;
1798: iptr=pw = gs->pw_elm_list;
1799: dptr1=dptr3 = gs->pw_vals;
1800: msg_ids_in = ids_in = gs->msg_ids_in;
1801: msg_ids_out = ids_out = gs->msg_ids_out;
1802: dptr2 = gs->out;
1803: in1=in2 = gs->in;
1805: /* post the receives */
1806: /* msg_nodes=nodes; */
1807: do
1808: {
1809: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1810: second one *list and do list++ afterwards */
1811: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
1812: gs->gs_comm, msg_ids_in++);
1813: in1 += *size++;
1814: }
1815: while (*++msg_nodes);
1816: msg_nodes=nodes;
1818: /* load gs values into in out gs buffers */
1819: while (*iptr >= 0)
1820: {*dptr3++ = *(in_vals + *iptr++);}
1822: /* load out buffers and post the sends */
1823: while ((iptr = *msg_nodes++))
1824: {
1825: dptr3 = dptr2;
1826: while (*iptr >= 0)
1827: {*dptr2++ = *(dptr1 + *iptr++);}
1828: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1829: /* is msg_ids_out++ correct? */
1830: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
1831: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
1832: }
1834: if (gs->max_left_over)
1835: {gs_gop_tree_max_abs(gs,in_vals);}
1837: /* process the received data */
1838: msg_nodes=nodes;
1839: while ((iptr = *nodes++))
1840: {
1841: /* Should I check the return value of MPI_Wait() or status? */
1842: /* Can this loop be replaced by a call to MPI_Waitall()? */
1843: MPI_Wait(ids_in++, &status);
1844: while (*iptr >= 0)
1845: {*(dptr1 + *iptr) = MAX_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
1846: }
1848: /* replace vals */
1849: while (*pw >= 0)
1850: {*(in_vals + *pw++) = *dptr1++;}
1852: /* clear isend message handles */
1853: /* This changed for clarity though it could be the same */
1854: while (*msg_nodes++)
1855: /* Should I check the return value of MPI_Wait() or status? */
1856: /* Can this loop be replaced by a call to MPI_Waitall()? */
1857: {MPI_Wait(ids_out++, &status);}
1858: }
1862: /******************************************************************************
1863: Function: gather_scatter
1865: Input :
1866: Output:
1867: Return:
1868: Description:
1869: ******************************************************************************/
1870: static
1871: void
1872: gs_gop_tree_max_abs(gs_id *gs, PetscScalar *vals)
1873: {
1874: int size;
1875: int *in, *out;
1876: PetscScalar *buf, *work;
1877: int op[] = {GL_MAX_ABS,0};
1880: in = gs->tree_map_in;
1881: out = gs->tree_map_out;
1882: buf = gs->tree_buf;
1883: work = gs->tree_work;
1884: size = gs->tree_nel;
1886: rvec_zero(buf,size);
1888: while (*in >= 0)
1889: {
1890: /*
1891: printf("%d :: out=%d\n",my_id,*out);
1892: printf("%d :: in=%d\n",my_id,*in);
1893: */
1894: *(buf + *out++) = *(vals + *in++);
1895: }
1897: grop(buf,work,size,op);
1899: in = gs->tree_map_in;
1900: out = gs->tree_map_out;
1902: while (*in >= 0)
1903: {*(vals + *in++) = *(buf + *out++);}
1905: }
1909: /******************************************************************************
1910: Function: gather_scatter
1912: Input :
1913: Output:
1914: Return:
1915: Description:
1916: ******************************************************************************/
1917: static void
1918: gs_gop_max( gs_id *gs, PetscScalar *vals)
1919: {
1921: /* local only operations!!! */
1922: if (gs->num_local)
1923: {gs_gop_local_max(gs,vals);}
1925: /* if intersection tree/pairwise and local isn't empty */
1926: if (gs->num_local_gop)
1927: {
1928: gs_gop_local_in_max(gs,vals);
1930: /* pairwise */
1931: if (gs->num_pairs)
1932: {gs_gop_pairwise_max(gs,vals);}
1933:
1934: /* tree */
1935: else if (gs->max_left_over)
1936: {gs_gop_tree_max(gs,vals);}
1937:
1938: gs_gop_local_out(gs,vals);
1939: }
1940: /* if intersection tree/pairwise and local is empty */
1941: else
1942: {
1943: /* pairwise */
1944: if (gs->num_pairs)
1945: {gs_gop_pairwise_max(gs,vals);}
1946:
1947: /* tree */
1948: else if (gs->max_left_over)
1949: {gs_gop_tree_max(gs,vals);}
1950: }
1951: }
1955: /******************************************************************************
1956: Function: gather_scatter
1958: Input :
1959: Output:
1960: Return:
1961: Description:
1962: ******************************************************************************/
1963: static
1964: void
1965: gs_gop_local_max( gs_id *gs, PetscScalar *vals)
1966: {
1967: int *num, *map, **reduce;
1968: PetscScalar tmp;
1971: num = gs->num_local_reduce;
1972: reduce = gs->local_reduce;
1973: while ((map = *reduce))
1974: {
1975: num ++;
1976: tmp = -REAL_MAX;
1977: while (*map >= 0)
1978: {tmp = PetscMax(tmp,*(vals + *map)); map++;}
1979:
1980: map = *reduce++;
1981: while (*map >= 0)
1982: {*(vals + *map++) = tmp;}
1983: }
1984: }
1988: /******************************************************************************
1989: Function: gather_scatter
1991: Input :
1992: Output:
1993: Return:
1994: Description:
1995: ******************************************************************************/
1996: static
1997: void
1998: gs_gop_local_in_max( gs_id *gs, PetscScalar *vals)
1999: {
2000: int *num, *map, **reduce;
2001: PetscScalar *base;
2004: num = gs->num_gop_local_reduce;
2005: reduce = gs->gop_local_reduce;
2006: while ((map = *reduce++))
2007: {
2008: num++;
2009: base = vals + *map++;
2010: while (*map >= 0)
2011: {*base = PetscMax(*base,*(vals + *map)); map++;}
2012: }
2013: }
2017: /******************************************************************************
2018: Function: gather_scatter
2020: VERSION 3 ::
2022: Input :
2023: Output:
2024: Return:
2025: Description:
2026: ******************************************************************************/
2027: static
2028: void
2029: gs_gop_pairwise_max( gs_id *gs, PetscScalar *in_vals)
2030: {
2031: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2032: int *iptr, *msg_list, *msg_size, **msg_nodes;
2033: int *pw, *list, *size, **nodes;
2034: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2035: MPI_Status status;
2036: int ierr;
2038: /* strip and load s */
2039: msg_list =list = gs->pair_list;
2040: msg_size =size = gs->msg_sizes;
2041: msg_nodes=nodes = gs->node_list;
2042: iptr=pw = gs->pw_elm_list;
2043: dptr1=dptr3 = gs->pw_vals;
2044: msg_ids_in = ids_in = gs->msg_ids_in;
2045: msg_ids_out = ids_out = gs->msg_ids_out;
2046: dptr2 = gs->out;
2047: in1=in2 = gs->in;
2049: /* post the receives */
2050: /* msg_nodes=nodes; */
2051: do
2052: {
2053: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2054: second one *list and do list++ afterwards */
2055: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2056: gs->gs_comm, msg_ids_in++);
2057: in1 += *size++;
2058: }
2059: while (*++msg_nodes);
2060: msg_nodes=nodes;
2062: /* load gs values into in out gs buffers */
2063: while (*iptr >= 0)
2064: {*dptr3++ = *(in_vals + *iptr++);}
2066: /* load out buffers and post the sends */
2067: while ((iptr = *msg_nodes++))
2068: {
2069: dptr3 = dptr2;
2070: while (*iptr >= 0)
2071: {*dptr2++ = *(dptr1 + *iptr++);}
2072: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2073: /* is msg_ids_out++ correct? */
2074: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2075: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2076: }
2078: if (gs->max_left_over)
2079: {gs_gop_tree_max(gs,in_vals);}
2081: /* process the received data */
2082: msg_nodes=nodes;
2083: while ((iptr = *nodes++))
2084: {
2085: /* Should I check the return value of MPI_Wait() or status? */
2086: /* Can this loop be replaced by a call to MPI_Waitall()? */
2087: MPI_Wait(ids_in++, &status);
2088: while (*iptr >= 0)
2089: {*(dptr1 + *iptr) = PetscMax(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2090: }
2092: /* replace vals */
2093: while (*pw >= 0)
2094: {*(in_vals + *pw++) = *dptr1++;}
2096: /* clear isend message handles */
2097: /* This changed for clarity though it could be the same */
2098: while (*msg_nodes++)
2099: /* Should I check the return value of MPI_Wait() or status? */
2100: /* Can this loop be replaced by a call to MPI_Waitall()? */
2101: {MPI_Wait(ids_out++, &status);}
2102: }
2106: /******************************************************************************
2107: Function: gather_scatter
2109: Input :
2110: Output:
2111: Return:
2112: Description:
2113: ******************************************************************************/
2114: static
2115: void
2116: gs_gop_tree_max(gs_id *gs, PetscScalar *vals)
2117: {
2118: int size;
2119: int *in, *out;
2120: PetscScalar *buf, *work;
2121: int ierr;
2122:
2123: in = gs->tree_map_in;
2124: out = gs->tree_map_out;
2125: buf = gs->tree_buf;
2126: work = gs->tree_work;
2127: size = gs->tree_nel;
2129: rvec_set(buf,-REAL_MAX,size);
2131: while (*in >= 0)
2132: {*(buf + *out++) = *(vals + *in++);}
2134: in = gs->tree_map_in;
2135: out = gs->tree_map_out;
2136: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MAX,gs->gs_comm);
2137: while (*in >= 0)
2138: {*(vals + *in++) = *(work + *out++);}
2140: }
2144: /******************************************************************************
2145: Function: gather_scatter
2147: Input :
2148: Output:
2149: Return:
2150: Description:
2151: ******************************************************************************/
2152: static void
2153: gs_gop_min_abs( gs_id *gs, PetscScalar *vals)
2154: {
2156: /* local only operations!!! */
2157: if (gs->num_local)
2158: {gs_gop_local_min_abs(gs,vals);}
2160: /* if intersection tree/pairwise and local isn't empty */
2161: if (gs->num_local_gop)
2162: {
2163: gs_gop_local_in_min_abs(gs,vals);
2165: /* pairwise */
2166: if (gs->num_pairs)
2167: {gs_gop_pairwise_min_abs(gs,vals);}
2168:
2169: /* tree */
2170: else if (gs->max_left_over)
2171: {gs_gop_tree_min_abs(gs,vals);}
2172:
2173: gs_gop_local_out(gs,vals);
2174: }
2175: /* if intersection tree/pairwise and local is empty */
2176: else
2177: {
2178: /* pairwise */
2179: if (gs->num_pairs)
2180: {gs_gop_pairwise_min_abs(gs,vals);}
2181:
2182: /* tree */
2183: else if (gs->max_left_over)
2184: {gs_gop_tree_min_abs(gs,vals);}
2185: }
2186: }
2190: /******************************************************************************
2191: Function: gather_scatter
2193: Input :
2194: Output:
2195: Return:
2196: Description:
2197: ******************************************************************************/
2198: static
2199: void
2200: gs_gop_local_min_abs( gs_id *gs, PetscScalar *vals)
2201: {
2202: int *num, *map, **reduce;
2203: PetscScalar tmp;
2206: num = gs->num_local_reduce;
2207: reduce = gs->local_reduce;
2208: while ((map = *reduce))
2209: {
2210: num ++;
2211: tmp = REAL_MAX;
2212: while (*map >= 0)
2213: {tmp = MIN_FABS(tmp,*(vals + *map)); map++;}
2214:
2215: map = *reduce++;
2216: while (*map >= 0)
2217: {*(vals + *map++) = tmp;}
2218: }
2219: }
2223: /******************************************************************************
2224: Function: gather_scatter
2226: Input :
2227: Output:
2228: Return:
2229: Description:
2230: ******************************************************************************/
2231: static
2232: void
2233: gs_gop_local_in_min_abs( gs_id *gs, PetscScalar *vals)
2234: {
2235: int *num, *map, **reduce;
2236: PetscScalar *base;
2238: num = gs->num_gop_local_reduce;
2239: reduce = gs->gop_local_reduce;
2240: while ((map = *reduce++))
2241: {
2242: num++;
2243: base = vals + *map++;
2244: while (*map >= 0)
2245: {*base = MIN_FABS(*base,*(vals + *map)); map++;}
2246: }
2247: }
2251: /******************************************************************************
2252: Function: gather_scatter
2254: VERSION 3 ::
2256: Input :
2257: Output:
2258: Return:
2259: Description:
2260: ******************************************************************************/
2261: static
2262: void
2263: gs_gop_pairwise_min_abs( gs_id *gs, PetscScalar *in_vals)
2264: {
2265: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2266: int *iptr, *msg_list, *msg_size, **msg_nodes;
2267: int *pw, *list, *size, **nodes;
2268: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2269: MPI_Status status;
2270: int ierr;
2272: /* strip and load s */
2273: msg_list =list = gs->pair_list;
2274: msg_size =size = gs->msg_sizes;
2275: msg_nodes=nodes = gs->node_list;
2276: iptr=pw = gs->pw_elm_list;
2277: dptr1=dptr3 = gs->pw_vals;
2278: msg_ids_in = ids_in = gs->msg_ids_in;
2279: msg_ids_out = ids_out = gs->msg_ids_out;
2280: dptr2 = gs->out;
2281: in1=in2 = gs->in;
2283: /* post the receives */
2284: /* msg_nodes=nodes; */
2285: do
2286: {
2287: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2288: second one *list and do list++ afterwards */
2289: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2290: gs->gs_comm, msg_ids_in++);
2291: in1 += *size++;
2292: }
2293: while (*++msg_nodes);
2294: msg_nodes=nodes;
2296: /* load gs values into in out gs buffers */
2297: while (*iptr >= 0)
2298: {*dptr3++ = *(in_vals + *iptr++);}
2300: /* load out buffers and post the sends */
2301: while ((iptr = *msg_nodes++))
2302: {
2303: dptr3 = dptr2;
2304: while (*iptr >= 0)
2305: {*dptr2++ = *(dptr1 + *iptr++);}
2306: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2307: /* is msg_ids_out++ correct? */
2308: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2309: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2310: }
2312: if (gs->max_left_over)
2313: {gs_gop_tree_min_abs(gs,in_vals);}
2315: /* process the received data */
2316: msg_nodes=nodes;
2317: while ((iptr = *nodes++))
2318: {
2319: /* Should I check the return value of MPI_Wait() or status? */
2320: /* Can this loop be replaced by a call to MPI_Waitall()? */
2321: MPI_Wait(ids_in++, &status);
2322: while (*iptr >= 0)
2323: {*(dptr1 + *iptr) = MIN_FABS(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2324: }
2326: /* replace vals */
2327: while (*pw >= 0)
2328: {*(in_vals + *pw++) = *dptr1++;}
2330: /* clear isend message handles */
2331: /* This changed for clarity though it could be the same */
2332: while (*msg_nodes++)
2333: /* Should I check the return value of MPI_Wait() or status? */
2334: /* Can this loop be replaced by a call to MPI_Waitall()? */
2335: {MPI_Wait(ids_out++, &status);}
2336: }
2340: /******************************************************************************
2341: Function: gather_scatter
2343: Input :
2344: Output:
2345: Return:
2346: Description:
2347: ******************************************************************************/
2348: static
2349: void
2350: gs_gop_tree_min_abs(gs_id *gs, PetscScalar *vals)
2351: {
2352: int size;
2353: int *in, *out;
2354: PetscScalar *buf, *work;
2355: int op[] = {GL_MIN_ABS,0};
2358: in = gs->tree_map_in;
2359: out = gs->tree_map_out;
2360: buf = gs->tree_buf;
2361: work = gs->tree_work;
2362: size = gs->tree_nel;
2364: rvec_set(buf,REAL_MAX,size);
2366: while (*in >= 0)
2367: {*(buf + *out++) = *(vals + *in++);}
2369: in = gs->tree_map_in;
2370: out = gs->tree_map_out;
2371: grop(buf,work,size,op);
2372: while (*in >= 0)
2373: {*(vals + *in++) = *(buf + *out++);}
2375: }
2379: /******************************************************************************
2380: Function: gather_scatter
2382: Input :
2383: Output:
2384: Return:
2385: Description:
2386: ******************************************************************************/
2387: static void
2388: gs_gop_min( gs_id *gs, PetscScalar *vals)
2389: {
2391: /* local only operations!!! */
2392: if (gs->num_local)
2393: {gs_gop_local_min(gs,vals);}
2395: /* if intersection tree/pairwise and local isn't empty */
2396: if (gs->num_local_gop)
2397: {
2398: gs_gop_local_in_min(gs,vals);
2400: /* pairwise */
2401: if (gs->num_pairs)
2402: {gs_gop_pairwise_min(gs,vals);}
2403:
2404: /* tree */
2405: else if (gs->max_left_over)
2406: {gs_gop_tree_min(gs,vals);}
2407:
2408: gs_gop_local_out(gs,vals);
2409: }
2410: /* if intersection tree/pairwise and local is empty */
2411: else
2412: {
2413: /* pairwise */
2414: if (gs->num_pairs)
2415: {gs_gop_pairwise_min(gs,vals);}
2416:
2417: /* tree */
2418: else if (gs->max_left_over)
2419: {gs_gop_tree_min(gs,vals);}
2420: }
2421: }
2425: /******************************************************************************
2426: Function: gather_scatter
2428: Input :
2429: Output:
2430: Return:
2431: Description:
2432: ******************************************************************************/
2433: static
2434: void
2435: gs_gop_local_min( gs_id *gs, PetscScalar *vals)
2436: {
2437: int *num, *map, **reduce;
2438: PetscScalar tmp;
2440: num = gs->num_local_reduce;
2441: reduce = gs->local_reduce;
2442: while ((map = *reduce))
2443: {
2444: num ++;
2445: tmp = REAL_MAX;
2446: while (*map >= 0)
2447: {tmp = PetscMin(tmp,*(vals + *map)); map++;}
2448:
2449: map = *reduce++;
2450: while (*map >= 0)
2451: {*(vals + *map++) = tmp;}
2452: }
2453: }
2457: /******************************************************************************
2458: Function: gather_scatter
2460: Input :
2461: Output:
2462: Return:
2463: Description:
2464: ******************************************************************************/
2465: static
2466: void
2467: gs_gop_local_in_min( gs_id *gs, PetscScalar *vals)
2468: {
2469: int *num, *map, **reduce;
2470: PetscScalar *base;
2472: num = gs->num_gop_local_reduce;
2473: reduce = gs->gop_local_reduce;
2474: while ((map = *reduce++))
2475: {
2476: num++;
2477: base = vals + *map++;
2478: while (*map >= 0)
2479: {*base = PetscMin(*base,*(vals + *map)); map++;}
2480: }
2481: }
2485: /******************************************************************************
2486: Function: gather_scatter
2488: VERSION 3 ::
2490: Input :
2491: Output:
2492: Return:
2493: Description:
2494: ******************************************************************************/
2495: static
2496: void
2497: gs_gop_pairwise_min( gs_id *gs, PetscScalar *in_vals)
2498: {
2499: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2500: int *iptr, *msg_list, *msg_size, **msg_nodes;
2501: int *pw, *list, *size, **nodes;
2502: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2503: MPI_Status status;
2504: int ierr;
2506: /* strip and load s */
2507: msg_list =list = gs->pair_list;
2508: msg_size =size = gs->msg_sizes;
2509: msg_nodes=nodes = gs->node_list;
2510: iptr=pw = gs->pw_elm_list;
2511: dptr1=dptr3 = gs->pw_vals;
2512: msg_ids_in = ids_in = gs->msg_ids_in;
2513: msg_ids_out = ids_out = gs->msg_ids_out;
2514: dptr2 = gs->out;
2515: in1=in2 = gs->in;
2517: /* post the receives */
2518: /* msg_nodes=nodes; */
2519: do
2520: {
2521: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2522: second one *list and do list++ afterwards */
2523: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2524: gs->gs_comm, msg_ids_in++);
2525: in1 += *size++;
2526: }
2527: while (*++msg_nodes);
2528: msg_nodes=nodes;
2530: /* load gs values into in out gs buffers */
2531: while (*iptr >= 0)
2532: {*dptr3++ = *(in_vals + *iptr++);}
2534: /* load out buffers and post the sends */
2535: while ((iptr = *msg_nodes++))
2536: {
2537: dptr3 = dptr2;
2538: while (*iptr >= 0)
2539: {*dptr2++ = *(dptr1 + *iptr++);}
2540: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2541: /* is msg_ids_out++ correct? */
2542: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2543: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2544: }
2546: /* process the received data */
2547: if (gs->max_left_over)
2548: {gs_gop_tree_min(gs,in_vals);}
2550: msg_nodes=nodes;
2551: while ((iptr = *nodes++))
2552: {
2553: /* Should I check the return value of MPI_Wait() or status? */
2554: /* Can this loop be replaced by a call to MPI_Waitall()? */
2555: MPI_Wait(ids_in++, &status);
2556: while (*iptr >= 0)
2557: {*(dptr1 + *iptr) = PetscMin(*(dptr1 + *iptr),*in2); iptr++; in2++;}
2558: }
2560: /* replace vals */
2561: while (*pw >= 0)
2562: {*(in_vals + *pw++) = *dptr1++;}
2564: /* clear isend message handles */
2565: /* This changed for clarity though it could be the same */
2566: while (*msg_nodes++)
2567: /* Should I check the return value of MPI_Wait() or status? */
2568: /* Can this loop be replaced by a call to MPI_Waitall()? */
2569: {MPI_Wait(ids_out++, &status);}
2570: }
2574: /******************************************************************************
2575: Function: gather_scatter
2577: Input :
2578: Output:
2579: Return:
2580: Description:
2581: ******************************************************************************/
2582: static
2583: void
2584: gs_gop_tree_min(gs_id *gs, PetscScalar *vals)
2585: {
2586: int size;
2587: int *in, *out;
2588: PetscScalar *buf, *work;
2589: int ierr;
2590:
2591: in = gs->tree_map_in;
2592: out = gs->tree_map_out;
2593: buf = gs->tree_buf;
2594: work = gs->tree_work;
2595: size = gs->tree_nel;
2597: rvec_set(buf,REAL_MAX,size);
2599: while (*in >= 0)
2600: {*(buf + *out++) = *(vals + *in++);}
2602: in = gs->tree_map_in;
2603: out = gs->tree_map_out;
2604: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_MIN,gs->gs_comm);
2605: while (*in >= 0)
2606: {*(vals + *in++) = *(work + *out++);}
2607: }
2611: /******************************************************************************
2612: Function: gather_scatter
2614: Input :
2615: Output:
2616: Return:
2617: Description:
2618: ******************************************************************************/
2619: static void
2620: gs_gop_times( gs_id *gs, PetscScalar *vals)
2621: {
2623: /* local only operations!!! */
2624: if (gs->num_local)
2625: {gs_gop_local_times(gs,vals);}
2627: /* if intersection tree/pairwise and local isn't empty */
2628: if (gs->num_local_gop)
2629: {
2630: gs_gop_local_in_times(gs,vals);
2632: /* pairwise */
2633: if (gs->num_pairs)
2634: {gs_gop_pairwise_times(gs,vals);}
2635:
2636: /* tree */
2637: else if (gs->max_left_over)
2638: {gs_gop_tree_times(gs,vals);}
2639:
2640: gs_gop_local_out(gs,vals);
2641: }
2642: /* if intersection tree/pairwise and local is empty */
2643: else
2644: {
2645: /* pairwise */
2646: if (gs->num_pairs)
2647: {gs_gop_pairwise_times(gs,vals);}
2648:
2649: /* tree */
2650: else if (gs->max_left_over)
2651: {gs_gop_tree_times(gs,vals);}
2652: }
2653: }
2657: /******************************************************************************
2658: Function: gather_scatter
2660: Input :
2661: Output:
2662: Return:
2663: Description:
2664: ******************************************************************************/
2665: static
2666: void
2667: gs_gop_local_times( gs_id *gs, PetscScalar *vals)
2668: {
2669: int *num, *map, **reduce;
2670: PetscScalar tmp;
2672: num = gs->num_local_reduce;
2673: reduce = gs->local_reduce;
2674: while ((map = *reduce))
2675: {
2676: /* wall */
2677: if (*num == 2)
2678: {
2679: num ++; reduce++;
2680: vals[map[1]] = vals[map[0]] *= vals[map[1]];
2681: }
2682: /* corner shared by three elements */
2683: else if (*num == 3)
2684: {
2685: num ++; reduce++;
2686: vals[map[2]]=vals[map[1]]=vals[map[0]]*=(vals[map[1]]*vals[map[2]]);
2687: }
2688: /* corner shared by four elements */
2689: else if (*num == 4)
2690: {
2691: num ++; reduce++;
2692: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] *=
2693: (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2694: }
2695: /* general case ... odd geoms ... 3D*/
2696: else
2697: {
2698: num ++;
2699: tmp = 1.0;
2700: while (*map >= 0)
2701: {tmp *= *(vals + *map++);}
2703: map = *reduce++;
2704: while (*map >= 0)
2705: {*(vals + *map++) = tmp;}
2706: }
2707: }
2708: }
2712: /******************************************************************************
2713: Function: gather_scatter
2715: Input :
2716: Output:
2717: Return:
2718: Description:
2719: ******************************************************************************/
2720: static
2721: void
2722: gs_gop_local_in_times( gs_id *gs, PetscScalar *vals)
2723: {
2724: int *num, *map, **reduce;
2725: PetscScalar *base;
2727: num = gs->num_gop_local_reduce;
2728: reduce = gs->gop_local_reduce;
2729: while ((map = *reduce++))
2730: {
2731: /* wall */
2732: if (*num == 2)
2733: {
2734: num ++;
2735: vals[map[0]] *= vals[map[1]];
2736: }
2737: /* corner shared by three elements */
2738: else if (*num == 3)
2739: {
2740: num ++;
2741: vals[map[0]] *= (vals[map[1]] * vals[map[2]]);
2742: }
2743: /* corner shared by four elements */
2744: else if (*num == 4)
2745: {
2746: num ++;
2747: vals[map[0]] *= (vals[map[1]] * vals[map[2]] * vals[map[3]]);
2748: }
2749: /* general case ... odd geoms ... 3D*/
2750: else
2751: {
2752: num++;
2753: base = vals + *map++;
2754: while (*map >= 0)
2755: {*base *= *(vals + *map++);}
2756: }
2757: }
2758: }
2762: /******************************************************************************
2763: Function: gather_scatter
2765: VERSION 3 ::
2767: Input :
2768: Output:
2769: Return:
2770: Description:
2771: ******************************************************************************/
2772: static
2773: void
2774: gs_gop_pairwise_times( gs_id *gs, PetscScalar *in_vals)
2775: {
2776: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
2777: int *iptr, *msg_list, *msg_size, **msg_nodes;
2778: int *pw, *list, *size, **nodes;
2779: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
2780: MPI_Status status;
2781: int ierr;
2783: /* strip and load s */
2784: msg_list =list = gs->pair_list;
2785: msg_size =size = gs->msg_sizes;
2786: msg_nodes=nodes = gs->node_list;
2787: iptr=pw = gs->pw_elm_list;
2788: dptr1=dptr3 = gs->pw_vals;
2789: msg_ids_in = ids_in = gs->msg_ids_in;
2790: msg_ids_out = ids_out = gs->msg_ids_out;
2791: dptr2 = gs->out;
2792: in1=in2 = gs->in;
2794: /* post the receives */
2795: /* msg_nodes=nodes; */
2796: do
2797: {
2798: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
2799: second one *list and do list++ afterwards */
2800: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
2801: gs->gs_comm, msg_ids_in++);
2802: in1 += *size++;
2803: }
2804: while (*++msg_nodes);
2805: msg_nodes=nodes;
2807: /* load gs values into in out gs buffers */
2808: while (*iptr >= 0)
2809: {*dptr3++ = *(in_vals + *iptr++);}
2811: /* load out buffers and post the sends */
2812: while ((iptr = *msg_nodes++))
2813: {
2814: dptr3 = dptr2;
2815: while (*iptr >= 0)
2816: {*dptr2++ = *(dptr1 + *iptr++);}
2817: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
2818: /* is msg_ids_out++ correct? */
2819: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
2820: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
2821: }
2823: if (gs->max_left_over)
2824: {gs_gop_tree_times(gs,in_vals);}
2826: /* process the received data */
2827: msg_nodes=nodes;
2828: while ((iptr = *nodes++))
2829: {
2830: /* Should I check the return value of MPI_Wait() or status? */
2831: /* Can this loop be replaced by a call to MPI_Waitall()? */
2832: MPI_Wait(ids_in++, &status);
2833: while (*iptr >= 0)
2834: {*(dptr1 + *iptr++) *= *in2++;}
2835: }
2837: /* replace vals */
2838: while (*pw >= 0)
2839: {*(in_vals + *pw++) = *dptr1++;}
2841: /* clear isend message handles */
2842: /* This changed for clarity though it could be the same */
2843: while (*msg_nodes++)
2844: /* Should I check the return value of MPI_Wait() or status? */
2845: /* Can this loop be replaced by a call to MPI_Waitall()? */
2846: {MPI_Wait(ids_out++, &status);}
2847: }
2851: /******************************************************************************
2852: Function: gather_scatter
2854: Input :
2855: Output:
2856: Return:
2857: Description:
2858: ******************************************************************************/
2859: static
2860: void
2861: gs_gop_tree_times(gs_id *gs, PetscScalar *vals)
2862: {
2863: int size;
2864: int *in, *out;
2865: PetscScalar *buf, *work;
2866: int ierr;
2867:
2868: in = gs->tree_map_in;
2869: out = gs->tree_map_out;
2870: buf = gs->tree_buf;
2871: work = gs->tree_work;
2872: size = gs->tree_nel;
2874: rvec_one(buf,size);
2876: while (*in >= 0)
2877: {*(buf + *out++) = *(vals + *in++);}
2879: in = gs->tree_map_in;
2880: out = gs->tree_map_out;
2881: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_PROD,gs->gs_comm);
2882: while (*in >= 0)
2883: {*(vals + *in++) = *(work + *out++);}
2885: }
2889: /******************************************************************************
2890: Function: gather_scatter
2893: Input :
2894: Output:
2895: Return:
2896: Description:
2897: ******************************************************************************/
2898: static void
2899: gs_gop_plus( gs_id *gs, PetscScalar *vals)
2900: {
2902: /* local only operations!!! */
2903: if (gs->num_local)
2904: {gs_gop_local_plus(gs,vals);}
2906: /* if intersection tree/pairwise and local isn't empty */
2907: if (gs->num_local_gop)
2908: {
2909: gs_gop_local_in_plus(gs,vals);
2911: /* pairwise will NOT do tree inside ... */
2912: if (gs->num_pairs)
2913: {gs_gop_pairwise_plus(gs,vals);}
2915: /* tree */
2916: if (gs->max_left_over)
2917: {gs_gop_tree_plus(gs,vals);}
2918:
2919: gs_gop_local_out(gs,vals);
2920: }
2921: /* if intersection tree/pairwise and local is empty */
2922: else
2923: {
2924: /* pairwise will NOT do tree inside */
2925: if (gs->num_pairs)
2926: {gs_gop_pairwise_plus(gs,vals);}
2927:
2928: /* tree */
2929: if (gs->max_left_over)
2930: {gs_gop_tree_plus(gs,vals);}
2931: }
2933: }
2937: /******************************************************************************
2938: Function: gather_scatter
2940: Input :
2941: Output:
2942: Return:
2943: Description:
2944: ******************************************************************************/
2945: static
2946: void
2947: gs_gop_local_plus( gs_id *gs, PetscScalar *vals)
2948: {
2949: int *num, *map, **reduce;
2950: PetscScalar tmp;
2953: num = gs->num_local_reduce;
2954: reduce = gs->local_reduce;
2955: while ((map = *reduce))
2956: {
2957: /* wall */
2958: if (*num == 2)
2959: {
2960: num ++; reduce++;
2961: vals[map[1]] = vals[map[0]] += vals[map[1]];
2962: }
2963: /* corner shared by three elements */
2964: else if (*num == 3)
2965: {
2966: num ++; reduce++;
2967: vals[map[2]]=vals[map[1]]=vals[map[0]]+=(vals[map[1]]+vals[map[2]]);
2968: }
2969: /* corner shared by four elements */
2970: else if (*num == 4)
2971: {
2972: num ++; reduce++;
2973: vals[map[1]]=vals[map[2]]=vals[map[3]]=vals[map[0]] +=
2974: (vals[map[1]] + vals[map[2]] + vals[map[3]]);
2975: }
2976: /* general case ... odd geoms ... 3D*/
2977: else
2978: {
2979: num ++;
2980: tmp = 0.0;
2981: while (*map >= 0)
2982: {tmp += *(vals + *map++);}
2984: map = *reduce++;
2985: while (*map >= 0)
2986: {*(vals + *map++) = tmp;}
2987: }
2988: }
2989: }
2993: /******************************************************************************
2994: Function: gather_scatter
2996: Input :
2997: Output:
2998: Return:
2999: Description:
3000: ******************************************************************************/
3001: static
3002: void
3003: gs_gop_local_in_plus( gs_id *gs, PetscScalar *vals)
3004: {
3005: int *num, *map, **reduce;
3006: PetscScalar *base;
3009: num = gs->num_gop_local_reduce;
3010: reduce = gs->gop_local_reduce;
3011: while ((map = *reduce++))
3012: {
3013: /* wall */
3014: if (*num == 2)
3015: {
3016: num ++;
3017: vals[map[0]] += vals[map[1]];
3018: }
3019: /* corner shared by three elements */
3020: else if (*num == 3)
3021: {
3022: num ++;
3023: vals[map[0]] += (vals[map[1]] + vals[map[2]]);
3024: }
3025: /* corner shared by four elements */
3026: else if (*num == 4)
3027: {
3028: num ++;
3029: vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
3030: }
3031: /* general case ... odd geoms ... 3D*/
3032: else
3033: {
3034: num++;
3035: base = vals + *map++;
3036: while (*map >= 0)
3037: {*base += *(vals + *map++);}
3038: }
3039: }
3040: }
3044: /******************************************************************************
3045: Function: gather_scatter
3047: VERSION 3 ::
3049: Input :
3050: Output:
3051: Return:
3052: Description:
3053: ******************************************************************************/
3054: static
3055: void
3056: gs_gop_pairwise_plus( gs_id *gs, PetscScalar *in_vals)
3057: {
3058: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3059: int *iptr, *msg_list, *msg_size, **msg_nodes;
3060: int *pw, *list, *size, **nodes;
3061: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3062: MPI_Status status;
3063: int ierr;
3065: /* strip and load s */
3066: msg_list =list = gs->pair_list;
3067: msg_size =size = gs->msg_sizes;
3068: msg_nodes=nodes = gs->node_list;
3069: iptr=pw = gs->pw_elm_list;
3070: dptr1=dptr3 = gs->pw_vals;
3071: msg_ids_in = ids_in = gs->msg_ids_in;
3072: msg_ids_out = ids_out = gs->msg_ids_out;
3073: dptr2 = gs->out;
3074: in1=in2 = gs->in;
3076: /* post the receives */
3077: /* msg_nodes=nodes; */
3078: do
3079: {
3080: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3081: second one *list and do list++ afterwards */
3082: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3083: gs->gs_comm, msg_ids_in++);
3084: in1 += *size++;
3085: }
3086: while (*++msg_nodes);
3087: msg_nodes=nodes;
3089: /* load gs values into in out gs buffers */
3090: while (*iptr >= 0)
3091: {*dptr3++ = *(in_vals + *iptr++);}
3093: /* load out buffers and post the sends */
3094: while ((iptr = *msg_nodes++))
3095: {
3096: dptr3 = dptr2;
3097: while (*iptr >= 0)
3098: {*dptr2++ = *(dptr1 + *iptr++);}
3099: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3100: /* is msg_ids_out++ correct? */
3101: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *msg_list++,
3102: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3103: }
3105: /* do the tree while we're waiting */
3106: if (gs->max_left_over)
3107: {gs_gop_tree_plus(gs,in_vals);}
3109: /* process the received data */
3110: msg_nodes=nodes;
3111: while ((iptr = *nodes++))
3112: {
3113: /* Should I check the return value of MPI_Wait() or status? */
3114: /* Can this loop be replaced by a call to MPI_Waitall()? */
3115: MPI_Wait(ids_in++, &status);
3116: while (*iptr >= 0)
3117: {*(dptr1 + *iptr++) += *in2++;}
3118: }
3120: /* replace vals */
3121: while (*pw >= 0)
3122: {*(in_vals + *pw++) = *dptr1++;}
3124: /* clear isend message handles */
3125: /* This changed for clarity though it could be the same */
3126: while (*msg_nodes++)
3127: /* Should I check the return value of MPI_Wait() or status? */
3128: /* Can this loop be replaced by a call to MPI_Waitall()? */
3129: {MPI_Wait(ids_out++, &status);}
3131: }
3135: /******************************************************************************
3136: Function: gather_scatter
3138: Input :
3139: Output:
3140: Return:
3141: Description:
3142: ******************************************************************************/
3143: static
3144: void
3145: gs_gop_tree_plus(gs_id *gs, PetscScalar *vals)
3146: {
3147: int size;
3148: int *in, *out;
3149: PetscScalar *buf, *work;
3150: int ierr;
3151:
3152: in = gs->tree_map_in;
3153: out = gs->tree_map_out;
3154: buf = gs->tree_buf;
3155: work = gs->tree_work;
3156: size = gs->tree_nel;
3158: rvec_zero(buf,size);
3160: while (*in >= 0)
3161: {*(buf + *out++) = *(vals + *in++);}
3163: in = gs->tree_map_in;
3164: out = gs->tree_map_out;
3165: MPI_Allreduce(buf,work,size,MPIU_SCALAR,MPI_SUM,gs->gs_comm);
3166: while (*in >= 0)
3167: {*(vals + *in++) = *(work + *out++);}
3169: }
3171: /******************************************************************************
3172: Function: gs_free()
3174: Input :
3176: Output:
3178: Return:
3180: Description:
3181: if (gs->sss) {free((void*) gs->sss);}
3182: ******************************************************************************/
3183: void
3184: gs_free( gs_id *gs)
3185: {
3186: int i;
3189: if (gs->nghs) {free((void*) gs->nghs);}
3190: if (gs->pw_nghs) {free((void*) gs->pw_nghs);}
3192: /* tree */
3193: if (gs->max_left_over)
3194: {
3195: if (gs->tree_elms) {free((void*) gs->tree_elms);}
3196: if (gs->tree_buf) {free((void*) gs->tree_buf);}
3197: if (gs->tree_work) {free((void*) gs->tree_work);}
3198: if (gs->tree_map_in) {free((void*) gs->tree_map_in);}
3199: if (gs->tree_map_out) {free((void*) gs->tree_map_out);}
3200: }
3202: /* pairwise info */
3203: if (gs->num_pairs)
3204: {
3205: /* should be NULL already */
3206: if (gs->ngh_buf) {free((void*) gs->ngh_buf);}
3207: if (gs->elms) {free((void*) gs->elms);}
3208: if (gs->local_elms) {free((void*) gs->local_elms);}
3209: if (gs->companion) {free((void*) gs->companion);}
3210:
3211: /* only set if pairwise */
3212: if (gs->vals) {free((void*) gs->vals);}
3213: if (gs->in) {free((void*) gs->in);}
3214: if (gs->out) {free((void*) gs->out);}
3215: if (gs->msg_ids_in) {free((void*) gs->msg_ids_in);}
3216: if (gs->msg_ids_out) {free((void*) gs->msg_ids_out);}
3217: if (gs->pw_vals) {free((void*) gs->pw_vals);}
3218: if (gs->pw_elm_list) {free((void*) gs->pw_elm_list);}
3219: if (gs->node_list)
3220: {
3221: for (i=0;i<gs->num_pairs;i++)
3222: {if (gs->node_list[i]) {free((void*) gs->node_list[i]);}}
3223: free((void*) gs->node_list);
3224: }
3225: if (gs->msg_sizes) {free((void*) gs->msg_sizes);}
3226: if (gs->pair_list) {free((void*) gs->pair_list);}
3227: }
3229: /* local info */
3230: if (gs->num_local_total>=0)
3231: {
3232: for (i=0;i<gs->num_local_total+1;i++)
3233: /* for (i=0;i<gs->num_local_total;i++) */
3234: {
3235: if (gs->num_gop_local_reduce[i])
3236: {free((void*) gs->gop_local_reduce[i]);}
3237: }
3238: }
3240: /* if intersection tree/pairwise and local isn't empty */
3241: if (gs->gop_local_reduce) {free((void*) gs->gop_local_reduce);}
3242: if (gs->num_gop_local_reduce) {free((void*) gs->num_gop_local_reduce);}
3244: free((void*) gs);
3245: }
3252: /******************************************************************************
3253: Function: gather_scatter
3255: Input :
3256: Output:
3257: Return:
3258: Description:
3259: ******************************************************************************/
3260: void
3261: gs_gop_vec( gs_id *gs, PetscScalar *vals, const char *op, int step)
3262: {
3264: switch (*op) {
3265: case '+':
3266: gs_gop_vec_plus(gs,vals,step);
3267: break;
3268: default:
3269: error_msg_warning("gs_gop_vec() :: %c is not a valid op",op[0]);
3270: error_msg_warning("gs_gop_vec() :: default :: plus");
3271: gs_gop_vec_plus(gs,vals,step);
3272: break;
3273: }
3274: }
3278: /******************************************************************************
3279: Function: gather_scatter
3281: Input :
3282: Output:
3283: Return:
3284: Description:
3285: ******************************************************************************/
3286: static void
3287: gs_gop_vec_plus( gs_id *gs, PetscScalar *vals, int step)
3288: {
3289: if (!gs) {error_msg_fatal("gs_gop_vec() passed NULL gs handle!!!");}
3291: /* local only operations!!! */
3292: if (gs->num_local)
3293: {gs_gop_vec_local_plus(gs,vals,step);}
3295: /* if intersection tree/pairwise and local isn't empty */
3296: if (gs->num_local_gop)
3297: {
3298: gs_gop_vec_local_in_plus(gs,vals,step);
3300: /* pairwise */
3301: if (gs->num_pairs)
3302: {gs_gop_vec_pairwise_plus(gs,vals,step);}
3304: /* tree */
3305: else if (gs->max_left_over)
3306: {gs_gop_vec_tree_plus(gs,vals,step);}
3308: gs_gop_vec_local_out(gs,vals,step);
3309: }
3310: /* if intersection tree/pairwise and local is empty */
3311: else
3312: {
3313: /* pairwise */
3314: if (gs->num_pairs)
3315: {gs_gop_vec_pairwise_plus(gs,vals,step);}
3317: /* tree */
3318: else if (gs->max_left_over)
3319: {gs_gop_vec_tree_plus(gs,vals,step);}
3320: }
3321: }
3325: /******************************************************************************
3326: Function: gather_scatter
3328: Input :
3329: Output:
3330: Return:
3331: Description:
3332: ******************************************************************************/
3333: static
3334: void
3335: gs_gop_vec_local_plus( gs_id *gs, PetscScalar *vals,
3336: int step)
3337: {
3338: int *num, *map, **reduce;
3339: PetscScalar *base;
3342: num = gs->num_local_reduce;
3343: reduce = gs->local_reduce;
3344: while ((map = *reduce))
3345: {
3346: base = vals + map[0] * step;
3348: /* wall */
3349: if (*num == 2)
3350: {
3351: num++; reduce++;
3352: rvec_add (base,vals+map[1]*step,step);
3353: rvec_copy(vals+map[1]*step,base,step);
3354: }
3355: /* corner shared by three elements */
3356: else if (*num == 3)
3357: {
3358: num++; reduce++;
3359: rvec_add (base,vals+map[1]*step,step);
3360: rvec_add (base,vals+map[2]*step,step);
3361: rvec_copy(vals+map[2]*step,base,step);
3362: rvec_copy(vals+map[1]*step,base,step);
3363: }
3364: /* corner shared by four elements */
3365: else if (*num == 4)
3366: {
3367: num++; reduce++;
3368: rvec_add (base,vals+map[1]*step,step);
3369: rvec_add (base,vals+map[2]*step,step);
3370: rvec_add (base,vals+map[3]*step,step);
3371: rvec_copy(vals+map[3]*step,base,step);
3372: rvec_copy(vals+map[2]*step,base,step);
3373: rvec_copy(vals+map[1]*step,base,step);
3374: }
3375: /* general case ... odd geoms ... 3D */
3376: else
3377: {
3378: num++;
3379: while (*++map >= 0)
3380: {rvec_add (base,vals+*map*step,step);}
3381:
3382: map = *reduce;
3383: while (*++map >= 0)
3384: {rvec_copy(vals+*map*step,base,step);}
3385:
3386: reduce++;
3387: }
3388: }
3389: }
3393: /******************************************************************************
3394: Function: gather_scatter
3396: Input :
3397: Output:
3398: Return:
3399: Description:
3400: ******************************************************************************/
3401: static
3402: void
3403: gs_gop_vec_local_in_plus( gs_id *gs, PetscScalar *vals,
3404: int step)
3405: {
3406: int *num, *map, **reduce;
3407: PetscScalar *base;
3409: num = gs->num_gop_local_reduce;
3410: reduce = gs->gop_local_reduce;
3411: while ((map = *reduce++))
3412: {
3413: base = vals + map[0] * step;
3415: /* wall */
3416: if (*num == 2)
3417: {
3418: num ++;
3419: rvec_add(base,vals+map[1]*step,step);
3420: }
3421: /* corner shared by three elements */
3422: else if (*num == 3)
3423: {
3424: num ++;
3425: rvec_add(base,vals+map[1]*step,step);
3426: rvec_add(base,vals+map[2]*step,step);
3427: }
3428: /* corner shared by four elements */
3429: else if (*num == 4)
3430: {
3431: num ++;
3432: rvec_add(base,vals+map[1]*step,step);
3433: rvec_add(base,vals+map[2]*step,step);
3434: rvec_add(base,vals+map[3]*step,step);
3435: }
3436: /* general case ... odd geoms ... 3D*/
3437: else
3438: {
3439: num++;
3440: while (*++map >= 0)
3441: {rvec_add(base,vals+*map*step,step);}
3442: }
3443: }
3444: }
3447: /******************************************************************************
3448: Function: gather_scatter
3450: Input :
3451: Output:
3452: Return:
3453: Description:
3454: ******************************************************************************/
3455: static
3456: void
3457: gs_gop_vec_local_out( gs_id *gs, PetscScalar *vals,
3458: int step)
3459: {
3460: int *num, *map, **reduce;
3461: PetscScalar *base;
3464: num = gs->num_gop_local_reduce;
3465: reduce = gs->gop_local_reduce;
3466: while ((map = *reduce++))
3467: {
3468: base = vals + map[0] * step;
3470: /* wall */
3471: if (*num == 2)
3472: {
3473: num ++;
3474: rvec_copy(vals+map[1]*step,base,step);
3475: }
3476: /* corner shared by three elements */
3477: else if (*num == 3)
3478: {
3479: num ++;
3480: rvec_copy(vals+map[1]*step,base,step);
3481: rvec_copy(vals+map[2]*step,base,step);
3482: }
3483: /* corner shared by four elements */
3484: else if (*num == 4)
3485: {
3486: num ++;
3487: rvec_copy(vals+map[1]*step,base,step);
3488: rvec_copy(vals+map[2]*step,base,step);
3489: rvec_copy(vals+map[3]*step,base,step);
3490: }
3491: /* general case ... odd geoms ... 3D*/
3492: else
3493: {
3494: num++;
3495: while (*++map >= 0)
3496: {rvec_copy(vals+*map*step,base,step);}
3497: }
3498: }
3499: }
3503: /******************************************************************************
3504: Function: gather_scatter
3506: VERSION 3 ::
3508: Input :
3509: Output:
3510: Return:
3511: Description:
3512: ******************************************************************************/
3513: static
3514: void
3515: gs_gop_vec_pairwise_plus( gs_id *gs, PetscScalar *in_vals,
3516: int step)
3517: {
3518: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3519: int *iptr, *msg_list, *msg_size, **msg_nodes;
3520: int *pw, *list, *size, **nodes;
3521: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3522: MPI_Status status;
3523: PetscBLASInt i1;
3524: int ierr;
3526: /* strip and load s */
3527: msg_list =list = gs->pair_list;
3528: msg_size =size = gs->msg_sizes;
3529: msg_nodes=nodes = gs->node_list;
3530: iptr=pw = gs->pw_elm_list;
3531: dptr1=dptr3 = gs->pw_vals;
3532: msg_ids_in = ids_in = gs->msg_ids_in;
3533: msg_ids_out = ids_out = gs->msg_ids_out;
3534: dptr2 = gs->out;
3535: in1=in2 = gs->in;
3537: /* post the receives */
3538: /* msg_nodes=nodes; */
3539: do
3540: {
3541: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3542: second one *list and do list++ afterwards */
3543: MPI_Irecv(in1, *size *step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3544: gs->gs_comm, msg_ids_in++);
3545: in1 += *size++ *step;
3546: }
3547: while (*++msg_nodes);
3548: msg_nodes=nodes;
3550: /* load gs values into in out gs buffers */
3551: while (*iptr >= 0)
3552: {
3553: rvec_copy(dptr3,in_vals + *iptr*step,step);
3554: dptr3+=step;
3555: iptr++;
3556: }
3558: /* load out buffers and post the sends */
3559: while ((iptr = *msg_nodes++))
3560: {
3561: dptr3 = dptr2;
3562: while (*iptr >= 0)
3563: {
3564: rvec_copy(dptr2,dptr1 + *iptr*step,step);
3565: dptr2+=step;
3566: iptr++;
3567: }
3568: MPI_Isend(dptr3, *msg_size++ *step, MPIU_SCALAR, *msg_list++,
3569: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3570: }
3572: /* tree */
3573: if (gs->max_left_over)
3574: {gs_gop_vec_tree_plus(gs,in_vals,step);}
3576: /* process the received data */
3577: msg_nodes=nodes;
3578: while ((iptr = *nodes++)){
3579: PetscScalar d1 = 1.0;
3580: /* Should I check the return value of MPI_Wait() or status? */
3581: /* Can this loop be replaced by a call to MPI_Waitall()? */
3582: MPI_Wait(ids_in++, &status);
3583: while (*iptr >= 0) {
3584: BLASaxpy_(&step,&d1,in2,&i1,dptr1 + *iptr*step,&i1);
3585: in2+=step;
3586: iptr++;
3587: }
3588: }
3590: /* replace vals */
3591: while (*pw >= 0)
3592: {
3593: rvec_copy(in_vals + *pw*step,dptr1,step);
3594: dptr1+=step;
3595: pw++;
3596: }
3598: /* clear isend message handles */
3599: /* This changed for clarity though it could be the same */
3600: while (*msg_nodes++)
3601: /* Should I check the return value of MPI_Wait() or status? */
3602: /* Can this loop be replaced by a call to MPI_Waitall()? */
3603: {MPI_Wait(ids_out++, &status);}
3606: }
3610: /******************************************************************************
3611: Function: gather_scatter
3613: Input :
3614: Output:
3615: Return:
3616: Description:
3617: ******************************************************************************/
3618: static
3619: void
3620: gs_gop_vec_tree_plus( gs_id *gs, PetscScalar *vals, int step)
3621: {
3622: int size, *in, *out;
3623: PetscScalar *buf, *work;
3624: int op[] = {GL_ADD,0};
3625: PetscBLASInt i1 = 1;
3628: /* copy over to local variables */
3629: in = gs->tree_map_in;
3630: out = gs->tree_map_out;
3631: buf = gs->tree_buf;
3632: work = gs->tree_work;
3633: size = gs->tree_nel*step;
3635: /* zero out collection buffer */
3636: rvec_zero(buf,size);
3639: /* copy over my contributions */
3640: while (*in >= 0)
3641: {
3642: BLAScopy_(&step,vals + *in++*step,&i1,buf + *out++*step,&i1);
3643: }
3645: /* perform fan in/out on full buffer */
3646: /* must change grop to handle the blas */
3647: grop(buf,work,size,op);
3649: /* reset */
3650: in = gs->tree_map_in;
3651: out = gs->tree_map_out;
3653: /* get the portion of the results I need */
3654: while (*in >= 0)
3655: {
3656: BLAScopy_(&step,buf + *out++*step,&i1,vals + *in++*step,&i1);
3657: }
3659: }
3663: /******************************************************************************
3664: Function: gather_scatter
3666: Input :
3667: Output:
3668: Return:
3669: Description:
3670: ******************************************************************************/
3671: void
3672: gs_gop_hc( gs_id *gs, PetscScalar *vals, const char *op, int dim)
3673: {
3675: switch (*op) {
3676: case '+':
3677: gs_gop_plus_hc(gs,vals,dim);
3678: break;
3679: default:
3680: error_msg_warning("gs_gop_hc() :: %c is not a valid op",op[0]);
3681: error_msg_warning("gs_gop_hc() :: default :: plus\n");
3682: gs_gop_plus_hc(gs,vals,dim);
3683: break;
3684: }
3685: }
3689: /******************************************************************************
3690: Function: gather_scatter
3692: Input :
3693: Output:
3694: Return:
3695: Description:
3696: ******************************************************************************/
3697: static void
3698: gs_gop_plus_hc( gs_id *gs, PetscScalar *vals, int dim)
3699: {
3700: /* if there's nothing to do return */
3701: if (dim<=0)
3702: {return;}
3704: /* can't do more dimensions then exist */
3705: dim = PetscMin(dim,i_log2_num_nodes);
3707: /* local only operations!!! */
3708: if (gs->num_local)
3709: {gs_gop_local_plus(gs,vals);}
3711: /* if intersection tree/pairwise and local isn't empty */
3712: if (gs->num_local_gop)
3713: {
3714: gs_gop_local_in_plus(gs,vals);
3716: /* pairwise will do tree inside ... */
3717: if (gs->num_pairs)
3718: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3720: /* tree only */
3721: else if (gs->max_left_over)
3722: {gs_gop_tree_plus_hc(gs,vals,dim);}
3723:
3724: gs_gop_local_out(gs,vals);
3725: }
3726: /* if intersection tree/pairwise and local is empty */
3727: else
3728: {
3729: /* pairwise will do tree inside */
3730: if (gs->num_pairs)
3731: {gs_gop_pairwise_plus_hc(gs,vals,dim);}
3732:
3733: /* tree */
3734: else if (gs->max_left_over)
3735: {gs_gop_tree_plus_hc(gs,vals,dim);}
3736: }
3738: }
3741: /******************************************************************************
3742: VERSION 3 ::
3744: Input :
3745: Output:
3746: Return:
3747: Description:
3748: ******************************************************************************/
3749: static
3750: void
3751: gs_gop_pairwise_plus_hc( gs_id *gs, PetscScalar *in_vals, int dim)
3752: {
3753: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
3754: int *iptr, *msg_list, *msg_size, **msg_nodes;
3755: int *pw, *list, *size, **nodes;
3756: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
3757: MPI_Status status;
3758: int i, mask=1;
3759: int ierr;
3761: for (i=1; i<dim; i++)
3762: {mask<<=1; mask++;}
3765: /* strip and load s */
3766: msg_list =list = gs->pair_list;
3767: msg_size =size = gs->msg_sizes;
3768: msg_nodes=nodes = gs->node_list;
3769: iptr=pw = gs->pw_elm_list;
3770: dptr1=dptr3 = gs->pw_vals;
3771: msg_ids_in = ids_in = gs->msg_ids_in;
3772: msg_ids_out = ids_out = gs->msg_ids_out;
3773: dptr2 = gs->out;
3774: in1=in2 = gs->in;
3776: /* post the receives */
3777: /* msg_nodes=nodes; */
3778: do
3779: {
3780: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
3781: second one *list and do list++ afterwards */
3782: if ((my_id|mask)==(*list|mask))
3783: {
3784: MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list++,
3785: gs->gs_comm, msg_ids_in++);
3786: in1 += *size++;
3787: }
3788: else
3789: {list++; size++;}
3790: }
3791: while (*++msg_nodes);
3793: /* load gs values into in out gs buffers */
3794: while (*iptr >= 0)
3795: {*dptr3++ = *(in_vals + *iptr++);}
3797: /* load out buffers and post the sends */
3798: msg_nodes=nodes;
3799: list = msg_list;
3800: while ((iptr = *msg_nodes++))
3801: {
3802: if ((my_id|mask)==(*list|mask))
3803: {
3804: dptr3 = dptr2;
3805: while (*iptr >= 0)
3806: {*dptr2++ = *(dptr1 + *iptr++);}
3807: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
3808: /* is msg_ids_out++ correct? */
3809: MPI_Isend(dptr3, *msg_size++, MPIU_SCALAR, *list++,
3810: MSGTAG1+my_id, gs->gs_comm, msg_ids_out++);
3811: }
3812: else
3813: {list++; msg_size++;}
3814: }
3816: /* do the tree while we're waiting */
3817: if (gs->max_left_over)
3818: {gs_gop_tree_plus_hc(gs,in_vals,dim);}
3820: /* process the received data */
3821: msg_nodes=nodes;
3822: list = msg_list;
3823: while ((iptr = *nodes++))
3824: {
3825: if ((my_id|mask)==(*list|mask))
3826: {
3827: /* Should I check the return value of MPI_Wait() or status? */
3828: /* Can this loop be replaced by a call to MPI_Waitall()? */
3829: MPI_Wait(ids_in++, &status);
3830: while (*iptr >= 0)
3831: {*(dptr1 + *iptr++) += *in2++;}
3832: }
3833: list++;
3834: }
3836: /* replace vals */
3837: while (*pw >= 0)
3838: {*(in_vals + *pw++) = *dptr1++;}
3840: /* clear isend message handles */
3841: /* This changed for clarity though it could be the same */
3842: while (*msg_nodes++)
3843: {
3844: if ((my_id|mask)==(*msg_list|mask))
3845: {
3846: /* Should I check the return value of MPI_Wait() or status? */
3847: /* Can this loop be replaced by a call to MPI_Waitall()? */
3848: MPI_Wait(ids_out++, &status);
3849: }
3850: msg_list++;
3851: }
3854: }
3858: /******************************************************************************
3859: Function: gather_scatter
3861: Input :
3862: Output:
3863: Return:
3864: Description:
3865: ******************************************************************************/
3866: static
3867: void
3868: gs_gop_tree_plus_hc(gs_id *gs, PetscScalar *vals, int dim)
3869: {
3870: int size;
3871: int *in, *out;
3872: PetscScalar *buf, *work;
3873: int op[] = {GL_ADD,0};
3875: in = gs->tree_map_in;
3876: out = gs->tree_map_out;
3877: buf = gs->tree_buf;
3878: work = gs->tree_work;
3879: size = gs->tree_nel;
3881: rvec_zero(buf,size);
3883: while (*in >= 0)
3884: {*(buf + *out++) = *(vals + *in++);}
3886: in = gs->tree_map_in;
3887: out = gs->tree_map_out;
3889: grop_hc(buf,work,size,op,dim);
3891: while (*in >= 0)
3892: {*(vals + *in++) = *(buf + *out++);}
3894: }