Actual source code: comb.c
1: /*$Id: comb.c,v 1.40 2001/09/07 20:08:55 bsmith Exp $*/
3: /*
4: Split phase global vector reductions with support for combining the
5: communication portion of several operations. Using MPI-1.1 support only
7: The idea for this and much of the initial code is contributed by
8: Victor Eijkhout.
10: Usage:
11: VecDotBegin(Vec,Vec,PetscScalar *);
12: VecNormBegin(Vec,NormType,PetscReal *);
13: ....
14: VecDotEnd(Vec,Vec,PetscScalar *);
15: VecNormEnd(Vec,NormType,PetscReal *);
17: Limitations:
18: - The order of the xxxEnd() functions MUST be in the same order
19: as the xxxBegin(). There is extensive error checking to try to
20: insure that the user calls the routines in the correct order
21: */
23: #include src/vec/vecimpl.h
25: #define STATE_BEGIN 0
26: #define STATE_END 1
28: #define REDUCE_SUM 0
29: #define REDUCE_MAX 1
30: #define REDUCE_MIN 2
32: typedef struct {
33: MPI_Comm comm;
34: PetscScalar *lvalues; /* this are the reduced values before call to MPI_Allreduce() */
35: PetscScalar *gvalues; /* values after call to MPI_Allreduce() */
36: void **invecs; /* for debugging only, vector/memory used with each op */
37: int *reducetype; /* is particular value to be summed or maxed? */
38: int state; /* are we calling xxxBegin() or xxxEnd()? */
39: int maxops; /* total amount of space we have for requests */
40: int numopsbegin; /* number of requests that have been queued in */
41: int numopsend; /* number of requests that have been gotten by user */
42: } PetscSplitReduction;
43: /*
44: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
45: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
46: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
47: some of each.
48: */
50: #undef __FUNCT__
52: /*
53: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
54: */
55: int PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
56: {
60: ierr = PetscNew(PetscSplitReduction,sr);
61: (*sr)->numopsbegin = 0;
62: (*sr)->numopsend = 0;
63: (*sr)->state = STATE_BEGIN;
64: (*sr)->maxops = 32;
65: ierr = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
66: ierr = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
67: ierr = PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
68: (*sr)->comm = comm;
69: ierr = PetscMalloc(32*sizeof(int),&(*sr)->reducetype);
70: return(0);
71: }
73: /*
74: This function is the MPI reduction operation used when there is
75: a combination of sums and max in the reduction. The call below to
76: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
77: MPI operator PetscSplitReduction_Op.
78: */
79: MPI_Op PetscSplitReduction_Op = 0;
81: EXTERN_C_BEGIN
82: #undef __FUNCT__
84: void PetscSplitReduction_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
85: {
86: PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
87: int i,count = *cnt;
90: if (*datatype != MPIU_REAL) {
91: (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
92: MPI_Abort(MPI_COMM_WORLD,1);
93: }
94: #if defined(PETSC_USE_COMPLEX)
95: count = count/2;
96: #endif
97: count = count/2;
98: for (i=0; i<count; i++) {
99: if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
100: xout[i] += xin[i];
101: } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
102: xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
103: } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
104: xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
105: } else {
106: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
107: MPI_Abort(MPI_COMM_WORLD,1);
108: }
109: }
110: PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
111: return;
112: }
113: EXTERN_C_END
115: #undef __FUNCT__
117: /*
118: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
119: */
120: int PetscSplitReductionApply(PetscSplitReduction *sr)
121: {
122: int size,ierr,i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
123: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
124: int sum_flg = 0,max_flg = 0, min_flg = 0;
125: MPI_Comm comm = sr->comm;
128: if (sr->numopsend > 0) {
129: SETERRQ(1,"Cannot call this after VecxxxEnd() has been called");
130: }
132: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
133: ierr = MPI_Comm_size(sr->comm,&size);
134: if (size == 1) {
135: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
136: } else {
137: /* determine if all reductions are sum, max, or min */
138: for (i=0; i<numops; i++) {
139: if (reducetype[i] == REDUCE_MAX) {
140: max_flg = 1;
141: } else if (reducetype[i] == REDUCE_SUM) {
142: sum_flg = 1;
143: } else if (reducetype[i] == REDUCE_MIN) {
144: min_flg = 1;
145: } else {
146: SETERRQ(1,"Error in PetscSplitReduction data structure, probably memory corruption");
147: }
148: }
149: if (sum_flg + max_flg + min_flg > 1) {
150: /*
151: after all the entires in lvalues we store the reducetype flags to indicate
152: to the reduction operations what are sums and what are max
153: */
154: for (i=0; i<numops; i++) {
155: lvalues[numops+i] = reducetype[i];
156: }
157: #if defined(PETSC_USE_COMPLEX)
158: MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
159: #else
160: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
161: #endif
162: } else if (max_flg) {
163: #if defined(PETSC_USE_COMPLEX)
164: /*
165: complex case we max both the real and imaginary parts, the imaginary part
166: is just ignored later
167: */
168: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
169: #else
170: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
171: #endif
172: } else if (min_flg) {
173: #if defined(PETSC_USE_COMPLEX)
174: /*
175: complex case we min both the real and imaginary parts, the imaginary part
176: is just ignored later
177: */
178: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
179: #else
180: MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
181: #endif
182: } else {
183: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
184: }
185: }
186: sr->state = STATE_END;
187: sr->numopsend = 0;
188: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
189: return(0);
190: }
193: #undef __FUNCT__
195: /*
196: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
197: */
198: int PetscSplitReductionExtend(PetscSplitReduction *sr)
199: {
200: int maxops = sr->maxops,*reducetype = sr->reducetype,ierr;
201: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
202: void *invecs = sr->invecs;
205: sr->maxops = 2*maxops;
206: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
207: PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
208: PetscMalloc(2*maxops*sizeof(int),&sr->reducetype);
209: PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
210: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
211: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
212: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(int));
213: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
214: PetscFree(lvalues);
215: PetscFree(gvalues);
216: PetscFree(reducetype);
217: PetscFree(invecs);
218: return(0);
219: }
221: #undef __FUNCT__
223: int PetscSplitReductionDestroy(PetscSplitReduction *sr)
224: {
228: PetscFree(sr->lvalues);
229: PetscFree(sr->gvalues);
230: PetscFree(sr->reducetype);
231: PetscFree(sr->invecs);
232: PetscFree(sr);
233: return(0);
234: }
236: static int Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
238: EXTERN_C_BEGIN
239: #undef __FUNCT__
241: /*
242: Private routine to delete internal storage when a communicator is freed.
243: This is called by MPI, not by users.
245: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
246: it was MPI_Comm *comm.
247: */
248: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
249: {
253: PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ldn",(long)comm);
254: PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
255: return(0);
256: }
257: EXTERN_C_END
259: /*
260: PetscSplitReductionGet - Gets the split reduction object from a
261: PETSc vector, creates if it does not exit.
263: */
264: #undef __FUNCT__
266: int PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
267: {
268: int ierr,flag;
271: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
272: /*
273: The calling sequence of the 2nd argument to this function changed
274: between MPI Standard 1.0 and the revisions 1.1 Here we match the
275: new standard, if you are using an MPI implementation that uses
276: the older version you will get a warning message about the next line;
277: it is only a warning message and should do no harm.
278: */
279: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
280: /*
281: Also create the special MPI reduction operation that may be needed
282: */
283: MPI_Op_create(PetscSplitReduction_Local,1,&PetscSplitReduction_Op);
284: }
285: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
286: if (!flag) { /* doesn't exist yet so create it and put it in */
287: PetscSplitReductionCreate(comm,sr);
288: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
289: PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ldn",(long)comm);
290: }
292: return(0);
293: }
295: /* ----------------------------------------------------------------------------------------------------*/
297: #undef __FUNCT__
299: /*@
300: VecDotBegin - Starts a split phase dot product computation.
302: Input Parameters:
303: + x - the first vector
304: . y - the second vector
305: - result - where the result will go (can be PETSC_NULL)
307: Level: advanced
309: Notes:
310: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
312: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
313: VecTDotBegin(), VecTDotEnd()
314: @*/
315: int VecDotBegin(Vec x,Vec y,PetscScalar *result)
316: {
317: int ierr;
318: PetscSplitReduction *sr;
319: MPI_Comm comm;
322: PetscObjectGetComm((PetscObject)x,&comm);
323: PetscSplitReductionGet(comm,&sr);
324: if (sr->state == STATE_END) {
325: SETERRQ(1,"Called before all VecxxxEnd() called");
326: }
327: if (sr->numopsbegin >= sr->maxops) {
328: PetscSplitReductionExtend(sr);
329: }
330: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
331: sr->invecs[sr->numopsbegin] = (void*)x;
332: if (!x->ops->dot_local) SETERRQ(1,"Vector does not suppport local dots");
333: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
334: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
335: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
336: return(0);
337: }
339: #undef __FUNCT__
341: /*@
342: VecDotEnd - Ends a split phase dot product computation.
344: Input Parameters:
345: + x - the first vector (can be PETSC_NULL)
346: . y - the second vector (can be PETSC_NULL)
347: - result - where the result will go
349: Level: advanced
351: Notes:
352: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
354: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
355: VecTDotBegin(),VecTDotEnd()
357: @*/
358: int VecDotEnd(Vec x,Vec y,PetscScalar *result)
359: {
360: int ierr;
361: PetscSplitReduction *sr;
362: MPI_Comm comm;
365: PetscObjectGetComm((PetscObject)x,&comm);
366: PetscSplitReductionGet(comm,&sr);
367:
368: if (sr->state != STATE_END) {
369: /* this is the first call to VecxxxEnd() so do the communication */
370: PetscSplitReductionApply(sr);
371: }
373: if (sr->numopsend >= sr->numopsbegin) {
374: SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
375: }
376: if (x && (void*) x != sr->invecs[sr->numopsend]) {
377: SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
378: }
379: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
380: SETERRQ(1,"Called VecDotEnd() on a reduction started with VecNormBegin()");
381: }
382: *result = sr->gvalues[sr->numopsend++];
384: /*
385: We are finished getting all the results so reset to no outstanding requests
386: */
387: if (sr->numopsend == sr->numopsbegin) {
388: sr->state = STATE_BEGIN;
389: sr->numopsend = 0;
390: sr->numopsbegin = 0;
391: }
392: return(0);
393: }
395: #undef __FUNCT__
397: /*@
398: VecTDotBegin - Starts a split phase transpose dot product computation.
400: Input Parameters:
401: + x - the first vector
402: . y - the second vector
403: - result - where the result will go (can be PETSC_NULL)
405: Level: advanced
407: Notes:
408: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
410: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
411: VecDotBegin(), VecDotEnd()
413: @*/
414: int VecTDotBegin(Vec x,Vec y,PetscScalar *result)
415: {
416: int ierr;
417: PetscSplitReduction *sr;
418: MPI_Comm comm;
421: PetscObjectGetComm((PetscObject)x,&comm);
422: PetscSplitReductionGet(comm,&sr);
423: if (sr->state == STATE_END) {
424: SETERRQ(1,"Called before all VecxxxEnd() called");
425: }
426: if (sr->numopsbegin >= sr->maxops) {
427: PetscSplitReductionExtend(sr);
428: }
429: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
430: sr->invecs[sr->numopsbegin] = (void*)x;
431: if (!x->ops->tdot_local) SETERRQ(1,"Vector does not suppport local dots");
432: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
433: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
434: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
435: return(0);
436: }
438: #undef __FUNCT__
440: /*@
441: VecTDotEnd - Ends a split phase transpose dot product computation.
443: Input Parameters:
444: + x - the first vector (can be PETSC_NULL)
445: . y - the second vector (can be PETSC_NULL)
446: - result - where the result will go
448: Level: advanced
450: Notes:
451: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
453: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
454: VecDotBegin(), VecDotEnd()
455: @*/
456: int VecTDotEnd(Vec x,Vec y,PetscScalar *result)
457: {
458: int ierr;
461: /*
462: TDotEnd() is the same as DotEnd() so reuse the code
463: */
464: VecDotEnd(x,y,result);
465: return(0);
466: }
468: /* -------------------------------------------------------------------------*/
470: #undef __FUNCT__
472: /*@
473: VecNormBegin - Starts a split phase norm computation.
475: Input Parameters:
476: + x - the first vector
477: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
478: - result - where the result will go (can be PETSC_NULL)
480: Level: advanced
482: Notes:
483: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
485: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
487: @*/
488: int VecNormBegin(Vec x,NormType ntype,PetscReal *result)
489: {
490: int ierr;
491: PetscSplitReduction *sr;
492: PetscReal lresult[2];
493: MPI_Comm comm;
496: PetscObjectGetComm((PetscObject)x,&comm);
497: PetscSplitReductionGet(comm,&sr);
498: if (sr->state == STATE_END) {
499: SETERRQ(1,"Called before all VecxxxEnd() called");
500: }
501: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
502: PetscSplitReductionExtend(sr);
503: }
504:
505: sr->invecs[sr->numopsbegin] = (void*)x;
506: if (!x->ops->norm_local) SETERRQ(1,"Vector does not support local norms");
507: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
508: (*x->ops->norm_local)(x,ntype,lresult);
509: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
510: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
511: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
512: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
513: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
514: sr->lvalues[sr->numopsbegin++] = lresult[0];
515: if (ntype == NORM_1_AND_2) {
516: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
517: sr->lvalues[sr->numopsbegin++] = lresult[1];
518: }
519: return(0);
520: }
522: #undef __FUNCT__
524: /*@
525: VecNormEnd - Ends a split phase norm computation.
527: Input Parameters:
528: + x - the first vector (can be PETSC_NULL)
529: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
530: - result - where the result will go
532: Level: advanced
534: Notes:
535: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
537: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()
539: @*/
540: int VecNormEnd(Vec x,NormType ntype,PetscReal *result)
541: {
542: int ierr;
543: PetscSplitReduction *sr;
544: MPI_Comm comm;
547: PetscObjectGetComm((PetscObject)x,&comm);
548: PetscSplitReductionGet(comm,&sr);
549:
550: if (sr->state != STATE_END) {
551: /* this is the first call to VecxxxEnd() so do the communication */
552: PetscSplitReductionApply(sr);
553: }
555: if (sr->numopsend >= sr->numopsbegin) {
556: SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
557: }
558: if (x && (void*)x != sr->invecs[sr->numopsend]) {
559: SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560: }
561: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
562: SETERRQ(1,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
563: }
564: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
565: if (ntype == NORM_2) {
566: result[0] = sqrt(result[0]);
567: } else if (ntype == NORM_1_AND_2) {
568: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
569: result[1] = sqrt(result[1]);
570: }
572: if (sr->numopsend == sr->numopsbegin) {
573: sr->state = STATE_BEGIN;
574: sr->numopsend = 0;
575: sr->numopsbegin = 0;
576: }
577: return(0);
578: }
580: /*
581: Possibly add
583: PetscReductionSumBegin/End()
584: PetscReductionMaxBegin/End()
585: PetscReductionMinBegin/End()
586: or have more like MPI with a single function with flag for Op? Like first better
587: */