Actual source code: comb.c

  1: /*$Id: comb.c,v 1.40 2001/09/07 20:08:55 bsmith Exp $*/

  3: /*
  4:       Split phase global vector reductions with support for combining the
  5:    communication portion of several operations. Using MPI-1.1 support only

  7:       The idea for this and much of the initial code is contributed by 
  8:    Victor Eijkhout.

 10:        Usage:
 11:              VecDotBegin(Vec,Vec,PetscScalar *);
 12:              VecNormBegin(Vec,NormType,PetscReal *);
 13:              ....
 14:              VecDotEnd(Vec,Vec,PetscScalar *);
 15:              VecNormEnd(Vec,NormType,PetscReal *);

 17:        Limitations: 
 18:          - The order of the xxxEnd() functions MUST be in the same order
 19:            as the xxxBegin(). There is extensive error checking to try to 
 20:            insure that the user calls the routines in the correct order
 21: */

 23:  #include src/vec/vecimpl.h

 25: #define STATE_BEGIN 0
 26: #define STATE_END   1

 28: #define REDUCE_SUM  0
 29: #define REDUCE_MAX  1
 30: #define REDUCE_MIN  2

 32: typedef struct {
 33:   MPI_Comm     comm;
 34:   PetscScalar  *lvalues;    /* this are the reduced values before call to MPI_Allreduce() */
 35:   PetscScalar  *gvalues;    /* values after call to MPI_Allreduce() */
 36:   void         **invecs;    /* for debugging only, vector/memory used with each op */
 37:   int          *reducetype; /* is particular value to be summed or maxed? */
 38:   int          state;       /* are we calling xxxBegin() or xxxEnd()? */
 39:   int          maxops;      /* total amount of space we have for requests */
 40:   int          numopsbegin; /* number of requests that have been queued in */
 41:   int          numopsend;   /* number of requests that have been gotten by user */
 42: } PetscSplitReduction;
 43: /*
 44:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 45: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by 
 46: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 47: some of each.
 48: */

 50: #undef __FUNCT__
 52: /*
 53:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 54: */
 55: int PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 56: {

 60:   ierr               = PetscNew(PetscSplitReduction,sr);
 61:   (*sr)->numopsbegin = 0;
 62:   (*sr)->numopsend   = 0;
 63:   (*sr)->state       = STATE_BEGIN;
 64:   (*sr)->maxops      = 32;
 65:   ierr               = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->lvalues);
 66:   ierr               = PetscMalloc(2*32*sizeof(PetscScalar),&(*sr)->gvalues);
 67:   ierr               = PetscMalloc(32*sizeof(void*),&(*sr)->invecs);
 68:   (*sr)->comm        = comm;
 69:   ierr               = PetscMalloc(32*sizeof(int),&(*sr)->reducetype);
 70:   return(0);
 71: }

 73: /*
 74:        This function is the MPI reduction operation used when there is 
 75:    a combination of sums and max in the reduction. The call below to 
 76:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the 
 77:    MPI operator PetscSplitReduction_Op.
 78: */
 79: MPI_Op PetscSplitReduction_Op = 0;

 81: EXTERN_C_BEGIN
 82: #undef __FUNCT__
 84: void PetscSplitReduction_Local(void *in,void *out,int *cnt,MPI_Datatype *datatype)
 85: {
 86:   PetscScalar *xin = (PetscScalar *)in,*xout = (PetscScalar*)out;
 87:   int         i,count = *cnt;

 90:   if (*datatype != MPIU_REAL) {
 91:     (*PetscErrorPrintf)("Can only handle MPIU_REAL data types");
 92:     MPI_Abort(MPI_COMM_WORLD,1);
 93:   }
 94: #if defined(PETSC_USE_COMPLEX)
 95:   count = count/2;
 96: #endif
 97:   count = count/2;
 98:   for (i=0; i<count; i++) {
 99:     if (((int)PetscRealPart(xin[count+i])) == REDUCE_SUM) { /* second half of xin[] is flags for reduction type */
100:       xout[i] += xin[i];
101:     } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MAX) {
102:       xout[i] = PetscMax(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
103:     } else if ((int)PetscRealPart(xin[count+i]) == REDUCE_MIN) {
104:       xout[i] = PetscMin(*(PetscReal *)(xout+i),*(PetscReal *)(xin+i));
105:     } else {
106:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
107:       MPI_Abort(MPI_COMM_WORLD,1);
108:     }
109:   }
110:   PetscStackPop; /* since function returns void cannot use PetscFunctionReturn(); */
111:   return;
112: }
113: EXTERN_C_END

115: #undef __FUNCT__
117: /*
118:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
119: */
120: int PetscSplitReductionApply(PetscSplitReduction *sr)
121: {
122:   int         size,ierr,i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
123:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
124:   int         sum_flg = 0,max_flg = 0, min_flg = 0;
125:   MPI_Comm    comm = sr->comm;

128:   if (sr->numopsend > 0) {
129:     SETERRQ(1,"Cannot call this after VecxxxEnd() has been called");
130:   }

132:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
133:   ierr  = MPI_Comm_size(sr->comm,&size);
134:   if (size == 1) {
135:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
136:   } else {
137:     /* determine if all reductions are sum, max, or min */
138:     for (i=0; i<numops; i++) {
139:       if (reducetype[i] == REDUCE_MAX) {
140:         max_flg = 1;
141:       } else if (reducetype[i] == REDUCE_SUM) {
142:         sum_flg = 1;
143:       } else if (reducetype[i] == REDUCE_MIN) {
144:         min_flg = 1;
145:       } else {
146:         SETERRQ(1,"Error in PetscSplitReduction data structure, probably memory corruption");
147:       }
148:     }
149:     if (sum_flg + max_flg + min_flg > 1) {
150:       /* 
151:          after all the entires in lvalues we store the reducetype flags to indicate
152:          to the reduction operations what are sums and what are max
153:       */
154:       for (i=0; i<numops; i++) {
155:         lvalues[numops+i] = reducetype[i];
156:       }
157: #if defined(PETSC_USE_COMPLEX)
158:       MPI_Allreduce(lvalues,gvalues,2*2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
159: #else
160:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,PetscSplitReduction_Op,comm);
161: #endif
162:     } else if (max_flg) {
163: #if defined(PETSC_USE_COMPLEX)
164:       /* 
165:         complex case we max both the real and imaginary parts, the imaginary part
166:         is just ignored later
167:       */
168:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MAX,comm);
169: #else
170:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MAX,comm);
171: #endif
172:     } else if (min_flg) {
173: #if defined(PETSC_USE_COMPLEX)
174:       /* 
175:         complex case we min both the real and imaginary parts, the imaginary part
176:         is just ignored later
177:       */
178:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_REAL,MPI_MIN,comm);
179: #else
180:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_REAL,MPI_MIN,comm);
181: #endif
182:     } else {
183:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,PetscSum_Op,comm);
184:     }
185:   }
186:   sr->state     = STATE_END;
187:   sr->numopsend = 0;
188:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
189:   return(0);
190: }


193: #undef __FUNCT__
195: /*
196:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
197: */
198: int PetscSplitReductionExtend(PetscSplitReduction *sr)
199: {
200:   int         maxops = sr->maxops,*reducetype = sr->reducetype,ierr;
201:   PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
202:   void        *invecs = sr->invecs;

205:   sr->maxops     = 2*maxops;
206:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->lvalues);
207:   PetscMalloc(2*2*maxops*sizeof(PetscScalar),&sr->gvalues);
208:   PetscMalloc(2*maxops*sizeof(int),&sr->reducetype);
209:   PetscMalloc(2*maxops*sizeof(void*),&sr->invecs);
210:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
211:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
212:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(int));
213:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
214:   PetscFree(lvalues);
215:   PetscFree(gvalues);
216:   PetscFree(reducetype);
217:   PetscFree(invecs);
218:   return(0);
219: }

221: #undef __FUNCT__
223: int PetscSplitReductionDestroy(PetscSplitReduction *sr)
224: {

228:   PetscFree(sr->lvalues);
229:   PetscFree(sr->gvalues);
230:   PetscFree(sr->reducetype);
231:   PetscFree(sr->invecs);
232:   PetscFree(sr);
233:   return(0);
234: }

236: static int Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

238: EXTERN_C_BEGIN
239: #undef __FUNCT__  
241: /*
242:    Private routine to delete internal storage when a communicator is freed.
243:   This is called by MPI, not by users.

245:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
246:   it was MPI_Comm *comm.  
247: */
248: int Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
249: {

253:   PetscLogInfo(0,"Petsc_DelReduction:Deleting reduction data in an MPI_Comm %ldn",(long)comm);
254:   PetscSplitReductionDestroy((PetscSplitReduction *)attr_val);
255:   return(0);
256: }
257: EXTERN_C_END

259: /*
260:      PetscSplitReductionGet - Gets the split reduction object from a 
261:         PETSc vector, creates if it does not exit.

263: */
264: #undef __FUNCT__
266: int PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
267: {
268:   int      ierr,flag;

271:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
272:     /* 
273:        The calling sequence of the 2nd argument to this function changed
274:        between MPI Standard 1.0 and the revisions 1.1 Here we match the 
275:        new standard, if you are using an MPI implementation that uses 
276:        the older version you will get a warning message about the next line;
277:        it is only a warning message and should do no harm.
278:     */
279:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
280:     /*
281:          Also create the special MPI reduction operation that may be needed 
282:     */
283:     MPI_Op_create(PetscSplitReduction_Local,1,&PetscSplitReduction_Op);
284:   }
285:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void **)sr,&flag);
286:   if (!flag) {  /* doesn't exist yet so create it and put it in */
287:     PetscSplitReductionCreate(comm,sr);
288:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
289:     PetscLogInfo(0,"PetscSplitReductionGet:Putting reduction data in an MPI_Comm %ldn",(long)comm);
290:   }

292:   return(0);
293: }

295: /* ----------------------------------------------------------------------------------------------------*/

297: #undef __FUNCT__
299: /*@
300:    VecDotBegin - Starts a split phase dot product computation.

302:    Input Parameters:
303: +   x - the first vector
304: .   y - the second vector
305: -   result - where the result will go (can be PETSC_NULL)

307:    Level: advanced

309:    Notes:
310:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

312: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
313:          VecTDotBegin(), VecTDotEnd()
314: @*/
315: int VecDotBegin(Vec x,Vec y,PetscScalar *result)
316: {
317:   int                 ierr;
318:   PetscSplitReduction *sr;
319:   MPI_Comm            comm;

322:   PetscObjectGetComm((PetscObject)x,&comm);
323:   PetscSplitReductionGet(comm,&sr);
324:   if (sr->state == STATE_END) {
325:     SETERRQ(1,"Called before all VecxxxEnd() called");
326:   }
327:   if (sr->numopsbegin >= sr->maxops) {
328:     PetscSplitReductionExtend(sr);
329:   }
330:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
331:   sr->invecs[sr->numopsbegin]     = (void*)x;
332:   if (!x->ops->dot_local) SETERRQ(1,"Vector does not suppport local dots");
333:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
334:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
335:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
336:   return(0);
337: }

339: #undef __FUNCT__
341: /*@
342:    VecDotEnd - Ends a split phase dot product computation.

344:    Input Parameters:
345: +  x - the first vector (can be PETSC_NULL)
346: .  y - the second vector (can be PETSC_NULL)
347: -  result - where the result will go

349:    Level: advanced

351:    Notes:
352:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

354: seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
355:          VecTDotBegin(),VecTDotEnd()

357: @*/
358: int VecDotEnd(Vec x,Vec y,PetscScalar *result)
359: {
360:   int                 ierr;
361:   PetscSplitReduction *sr;
362:   MPI_Comm            comm;

365:   PetscObjectGetComm((PetscObject)x,&comm);
366:   PetscSplitReductionGet(comm,&sr);
367: 
368:   if (sr->state != STATE_END) {
369:     /* this is the first call to VecxxxEnd() so do the communication */
370:     PetscSplitReductionApply(sr);
371:   }

373:   if (sr->numopsend >= sr->numopsbegin) {
374:     SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
375:   }
376:   if (x && (void*) x != sr->invecs[sr->numopsend]) {
377:     SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
378:   }
379:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) {
380:     SETERRQ(1,"Called VecDotEnd() on a reduction started with VecNormBegin()");
381:   }
382:   *result = sr->gvalues[sr->numopsend++];

384:   /*
385:      We are finished getting all the results so reset to no outstanding requests
386:   */
387:   if (sr->numopsend == sr->numopsbegin) {
388:     sr->state        = STATE_BEGIN;
389:     sr->numopsend    = 0;
390:     sr->numopsbegin  = 0;
391:   }
392:   return(0);
393: }

395: #undef __FUNCT__
397: /*@
398:    VecTDotBegin - Starts a split phase transpose dot product computation.

400:    Input Parameters:
401: +  x - the first vector
402: .  y - the second vector
403: -  result - where the result will go (can be PETSC_NULL)

405:    Level: advanced

407:    Notes:
408:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

410: seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
411:          VecDotBegin(), VecDotEnd()

413: @*/
414: int VecTDotBegin(Vec x,Vec y,PetscScalar *result)
415: {
416:   int                 ierr;
417:   PetscSplitReduction *sr;
418:   MPI_Comm            comm;

421:   PetscObjectGetComm((PetscObject)x,&comm);
422:   PetscSplitReductionGet(comm,&sr);
423:   if (sr->state == STATE_END) {
424:     SETERRQ(1,"Called before all VecxxxEnd() called");
425:   }
426:   if (sr->numopsbegin >= sr->maxops) {
427:     PetscSplitReductionExtend(sr);
428:   }
429:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
430:   sr->invecs[sr->numopsbegin]     = (void*)x;
431:   if (!x->ops->tdot_local) SETERRQ(1,"Vector does not suppport local dots");
432:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
433:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
434:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
435:   return(0);
436: }

438: #undef __FUNCT__
440: /*@
441:    VecTDotEnd - Ends a split phase transpose dot product computation.

443:    Input Parameters:
444: +  x - the first vector (can be PETSC_NULL)
445: .  y - the second vector (can be PETSC_NULL)
446: -  result - where the result will go

448:    Level: advanced

450:    Notes:
451:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

453: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(), 
454:          VecDotBegin(), VecDotEnd()
455: @*/
456: int VecTDotEnd(Vec x,Vec y,PetscScalar *result)
457: {
458:   int               ierr;

461:   /*
462:       TDotEnd() is the same as DotEnd() so reuse the code
463:   */
464:   VecDotEnd(x,y,result);
465:   return(0);
466: }

468: /* -------------------------------------------------------------------------*/

470: #undef __FUNCT__
472: /*@
473:    VecNormBegin - Starts a split phase norm computation.

475:    Input Parameters:
476: +  x - the first vector
477: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
478: -  result - where the result will go (can be PETSC_NULL)

480:    Level: advanced

482:    Notes:
483:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

485: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

487: @*/
488: int VecNormBegin(Vec x,NormType ntype,PetscReal *result)
489: {
490:   int                 ierr;
491:   PetscSplitReduction *sr;
492:   PetscReal           lresult[2];
493:   MPI_Comm            comm;

496:   PetscObjectGetComm((PetscObject)x,&comm);
497:   PetscSplitReductionGet(comm,&sr);
498:   if (sr->state == STATE_END) {
499:     SETERRQ(1,"Called before all VecxxxEnd() called");
500:   }
501:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
502:     PetscSplitReductionExtend(sr);
503:   }
504: 
505:   sr->invecs[sr->numopsbegin]     = (void*)x;
506:   if (!x->ops->norm_local) SETERRQ(1,"Vector does not support local norms");
507:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
508:   (*x->ops->norm_local)(x,ntype,lresult);
509:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
510:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
511:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
512:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
513:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
514:   sr->lvalues[sr->numopsbegin++] = lresult[0];
515:   if (ntype == NORM_1_AND_2) {
516:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
517:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
518:   }
519:   return(0);
520: }

522: #undef __FUNCT__
524: /*@
525:    VecNormEnd - Ends a split phase norm computation.

527:    Input Parameters:
528: +  x - the first vector (can be PETSC_NULL)
529: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
530: -  result - where the result will go

532:    Level: advanced

534:    Notes:
535:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

537: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd()

539: @*/
540: int VecNormEnd(Vec x,NormType ntype,PetscReal *result)
541: {
542:   int                 ierr;
543:   PetscSplitReduction *sr;
544:   MPI_Comm            comm;

547:   PetscObjectGetComm((PetscObject)x,&comm);
548:   PetscSplitReductionGet(comm,&sr);
549: 
550:   if (sr->state != STATE_END) {
551:     /* this is the first call to VecxxxEnd() so do the communication */
552:     PetscSplitReductionApply(sr);
553:   }

555:   if (sr->numopsend >= sr->numopsbegin) {
556:     SETERRQ(1,"Called VecxxxEnd() more times then VecxxxBegin()");
557:   }
558:   if (x && (void*)x != sr->invecs[sr->numopsend]) {
559:     SETERRQ(1,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
560:   }
561:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) {
562:     SETERRQ(1,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
563:   }
564:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
565:   if (ntype == NORM_2) {
566:     result[0] = sqrt(result[0]);
567:   } else if (ntype == NORM_1_AND_2) {
568:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
569:     result[1] = sqrt(result[1]);
570:   }

572:   if (sr->numopsend == sr->numopsbegin) {
573:     sr->state        = STATE_BEGIN;
574:     sr->numopsend    = 0;
575:     sr->numopsbegin  = 0;
576:   }
577:   return(0);
578: }

580: /*
581:    Possibly add

583:      PetscReductionSumBegin/End()
584:      PetscReductionMaxBegin/End()
585:      PetscReductionMinBegin/End()
586:    or have more like MPI with a single function with flag for Op? Like first better
587: */