Actual source code: comb.c

petsc-3.7.5 2017-01-01
Report Typos and Errors
  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>    /*I   "petscvec.h"    I*/

 26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 27: {

 31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 32:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
 34:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 35: #else
 36:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 37:   *request = MPI_REQUEST_NULL;
 38: #endif
 39:   return(0);
 40: }

 42: /*
 43:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
 45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 46: some of each.
 47: */

 49: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 53: /*
 54:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 55: */
 56: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 57: {

 61:   PetscNew(sr);
 62:   (*sr)->numopsbegin = 0;
 63:   (*sr)->numopsend   = 0;
 64:   (*sr)->state       = STATE_BEGIN;
 65:   (*sr)->maxops      = 32;
 66:   PetscMalloc1(2*32,&(*sr)->lvalues);
 67:   PetscMalloc1(2*32,&(*sr)->gvalues);
 68:   PetscMalloc1(32,&(*sr)->invecs);
 69:   (*sr)->comm        = comm;
 70:   (*sr)->request     = MPI_REQUEST_NULL;
 71:   PetscMalloc1(32,&(*sr)->reducetype);
 72:   (*sr)->async       = PETSC_FALSE;
 73: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
 74:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 75: #endif
 76:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 77:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 78:   return(0);
 79: }

 81: /*
 82:        This function is the MPI reduction operation used when there is
 83:    a combination of sums and max in the reduction. The call below to
 84:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 85:    MPI operator PetscSplitReduction_Op.
 86: */
 87: MPI_Op PetscSplitReduction_Op = 0;

 91: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 92: {
 93:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
 94:   PetscInt    i,count = (PetscInt)*cnt;

 97:   if (*datatype != MPIU_SCALAR) {
 98:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
 99:     MPI_Abort(MPI_COMM_SELF,1);
100:   }
101:   count = count/2;
102:   for (i=0; i<count; i++) {
103:      /* second half of xin[] is flags for reduction type */
104:     if      ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
105:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
106:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
107:     else {
108:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
109:       MPI_Abort(MPI_COMM_SELF,1);
110:     }
111:   }
112:   PetscFunctionReturnVoid();
113: }

117: /*@
118:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

120:    Collective but not synchronizing

122:    Input Arguments:
123:    comm - communicator on which split reduction has been queued

125:    Level: advanced

127:    Note:
128:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
129:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

131: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
132: @*/
133: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
134: {
135:   PetscErrorCode      ierr;
136:   PetscSplitReduction *sr;

139:   PetscSplitReductionGet(comm,&sr);
140:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
141:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
142:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
143:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
144:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
145:     MPI_Comm       comm = sr->comm;
146:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
147:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
148:     MPI_Comm_size(sr->comm,&size);
149:     if (size == 1) {
150:       PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
151:     } else {
152:       /* determine if all reductions are sum, max, or min */
153:       for (i=0; i<numops; i++) {
154:         if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
155:         else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
156:         else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
157:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
158:       }
159:       if (sum_flg + max_flg + min_flg > 1) {
160:         /*
161:          after all the entires in lvalues we store the reducetype flags to indicate
162:          to the reduction operations what are sums and what are max
163:          */
164:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

166:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
167:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
168:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
169:       } else if (min_flg) {
170:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
171:       } else {
172:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
173:       }
174:     }
175:     sr->state     = STATE_PENDING;
176:     sr->numopsend = 0;
177:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
178:   } else {
179:     PetscSplitReductionApply(sr);
180:   }
181:   return(0);
182: }

186: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
187: {

191:   switch (sr->state) {
192:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
193:     PetscSplitReductionApply(sr);
194:     break;
195:   case STATE_PENDING:
196:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
197:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
198:     if (sr->request != MPI_REQUEST_NULL) {
199:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
200:     }
201:     sr->state = STATE_END;
202:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
203:     break;
204:   default: break;            /* everything is already done */
205:   }
206:   return(0);
207: }

211: /*
212:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
213: */
214: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
215: {
217:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
218:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
219:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
220:   MPI_Comm       comm     = sr->comm;
221:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

224:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
225:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
226:   MPI_Comm_size(sr->comm,&size);
227:   if (size == 1) {
228:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
229:   } else {
230:     /* determine if all reductions are sum, max, or min */
231:     for (i=0; i<numops; i++) {
232:       if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
233:       else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
234:       else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
235:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
236:     }
237:     if (sum_flg + max_flg + min_flg > 1) {
238:       /*
239:          after all the entires in lvalues we store the reducetype flags to indicate
240:          to the reduction operations what are sums and what are max
241:       */
242:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
243:       MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
244:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
245:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
246:     } else if (min_flg) {
247:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
248:     } else {
249:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
250:     }
251:   }
252:   sr->state     = STATE_END;
253:   sr->numopsend = 0;
254:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
255:   return(0);
256: }

260: /*
261:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
262: */
263: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
264: {
266:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
267:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
268:   void           *invecs  = sr->invecs;

271:   sr->maxops     = 2*maxops;
272:   PetscMalloc1(2*2*maxops,&sr->lvalues);
273:   PetscMalloc1(2*2*maxops,&sr->gvalues);
274:   PetscMalloc1(2*maxops,&sr->reducetype);
275:   PetscMalloc1(2*maxops,&sr->invecs);
276:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
277:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
278:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
279:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
280:   PetscFree(lvalues);
281:   PetscFree(gvalues);
282:   PetscFree(reducetype);
283:   PetscFree(invecs);
284:   return(0);
285: }

289: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
290: {

294:   PetscFree(sr->lvalues);
295:   PetscFree(sr->gvalues);
296:   PetscFree(sr->reducetype);
297:   PetscFree(sr->invecs);
298:   PetscFree(sr);
299:   return(0);
300: }

302: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

306: /*
307:    Private routine to delete internal storage when a communicator is freed.
308:   This is called by MPI, not by users.

310:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
311:   it was MPI_Comm *comm.
312: */
313: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
314: {

318:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
319:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
320:   return(0);
321: }

323: /*
324:      PetscSplitReductionGet - Gets the split reduction object from a
325:         PETSc vector, creates if it does not exit.

327: */
330: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
331: {
333:   PetscMPIInt    flag;

336:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
337:     /*
338:        The calling sequence of the 2nd argument to this function changed
339:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
340:        new standard, if you are using an MPI implementation that uses
341:        the older version you will get a warning message about the next line;
342:        it is only a warning message and should do no harm.
343:     */
344:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
345:   }
346:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
347:   if (!flag) {  /* doesn't exist yet so create it and put it in */
348:     PetscSplitReductionCreate(comm,sr);
349:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
350:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
351:   }
352:   return(0);
353: }

355: /* ----------------------------------------------------------------------------------------------------*/

359: /*@
360:    VecDotBegin - Starts a split phase dot product computation.

362:    Input Parameters:
363: +   x - the first vector
364: .   y - the second vector
365: -   result - where the result will go (can be NULL)

367:    Level: advanced

369:    Notes:
370:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

372: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
373:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
374: @*/
375: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
376: {
377:   PetscErrorCode      ierr;
378:   PetscSplitReduction *sr;
379:   MPI_Comm            comm;

382:   PetscObjectGetComm((PetscObject)x,&comm);
383:   PetscSplitReductionGet(comm,&sr);
384:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
385:   if (sr->numopsbegin >= sr->maxops) {
386:     PetscSplitReductionExtend(sr);
387:   }
388:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
389:   sr->invecs[sr->numopsbegin]     = (void*)x;
390:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
391:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
392:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
393:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
394:   return(0);
395: }

399: /*@
400:    VecDotEnd - Ends a split phase dot product computation.

402:    Input Parameters:
403: +  x - the first vector (can be NULL)
404: .  y - the second vector (can be NULL)
405: -  result - where the result will go

407:    Level: advanced

409:    Notes:
410:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

412: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
413:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

415: @*/
416: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
417: {
418:   PetscErrorCode      ierr;
419:   PetscSplitReduction *sr;
420:   MPI_Comm            comm;

423:   PetscObjectGetComm((PetscObject)x,&comm);
424:   PetscSplitReductionGet(comm,&sr);
425:   PetscSplitReductionEnd(sr);

427:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
428:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
429:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
430:   *result = sr->gvalues[sr->numopsend++];

432:   /*
433:      We are finished getting all the results so reset to no outstanding requests
434:   */
435:   if (sr->numopsend == sr->numopsbegin) {
436:     sr->state       = STATE_BEGIN;
437:     sr->numopsend   = 0;
438:     sr->numopsbegin = 0;
439:   }
440:   return(0);
441: }

445: /*@
446:    VecTDotBegin - Starts a split phase transpose dot product computation.

448:    Input Parameters:
449: +  x - the first vector
450: .  y - the second vector
451: -  result - where the result will go (can be NULL)

453:    Level: advanced

455:    Notes:
456:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

458: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
459:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

461: @*/
462: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
463: {
464:   PetscErrorCode      ierr;
465:   PetscSplitReduction *sr;
466:   MPI_Comm            comm;

469:   PetscObjectGetComm((PetscObject)x,&comm);
470:   PetscSplitReductionGet(comm,&sr);
471:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
472:   if (sr->numopsbegin >= sr->maxops) {
473:     PetscSplitReductionExtend(sr);
474:   }
475:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
476:   sr->invecs[sr->numopsbegin]     = (void*)x;
477:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
478:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
479:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
480:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
481:   return(0);
482: }

486: /*@
487:    VecTDotEnd - Ends a split phase transpose dot product computation.

489:    Input Parameters:
490: +  x - the first vector (can be NULL)
491: .  y - the second vector (can be NULL)
492: -  result - where the result will go

494:    Level: advanced

496:    Notes:
497:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

499: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
500:          VecDotBegin(), VecDotEnd()
501: @*/
502: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
503: {

507:   /*
508:       TDotEnd() is the same as DotEnd() so reuse the code
509:   */
510:   VecDotEnd(x,y,result);
511:   return(0);
512: }

514: /* -------------------------------------------------------------------------*/

518: /*@
519:    VecNormBegin - Starts a split phase norm computation.

521:    Input Parameters:
522: +  x - the first vector
523: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
524: -  result - where the result will go (can be NULL)

526:    Level: advanced

528:    Notes:
529:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

531: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

533: @*/
534: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
535: {
536:   PetscErrorCode      ierr;
537:   PetscSplitReduction *sr;
538:   PetscReal           lresult[2];
539:   MPI_Comm            comm;

542:   PetscObjectGetComm((PetscObject)x,&comm);
543:   PetscSplitReductionGet(comm,&sr);
544:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
545:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
546:     PetscSplitReductionExtend(sr);
547:   }

549:   sr->invecs[sr->numopsbegin] = (void*)x;
550:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
551:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
552:   (*x->ops->norm_local)(x,ntype,lresult);
553:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
554:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
555:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
556:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
557:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
558:   sr->lvalues[sr->numopsbegin++] = lresult[0];
559:   if (ntype == NORM_1_AND_2) {
560:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
561:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
562:   }
563:   return(0);
564: }

568: /*@
569:    VecNormEnd - Ends a split phase norm computation.

571:    Input Parameters:
572: +  x - the first vector (can be NULL)
573: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
574: -  result - where the result will go

576:    Level: advanced

578:    Notes:
579:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

581: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

583: @*/
584: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
585: {
586:   PetscErrorCode      ierr;
587:   PetscSplitReduction *sr;
588:   MPI_Comm            comm;

591:   PetscObjectGetComm((PetscObject)x,&comm);
592:   PetscSplitReductionGet(comm,&sr);
593:   PetscSplitReductionEnd(sr);

595:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
596:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
597:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
598:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

600:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
601:   else if (ntype == NORM_1_AND_2) {
602:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
603:     result[1] = PetscSqrtReal(result[1]);
604:   }
605:   if (ntype!=NORM_1_AND_2) {
606:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
607:   }

609:   if (sr->numopsend == sr->numopsbegin) {
610:     sr->state       = STATE_BEGIN;
611:     sr->numopsend   = 0;
612:     sr->numopsbegin = 0;
613:   }
614:   return(0);
615: }

617: /*
618:    Possibly add

620:      PetscReductionSumBegin/End()
621:      PetscReductionMaxBegin/End()
622:      PetscReductionMinBegin/End()
623:    or have more like MPI with a single function with flag for Op? Like first better
624: */

628: /*@
629:    VecMDotBegin - Starts a split phase multiple dot product computation.

631:    Input Parameters:
632: +   x - the first vector
633: .   nv - number of vectors
634: .   y - array of vectors
635: -   result - where the result will go (can be NULL)

637:    Level: advanced

639:    Notes:
640:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

642: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
643:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
644: @*/
645: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
646: {
647:   PetscErrorCode      ierr;
648:   PetscSplitReduction *sr;
649:   MPI_Comm            comm;
650:   int                 i;

653:   PetscObjectGetComm((PetscObject)x,&comm);
654:   PetscSplitReductionGet(comm,&sr);
655:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
656:   for (i=0; i<nv; i++) {
657:     if (sr->numopsbegin+i >= sr->maxops) {
658:       PetscSplitReductionExtend(sr);
659:     }
660:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
661:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
662:   }
663:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
664:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
665:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
666:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
667:   sr->numopsbegin += nv;
668:   return(0);
669: }

673: /*@
674:    VecMDotEnd - Ends a split phase multiple dot product computation.

676:    Input Parameters:
677: +   x - the first vector (can be NULL)
678: .   nv - number of vectors
679: -   y - array of vectors (can be NULL)

681:    Output Parameters:
682: .   result - where the result will go

684:    Level: advanced

686:    Notes:
687:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

689: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
690:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

692: @*/
693: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
694: {
695:   PetscErrorCode      ierr;
696:   PetscSplitReduction *sr;
697:   MPI_Comm            comm;
698:   int                 i;

701:   PetscObjectGetComm((PetscObject)x,&comm);
702:   PetscSplitReductionGet(comm,&sr);
703:   PetscSplitReductionEnd(sr);

705:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
706:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
707:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
708:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

710:   /*
711:      We are finished getting all the results so reset to no outstanding requests
712:   */
713:   if (sr->numopsend == sr->numopsbegin) {
714:     sr->state       = STATE_BEGIN;
715:     sr->numopsend   = 0;
716:     sr->numopsbegin = 0;
717:   }
718:   return(0);
719: }

723: /*@
724:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

726:    Input Parameters:
727: +  x - the first vector
728: .  nv - number of vectors
729: .  y - array of  vectors
730: -  result - where the result will go (can be NULL)

732:    Level: advanced

734:    Notes:
735:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

737: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
738:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

740: @*/
741: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
742: {
743:   PetscErrorCode      ierr;
744:   PetscSplitReduction *sr;
745:   MPI_Comm            comm;
746:   int                 i;

749:   PetscObjectGetComm((PetscObject)x,&comm);
750:   PetscSplitReductionGet(comm,&sr);
751:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
752:   for (i=0; i<nv; i++) {
753:     if (sr->numopsbegin+i >= sr->maxops) {
754:       PetscSplitReductionExtend(sr);
755:     }
756:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
757:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
758:   }
759:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
760:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
761:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
762:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
763:   sr->numopsbegin += nv;
764:   return(0);
765: }

769: /*@
770:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

772:    Input Parameters:
773: +  x - the first vector (can be NULL)
774: .  nv - number of vectors
775: -  y - array of  vectors (can be NULL)

777:    Output Parameters
778: .  result - where the result will go

780:    Level: advanced

782:    Notes:
783:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

785: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
786:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
787: @*/
788: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
789: {

793:   /*
794:       MTDotEnd() is the same as MDotEnd() so reuse the code
795:   */
796:   VecMDotEnd(x,nv,y,result);
797:   return(0);
798: }