Actual source code: comb.c
petsc-3.7.5 2017-01-01
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
27: {
31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
32: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
34: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
35: #else
36: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
37: *request = MPI_REQUEST_NULL;
38: #endif
39: return(0);
40: }
42: /*
43: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
46: some of each.
47: */
49: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
53: /*
54: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
55: */
56: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
57: {
61: PetscNew(sr);
62: (*sr)->numopsbegin = 0;
63: (*sr)->numopsend = 0;
64: (*sr)->state = STATE_BEGIN;
65: (*sr)->maxops = 32;
66: PetscMalloc1(2*32,&(*sr)->lvalues);
67: PetscMalloc1(2*32,&(*sr)->gvalues);
68: PetscMalloc1(32,&(*sr)->invecs);
69: (*sr)->comm = comm;
70: (*sr)->request = MPI_REQUEST_NULL;
71: PetscMalloc1(32,&(*sr)->reducetype);
72: (*sr)->async = PETSC_FALSE;
73: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
74: (*sr)->async = PETSC_TRUE; /* Enable by default */
75: #endif
76: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
77: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
78: return(0);
79: }
81: /*
82: This function is the MPI reduction operation used when there is
83: a combination of sums and max in the reduction. The call below to
84: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
85: MPI operator PetscSplitReduction_Op.
86: */
87: MPI_Op PetscSplitReduction_Op = 0;
91: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
92: {
93: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
94: PetscInt i,count = (PetscInt)*cnt;
97: if (*datatype != MPIU_SCALAR) {
98: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
99: MPI_Abort(MPI_COMM_SELF,1);
100: }
101: count = count/2;
102: for (i=0; i<count; i++) {
103: /* second half of xin[] is flags for reduction type */
104: if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
105: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
106: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
107: else {
108: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
109: MPI_Abort(MPI_COMM_SELF,1);
110: }
111: }
112: PetscFunctionReturnVoid();
113: }
117: /*@
118: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
120: Collective but not synchronizing
122: Input Arguments:
123: comm - communicator on which split reduction has been queued
125: Level: advanced
127: Note:
128: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
129: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
131: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
132: @*/
133: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
134: {
135: PetscErrorCode ierr;
136: PetscSplitReduction *sr;
139: PetscSplitReductionGet(comm,&sr);
140: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
141: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
142: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
143: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
144: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
145: MPI_Comm comm = sr->comm;
146: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
147: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
148: MPI_Comm_size(sr->comm,&size);
149: if (size == 1) {
150: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
151: } else {
152: /* determine if all reductions are sum, max, or min */
153: for (i=0; i<numops; i++) {
154: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
155: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
156: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
157: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
158: }
159: if (sum_flg + max_flg + min_flg > 1) {
160: /*
161: after all the entires in lvalues we store the reducetype flags to indicate
162: to the reduction operations what are sums and what are max
163: */
164: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
166: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
167: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
168: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
169: } else if (min_flg) {
170: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
171: } else {
172: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
173: }
174: }
175: sr->state = STATE_PENDING;
176: sr->numopsend = 0;
177: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
178: } else {
179: PetscSplitReductionApply(sr);
180: }
181: return(0);
182: }
186: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
187: {
191: switch (sr->state) {
192: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
193: PetscSplitReductionApply(sr);
194: break;
195: case STATE_PENDING:
196: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
197: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
198: if (sr->request != MPI_REQUEST_NULL) {
199: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
200: }
201: sr->state = STATE_END;
202: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
203: break;
204: default: break; /* everything is already done */
205: }
206: return(0);
207: }
211: /*
212: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
213: */
214: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
215: {
217: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
218: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
219: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
220: MPI_Comm comm = sr->comm;
221: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
224: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
225: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
226: MPI_Comm_size(sr->comm,&size);
227: if (size == 1) {
228: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
229: } else {
230: /* determine if all reductions are sum, max, or min */
231: for (i=0; i<numops; i++) {
232: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
233: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
234: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
235: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
236: }
237: if (sum_flg + max_flg + min_flg > 1) {
238: /*
239: after all the entires in lvalues we store the reducetype flags to indicate
240: to the reduction operations what are sums and what are max
241: */
242: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
243: MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
244: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
245: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
246: } else if (min_flg) {
247: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
248: } else {
249: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
250: }
251: }
252: sr->state = STATE_END;
253: sr->numopsend = 0;
254: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
255: return(0);
256: }
260: /*
261: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
262: */
263: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
264: {
266: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
267: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
268: void *invecs = sr->invecs;
271: sr->maxops = 2*maxops;
272: PetscMalloc1(2*2*maxops,&sr->lvalues);
273: PetscMalloc1(2*2*maxops,&sr->gvalues);
274: PetscMalloc1(2*maxops,&sr->reducetype);
275: PetscMalloc1(2*maxops,&sr->invecs);
276: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
277: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
278: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
279: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
280: PetscFree(lvalues);
281: PetscFree(gvalues);
282: PetscFree(reducetype);
283: PetscFree(invecs);
284: return(0);
285: }
289: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
290: {
294: PetscFree(sr->lvalues);
295: PetscFree(sr->gvalues);
296: PetscFree(sr->reducetype);
297: PetscFree(sr->invecs);
298: PetscFree(sr);
299: return(0);
300: }
302: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
306: /*
307: Private routine to delete internal storage when a communicator is freed.
308: This is called by MPI, not by users.
310: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
311: it was MPI_Comm *comm.
312: */
313: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
314: {
318: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
319: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
320: return(0);
321: }
323: /*
324: PetscSplitReductionGet - Gets the split reduction object from a
325: PETSc vector, creates if it does not exit.
327: */
330: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
331: {
333: PetscMPIInt flag;
336: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
337: /*
338: The calling sequence of the 2nd argument to this function changed
339: between MPI Standard 1.0 and the revisions 1.1 Here we match the
340: new standard, if you are using an MPI implementation that uses
341: the older version you will get a warning message about the next line;
342: it is only a warning message and should do no harm.
343: */
344: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
345: }
346: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
347: if (!flag) { /* doesn't exist yet so create it and put it in */
348: PetscSplitReductionCreate(comm,sr);
349: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
350: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
351: }
352: return(0);
353: }
355: /* ----------------------------------------------------------------------------------------------------*/
359: /*@
360: VecDotBegin - Starts a split phase dot product computation.
362: Input Parameters:
363: + x - the first vector
364: . y - the second vector
365: - result - where the result will go (can be NULL)
367: Level: advanced
369: Notes:
370: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
372: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
373: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
374: @*/
375: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
376: {
377: PetscErrorCode ierr;
378: PetscSplitReduction *sr;
379: MPI_Comm comm;
382: PetscObjectGetComm((PetscObject)x,&comm);
383: PetscSplitReductionGet(comm,&sr);
384: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
385: if (sr->numopsbegin >= sr->maxops) {
386: PetscSplitReductionExtend(sr);
387: }
388: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
389: sr->invecs[sr->numopsbegin] = (void*)x;
390: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
391: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
392: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
393: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
394: return(0);
395: }
399: /*@
400: VecDotEnd - Ends a split phase dot product computation.
402: Input Parameters:
403: + x - the first vector (can be NULL)
404: . y - the second vector (can be NULL)
405: - result - where the result will go
407: Level: advanced
409: Notes:
410: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
412: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
413: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
415: @*/
416: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
417: {
418: PetscErrorCode ierr;
419: PetscSplitReduction *sr;
420: MPI_Comm comm;
423: PetscObjectGetComm((PetscObject)x,&comm);
424: PetscSplitReductionGet(comm,&sr);
425: PetscSplitReductionEnd(sr);
427: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
428: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
429: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
430: *result = sr->gvalues[sr->numopsend++];
432: /*
433: We are finished getting all the results so reset to no outstanding requests
434: */
435: if (sr->numopsend == sr->numopsbegin) {
436: sr->state = STATE_BEGIN;
437: sr->numopsend = 0;
438: sr->numopsbegin = 0;
439: }
440: return(0);
441: }
445: /*@
446: VecTDotBegin - Starts a split phase transpose dot product computation.
448: Input Parameters:
449: + x - the first vector
450: . y - the second vector
451: - result - where the result will go (can be NULL)
453: Level: advanced
455: Notes:
456: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
458: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
459: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
461: @*/
462: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
463: {
464: PetscErrorCode ierr;
465: PetscSplitReduction *sr;
466: MPI_Comm comm;
469: PetscObjectGetComm((PetscObject)x,&comm);
470: PetscSplitReductionGet(comm,&sr);
471: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
472: if (sr->numopsbegin >= sr->maxops) {
473: PetscSplitReductionExtend(sr);
474: }
475: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
476: sr->invecs[sr->numopsbegin] = (void*)x;
477: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
478: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
479: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
480: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
481: return(0);
482: }
486: /*@
487: VecTDotEnd - Ends a split phase transpose dot product computation.
489: Input Parameters:
490: + x - the first vector (can be NULL)
491: . y - the second vector (can be NULL)
492: - result - where the result will go
494: Level: advanced
496: Notes:
497: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
499: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
500: VecDotBegin(), VecDotEnd()
501: @*/
502: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
503: {
507: /*
508: TDotEnd() is the same as DotEnd() so reuse the code
509: */
510: VecDotEnd(x,y,result);
511: return(0);
512: }
514: /* -------------------------------------------------------------------------*/
518: /*@
519: VecNormBegin - Starts a split phase norm computation.
521: Input Parameters:
522: + x - the first vector
523: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
524: - result - where the result will go (can be NULL)
526: Level: advanced
528: Notes:
529: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
531: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
533: @*/
534: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
535: {
536: PetscErrorCode ierr;
537: PetscSplitReduction *sr;
538: PetscReal lresult[2];
539: MPI_Comm comm;
542: PetscObjectGetComm((PetscObject)x,&comm);
543: PetscSplitReductionGet(comm,&sr);
544: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
545: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
546: PetscSplitReductionExtend(sr);
547: }
549: sr->invecs[sr->numopsbegin] = (void*)x;
550: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
551: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
552: (*x->ops->norm_local)(x,ntype,lresult);
553: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
554: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
555: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
556: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
557: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
558: sr->lvalues[sr->numopsbegin++] = lresult[0];
559: if (ntype == NORM_1_AND_2) {
560: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
561: sr->lvalues[sr->numopsbegin++] = lresult[1];
562: }
563: return(0);
564: }
568: /*@
569: VecNormEnd - Ends a split phase norm computation.
571: Input Parameters:
572: + x - the first vector (can be NULL)
573: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
574: - result - where the result will go
576: Level: advanced
578: Notes:
579: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
581: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
583: @*/
584: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
585: {
586: PetscErrorCode ierr;
587: PetscSplitReduction *sr;
588: MPI_Comm comm;
591: PetscObjectGetComm((PetscObject)x,&comm);
592: PetscSplitReductionGet(comm,&sr);
593: PetscSplitReductionEnd(sr);
595: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
596: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
597: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
598: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
600: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
601: else if (ntype == NORM_1_AND_2) {
602: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
603: result[1] = PetscSqrtReal(result[1]);
604: }
605: if (ntype!=NORM_1_AND_2) {
606: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
607: }
609: if (sr->numopsend == sr->numopsbegin) {
610: sr->state = STATE_BEGIN;
611: sr->numopsend = 0;
612: sr->numopsbegin = 0;
613: }
614: return(0);
615: }
617: /*
618: Possibly add
620: PetscReductionSumBegin/End()
621: PetscReductionMaxBegin/End()
622: PetscReductionMinBegin/End()
623: or have more like MPI with a single function with flag for Op? Like first better
624: */
628: /*@
629: VecMDotBegin - Starts a split phase multiple dot product computation.
631: Input Parameters:
632: + x - the first vector
633: . nv - number of vectors
634: . y - array of vectors
635: - result - where the result will go (can be NULL)
637: Level: advanced
639: Notes:
640: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
642: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
643: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
644: @*/
645: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
646: {
647: PetscErrorCode ierr;
648: PetscSplitReduction *sr;
649: MPI_Comm comm;
650: int i;
653: PetscObjectGetComm((PetscObject)x,&comm);
654: PetscSplitReductionGet(comm,&sr);
655: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
656: for (i=0; i<nv; i++) {
657: if (sr->numopsbegin+i >= sr->maxops) {
658: PetscSplitReductionExtend(sr);
659: }
660: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
661: sr->invecs[sr->numopsbegin+i] = (void*)x;
662: }
663: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
664: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
665: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
666: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
667: sr->numopsbegin += nv;
668: return(0);
669: }
673: /*@
674: VecMDotEnd - Ends a split phase multiple dot product computation.
676: Input Parameters:
677: + x - the first vector (can be NULL)
678: . nv - number of vectors
679: - y - array of vectors (can be NULL)
681: Output Parameters:
682: . result - where the result will go
684: Level: advanced
686: Notes:
687: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
689: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
690: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
692: @*/
693: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
694: {
695: PetscErrorCode ierr;
696: PetscSplitReduction *sr;
697: MPI_Comm comm;
698: int i;
701: PetscObjectGetComm((PetscObject)x,&comm);
702: PetscSplitReductionGet(comm,&sr);
703: PetscSplitReductionEnd(sr);
705: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
706: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
707: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
708: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
710: /*
711: We are finished getting all the results so reset to no outstanding requests
712: */
713: if (sr->numopsend == sr->numopsbegin) {
714: sr->state = STATE_BEGIN;
715: sr->numopsend = 0;
716: sr->numopsbegin = 0;
717: }
718: return(0);
719: }
723: /*@
724: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
726: Input Parameters:
727: + x - the first vector
728: . nv - number of vectors
729: . y - array of vectors
730: - result - where the result will go (can be NULL)
732: Level: advanced
734: Notes:
735: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
737: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
738: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
740: @*/
741: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
742: {
743: PetscErrorCode ierr;
744: PetscSplitReduction *sr;
745: MPI_Comm comm;
746: int i;
749: PetscObjectGetComm((PetscObject)x,&comm);
750: PetscSplitReductionGet(comm,&sr);
751: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
752: for (i=0; i<nv; i++) {
753: if (sr->numopsbegin+i >= sr->maxops) {
754: PetscSplitReductionExtend(sr);
755: }
756: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
757: sr->invecs[sr->numopsbegin+i] = (void*)x;
758: }
759: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
760: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
761: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
762: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
763: sr->numopsbegin += nv;
764: return(0);
765: }
769: /*@
770: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
772: Input Parameters:
773: + x - the first vector (can be NULL)
774: . nv - number of vectors
775: - y - array of vectors (can be NULL)
777: Output Parameters
778: . result - where the result will go
780: Level: advanced
782: Notes:
783: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
785: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
786: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
787: @*/
788: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
789: {
793: /*
794: MTDotEnd() is the same as MDotEnd() so reuse the code
795: */
796: VecMDotEnd(x,nv,y,result);
797: return(0);
798: }