Actual source code: mpits.c

petsc-3.7.5 2017-01-01
Report Typos and Errors
  1: #include <petscsys.h>        /*I  "petscsys.h"  I*/

  3: PetscLogEvent PETSC_BuildTwoSided,PETSC_BuildTwoSidedF;

  5: const char *const PetscBuildTwoSidedTypes[] = {
  6:   "ALLREDUCE",
  7:   "IBARRIER",
  8:   "REDSCATTER",
  9:   "PetscBuildTwoSidedType",
 10:   "PETSC_BUILDTWOSIDED_",
 11:   0
 12: };

 14: static PetscBuildTwoSidedType _twosided_type = PETSC_BUILDTWOSIDED_NOTSET;

 18: /*@
 19:    PetscCommBuildTwoSidedSetType - set algorithm to use when building two-sided communication

 21:    Logically Collective

 23:    Input Arguments:
 24: +  comm - PETSC_COMM_WORLD
 25: -  twosided - algorithm to use in subsequent calls to PetscCommBuildTwoSided()

 27:    Level: developer

 29:    Note:
 30:    This option is currently global, but could be made per-communicator.

 32: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedGetType()
 33: @*/
 34: PetscErrorCode PetscCommBuildTwoSidedSetType(MPI_Comm comm,PetscBuildTwoSidedType twosided)
 35: {
 37: #if defined(PETSC_USE_DEBUG)
 39:     PetscMPIInt ierr;
 40:     PetscMPIInt b1[2],b2[2];
 41:     b1[0] = -(PetscMPIInt)twosided;
 42:     b1[1] = (PetscMPIInt)twosided;
 43:     MPIU_Allreduce(b1,b2,2,MPI_INT,MPI_MAX,comm);
 44:     if (-b2[0] != b2[1]) SETERRQ(comm,PETSC_ERR_ARG_WRONG,"Enum value must be same on all processes");
 45:   }
 46: #endif
 47:   _twosided_type = twosided;
 48:   return(0);
 49: }

 53: /*@
 54:    PetscCommBuildTwoSidedGetType - set algorithm to use when building two-sided communication

 56:    Logically Collective

 58:    Output Arguments:
 59: +  comm - communicator on which to query algorithm
 60: -  twosided - algorithm to use for PetscCommBuildTwoSided()

 62:    Level: developer

 64: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedSetType()
 65: @*/
 66: PetscErrorCode PetscCommBuildTwoSidedGetType(MPI_Comm comm,PetscBuildTwoSidedType *twosided)
 67: {

 71:   *twosided = PETSC_BUILDTWOSIDED_NOTSET;
 72:   if (_twosided_type == PETSC_BUILDTWOSIDED_NOTSET) {
 73: #if defined(PETSC_HAVE_MPI_IBARRIER)
 74: #  if defined(PETSC_HAVE_MPICH_CH3_SOCK) && !defined(PETSC_HAVE_MPICH_CH3_SOCK_FIXED_NBC_PROGRESS)
 75:     /* Deadlock in Ibarrier: http://trac.mpich.org/projects/mpich/ticket/1785 */
 76:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
 77: #  else
 78:     _twosided_type = PETSC_BUILDTWOSIDED_IBARRIER;
 79: #  endif
 80: #else
 81:     _twosided_type = PETSC_BUILDTWOSIDED_ALLREDUCE;
 82: #endif
 83:     PetscOptionsGetEnum(NULL,NULL,"-build_twosided",PetscBuildTwoSidedTypes,(PetscEnum*)&_twosided_type,NULL);
 84:   }
 85:   *twosided = _twosided_type;
 86:   return(0);
 87: }

 89: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

 93: static PetscErrorCode PetscCommBuildTwoSided_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
 94: {
 96:   PetscMPIInt    nrecvs,tag,done,i;
 97:   MPI_Aint       lb,unitbytes;
 98:   char           *tdata;
 99:   MPI_Request    *sendreqs,barrier;
100:   PetscSegBuffer segrank,segdata;
101:   PetscBool      barrier_started;

104:   PetscCommDuplicate(comm,&comm,&tag);
105:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
106:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
107:   tdata = (char*)todata;
108:   PetscMalloc1(nto,&sendreqs);
109:   for (i=0; i<nto; i++) {
110:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
111:   }
112:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
113:   PetscSegBufferCreate(unitbytes,4*count,&segdata);

115:   nrecvs  = 0;
116:   barrier = MPI_REQUEST_NULL;
117:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
118:    * but we need to work around it. */
119:   barrier_started = PETSC_FALSE;
120:   for (done=0; !done; ) {
121:     PetscMPIInt flag;
122:     MPI_Status  status;
123:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
124:     if (flag) {                 /* incoming message */
125:       PetscMPIInt *recvrank;
126:       void        *buf;
127:       PetscSegBufferGet(segrank,1,&recvrank);
128:       PetscSegBufferGet(segdata,count,&buf);
129:       *recvrank = status.MPI_SOURCE;
130:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
131:       nrecvs++;
132:     }
133:     if (!barrier_started) {
134:       PetscMPIInt sent,nsends;
135:       PetscMPIIntCast(nto,&nsends);
136:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
137:       if (sent) {
138: #if defined(PETSC_HAVE_MPI_IBARRIER)
139:         MPI_Ibarrier(comm,&barrier);
140: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
141:         MPIX_Ibarrier(comm,&barrier);
142: #endif
143:         barrier_started = PETSC_TRUE;
144:         PetscFree(sendreqs);
145:       }
146:     } else {
147:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
148:     }
149:   }
150:   *nfrom = nrecvs;
151:   PetscSegBufferExtractAlloc(segrank,fromranks);
152:   PetscSegBufferDestroy(&segrank);
153:   PetscSegBufferExtractAlloc(segdata,fromdata);
154:   PetscSegBufferDestroy(&segdata);
155:   PetscCommDestroy(&comm);
156:   return(0);
157: }
158: #endif

162: static PetscErrorCode PetscCommBuildTwoSided_Allreduce(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
163: {
165:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
166:   MPI_Aint       lb,unitbytes;
167:   char           *tdata,*fdata;
168:   MPI_Request    *reqs,*sendreqs;
169:   MPI_Status     *statuses;

172:   MPI_Comm_size(comm,&size);
173:   PetscCalloc1(size,&iflags);
174:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
175:   PetscGatherNumberOfMessages(comm,iflags,NULL,&nrecvs);
176:   PetscFree(iflags);

178:   PetscCommDuplicate(comm,&comm,&tag);
179:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
180:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
181:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
182:   tdata    = (char*)todata;
183:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
184:   sendreqs = reqs + nrecvs;
185:   for (i=0; i<nrecvs; i++) {
186:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
187:   }
188:   for (i=0; i<nto; i++) {
189:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
190:   }
191:   MPI_Waitall(nto+nrecvs,reqs,statuses);
192:   PetscMalloc1(nrecvs,&franks);
193:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
194:   PetscFree2(reqs,statuses);
195:   PetscCommDestroy(&comm);

197:   *nfrom            = nrecvs;
198:   *fromranks        = franks;
199:   *(void**)fromdata = fdata;
200:   return(0);
201: }

203: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
206: static PetscErrorCode PetscCommBuildTwoSided_RedScatter(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
207: {
209:   PetscMPIInt    size,*iflags,nrecvs,tag,*franks,i;
210:   MPI_Aint       lb,unitbytes;
211:   char           *tdata,*fdata;
212:   MPI_Request    *reqs,*sendreqs;
213:   MPI_Status     *statuses;

216:   MPI_Comm_size(comm,&size);
217:   PetscMalloc1(size,&iflags);
218:   PetscMemzero(iflags,size*sizeof(*iflags));
219:   for (i=0; i<nto; i++) iflags[toranks[i]] = 1;
220:   MPI_Reduce_scatter_block(iflags,&nrecvs,1,MPI_INT,MPI_SUM,comm);
221:   PetscFree(iflags);

223:   PetscCommDuplicate(comm,&comm,&tag);
224:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
225:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
226:   PetscMalloc(nrecvs*count*unitbytes,&fdata);
227:   tdata    = (char*)todata;
228:   PetscMalloc2(nto+nrecvs,&reqs,nto+nrecvs,&statuses);
229:   sendreqs = reqs + nrecvs;
230:   for (i=0; i<nrecvs; i++) {
231:     MPI_Irecv((void*)(fdata+count*unitbytes*i),count,dtype,MPI_ANY_SOURCE,tag,comm,reqs+i);
232:   }
233:   for (i=0; i<nto; i++) {
234:     MPI_Isend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
235:   }
236:   MPI_Waitall(nto+nrecvs,reqs,statuses);
237:   PetscMalloc1(nrecvs,&franks);
238:   for (i=0; i<nrecvs; i++) franks[i] = statuses[i].MPI_SOURCE;
239:   PetscFree2(reqs,statuses);
240:   PetscCommDestroy(&comm);

242:   *nfrom            = nrecvs;
243:   *fromranks        = franks;
244:   *(void**)fromdata = fdata;
245:   return(0);
246: }
247: #endif

251: /*@C
252:    PetscCommBuildTwoSided - discovers communicating ranks given one-sided information, moving constant-sized data in the process (often message lengths)

254:    Collective on MPI_Comm

256:    Input Arguments:
257: +  comm - communicator
258: .  count - number of entries to send/receive (must match on all ranks)
259: .  dtype - datatype to send/receive from each rank (must match on all ranks)
260: .  nto - number of ranks to send data to
261: .  toranks - ranks to send to (array of length nto)
262: -  todata - data to send to each rank (packed)

264:    Output Arguments:
265: +  nfrom - number of ranks receiving messages from
266: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
267: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

269:    Level: developer

271:    Options Database Keys:
272: .  -build_twosided <allreduce|ibarrier|redscatter> - algorithm to set up two-sided communication

274:    Notes:
275:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
276:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other constant-size data.

278:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

280:    References:
281: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
282:    Scalable communication protocols for dynamic sparse data exchange, 2010.

284: .seealso: PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
285: @*/
286: PetscErrorCode PetscCommBuildTwoSided(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata)
287: {
288:   PetscErrorCode         ierr;
289:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;

292:   PetscSysInitializePackage();
293:   PetscLogEventBegin(PETSC_BuildTwoSided,0,0,0,0);
294:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
295:   switch (buildtype) {
296:   case PETSC_BUILDTWOSIDED_IBARRIER:
297: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
298:     PetscCommBuildTwoSided_Ibarrier(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
299: #else
300:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
301: #endif
302:     break;
303:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
304:     PetscCommBuildTwoSided_Allreduce(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
305:     break;
306:   case PETSC_BUILDTWOSIDED_REDSCATTER:
307: #if defined(PETSC_HAVE_MPI_REDUCE_SCATTER_BLOCK)
308:     PetscCommBuildTwoSided_RedScatter(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);
309: #else
310:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Reduce_scatter_block (part of MPI-2.2)");
311: #endif
312:     break;
313:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
314:   }
315:   PetscLogEventEnd(PETSC_BuildTwoSided,0,0,0,0);
316:   return(0);
317: }

321: static PetscErrorCode PetscCommBuildTwoSidedFReq_Reference(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
322:                                                            PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
323:                                                            PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
324:                                                            PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
325: {
327:   PetscMPIInt i,*tag;
328:   MPI_Aint    lb,unitbytes;
329:   MPI_Request *sendreq,*recvreq;

332:   PetscMalloc1(ntags,&tag);
333:   if (ntags > 0) {
334:     PetscCommDuplicate(comm,&comm,&tag[0]);
335:   }
336:   for (i=1; i<ntags; i++) {
337:     PetscCommGetNewTag(comm,&tag[i]);
338:   }

340:   /* Perform complete initial rendezvous */
341:   PetscCommBuildTwoSided(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata);

343:   PetscMalloc1(nto*ntags,&sendreq);
344:   PetscMalloc1(*nfrom*ntags,&recvreq);

346:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
347:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
348:   for (i=0; i<nto; i++) {
349:     PetscMPIInt k;
350:     for (k=0; k<ntags; k++) sendreq[i*ntags+k] = MPI_REQUEST_NULL;
351:     (*send)(comm,tag,i,toranks[i],((char*)todata)+count*unitbytes*i,sendreq+i*ntags,ctx);
352:   }
353:   for (i=0; i<*nfrom; i++) {
354:     void *header = (*(char**)fromdata) + count*unitbytes*i;
355:     PetscMPIInt k;
356:     for (k=0; k<ntags; k++) recvreq[i*ntags+k] = MPI_REQUEST_NULL;
357:     (*recv)(comm,tag,(*fromranks)[i],header,recvreq+i*ntags,ctx);
358:   }
359:   PetscFree(tag);
360:   PetscCommDestroy(&comm);
361:   *toreqs = sendreq;
362:   *fromreqs = recvreq;
363:   return(0);
364: }

366: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)

370: static PetscErrorCode PetscCommBuildTwoSidedFReq_Ibarrier(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
371:                                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
372:                                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
373:                                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
374: {
376:   PetscMPIInt    nrecvs,tag,*tags,done,i;
377:   MPI_Aint       lb,unitbytes;
378:   char           *tdata;
379:   MPI_Request    *sendreqs,*usendreqs,*req,barrier;
380:   PetscSegBuffer segrank,segdata,segreq;
381:   PetscBool      barrier_started;

384:   PetscCommDuplicate(comm,&comm,&tag);
385:   PetscMalloc1(ntags,&tags);
386:   for (i=0; i<ntags; i++) {
387:     PetscCommGetNewTag(comm,&tags[i]);
388:   }
389:   MPI_Type_get_extent(dtype,&lb,&unitbytes);
390:   if (lb != 0) SETERRQ1(comm,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
391:   tdata = (char*)todata;
392:   PetscMalloc1(nto,&sendreqs);
393:   PetscMalloc1(nto*ntags,&usendreqs);
394:   /* Post synchronous sends */
395:   for (i=0; i<nto; i++) {
396:     MPI_Issend((void*)(tdata+count*unitbytes*i),count,dtype,toranks[i],tag,comm,sendreqs+i);
397:   }
398:   /* Post actual payloads.  These are typically larger messages.  Hopefully sending these later does not slow down the
399:    * synchronous messages above. */
400:   for (i=0; i<nto; i++) {
401:     PetscMPIInt k;
402:     for (k=0; k<ntags; k++) usendreqs[i*ntags+k] = MPI_REQUEST_NULL;
403:     (*send)(comm,tags,i,toranks[i],tdata+count*unitbytes*i,usendreqs+i*ntags,ctx);
404:   }

406:   PetscSegBufferCreate(sizeof(PetscMPIInt),4,&segrank);
407:   PetscSegBufferCreate(unitbytes,4*count,&segdata);
408:   PetscSegBufferCreate(sizeof(MPI_Request),4,&segreq);

410:   nrecvs  = 0;
411:   barrier = MPI_REQUEST_NULL;
412:   /* MPICH-3.2 sometimes does not create a request in some "optimized" cases.  This is arguably a standard violation,
413:    * but we need to work around it. */
414:   barrier_started = PETSC_FALSE;
415:   for (done=0; !done; ) {
416:     PetscMPIInt flag;
417:     MPI_Status  status;
418:     MPI_Iprobe(MPI_ANY_SOURCE,tag,comm,&flag,&status);
419:     if (flag) {                 /* incoming message */
420:       PetscMPIInt *recvrank,k;
421:       void        *buf;
422:       PetscSegBufferGet(segrank,1,&recvrank);
423:       PetscSegBufferGet(segdata,count,&buf);
424:       *recvrank = status.MPI_SOURCE;
425:       MPI_Recv(buf,count,dtype,status.MPI_SOURCE,tag,comm,MPI_STATUS_IGNORE);
426:       PetscSegBufferGet(segreq,ntags,&req);
427:       for (k=0; k<ntags; k++) req[k] = MPI_REQUEST_NULL;
428:       (*recv)(comm,tags,status.MPI_SOURCE,buf,req,ctx);
429:       nrecvs++;
430:     }
431:     if (!barrier_started) {
432:       PetscMPIInt sent,nsends;
433:       PetscMPIIntCast(nto,&nsends);
434:       MPI_Testall(nsends,sendreqs,&sent,MPI_STATUSES_IGNORE);
435:       if (sent) {
436: #if defined(PETSC_HAVE_MPI_IBARRIER)
437:         MPI_Ibarrier(comm,&barrier);
438: #elif defined(PETSC_HAVE_MPIX_IBARRIER)
439:         MPIX_Ibarrier(comm,&barrier);
440: #endif
441:         barrier_started = PETSC_TRUE;
442:       }
443:     } else {
444:       MPI_Test(&barrier,&done,MPI_STATUS_IGNORE);
445:     }
446:   }
447:   *nfrom = nrecvs;
448:   PetscSegBufferExtractAlloc(segrank,fromranks);
449:   PetscSegBufferDestroy(&segrank);
450:   PetscSegBufferExtractAlloc(segdata,fromdata);
451:   PetscSegBufferDestroy(&segdata);
452:   *toreqs = usendreqs;
453:   PetscSegBufferExtractAlloc(segreq,fromreqs);
454:   PetscSegBufferDestroy(&segreq);
455:   PetscFree(sendreqs);
456:   PetscFree(tags);
457:   PetscCommDestroy(&comm);
458:   return(0);
459: }
460: #endif

464: /*@C
465:    PetscCommBuildTwoSidedF - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous

467:    Collective on MPI_Comm

469:    Input Arguments:
470: +  comm - communicator
471: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
472: .  dtype - datatype to send/receive from each rank (must match on all ranks)
473: .  nto - number of ranks to send data to
474: .  toranks - ranks to send to (array of length nto)
475: .  todata - data to send to each rank (packed)
476: .  ntags - number of tags needed by send/recv callbacks
477: .  send - callback invoked on sending process when ready to send primary payload
478: .  recv - callback invoked on receiving process after delivery of rendezvous message
479: -  ctx - context for callbacks

481:    Output Arguments:
482: +  nfrom - number of ranks receiving messages from
483: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
484: -  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())

486:    Level: developer

488:    Notes:
489:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
490:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

492:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

494:    References:
495: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
496:    Scalable communication protocols for dynamic sparse data exchange, 2010.

498: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedFReq(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
499: @*/
500: PetscErrorCode PetscCommBuildTwoSidedF(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,
501:                                        PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
502:                                        PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
503: {
505:   MPI_Request    *toreqs,*fromreqs;

508:   PetscCommBuildTwoSidedFReq(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,&toreqs,&fromreqs,send,recv,ctx);
509:   MPI_Waitall(nto*ntags,toreqs,MPI_STATUSES_IGNORE);
510:   MPI_Waitall(*nfrom*ntags,fromreqs,MPI_STATUSES_IGNORE);
511:   PetscFree(toreqs);
512:   PetscFree(fromreqs);
513:   return(0);
514: }

518: /*@C
519:    PetscCommBuildTwoSidedFReq - discovers communicating ranks given one-sided information, calling user-defined functions during rendezvous, returns requests

521:    Collective on MPI_Comm

523:    Input Arguments:
524: +  comm - communicator
525: .  count - number of entries to send/receive in initial rendezvous (must match on all ranks)
526: .  dtype - datatype to send/receive from each rank (must match on all ranks)
527: .  nto - number of ranks to send data to
528: .  toranks - ranks to send to (array of length nto)
529: .  todata - data to send to each rank (packed)
530: .  ntags - number of tags needed by send/recv callbacks
531: .  send - callback invoked on sending process when ready to send primary payload
532: .  recv - callback invoked on receiving process after delivery of rendezvous message
533: -  ctx - context for callbacks

535:    Output Arguments:
536: +  nfrom - number of ranks receiving messages from
537: .  fromranks - ranks receiving messages from (length nfrom; caller should PetscFree())
538: .  fromdata - packed data from each rank, each with count entries of type dtype (length nfrom, caller responsible for PetscFree())
539: .  toreqs - array of nto*ntags sender requests (caller must wait on these, then PetscFree())
540: -  fromreqs - array of nfrom*ntags receiver requests (caller must wait on these, then PetscFree())

542:    Level: developer

544:    Notes:
545:    This memory-scalable interface is an alternative to calling PetscGatherNumberOfMessages() and
546:    PetscGatherMessageLengths(), possibly with a subsequent round of communication to send other data.

548:    Basic data types as well as contiguous types are supported, but non-contiguous (e.g., strided) types are not.

550:    References:
551: .  1. - Hoefler, Siebert and Lumsdaine, The MPI_Ibarrier implementation uses the algorithm in
552:    Scalable communication protocols for dynamic sparse data exchange, 2010.

554: .seealso: PetscCommBuildTwoSided(), PetscCommBuildTwoSidedF(), PetscGatherNumberOfMessages(), PetscGatherMessageLengths()
555: @*/
556: PetscErrorCode PetscCommBuildTwoSidedFReq(MPI_Comm comm,PetscMPIInt count,MPI_Datatype dtype,PetscMPIInt nto,const PetscMPIInt *toranks,const void *todata,
557:                                           PetscMPIInt *nfrom,PetscMPIInt **fromranks,void *fromdata,PetscMPIInt ntags,MPI_Request **toreqs,MPI_Request **fromreqs,
558:                                           PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
559:                                           PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx)
560: {
561:   PetscErrorCode         ierr,(*f)(MPI_Comm,PetscMPIInt,MPI_Datatype,PetscMPIInt,const PetscMPIInt[],const void*,
562:                                    PetscMPIInt*,PetscMPIInt**,void*,PetscMPIInt,MPI_Request**,MPI_Request**,
563:                                    PetscErrorCode (*send)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,PetscMPIInt,void*,MPI_Request[],void*),
564:                                    PetscErrorCode (*recv)(MPI_Comm,const PetscMPIInt[],PetscMPIInt,void*,MPI_Request[],void*),void *ctx);
565:   PetscBuildTwoSidedType buildtype = PETSC_BUILDTWOSIDED_NOTSET;
566:   PetscMPIInt i,size;

569:   PetscSysInitializePackage();
570:   MPI_Comm_size(comm,&size);
571:   for (i=0; i<nto; i++) {
572:     if (toranks[i] < 0 || size <= toranks[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"toranks[%d] %d not in comm size %d",i,toranks[i],size);
573:   }
574:   PetscLogEventBegin(PETSC_BuildTwoSidedF,0,0,0,0);
575:   PetscCommBuildTwoSidedGetType(comm,&buildtype);
576:   switch (buildtype) {
577:   case PETSC_BUILDTWOSIDED_IBARRIER:
578: #if defined(PETSC_HAVE_MPI_IBARRIER) || defined(PETSC_HAVE_MPIX_IBARRIER)
579:     f = PetscCommBuildTwoSidedFReq_Ibarrier;
580: #else
581:     SETERRQ(comm,PETSC_ERR_PLIB,"MPI implementation does not provide MPI_Ibarrier (part of MPI-3)");
582: #endif
583:     break;
584:   case PETSC_BUILDTWOSIDED_ALLREDUCE:
585:   case PETSC_BUILDTWOSIDED_REDSCATTER:
586:     f = PetscCommBuildTwoSidedFReq_Reference;
587:     break;
588:   default: SETERRQ(comm,PETSC_ERR_PLIB,"Unknown method for building two-sided communication");
589:   }
590:   (*f)(comm,count,dtype,nto,toranks,todata,nfrom,fromranks,fromdata,ntags,toreqs,fromreqs,send,recv,ctx);
591:   PetscLogEventEnd(PETSC_BuildTwoSidedF,0,0,0,0);
592:   return(0);
593: }