Actual source code: aomemscalable.c

petsc-3.7.5 2017-01-01
Report Typos and Errors
  2: /*
  3:     The memory scalable AO application ordering routines. These store the
  4:   local orderings on each processor.
  5: */

  7: #include <../src/vec/is/ao/aoimpl.h>          /*I  "petscao.h"   I*/

  9: typedef struct {
 10:   PetscInt    *app_loc;    /* app_loc[i] is the partner for the ith local PETSc slot */
 11:   PetscInt    *petsc_loc;  /* petsc_loc[j] is the partner for the jth local app slot */
 12:   PetscLayout map;         /* determines the local sizes of ao */
 13: } AO_MemoryScalable;

 15: /*
 16:        All processors have the same data so processor 1 prints it
 17: */
 20: PetscErrorCode AOView_MemoryScalable(AO ao,PetscViewer viewer)
 21: {
 22:   PetscErrorCode    ierr;
 23:   PetscMPIInt       rank,size;
 24:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 25:   PetscBool         iascii;
 26:   PetscMPIInt       tag_app,tag_petsc;
 27:   PetscLayout       map = aomems->map;
 28:   PetscInt          *app,*app_loc,*petsc,*petsc_loc,len,i,j;
 29:   MPI_Status        status;

 32:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 33:   if (!iascii) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not supported for AO MemoryScalable",((PetscObject)viewer)->type_name);

 35:   MPI_Comm_rank(PetscObjectComm((PetscObject)ao),&rank);
 36:   MPI_Comm_size(PetscObjectComm((PetscObject)ao),&size);

 38:   PetscObjectGetNewTag((PetscObject)ao,&tag_app);
 39:   PetscObjectGetNewTag((PetscObject)ao,&tag_petsc);

 41:   if (!rank) {
 42:     PetscViewerASCIIPrintf(viewer,"Number of elements in ordering %D\n",ao->N);
 43:     PetscViewerASCIIPrintf(viewer,  "PETSc->App  App->PETSc\n");

 45:     PetscMalloc2(map->N,&app,map->N,&petsc);
 46:     len  = map->n;
 47:     /* print local AO */
 48:     PetscViewerASCIIPrintf(viewer,"Process [%D]\n",rank);
 49:     for (i=0; i<len; i++) {
 50:       PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",i,aomems->app_loc[i],i,aomems->petsc_loc[i]);
 51:     }

 53:     /* recv and print off-processor's AO */
 54:     for (i=1; i<size; i++) {
 55:       len       = map->range[i+1] - map->range[i];
 56:       app_loc   = app  + map->range[i];
 57:       petsc_loc = petsc+ map->range[i];
 58:       MPI_Recv(app_loc,(PetscMPIInt)len,MPIU_INT,i,tag_app,PetscObjectComm((PetscObject)ao),&status);
 59:       MPI_Recv(petsc_loc,(PetscMPIInt)len,MPIU_INT,i,tag_petsc,PetscObjectComm((PetscObject)ao),&status);
 60:       PetscViewerASCIIPrintf(viewer,"Process [%D]\n",i);
 61:       for (j=0; j<len; j++) {
 62:         PetscViewerASCIIPrintf(viewer,"%3D  %3D    %3D  %3D\n",map->range[i]+j,app_loc[j],map->range[i]+j,petsc_loc[j]);
 63:       }
 64:     }
 65:     PetscFree2(app,petsc);

 67:   } else {
 68:     /* send values */
 69:     MPI_Send((void*)aomems->app_loc,map->n,MPIU_INT,0,tag_app,PetscObjectComm((PetscObject)ao));
 70:     MPI_Send((void*)aomems->petsc_loc,map->n,MPIU_INT,0,tag_petsc,PetscObjectComm((PetscObject)ao));
 71:   }
 72:   PetscViewerFlush(viewer);
 73:   return(0);
 74: }

 78: PetscErrorCode AODestroy_MemoryScalable(AO ao)
 79: {
 80:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
 81:   PetscErrorCode    ierr;

 84:   PetscFree2(aomems->app_loc,aomems->petsc_loc);
 85:   PetscLayoutDestroy(&aomems->map);
 86:   PetscFree(aomems);
 87:   return(0);
 88: }

 90: /*
 91:    Input Parameters:
 92: +   ao - the application ordering context
 93: .   n  - the number of integers in ia[]
 94: .   ia - the integers; these are replaced with their mapped value
 95: -   maploc - app_loc or petsc_loc in struct "AO_MemoryScalable"

 97:    Output Parameter:
 98: .   ia - the mapped interges
 99:  */
102: PetscErrorCode AOMap_MemoryScalable_private(AO ao,PetscInt n,PetscInt *ia,PetscInt *maploc)
103: {
104:   PetscErrorCode    ierr;
105:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
106:   MPI_Comm          comm;
107:   PetscMPIInt       rank,size,tag1,tag2;
108:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
109:   PetscInt          nmax,count,*sindices,*rindices,i,j,idx,lastidx,*sindices2,*rindices2;
110:   PetscInt          *owners = aomems->map->range;
111:   MPI_Request       *send_waits,*recv_waits,*send_waits2,*recv_waits2;
112:   MPI_Status        recv_status;
113:   PetscMPIInt       nindices,source,widx;
114:   PetscInt          *rbuf,*sbuf;
115:   MPI_Status        *send_status,*send_status2;

118:   PetscObjectGetComm((PetscObject)ao,&comm);
119:   MPI_Comm_rank(comm,&rank);
120:   MPI_Comm_size(comm,&size);

122:   /*  first count number of contributors to each processor */
123:   PetscMalloc2(2*size,&sizes,size,&start);
124:   PetscMemzero(sizes,2*size*sizeof(PetscInt));
125:   PetscCalloc1(n,&owner);

127:   j       = 0;
128:   lastidx = -1;
129:   for (i=0; i<n; i++) {
130:     /* if indices are NOT locally sorted, need to start search at the beginning */
131:     if (lastidx > (idx = ia[i])) j = 0;
132:     lastidx = idx;
133:     for (; j<size; j++) {
134:       if (idx >= owners[j] && idx < owners[j+1]) {
135:         sizes[2*j]++;     /* num of indices to be sent */
136:         sizes[2*j+1] = 1; /* send to proc[j] */
137:         owner[i]      = j;
138:         break;
139:       }
140:     }
141:   }
142:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
143:   nsends        = 0;
144:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

146:   /* inform other processors of number of messages and max length*/
147:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

149:   /* allocate arrays */
150:   PetscObjectGetNewTag((PetscObject)ao,&tag1);
151:   PetscObjectGetNewTag((PetscObject)ao,&tag2);

153:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
154:   PetscMalloc2(nsends*nmax,&rindices2,nsends,&recv_waits2);

156:   PetscMalloc3(n,&sindices,nsends,&send_waits,nsends,&send_status);
157:   PetscMalloc3(n,&sindices2,nreceives,&send_waits2,nreceives,&send_status2);

159:   /* post 1st receives: receive others requests
160:      since we don't know how long each individual message is we
161:      allocate the largest needed buffer for each receive. Potentially
162:      this is a lot of wasted space.
163:   */
164:   for (i=0,count=0; i<nreceives; i++) {
165:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
166:   }

168:   /* do 1st sends:
169:       1) starts[i] gives the starting index in svalues for stuff going to
170:          the ith processor
171:   */
172:   start[0] = 0;
173:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
174:   for (i=0; i<n; i++) {
175:     j = owner[i];
176:     if (j != rank) {
177:       sindices[start[j]++]  = ia[i];
178:     } else { /* compute my own map */
179:       if (ia[i] >= owners[rank] && ia[i] < owners[rank+1]) {
180:         ia[i] = maploc[ia[i]-owners[rank]];
181:       } else {
182:         ia[i] = -1;  /* ia[i] is not in the range of 0 and N-1, maps it to -1 */
183:       }
184:     }
185:   }

187:   start[0] = 0;
188:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
189:   for (i=0,count=0; i<size; i++) {
190:     if (sizes[2*i+1]) {
191:       /* send my request to others */
192:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag1,comm,send_waits+count);
193:       /* post receive for the answer of my request */
194:       MPI_Irecv(sindices2+start[i],sizes[2*i],MPIU_INT,i,tag2,comm,recv_waits2+count);
195:       count++;
196:     }
197:   }
198:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

200:   /* wait on 1st sends */
201:   if (nsends) {
202:     MPI_Waitall(nsends,send_waits,send_status);
203:   }

205:   /* 1st recvs: other's requests */
206:   for (j=0; j< nreceives; j++) {
207:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status); /* idx: index of handle for operation that completed */
208:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
209:     rbuf   = rindices+nmax*widx; /* global index */
210:     source = recv_status.MPI_SOURCE;

212:     /* compute mapping */
213:     sbuf = rbuf;
214:     for (i=0; i<nindices; i++) sbuf[i] = maploc[rbuf[i]-owners[rank]];

216:     /* send mapping back to the sender */
217:     MPI_Isend(sbuf,nindices,MPIU_INT,source,tag2,comm,send_waits2+widx);
218:   }

220:   /* wait on 2nd sends */
221:   if (nreceives) {
222:     MPI_Waitall(nreceives,send_waits2,send_status2);
223:   }

225:   /* 2nd recvs: for the answer of my request */
226:   for (j=0; j< nsends; j++) {
227:     MPI_Waitany(nsends,recv_waits2,&widx,&recv_status);
228:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
229:     source = recv_status.MPI_SOURCE;
230:     /* pack output ia[] */
231:     rbuf  = sindices2+start[source];
232:     count = 0;
233:     for (i=0; i<n; i++) {
234:       if (source == owner[i]) ia[i] = rbuf[count++];
235:     }
236:   }

238:   /* free arrays */
239:   PetscFree2(sizes,start);
240:   PetscFree(owner);
241:   PetscFree2(rindices,recv_waits);
242:   PetscFree2(rindices2,recv_waits2);
243:   PetscFree3(sindices,send_waits,send_status);
244:   PetscFree3(sindices2,send_waits2,send_status2);
245:   return(0);
246: }

250: PetscErrorCode AOPetscToApplication_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
251: {
252:   PetscErrorCode    ierr;
253:   AO_MemoryScalable *aomems  = (AO_MemoryScalable*)ao->data;
254:   PetscInt          *app_loc = aomems->app_loc;

257:   AOMap_MemoryScalable_private(ao,n,ia,app_loc);
258:   return(0);
259: }

263: PetscErrorCode AOApplicationToPetsc_MemoryScalable(AO ao,PetscInt n,PetscInt *ia)
264: {
265:   PetscErrorCode    ierr;
266:   AO_MemoryScalable *aomems    = (AO_MemoryScalable*)ao->data;
267:   PetscInt          *petsc_loc = aomems->petsc_loc;

270:   AOMap_MemoryScalable_private(ao,n,ia,petsc_loc);
271:   return(0);
272: }

274: static struct _AOOps AOOps_MemoryScalable = {
275:   AOView_MemoryScalable,
276:   AODestroy_MemoryScalable,
277:   AOPetscToApplication_MemoryScalable,
278:   AOApplicationToPetsc_MemoryScalable,
279:   0,
280:   0,
281:   0,
282:   0
283: };

287: PetscErrorCode  AOCreateMemoryScalable_private(MPI_Comm comm,PetscInt napp,const PetscInt from_array[],const PetscInt to_array[],AO ao, PetscInt *aomap_loc)
288: {
289:   PetscErrorCode    ierr;
290:   AO_MemoryScalable *aomems = (AO_MemoryScalable*)ao->data;
291:   PetscLayout       map     = aomems->map;
292:   PetscInt          n_local = map->n,i,j;
293:   PetscMPIInt       rank,size,tag;
294:   PetscInt          *owner,*start,*sizes,nsends,nreceives;
295:   PetscInt          nmax,count,*sindices,*rindices,idx,lastidx;
296:   PetscInt          *owners = aomems->map->range;
297:   MPI_Request       *send_waits,*recv_waits;
298:   MPI_Status        recv_status;
299:   PetscMPIInt       nindices,widx;
300:   PetscInt          *rbuf;
301:   PetscInt          n=napp,ip,ia;
302:   MPI_Status        *send_status;

305:   PetscMemzero(aomap_loc,n_local*sizeof(PetscInt));

307:   MPI_Comm_rank(comm,&rank);
308:   MPI_Comm_size(comm,&size);

310:   /*  first count number of contributors (of from_array[]) to each processor */
311:   PetscCalloc1(2*size,&sizes);
312:   PetscMalloc1(n,&owner);

314:   j       = 0;
315:   lastidx = -1;
316:   for (i=0; i<n; i++) {
317:     /* if indices are NOT locally sorted, need to start search at the beginning */
318:     if (lastidx > (idx = from_array[i])) j = 0;
319:     lastidx = idx;
320:     for (; j<size; j++) {
321:       if (idx >= owners[j] && idx < owners[j+1]) {
322:         sizes[2*j]  += 2; /* num of indices to be sent - in pairs (ip,ia) */
323:         sizes[2*j+1] = 1; /* send to proc[j] */
324:         owner[i]      = j;
325:         break;
326:       }
327:     }
328:   }
329:   sizes[2*rank]=sizes[2*rank+1]=0; /* do not receive from self! */
330:   nsends        = 0;
331:   for (i=0; i<size; i++) nsends += sizes[2*i+1];

333:   /* inform other processors of number of messages and max length*/
334:   PetscMaxSum(comm,sizes,&nmax,&nreceives);

336:   /* allocate arrays */
337:   PetscObjectGetNewTag((PetscObject)ao,&tag);
338:   PetscMalloc2(nreceives*nmax,&rindices,nreceives,&recv_waits);
339:   PetscMalloc3(2*n,&sindices,nsends,&send_waits,nsends,&send_status);
340:   PetscMalloc1(size,&start);

342:   /* post receives: */
343:   for (i=0; i<nreceives; i++) {
344:     MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
345:   }

347:   /* do sends:
348:       1) starts[i] gives the starting index in svalues for stuff going to
349:          the ith processor
350:   */
351:   start[0] = 0;
352:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
353:   for (i=0; i<n; i++) {
354:     j = owner[i];
355:     if (j != rank) {
356:       ip                   = from_array[i];
357:       ia                   = to_array[i];
358:       sindices[start[j]++] = ip;
359:       sindices[start[j]++] = ia;
360:     } else { /* compute my own map */
361:       ip            = from_array[i] - owners[rank];
362:       ia            = to_array[i];
363:       aomap_loc[ip] = ia;
364:     }
365:   }

367:   start[0] = 0;
368:   for (i=1; i<size; i++) start[i] = start[i-1] + sizes[2*i-2];
369:   for (i=0,count=0; i<size; i++) {
370:     if (sizes[2*i+1]) {
371:       MPI_Isend(sindices+start[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count);
372:       count++;
373:     }
374:   }
375:   if (nsends != count) SETERRQ2(comm,PETSC_ERR_SUP,"nsends %d != count %d",nsends,count);

377:   /* wait on sends */
378:   if (nsends) {
379:     MPI_Waitall(nsends,send_waits,send_status);
380:   }

382:   /* recvs */
383:   count=0;
384:   for (j= nreceives; j>0; j--) {
385:     MPI_Waitany(nreceives,recv_waits,&widx,&recv_status);
386:     MPI_Get_count(&recv_status,MPIU_INT,&nindices);
387:     rbuf = rindices+nmax*widx; /* global index */

389:     /* compute local mapping */
390:     for (i=0; i<nindices; i+=2) { /* pack aomap_loc */
391:       ip            = rbuf[i] - owners[rank]; /* local index */
392:       ia            = rbuf[i+1];
393:       aomap_loc[ip] = ia;
394:     }
395:     count++;
396:   }

398:   PetscFree(start);
399:   PetscFree3(sindices,send_waits,send_status);
400:   PetscFree2(rindices,recv_waits);
401:   PetscFree(owner);
402:   PetscFree(sizes);
403:   return(0);
404: }

408: PETSC_EXTERN PetscErrorCode AOCreate_MemoryScalable(AO ao)
409: {
410:   PetscErrorCode    ierr;
411:   IS                isapp=ao->isapp,ispetsc=ao->ispetsc;
412:   const PetscInt    *mypetsc,*myapp;
413:   PetscInt          napp,n_local,N,i,start,*petsc,*lens,*disp;
414:   MPI_Comm          comm;
415:   AO_MemoryScalable *aomems;
416:   PetscLayout       map;
417:   PetscMPIInt       size,rank;

420:   if (!isapp) SETERRQ(PetscObjectComm((PetscObject)ao),PETSC_ERR_ARG_WRONGSTATE,"AOSetIS() must be called before AOSetType()");
421:   /* create special struct aomems */
422:   PetscNewLog(ao,&aomems);
423:   ao->data = (void*) aomems;
424:   PetscMemcpy(ao->ops,&AOOps_MemoryScalable,sizeof(struct _AOOps));
425:   PetscObjectChangeTypeName((PetscObject)ao,AOMEMORYSCALABLE);

427:   /* transmit all local lengths of isapp to all processors */
428:   PetscObjectGetComm((PetscObject)isapp,&comm);
429:   MPI_Comm_size(comm, &size);
430:   MPI_Comm_rank(comm, &rank);
431:   PetscMalloc2(size,&lens,size,&disp);
432:   ISGetLocalSize(isapp,&napp);
433:   MPI_Allgather(&napp, 1, MPIU_INT, lens, 1, MPIU_INT, comm);

435:   N = 0;
436:   for (i = 0; i < size; i++) {
437:     disp[i] = N;
438:     N      += lens[i];
439:   }

441:   /* If ispetsc is 0 then use "natural" numbering */
442:   if (napp) {
443:     if (!ispetsc) {
444:       start = disp[rank];
445:       PetscMalloc1(napp+1, &petsc);
446:       for (i=0; i<napp; i++) petsc[i] = start + i;
447:     } else {
448:       ISGetIndices(ispetsc,&mypetsc);
449:       petsc = (PetscInt*)mypetsc;
450:     }
451:   }

453:   /* create a map with global size N - used to determine the local sizes of ao - shall we use local napp instead of N? */
454:   PetscLayoutCreate(comm,&map);
455:   map->bs = 1;
456:   map->N  = N;
457:   PetscLayoutSetUp(map);

459:   ao->N       = N;
460:   ao->n       = map->n;
461:   aomems->map = map;

463:   /* create distributed indices app_loc: petsc->app and petsc_loc: app->petsc */
464:   n_local = map->n;
465:   PetscMalloc2(n_local, &aomems->app_loc,n_local,&aomems->petsc_loc);
466:   PetscLogObjectMemory((PetscObject)ao,2*n_local*sizeof(PetscInt));
467:   PetscMemzero(aomems->app_loc,n_local*sizeof(PetscInt));
468:   PetscMemzero(aomems->petsc_loc,n_local*sizeof(PetscInt));
469:   ISGetIndices(isapp,&myapp);

471:   AOCreateMemoryScalable_private(comm,napp,petsc,myapp,ao,aomems->app_loc);
472:   AOCreateMemoryScalable_private(comm,napp,myapp,petsc,ao,aomems->petsc_loc);

474:   ISRestoreIndices(isapp,&myapp);
475:   if (napp) {
476:     if (ispetsc) {
477:       ISRestoreIndices(ispetsc,&mypetsc);
478:     } else {
479:       PetscFree(petsc);
480:     }
481:   }
482:   PetscFree2(lens,disp);
483:   return(0);
484: }

488: /*@C
489:    AOCreateMemoryScalable - Creates a memory scalable application ordering using two integer arrays.

491:    Collective on MPI_Comm

493:    Input Parameters:
494: +  comm - MPI communicator that is to share AO
495: .  napp - size of integer arrays
496: .  myapp - integer array that defines an ordering
497: -  mypetsc - integer array that defines another ordering (may be NULL to
498:              indicate the natural ordering, that is 0,1,2,3,...)

500:    Output Parameter:
501: .  aoout - the new application ordering

503:    Level: beginner

505:     Notes: The arrays myapp and mypetsc must contain the all the integers 0 to napp-1 with no duplicates; that is there cannot be any "holes"
506:            in the indices. Use AOCreateMapping() or AOCreateMappingIS() if you wish to have "holes" in the indices.
507:            Comparing with AOCreateBasic(), this routine trades memory with message communication.

509: .keywords: AO, create

511: .seealso: AOCreateMemoryScalableIS(), AODestroy(), AOPetscToApplication(), AOApplicationToPetsc()
512: @*/
513: PetscErrorCode AOCreateMemoryScalable(MPI_Comm comm,PetscInt napp,const PetscInt myapp[],const PetscInt mypetsc[],AO *aoout)
514: {
516:   IS             isapp,ispetsc;
517:   const PetscInt *app=myapp,*petsc=mypetsc;

520:   ISCreateGeneral(comm,napp,app,PETSC_USE_POINTER,&isapp);
521:   if (mypetsc) {
522:     ISCreateGeneral(comm,napp,petsc,PETSC_USE_POINTER,&ispetsc);
523:   } else {
524:     ispetsc = NULL;
525:   }
526:   AOCreateMemoryScalableIS(isapp,ispetsc,aoout);
527:   ISDestroy(&isapp);
528:   if (mypetsc) {
529:     ISDestroy(&ispetsc);
530:   }
531:   return(0);
532: }

536: /*@C
537:    AOCreateMemoryScalableIS - Creates a memory scalable application ordering using two index sets.

539:    Collective on IS

541:    Input Parameters:
542: +  isapp - index set that defines an ordering
543: -  ispetsc - index set that defines another ordering (may be NULL to use the
544:              natural ordering)

546:    Output Parameter:
547: .  aoout - the new application ordering

549:    Level: beginner

551:     Notes: The index sets isapp and ispetsc must contain the all the integers 0 to napp-1 (where napp is the length of the index sets) with no duplicates;
552:            that is there cannot be any "holes".
553:            Comparing with AOCreateBasicIS(), this routine trades memory with message communication.
554: .keywords: AO, create

556: .seealso: AOCreateMemoryScalable(),  AODestroy()
557: @*/
558: PetscErrorCode  AOCreateMemoryScalableIS(IS isapp,IS ispetsc,AO *aoout)
559: {
561:   MPI_Comm       comm;
562:   AO             ao;

565:   PetscObjectGetComm((PetscObject)isapp,&comm);
566:   AOCreate(comm,&ao);
567:   AOSetIS(ao,isapp,ispetsc);
568:   AOSetType(ao,AOMEMORYSCALABLE);
569:   AOViewFromOptions(ao,NULL,"-ao_view");
570:   *aoout = ao;
571:   return(0);
572: }