Actual source code: vscat.c
petsc-3.7.5 2017-01-01
2: /*
3: Code for creating scatters between vectors. This file
4: includes the code for scattering between sequential vectors and
5: some special cases for parallel scatters.
6: */
8: #include <petsc/private/isimpl.h>
9: #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
11: #if defined(PETSC_HAVE_CUSP)
12: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUSPIndices*);
13: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
14: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
15: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);
16: #elif defined(PETSC_HAVE_VECCUDA)
17: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_PtoP(PetscInt,PetscInt*,PetscInt,PetscInt*,PetscCUDAIndices*);
18: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUDAIndices*);
19: PETSC_INTERN PetscErrorCode VecScatterCUDAIndicesDestroy(PetscCUDAIndices*);
20: PETSC_INTERN PetscErrorCode VecScatterCUDA_StoS(Vec,Vec,PetscCUDAIndices,InsertMode,ScatterMode);
21: #endif
23: /* Logging support */
24: PetscClassId VEC_SCATTER_CLASSID;
26: #if defined(PETSC_USE_DEBUG)
27: /*
28: Checks if any indices are less than zero and generates an error
29: */
32: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
33: {
34: PetscInt i;
37: for (i=0; i<n; i++) {
38: if (idx[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
39: if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
40: }
41: return(0);
42: }
43: #endif
45: /*
46: This is special scatter code for when the entire parallel vector is copied to each processor.
48: This code was written by Cameron Cooper, Occidental College, Fall 1995,
49: will working at ANL as a SERS student.
50: */
53: PetscErrorCode VecScatterBegin_MPI_ToAll(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
54: {
56: PetscInt yy_n,xx_n;
57: PetscScalar *xv,*yv;
60: VecGetLocalSize(y,&yy_n);
61: VecGetLocalSize(x,&xx_n);
62: VecGetArrayPair(x,y,&xv,&yv);
64: if (mode & SCATTER_REVERSE) {
65: PetscScalar *xvt,*xvt2;
66: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
67: PetscInt i;
68: PetscMPIInt *disply = scat->displx;
70: if (addv == INSERT_VALUES) {
71: PetscInt rstart,rend;
72: /*
73: copy the correct part of the local vector into the local storage of
74: the MPI one. Note: this operation only makes sense if all the local
75: vectors have the same values
76: */
77: VecGetOwnershipRange(y,&rstart,&rend);
78: PetscMemcpy(yv,xv+rstart,yy_n*sizeof(PetscScalar));
79: } else {
80: MPI_Comm comm;
81: PetscMPIInt rank;
82: PetscObjectGetComm((PetscObject)y,&comm);
83: MPI_Comm_rank(comm,&rank);
84: if (scat->work1) xvt = scat->work1;
85: else {
86: PetscMalloc1(xx_n,&xvt);
87: scat->work1 = xvt;
88: }
89: if (!rank) { /* I am the zeroth processor, values are accumulated here */
90: if (scat->work2) xvt2 = scat->work2;
91: else {
92: PetscMalloc1(xx_n,&xvt2);
93: scat->work2 = xvt2;
94: }
95: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,xvt2,scat->count,disply,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
96: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
97: if (addv == ADD_VALUES) {
98: for (i=0; i<xx_n; i++) xvt[i] += xvt2[i];
99: #if !defined(PETSC_USE_COMPLEX)
100: } else if (addv == MAX_VALUES) {
101: for (i=0; i<xx_n; i++) xvt[i] = PetscMax(xvt[i],xvt2[i]);
102: #endif
103: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
104: MPI_Scatterv(xvt,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
105: } else {
106: MPI_Gatherv(yv,yy_n,MPIU_SCALAR,0, 0,0,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
107: MPI_Reduce(xv,xvt,xx_n,MPIU_SCALAR,MPIU_SUM,0,PetscObjectComm((PetscObject)ctx));
108: MPI_Scatterv(0,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
109: }
110: }
111: } else {
112: PetscScalar *yvt;
113: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
114: PetscInt i;
115: PetscMPIInt *displx = scat->displx;
117: if (addv == INSERT_VALUES) {
118: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
119: } else {
120: if (scat->work1) yvt = scat->work1;
121: else {
122: PetscMalloc1(yy_n,&yvt);
123: scat->work1 = yvt;
124: }
125: MPI_Allgatherv(xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,PetscObjectComm((PetscObject)ctx));
126: if (addv == ADD_VALUES) {
127: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
128: #if !defined(PETSC_USE_COMPLEX)
129: } else if (addv == MAX_VALUES) {
130: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
131: #endif
132: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
133: }
134: }
135: VecRestoreArrayPair(x,y,&xv,&yv);
136: return(0);
137: }
141: PetscErrorCode VecScatterView_MPI_ToAll(VecScatter in,PetscViewer viewer)
142: {
144: PetscBool isascii;
147: PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
148: if (isascii) {
149: PetscViewerASCIIPrintf(viewer,"Entire parallel vector is copied to each process\n");
150: }
151: return(0);
152: }
154: /*
155: This is special scatter code for when the entire parallel vector is copied to processor 0.
157: */
160: PetscErrorCode VecScatterBegin_MPI_ToOne(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
161: {
162: PetscErrorCode ierr;
163: PetscMPIInt rank;
164: PetscInt yy_n,xx_n;
165: PetscScalar *yv;
166: const PetscScalar *xv;
167: MPI_Comm comm;
170: VecGetLocalSize(y,&yy_n);
171: VecGetLocalSize(x,&xx_n);
172: VecGetArrayRead(x,&xv);
173: VecGetArray(y,&yv);
175: PetscObjectGetComm((PetscObject)x,&comm);
176: MPI_Comm_rank(comm,&rank);
178: /* -------- Reverse scatter; spread from processor 0 to other processors */
179: if (mode & SCATTER_REVERSE) {
180: PetscScalar *yvt;
181: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
182: PetscInt i;
183: PetscMPIInt *disply = scat->displx;
185: if (addv == INSERT_VALUES) {
186: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yv,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
187: } else {
188: if (scat->work2) yvt = scat->work2;
189: else {
190: PetscMalloc1(xx_n,&yvt);
191: scat->work2 = yvt;
192: }
193: MPI_Scatterv((PetscScalar*)xv,scat->count,disply,MPIU_SCALAR,yvt,yy_n,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
194: if (addv == ADD_VALUES) {
195: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
196: #if !defined(PETSC_USE_COMPLEX)
197: } else if (addv == MAX_VALUES) {
198: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
199: #endif
200: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
201: }
202: /* --------- Forward scatter; gather all values onto processor 0 */
203: } else {
204: PetscScalar *yvt = 0;
205: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
206: PetscInt i;
207: PetscMPIInt *displx = scat->displx;
209: if (addv == INSERT_VALUES) {
210: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yv,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
211: } else {
212: if (!rank) {
213: if (scat->work1) yvt = scat->work1;
214: else {
215: PetscMalloc1(yy_n,&yvt);
216: scat->work1 = yvt;
217: }
218: }
219: MPI_Gatherv((PetscScalar*)xv,xx_n,MPIU_SCALAR,yvt,scat->count,displx,MPIU_SCALAR,0,PetscObjectComm((PetscObject)ctx));
220: if (!rank) {
221: if (addv == ADD_VALUES) {
222: for (i=0; i<yy_n; i++) yv[i] += yvt[i];
223: #if !defined(PETSC_USE_COMPLEX)
224: } else if (addv == MAX_VALUES) {
225: for (i=0; i<yy_n; i++) yv[i] = PetscMax(yv[i],yvt[i]);
226: #endif
227: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
228: }
229: }
230: }
231: VecRestoreArrayRead(x,&xv);
232: VecRestoreArray(y,&yv);
233: return(0);
234: }
236: /*
237: The follow to are used for both VecScatterBegin_MPI_ToAll() and VecScatterBegin_MPI_ToOne()
238: */
241: PetscErrorCode VecScatterDestroy_MPI_ToAll(VecScatter ctx)
242: {
243: VecScatter_MPI_ToAll *scat = (VecScatter_MPI_ToAll*)ctx->todata;
244: PetscErrorCode ierr;
247: PetscFree(scat->work1);
248: PetscFree(scat->work2);
249: PetscFree3(ctx->todata,scat->count,scat->displx);
250: return(0);
251: }
255: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
256: {
260: PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
261: PetscFree2(ctx->todata,ctx->fromdata);
262: return(0);
263: }
267: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
268: {
272: PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
273: PetscFree2(ctx->todata,ctx->fromdata);
274: return(0);
275: }
279: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
280: {
284: PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
285: PetscFree2(ctx->todata,ctx->fromdata);
286: return(0);
287: }
291: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
292: {
296: PetscFree2(ctx->todata,ctx->fromdata);
297: return(0);
298: }
300: /* -------------------------------------------------------------------------*/
303: PetscErrorCode VecScatterCopy_MPI_ToAll(VecScatter in,VecScatter out)
304: {
305: VecScatter_MPI_ToAll *in_to = (VecScatter_MPI_ToAll*)in->todata,*sto;
306: PetscErrorCode ierr;
307: PetscMPIInt size,*count,*displx;
308: PetscInt i;
311: out->ops->begin = in->ops->begin;
312: out->ops->end = in->ops->end;
313: out->ops->copy = in->ops->copy;
314: out->ops->destroy = in->ops->destroy;
315: out->ops->view = in->ops->view;
317: MPI_Comm_size(PetscObjectComm((PetscObject)out),&size);
318: PetscMalloc3(1,&sto,size,&count,size,&displx);
319: sto->type = in_to->type;
320: sto->count = count;
321: sto->displx = displx;
322: for (i=0; i<size; i++) {
323: sto->count[i] = in_to->count[i];
324: sto->displx[i] = in_to->displx[i];
325: }
326: sto->work1 = 0;
327: sto->work2 = 0;
328: out->todata = (void*)sto;
329: out->fromdata = (void*)0;
330: return(0);
331: }
333: /* --------------------------------------------------------------------------------------*/
334: /*
335: Scatter: sequential general to sequential general
336: */
339: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
340: {
341: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
342: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
343: PetscErrorCode ierr;
344: PetscInt i,n = gen_from->n,*fslots,*tslots;
345: PetscScalar *xv,*yv;
348: #if defined(PETSC_HAVE_CUSP)
349: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
350: /* create the scatter indices if not done already */
351: if (!ctx->spptr) {
352: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
353: fslots = gen_from->vslots;
354: tslots = gen_to->vslots;
355: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
356: }
357: /* next do the scatter */
358: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
359: return(0);
360: }
361: #elif defined(PETSC_HAVE_VECCUDA)
362: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
363: /* create the scatter indices if not done already */
364: if (!ctx->spptr) {
365: PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
366: fslots = gen_from->vslots;
367: tslots = gen_to->vslots;
368: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
369: }
370: /* next do the scatter */
371: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
372: return(0);
373: }
374: #endif
376: VecGetArrayPair(x,y,&xv,&yv);
377: if (mode & SCATTER_REVERSE) {
378: gen_to = (VecScatter_Seq_General*)ctx->fromdata;
379: gen_from = (VecScatter_Seq_General*)ctx->todata;
380: }
381: fslots = gen_from->vslots;
382: tslots = gen_to->vslots;
384: if (addv == INSERT_VALUES) {
385: for (i=0; i<n; i++) yv[tslots[i]] = xv[fslots[i]];
386: } else if (addv == ADD_VALUES) {
387: for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]];
388: #if !defined(PETSC_USE_COMPLEX)
389: } else if (addv == MAX_VALUES) {
390: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[fslots[i]]);
391: #endif
392: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
393: VecRestoreArrayPair(x,y,&xv,&yv);
394: return(0);
395: }
397: /*
398: Scatter: sequential general to sequential stride 1
399: */
402: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
403: {
404: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
405: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
406: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
407: PetscErrorCode ierr;
408: PetscInt first = gen_to->first;
409: PetscScalar *xv,*yv;
412: #if defined(PETSC_HAVE_CUSP)
413: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
414: /* create the scatter indices if not done already */
415: if (!ctx->spptr) {
416: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
417: PetscInt *tslots = 0;
418: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
419: }
420: /* next do the scatter */
421: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
422: return(0);
423: }
424: #elif defined(PETSC_HAVE_VECCUDA)
425: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
426: /* create the scatter indices if not done already */
427: if (!ctx->spptr) {
428: PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
429: PetscInt *tslots = 0;
430: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
431: }
432: /* next do the scatter */
433: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
434: return(0);
435: }
436: #endif
438: VecGetArrayPair(x,y,&xv,&yv);
439: if (mode & SCATTER_REVERSE) {
440: xv += first;
441: if (addv == INSERT_VALUES) {
442: for (i=0; i<n; i++) yv[fslots[i]] = xv[i];
443: } else if (addv == ADD_VALUES) {
444: for (i=0; i<n; i++) yv[fslots[i]] += xv[i];
445: #if !defined(PETSC_USE_COMPLEX)
446: } else if (addv == MAX_VALUES) {
447: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[i]);
448: #endif
449: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
450: } else {
451: yv += first;
452: if (addv == INSERT_VALUES) {
453: for (i=0; i<n; i++) yv[i] = xv[fslots[i]];
454: } else if (addv == ADD_VALUES) {
455: for (i=0; i<n; i++) yv[i] += xv[fslots[i]];
456: #if !defined(PETSC_USE_COMPLEX)
457: } else if (addv == MAX_VALUES) {
458: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[fslots[i]]);
459: #endif
460: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
461: }
462: VecRestoreArrayPair(x,y,&xv,&yv);
463: return(0);
464: }
466: /*
467: Scatter: sequential general to sequential stride
468: */
471: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
472: {
473: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
474: VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
475: PetscInt i,n = gen_from->n,*fslots = gen_from->vslots;
476: PetscErrorCode ierr;
477: PetscInt first = gen_to->first,step = gen_to->step;
478: PetscScalar *xv,*yv;
481: #if defined(PETSC_HAVE_CUSP)
482: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
483: /* create the scatter indices if not done already */
484: if (!ctx->spptr) {
485: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
486: PetscInt * tslots = 0;
487: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
488: }
489: /* next do the scatter */
490: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
491: return(0);
492: }
493: #elif defined(PETSC_HAVE_VECCUDA)
494: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
495: /* create the scatter indices if not done already */
496: if (!ctx->spptr) {
497: PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
498: PetscInt * tslots = 0;
499: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
500: }
501: /* next do the scatter */
502: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
503: return(0);
504: }
505: #endif
507: VecGetArrayPair(x,y,&xv,&yv);
508: if (mode & SCATTER_REVERSE) {
509: if (addv == INSERT_VALUES) {
510: for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
511: } else if (addv == ADD_VALUES) {
512: for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
513: #if !defined(PETSC_USE_COMPLEX)
514: } else if (addv == MAX_VALUES) {
515: for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
516: #endif
517: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
518: } else {
519: if (addv == INSERT_VALUES) {
520: for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
521: } else if (addv == ADD_VALUES) {
522: for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
523: #if !defined(PETSC_USE_COMPLEX)
524: } else if (addv == MAX_VALUES) {
525: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
526: #endif
527: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
528: }
529: VecRestoreArrayPair(x,y,&xv,&yv);
530: return(0);
531: }
533: /*
534: Scatter: sequential stride 1 to sequential general
535: */
538: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
539: {
540: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
541: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
542: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
543: PetscErrorCode ierr;
544: PetscInt first = gen_from->first;
545: PetscScalar *xv,*yv;
548: #if defined(PETSC_HAVE_CUSP)
549: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
550: /* create the scatter indices if not done already */
551: if (!ctx->spptr) {
552: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
553: PetscInt *fslots = 0;
554: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
555: }
556: /* next do the scatter */
557: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
558: return(0);
559: }
560: #elif defined(PETSC_HAVE_VECCUDA)
561: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
562: /* create the scatter indices if not done already */
563: if (!ctx->spptr) {
564: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
565: PetscInt *fslots = 0;
566: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
567: }
568: /* next do the scatter */
569: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
570: return(0);
571: }
572: #endif
574: VecGetArrayPair(x,y,&xv,&yv);
575: if (mode & SCATTER_REVERSE) {
576: yv += first;
577: if (addv == INSERT_VALUES) {
578: for (i=0; i<n; i++) yv[i] = xv[tslots[i]];
579: } else if (addv == ADD_VALUES) {
580: for (i=0; i<n; i++) yv[i] += xv[tslots[i]];
581: #if !defined(PETSC_USE_COMPLEX)
582: } else if (addv == MAX_VALUES) {
583: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[tslots[i]]);
584: #endif
585: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
586: } else {
587: xv += first;
588: if (addv == INSERT_VALUES) {
589: for (i=0; i<n; i++) yv[tslots[i]] = xv[i];
590: } else if (addv == ADD_VALUES) {
591: for (i=0; i<n; i++) yv[tslots[i]] += xv[i];
592: #if !defined(PETSC_USE_COMPLEX)
593: } else if (addv == MAX_VALUES) {
594: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[i]);
595: #endif
596: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
597: }
598: VecRestoreArrayPair(x,y,&xv,&yv);
599: return(0);
600: }
604: /*
605: Scatter: sequential stride to sequential general
606: */
607: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
608: {
609: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
610: VecScatter_Seq_General *gen_to = (VecScatter_Seq_General*)ctx->todata;
611: PetscInt i,n = gen_from->n,*tslots = gen_to->vslots;
612: PetscErrorCode ierr;
613: PetscInt first = gen_from->first,step = gen_from->step;
614: PetscScalar *xv,*yv;
617: #if defined(PETSC_HAVE_CUSP)
618: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
619: /* create the scatter indices if not done already */
620: if (!ctx->spptr) {
621: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
622: PetscInt *fslots = 0;
623: VecScatterCUSPIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
624: }
625: /* next do the scatter */
626: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
627: return(0);
628: }
629: #elif defined(PETSC_HAVE_VECCUDA)
630: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
631: /* create the scatter indices if not done already */
632: if (!ctx->spptr) {
633: PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
634: PetscInt *fslots = 0;
635: VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
636: }
637: /* next do the scatter */
638: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
639: return(0);
640: }
641: #endif
643: VecGetArrayPair(x,y,&xv,&yv);
644: if (mode & SCATTER_REVERSE) {
645: if (addv == INSERT_VALUES) {
646: for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
647: } else if (addv == ADD_VALUES) {
648: for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
649: #if !defined(PETSC_USE_COMPLEX)
650: } else if (addv == MAX_VALUES) {
651: for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
652: #endif
653: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
654: } else {
655: if (addv == INSERT_VALUES) {
656: for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
657: } else if (addv == ADD_VALUES) {
658: for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
659: #if !defined(PETSC_USE_COMPLEX)
660: } else if (addv == MAX_VALUES) {
661: for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
662: #endif
663: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
664: }
665: VecRestoreArrayPair(x,y,&xv,&yv);
666: return(0);
667: }
671: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
672: {
673: PetscErrorCode ierr;
674: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->todata;
675: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->fromdata;
676: PetscInt i;
677: PetscBool isascii;
680: PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
681: if (isascii) {
682: PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
683: for (i=0; i<in_to->n; i++) {
684: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
685: }
686: }
687: return(0);
688: }
689: /*
690: Scatter: sequential stride to sequential stride
691: */
694: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
695: {
696: VecScatter_Seq_Stride *gen_to = (VecScatter_Seq_Stride*)ctx->todata;
697: VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
698: PetscInt i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
699: PetscErrorCode ierr;
700: PetscInt from_first = gen_from->first,from_step = gen_from->step;
701: PetscScalar *xv,*yv;
704: #if defined(PETSC_HAVE_CUSP)
705: if (x->valid_GPU_array == PETSC_CUSP_GPU) {
706: /* create the scatter indices if not done already */
707: if (!ctx->spptr) {
708: PetscInt *tslots = 0,*fslots = 0;
709: VecScatterCUSPIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUSPIndices*)&(ctx->spptr));
710: }
711: /* next do the scatter */
712: VecScatterCUSP_StoS(x,y,(PetscCUSPIndices)ctx->spptr,addv,mode);
713: return(0);
714: }
715: #elif defined(PETSC_HAVE_VECCUDA)
716: if (x->valid_GPU_array == PETSC_CUDA_GPU) {
717: /* create the scatter indices if not done already */
718: if (!ctx->spptr) {
719: PetscInt *tslots = 0,*fslots = 0;
720: VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
721: }
722: /* next do the scatter */
723: VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
724: return(0);
725: }
726: #endif
728: VecGetArrayPair(x,y,&xv,&yv);
729: if (mode & SCATTER_REVERSE) {
730: from_first = gen_to->first;
731: to_first = gen_from->first;
732: from_step = gen_to->step;
733: to_step = gen_from->step;
734: }
736: if (addv == INSERT_VALUES) {
737: if (to_step == 1 && from_step == 1) {
738: PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
739: } else {
740: for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
741: }
742: } else if (addv == ADD_VALUES) {
743: if (to_step == 1 && from_step == 1) {
744: yv += to_first; xv += from_first;
745: for (i=0; i<n; i++) yv[i] += xv[i];
746: } else {
747: for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
748: }
749: #if !defined(PETSC_USE_COMPLEX)
750: } else if (addv == MAX_VALUES) {
751: if (to_step == 1 && from_step == 1) {
752: yv += to_first; xv += from_first;
753: for (i=0; i<n; i++) yv[i] = PetscMax(yv[i],xv[i]);
754: } else {
755: for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
756: }
757: #endif
758: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
759: VecRestoreArrayPair(x,y,&xv,&yv);
760: return(0);
761: }
763: /* --------------------------------------------------------------------------*/
768: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
769: {
770: PetscErrorCode ierr;
771: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
772: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
775: out->ops->begin = in->ops->begin;
776: out->ops->end = in->ops->end;
777: out->ops->copy = in->ops->copy;
778: out->ops->destroy = in->ops->destroy;
779: out->ops->view = in->ops->view;
781: PetscMalloc2(1,&out_to,1,&out_from);
782: PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
783: out_to->n = in_to->n;
784: out_to->type = in_to->type;
785: out_to->nonmatching_computed = PETSC_FALSE;
786: out_to->slots_nonmatching = 0;
787: out_to->is_copy = PETSC_FALSE;
788: PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
790: out_from->n = in_from->n;
791: out_from->type = in_from->type;
792: out_from->nonmatching_computed = PETSC_FALSE;
793: out_from->slots_nonmatching = 0;
794: out_from->is_copy = PETSC_FALSE;
795: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
797: out->todata = (void*)out_to;
798: out->fromdata = (void*)out_from;
799: return(0);
800: }
804: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
805: {
806: PetscErrorCode ierr;
807: VecScatter_Seq_General *in_to = (VecScatter_Seq_General*)in->todata;
808: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
809: PetscInt i;
810: PetscBool isascii;
813: PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
814: if (isascii) {
815: PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
816: for (i=0; i<in_to->n; i++) {
817: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
818: }
819: }
820: return(0);
821: }
826: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
827: {
828: PetscErrorCode ierr;
829: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
830: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;
833: out->ops->begin = in->ops->begin;
834: out->ops->end = in->ops->end;
835: out->ops->copy = in->ops->copy;
836: out->ops->destroy = in->ops->destroy;
837: out->ops->view = in->ops->view;
839: PetscMalloc2(1,&out_to,1,&out_from);
840: PetscMalloc1(in_from->n,&out_from->vslots);
841: out_to->n = in_to->n;
842: out_to->type = in_to->type;
843: out_to->first = in_to->first;
844: out_to->step = in_to->step;
845: out_to->type = in_to->type;
847: out_from->n = in_from->n;
848: out_from->type = in_from->type;
849: out_from->nonmatching_computed = PETSC_FALSE;
850: out_from->slots_nonmatching = 0;
851: out_from->is_copy = PETSC_FALSE;
852: PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
854: out->todata = (void*)out_to;
855: out->fromdata = (void*)out_from;
856: return(0);
857: }
861: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
862: {
863: PetscErrorCode ierr;
864: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
865: VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
866: PetscInt i;
867: PetscBool isascii;
870: PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
871: if (isascii) {
872: PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
873: for (i=0; i<in_to->n; i++) {
874: PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
875: }
876: }
877: return(0);
878: }
880: /* --------------------------------------------------------------------------*/
881: /*
882: Scatter: parallel to sequential vector, sequential strides for both.
883: */
886: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
887: {
888: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
889: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
890: PetscErrorCode ierr;
893: out->ops->begin = in->ops->begin;
894: out->ops->end = in->ops->end;
895: out->ops->copy = in->ops->copy;
896: out->ops->destroy = in->ops->destroy;
897: out->ops->view = in->ops->view;
899: PetscMalloc2(1,&out_to,1,&out_from);
900: out_to->n = in_to->n;
901: out_to->type = in_to->type;
902: out_to->first = in_to->first;
903: out_to->step = in_to->step;
904: out_to->type = in_to->type;
905: out_from->n = in_from->n;
906: out_from->type = in_from->type;
907: out_from->first = in_from->first;
908: out_from->step = in_from->step;
909: out_from->type = in_from->type;
910: out->todata = (void*)out_to;
911: out->fromdata = (void*)out_from;
912: return(0);
913: }
917: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
918: {
919: VecScatter_Seq_Stride *in_to = (VecScatter_Seq_Stride*)in->todata;
920: VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
921: PetscErrorCode ierr;
922: PetscBool isascii;
925: PetscObjectTypeCompare((PetscObject)in,PETSCVIEWERASCII,&isascii);
926: if (isascii) {
927: PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
928: }
929: return(0);
930: }
933: extern PetscErrorCode VecScatterCreate_PtoS(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
934: extern PetscErrorCode VecScatterCreate_PtoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
935: extern PetscErrorCode VecScatterCreate_StoP(PetscInt,const PetscInt*,PetscInt,const PetscInt*,Vec,Vec,PetscInt,VecScatter);
937: /* =======================================================================*/
938: #define VEC_SEQ_ID 0
939: #define VEC_MPI_ID 1
940: #define IS_GENERAL_ID 0
941: #define IS_STRIDE_ID 1
942: #define IS_BLOCK_ID 2
944: /*
945: Blocksizes we have optimized scatters for
946: */
948: #define VecScatterOptimizedBS(mbs) (2 <= mbs)
953: PetscErrorCode VecScatterCreateEmpty(MPI_Comm comm,VecScatter *newctx)
954: {
955: VecScatter ctx;
959: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
960: ctx->inuse = PETSC_FALSE;
961: ctx->beginandendtogether = PETSC_FALSE;
962: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
963: if (ctx->beginandendtogether) {
964: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
965: }
966: ctx->packtogether = PETSC_FALSE;
967: PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
968: if (ctx->packtogether) {
969: PetscInfo(ctx,"Pack all messages before sending\n");
970: }
971: *newctx = ctx;
972: return(0);
973: }
977: /*@C
978: VecScatterCreate - Creates a vector scatter context.
980: Collective on Vec
982: Input Parameters:
983: + xin - a vector that defines the shape (parallel data layout of the vector)
984: of vectors from which we scatter
985: . yin - a vector that defines the shape (parallel data layout of the vector)
986: of vectors to which we scatter
987: . ix - the indices of xin to scatter (if NULL scatters all values)
988: - iy - the indices of yin to hold results (if NULL fills entire vector yin)
990: Output Parameter:
991: . newctx - location to store the new scatter context
993: Options Database Keys: (uses regular MPI_Sends by default)
994: + -vecscatter_view - Prints detail of communications
995: . -vecscatter_view ::ascii_info - Print less details about communication
996: . -vecscatter_ssend - Uses MPI_Ssend_init() instead of MPI_Send_init()
997: . -vecscatter_rsend - use ready receiver mode for MPI sends
998: . -vecscatter_merge - VecScatterBegin() handles all of the communication, VecScatterEnd() is a nop
999: eliminates the chance for overlap of computation and communication
1000: . -vecscatter_sendfirst - Posts sends before receives
1001: . -vecscatter_packtogether - Pack all messages before sending, receive all messages before unpacking
1002: . -vecscatter_alltoall - Uses MPI all to all communication for scatter
1003: . -vecscatter_window - Use MPI 2 window operations to move data
1004: . -vecscatter_nopack - Avoid packing to work vector when possible (if used with -vecscatter_alltoall then will use MPI_Alltoallw()
1005: - -vecscatter_reproduce - insure that the order of the communications are done the same for each scatter, this under certain circumstances
1006: will make the results of scatters deterministic when otherwise they are not (it may be slower also).
1008: $
1009: $ --When packing is used--
1010: $ MPI Datatypes (no packing) sendfirst merge packtogether persistent*
1011: $ _nopack _sendfirst _merge _packtogether -vecscatter_
1012: $ ----------------------------------------------------------------------------------------------------------------------------
1013: $ Message passing Send p X X X always
1014: $ Ssend p X X X always _ssend
1015: $ Rsend p nonsense X X always _rsend
1016: $ AlltoAll v or w X nonsense always X nonsense _alltoall
1017: $ MPI_Win p nonsense p p nonsense _window
1018: $
1019: $ Since persistent sends and receives require a constant memory address they can only be used when data is packed into the work vector
1020: $ because the in and out array may be different for each call to VecScatterBegin/End().
1021: $
1022: $ p indicates possible, but not implemented. X indicates implemented
1023: $
1025: Level: intermediate
1027: Notes:
1028: In calls to VecScatter() you can use different vectors than the xin and
1029: yin you used above; BUT they must have the same parallel data layout, for example,
1030: they could be obtained from VecDuplicate().
1031: A VecScatter context CANNOT be used in two or more simultaneous scatters;
1032: that is you cannot call a second VecScatterBegin() with the same scatter
1033: context until the VecScatterEnd() has been called on the first VecScatterBegin().
1034: In this case a separate VecScatter is needed for each concurrent scatter.
1036: Currently the MPI_Send(), MPI_Ssend() and MPI_Rsend() all use PERSISTENT versions.
1037: (this unfortunately requires that the same in and out arrays be used for each use, this
1038: is why when not using MPI_alltoallw() we always need to pack the input into the work array before sending
1039: and unpack upon receeving instead of using MPI datatypes to avoid the packing/unpacking).
1041: Both ix and iy cannot be NULL at the same time.
1043: Concepts: scatter^between vectors
1044: Concepts: gather^between vectors
1046: .seealso: VecScatterDestroy(), VecScatterCreateToAll(), VecScatterCreateToZero()
1047: @*/
1048: PetscErrorCode VecScatterCreate(Vec xin,IS ix,Vec yin,IS iy,VecScatter *newctx)
1049: {
1050: VecScatter ctx;
1051: PetscErrorCode ierr;
1052: PetscMPIInt size;
1053: PetscInt xin_type = VEC_SEQ_ID,yin_type = VEC_SEQ_ID,*range;
1054: PetscInt ix_type = IS_GENERAL_ID,iy_type = IS_GENERAL_ID;
1055: MPI_Comm comm,ycomm;
1056: PetscBool totalv,ixblock,iyblock,iystride,islocal,cando,flag;
1057: IS tix = 0,tiy = 0;
1060: if (!ix && !iy) SETERRQ(PetscObjectComm((PetscObject)xin),PETSC_ERR_SUP,"Cannot pass default in for both input and output indices");
1062: /*
1063: Determine if the vectors are "parallel", ie. it shares a comm with other processors, or
1064: sequential (it does not share a comm). The difference is that parallel vectors treat the
1065: index set as providing indices in the global parallel numbering of the vector, with
1066: sequential vectors treat the index set as providing indices in the local sequential
1067: numbering
1068: */
1069: PetscObjectGetComm((PetscObject)xin,&comm);
1070: MPI_Comm_size(comm,&size);
1071: if (size > 1) xin_type = VEC_MPI_ID;
1073: PetscObjectGetComm((PetscObject)yin,&ycomm);
1074: MPI_Comm_size(ycomm,&size);
1075: if (size > 1) {comm = ycomm; yin_type = VEC_MPI_ID;}
1077: /* generate the Scatter context */
1078: PetscHeaderCreate(ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",comm,VecScatterDestroy,VecScatterView);
1079: ctx->inuse = PETSC_FALSE;
1081: ctx->beginandendtogether = PETSC_FALSE;
1082: PetscOptionsGetBool(NULL,NULL,"-vecscatter_merge",&ctx->beginandendtogether,NULL);
1083: if (ctx->beginandendtogether) {
1084: PetscInfo(ctx,"Using combined (merged) vector scatter begin and end\n");
1085: }
1086: ctx->packtogether = PETSC_FALSE;
1087: PetscOptionsGetBool(NULL,NULL,"-vecscatter_packtogether",&ctx->packtogether,NULL);
1088: if (ctx->packtogether) {
1089: PetscInfo(ctx,"Pack all messages before sending\n");
1090: }
1092: VecGetLocalSize(xin,&ctx->from_n);
1093: VecGetLocalSize(yin,&ctx->to_n);
1095: /*
1096: if ix or iy is not included; assume just grabbing entire vector
1097: */
1098: if (!ix && xin_type == VEC_SEQ_ID) {
1099: ISCreateStride(comm,ctx->from_n,0,1,&ix);
1100: tix = ix;
1101: } else if (!ix && xin_type == VEC_MPI_ID) {
1102: if (yin_type == VEC_MPI_ID) {
1103: PetscInt ntmp, low;
1104: VecGetLocalSize(xin,&ntmp);
1105: VecGetOwnershipRange(xin,&low,NULL);
1106: ISCreateStride(comm,ntmp,low,1,&ix);
1107: } else {
1108: PetscInt Ntmp;
1109: VecGetSize(xin,&Ntmp);
1110: ISCreateStride(comm,Ntmp,0,1,&ix);
1111: }
1112: tix = ix;
1113: } else if (!ix) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ix not given, but not Seq or MPI vector");
1115: if (!iy && yin_type == VEC_SEQ_ID) {
1116: ISCreateStride(comm,ctx->to_n,0,1,&iy);
1117: tiy = iy;
1118: } else if (!iy && yin_type == VEC_MPI_ID) {
1119: if (xin_type == VEC_MPI_ID) {
1120: PetscInt ntmp, low;
1121: VecGetLocalSize(yin,&ntmp);
1122: VecGetOwnershipRange(yin,&low,NULL);
1123: ISCreateStride(comm,ntmp,low,1,&iy);
1124: } else {
1125: PetscInt Ntmp;
1126: VecGetSize(yin,&Ntmp);
1127: ISCreateStride(comm,Ntmp,0,1,&iy);
1128: }
1129: tiy = iy;
1130: } else if (!iy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"iy not given, but not Seq or MPI vector");
1132: /*
1133: Determine types of index sets
1134: */
1135: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&flag);
1136: if (flag) ix_type = IS_BLOCK_ID;
1137: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&flag);
1138: if (flag) iy_type = IS_BLOCK_ID;
1139: PetscObjectTypeCompare((PetscObject)ix,ISSTRIDE,&flag);
1140: if (flag) ix_type = IS_STRIDE_ID;
1141: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&flag);
1142: if (flag) iy_type = IS_STRIDE_ID;
1144: /* ===========================================================================================================
1145: Check for special cases
1146: ==========================================================================================================*/
1147: /* ---------------------------------------------------------------------------*/
1148: if (xin_type == VEC_SEQ_ID && yin_type == VEC_SEQ_ID) {
1149: if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
1150: PetscInt nx,ny;
1151: const PetscInt *idx,*idy;
1152: VecScatter_Seq_General *to = NULL,*from = NULL;
1154: ISGetLocalSize(ix,&nx);
1155: ISGetLocalSize(iy,&ny);
1156: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1157: ISGetIndices(ix,&idx);
1158: ISGetIndices(iy,&idy);
1159: PetscMalloc2(1,&to,1,&from);
1160: PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
1161: to->n = nx;
1162: #if defined(PETSC_USE_DEBUG)
1163: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1164: #endif
1165: PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
1166: from->n = nx;
1167: #if defined(PETSC_USE_DEBUG)
1168: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1169: #endif
1170: PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
1171: to->type = VEC_SCATTER_SEQ_GENERAL;
1172: from->type = VEC_SCATTER_SEQ_GENERAL;
1173: ctx->todata = (void*)to;
1174: ctx->fromdata = (void*)from;
1175: ctx->ops->begin = VecScatterBegin_SGToSG;
1176: ctx->ops->end = 0;
1177: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1178: ctx->ops->copy = VecScatterCopy_SGToSG;
1179: ctx->ops->view = VecScatterView_SGToSG;
1180: PetscInfo(xin,"Special case: sequential vector general scatter\n");
1181: goto functionend;
1182: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1183: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1184: VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;
1186: ISGetLocalSize(ix,&nx);
1187: ISGetLocalSize(iy,&ny);
1188: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1189: ISStrideGetInfo(iy,&to_first,&to_step);
1190: ISStrideGetInfo(ix,&from_first,&from_step);
1191: PetscMalloc2(1,&to8,1,&from8);
1192: to8->n = nx;
1193: to8->first = to_first;
1194: to8->step = to_step;
1195: from8->n = nx;
1196: from8->first = from_first;
1197: from8->step = from_step;
1198: to8->type = VEC_SCATTER_SEQ_STRIDE;
1199: from8->type = VEC_SCATTER_SEQ_STRIDE;
1200: ctx->todata = (void*)to8;
1201: ctx->fromdata = (void*)from8;
1202: ctx->ops->begin = VecScatterBegin_SSToSS;
1203: ctx->ops->end = 0;
1204: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1205: ctx->ops->copy = VecScatterCopy_SSToSS;
1206: ctx->ops->view = VecScatterView_SSToSS;
1207: PetscInfo(xin,"Special case: sequential vector stride to stride\n");
1208: goto functionend;
1209: } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
1210: PetscInt nx,ny,first,step;
1211: const PetscInt *idx;
1212: VecScatter_Seq_General *from9 = NULL;
1213: VecScatter_Seq_Stride *to9 = NULL;
1215: ISGetLocalSize(ix,&nx);
1216: ISGetIndices(ix,&idx);
1217: ISGetLocalSize(iy,&ny);
1218: ISStrideGetInfo(iy,&first,&step);
1219: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1220: PetscMalloc2(1,&to9,1,&from9);
1221: PetscMalloc1(nx,&from9->vslots);
1222: to9->n = nx;
1223: to9->first = first;
1224: to9->step = step;
1225: from9->n = nx;
1226: #if defined(PETSC_USE_DEBUG)
1227: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1228: #endif
1229: PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
1230: ctx->todata = (void*)to9; ctx->fromdata = (void*)from9;
1231: if (step == 1) ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
1232: else ctx->ops->begin = VecScatterBegin_SGToSS;
1233: ctx->ops->destroy = VecScatterDestroy_SGToSS;
1234: ctx->ops->end = 0;
1235: ctx->ops->copy = VecScatterCopy_SGToSS;
1236: ctx->ops->view = VecScatterView_SGToSS;
1237: to9->type = VEC_SCATTER_SEQ_STRIDE;
1238: from9->type = VEC_SCATTER_SEQ_GENERAL;
1239: PetscInfo(xin,"Special case: sequential vector general to stride\n");
1240: goto functionend;
1241: } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
1242: PetscInt nx,ny,first,step;
1243: const PetscInt *idy;
1244: VecScatter_Seq_General *to10 = NULL;
1245: VecScatter_Seq_Stride *from10 = NULL;
1247: ISGetLocalSize(ix,&nx);
1248: ISGetIndices(iy,&idy);
1249: ISGetLocalSize(iy,&ny);
1250: ISStrideGetInfo(ix,&first,&step);
1251: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1252: PetscMalloc2(1,&to10,1,&from10);
1253: PetscMalloc1(nx,&to10->vslots);
1254: from10->n = nx;
1255: from10->first = first;
1256: from10->step = step;
1257: to10->n = nx;
1258: #if defined(PETSC_USE_DEBUG)
1259: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1260: #endif
1261: PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
1262: ctx->todata = (void*)to10;
1263: ctx->fromdata = (void*)from10;
1264: if (step == 1) ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
1265: else ctx->ops->begin = VecScatterBegin_SSToSG;
1266: ctx->ops->destroy = VecScatterDestroy_SSToSG;
1267: ctx->ops->end = 0;
1268: ctx->ops->copy = 0;
1269: ctx->ops->view = VecScatterView_SSToSG;
1270: to10->type = VEC_SCATTER_SEQ_GENERAL;
1271: from10->type = VEC_SCATTER_SEQ_STRIDE;
1272: PetscInfo(xin,"Special case: sequential vector stride to general\n");
1273: goto functionend;
1274: } else {
1275: PetscInt nx,ny;
1276: const PetscInt *idx,*idy;
1277: VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
1278: PetscBool idnx,idny;
1280: ISGetLocalSize(ix,&nx);
1281: ISGetLocalSize(iy,&ny);
1282: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);
1284: ISIdentity(ix,&idnx);
1285: ISIdentity(iy,&idny);
1286: if (idnx && idny) {
1287: VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
1288: PetscMalloc2(1,&to13,1,&from13);
1289: to13->n = nx;
1290: to13->first = 0;
1291: to13->step = 1;
1292: from13->n = nx;
1293: from13->first = 0;
1294: from13->step = 1;
1295: to13->type = VEC_SCATTER_SEQ_STRIDE;
1296: from13->type = VEC_SCATTER_SEQ_STRIDE;
1297: ctx->todata = (void*)to13;
1298: ctx->fromdata = (void*)from13;
1299: ctx->ops->begin = VecScatterBegin_SSToSS;
1300: ctx->ops->end = 0;
1301: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1302: ctx->ops->copy = VecScatterCopy_SSToSS;
1303: ctx->ops->view = VecScatterView_SSToSS;
1304: PetscInfo(xin,"Special case: sequential copy\n");
1305: goto functionend;
1306: }
1308: ISGetIndices(iy,&idy);
1309: ISGetIndices(ix,&idx);
1310: PetscMalloc2(1,&to11,1,&from11);
1311: PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
1312: to11->n = nx;
1313: #if defined(PETSC_USE_DEBUG)
1314: VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
1315: #endif
1316: PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
1317: from11->n = nx;
1318: #if defined(PETSC_USE_DEBUG)
1319: VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
1320: #endif
1321: PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
1322: to11->type = VEC_SCATTER_SEQ_GENERAL;
1323: from11->type = VEC_SCATTER_SEQ_GENERAL;
1324: ctx->todata = (void*)to11;
1325: ctx->fromdata = (void*)from11;
1326: ctx->ops->begin = VecScatterBegin_SGToSG;
1327: ctx->ops->end = 0;
1328: ctx->ops->destroy = VecScatterDestroy_SGToSG;
1329: ctx->ops->copy = VecScatterCopy_SGToSG;
1330: ctx->ops->view = VecScatterView_SGToSG;
1331: ISRestoreIndices(ix,&idx);
1332: ISRestoreIndices(iy,&idy);
1333: PetscInfo(xin,"Sequential vector scatter with block indices\n");
1334: goto functionend;
1335: }
1336: }
1337: /* ---------------------------------------------------------------------------*/
1338: if (xin_type == VEC_MPI_ID && yin_type == VEC_SEQ_ID) {
1340: /* ===========================================================================================================
1341: Check for special cases
1342: ==========================================================================================================*/
1343: islocal = PETSC_FALSE;
1344: /* special case extracting (subset of) local portion */
1345: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1346: PetscInt nx,ny,to_first,to_step,from_first,from_step;
1347: PetscInt start,end,min,max;
1348: VecScatter_Seq_Stride *from12 = NULL,*to12 = NULL;
1350: VecGetOwnershipRange(xin,&start,&end);
1351: ISGetLocalSize(ix,&nx);
1352: ISStrideGetInfo(ix,&from_first,&from_step);
1353: ISGetLocalSize(iy,&ny);
1354: ISStrideGetInfo(iy,&to_first,&to_step);
1355: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1356: ISGetMinMax(ix,&min,&max);
1357: if (min >= start && max < end) islocal = PETSC_TRUE;
1358: else islocal = PETSC_FALSE;
1359: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1360: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1361: if (cando) {
1362: PetscMalloc2(1,&to12,1,&from12);
1363: to12->n = nx;
1364: to12->first = to_first;
1365: to12->step = to_step;
1366: from12->n = nx;
1367: from12->first = from_first-start;
1368: from12->step = from_step;
1369: to12->type = VEC_SCATTER_SEQ_STRIDE;
1370: from12->type = VEC_SCATTER_SEQ_STRIDE;
1371: ctx->todata = (void*)to12;
1372: ctx->fromdata = (void*)from12;
1373: ctx->ops->begin = VecScatterBegin_SSToSS;
1374: ctx->ops->end = 0;
1375: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1376: ctx->ops->copy = VecScatterCopy_SSToSS;
1377: ctx->ops->view = VecScatterView_SSToSS;
1378: PetscInfo(xin,"Special case: processors only getting local values\n");
1379: goto functionend;
1380: }
1381: } else {
1382: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1383: }
1385: /* test for special case of all processors getting entire vector */
1386: /* contains check that PetscMPIInt can handle the sizes needed */
1387: totalv = PETSC_FALSE;
1388: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1389: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1390: PetscMPIInt *count = NULL,*displx;
1391: VecScatter_MPI_ToAll *sto = NULL;
1393: ISGetLocalSize(ix,&nx);
1394: ISStrideGetInfo(ix,&from_first,&from_step);
1395: ISGetLocalSize(iy,&ny);
1396: ISStrideGetInfo(iy,&to_first,&to_step);
1397: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1398: VecGetSize(xin,&N);
1399: if (nx != N) totalv = PETSC_FALSE;
1400: else if (from_first == 0 && from_step == 1 && from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1401: else totalv = PETSC_FALSE;
1402: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1403: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1405: #if defined(PETSC_USE_64BIT_INDICES)
1406: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1407: #else
1408: if (cando) {
1409: #endif
1410: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1411: PetscMalloc3(1,&sto,size,&count,size,&displx);
1412: range = xin->map->range;
1413: for (i=0; i<size; i++) {
1414: PetscMPIIntCast(range[i+1] - range[i],count+i);
1415: PetscMPIIntCast(range[i],displx+i);
1416: }
1417: sto->count = count;
1418: sto->displx = displx;
1419: sto->work1 = 0;
1420: sto->work2 = 0;
1421: sto->type = VEC_SCATTER_MPI_TOALL;
1422: ctx->todata = (void*)sto;
1423: ctx->fromdata = 0;
1424: ctx->ops->begin = VecScatterBegin_MPI_ToAll;
1425: ctx->ops->end = 0;
1426: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1427: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1428: ctx->ops->view = VecScatterView_MPI_ToAll;
1429: PetscInfo(xin,"Special case: all processors get entire parallel vector\n");
1430: goto functionend;
1431: }
1432: } else {
1433: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1434: }
1436: /* test for special case of processor 0 getting entire vector */
1437: /* contains check that PetscMPIInt can handle the sizes needed */
1438: totalv = PETSC_FALSE;
1439: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1440: PetscInt i,nx,ny,to_first,to_step,from_first,from_step,N;
1441: PetscMPIInt rank,*count = NULL,*displx;
1442: VecScatter_MPI_ToAll *sto = NULL;
1444: PetscObjectGetComm((PetscObject)xin,&comm);
1445: MPI_Comm_rank(comm,&rank);
1446: ISGetLocalSize(ix,&nx);
1447: ISStrideGetInfo(ix,&from_first,&from_step);
1448: ISGetLocalSize(iy,&ny);
1449: ISStrideGetInfo(iy,&to_first,&to_step);
1450: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1451: if (!rank) {
1452: VecGetSize(xin,&N);
1453: if (nx != N) totalv = PETSC_FALSE;
1454: else if (from_first == 0 && from_step == 1 &&
1455: from_first == to_first && from_step == to_step) totalv = PETSC_TRUE;
1456: else totalv = PETSC_FALSE;
1457: } else {
1458: if (!nx) totalv = PETSC_TRUE;
1459: else totalv = PETSC_FALSE;
1460: }
1461: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1462: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1464: #if defined(PETSC_USE_64BIT_INDICES)
1465: if (cando && (yin->map->N < PETSC_MPI_INT_MAX)) {
1466: #else
1467: if (cando) {
1468: #endif
1469: MPI_Comm_size(PetscObjectComm((PetscObject)ctx),&size);
1470: PetscMalloc3(1,&sto,size,&count,size,&displx);
1471: range = xin->map->range;
1472: for (i=0; i<size; i++) {
1473: PetscMPIIntCast(range[i+1] - range[i],count+i);
1474: PetscMPIIntCast(range[i],displx+i);
1475: }
1476: sto->count = count;
1477: sto->displx = displx;
1478: sto->work1 = 0;
1479: sto->work2 = 0;
1480: sto->type = VEC_SCATTER_MPI_TOONE;
1481: ctx->todata = (void*)sto;
1482: ctx->fromdata = 0;
1483: ctx->ops->begin = VecScatterBegin_MPI_ToOne;
1484: ctx->ops->end = 0;
1485: ctx->ops->destroy = VecScatterDestroy_MPI_ToAll;
1486: ctx->ops->copy = VecScatterCopy_MPI_ToAll;
1487: ctx->ops->view = VecScatterView_MPI_ToAll;
1488: PetscInfo(xin,"Special case: processor zero gets entire parallel vector, rest get none\n");
1489: goto functionend;
1490: }
1491: } else {
1492: MPI_Allreduce(&totalv,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)xin));
1493: }
1495: PetscObjectTypeCompare((PetscObject)ix,ISBLOCK,&ixblock);
1496: PetscObjectTypeCompare((PetscObject)iy,ISBLOCK,&iyblock);
1497: PetscObjectTypeCompare((PetscObject)iy,ISSTRIDE,&iystride);
1498: if (ixblock) {
1499: /* special case block to block */
1500: if (iyblock) {
1501: PetscInt nx,ny,bsx,bsy;
1502: const PetscInt *idx,*idy;
1503: ISGetBlockSize(iy,&bsy);
1504: ISGetBlockSize(ix,&bsx);
1505: if (bsx == bsy && VecScatterOptimizedBS(bsx)) {
1506: ISBlockGetLocalSize(ix,&nx);
1507: ISBlockGetIndices(ix,&idx);
1508: ISBlockGetLocalSize(iy,&ny);
1509: ISBlockGetIndices(iy,&idy);
1510: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1511: VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,bsx,ctx);
1512: ISBlockRestoreIndices(ix,&idx);
1513: ISBlockRestoreIndices(iy,&idy);
1514: PetscInfo(xin,"Special case: blocked indices\n");
1515: goto functionend;
1516: }
1517: /* special case block to stride */
1518: } else if (iystride) {
1519: PetscInt ystart,ystride,ysize,bsx;
1520: ISStrideGetInfo(iy,&ystart,&ystride);
1521: ISGetLocalSize(iy,&ysize);
1522: ISGetBlockSize(ix,&bsx);
1523: /* see if stride index set is equivalent to block index set */
1524: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1525: PetscInt nx,il,*idy;
1526: const PetscInt *idx;
1527: ISBlockGetLocalSize(ix,&nx);
1528: ISBlockGetIndices(ix,&idx);
1529: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1530: PetscMalloc1(nx,&idy);
1531: if (nx) {
1532: idy[0] = ystart/bsx;
1533: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1534: }
1535: VecScatterCreate_PtoS(nx,idx,nx,idy,xin,yin,bsx,ctx);
1536: PetscFree(idy);
1537: ISBlockRestoreIndices(ix,&idx);
1538: PetscInfo(xin,"Special case: blocked indices to stride\n");
1539: goto functionend;
1540: }
1541: }
1542: }
1543: /* left over general case */
1544: {
1545: PetscInt nx,ny;
1546: const PetscInt *idx,*idy;
1547: ISGetLocalSize(ix,&nx);
1548: ISGetIndices(ix,&idx);
1549: ISGetLocalSize(iy,&ny);
1550: ISGetIndices(iy,&idy);
1551: if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match (%d %d)",nx,ny);
1552: VecScatterCreate_PtoS(nx,idx,ny,idy,xin,yin,1,ctx);
1553: ISRestoreIndices(ix,&idx);
1554: ISRestoreIndices(iy,&idy);
1555: PetscInfo(xin,"General case: MPI to Seq\n");
1556: goto functionend;
1557: }
1558: }
1559: /* ---------------------------------------------------------------------------*/
1560: if (xin_type == VEC_SEQ_ID && yin_type == VEC_MPI_ID) {
1561: /* ===========================================================================================================
1562: Check for special cases
1563: ==========================================================================================================*/
1564: /* special case local copy portion */
1565: islocal = PETSC_FALSE;
1566: if (ix_type == IS_STRIDE_ID && iy_type == IS_STRIDE_ID) {
1567: PetscInt nx,ny,to_first,to_step,from_step,start,end,from_first,min,max;
1568: VecScatter_Seq_Stride *from = NULL,*to = NULL;
1570: VecGetOwnershipRange(yin,&start,&end);
1571: ISGetLocalSize(ix,&nx);
1572: ISStrideGetInfo(ix,&from_first,&from_step);
1573: ISGetLocalSize(iy,&ny);
1574: ISStrideGetInfo(iy,&to_first,&to_step);
1575: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1576: ISGetMinMax(iy,&min,&max);
1577: if (min >= start && max < end) islocal = PETSC_TRUE;
1578: else islocal = PETSC_FALSE;
1579: /* cannot use MPIU_Allreduce() since this call matches with the MPI_Allreduce() in the else statement below */
1580: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1581: if (cando) {
1582: PetscMalloc2(1,&to,1,&from);
1583: to->n = nx;
1584: to->first = to_first-start;
1585: to->step = to_step;
1586: from->n = nx;
1587: from->first = from_first;
1588: from->step = from_step;
1589: to->type = VEC_SCATTER_SEQ_STRIDE;
1590: from->type = VEC_SCATTER_SEQ_STRIDE;
1591: ctx->todata = (void*)to;
1592: ctx->fromdata = (void*)from;
1593: ctx->ops->begin = VecScatterBegin_SSToSS;
1594: ctx->ops->end = 0;
1595: ctx->ops->destroy = VecScatterDestroy_SSToSS;
1596: ctx->ops->copy = VecScatterCopy_SSToSS;
1597: ctx->ops->view = VecScatterView_SSToSS;
1598: PetscInfo(xin,"Special case: sequential stride to MPI stride\n");
1599: goto functionend;
1600: }
1601: } else {
1602: MPI_Allreduce(&islocal,&cando,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)yin));
1603: }
1604: /* special case block to stride */
1605: if (ix_type == IS_BLOCK_ID && iy_type == IS_STRIDE_ID) {
1606: PetscInt ystart,ystride,ysize,bsx;
1607: ISStrideGetInfo(iy,&ystart,&ystride);
1608: ISGetLocalSize(iy,&ysize);
1609: ISGetBlockSize(ix,&bsx);
1610: /* see if stride index set is equivalent to block index set */
1611: if (VecScatterOptimizedBS(bsx) && ((ystart % bsx) == 0) && (ystride == 1) && ((ysize % bsx) == 0)) {
1612: PetscInt nx,il,*idy;
1613: const PetscInt *idx;
1614: ISBlockGetLocalSize(ix,&nx);
1615: ISBlockGetIndices(ix,&idx);
1616: if (ysize != bsx*nx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1617: PetscMalloc1(nx,&idy);
1618: if (nx) {
1619: idy[0] = ystart/bsx;
1620: for (il=1; il<nx; il++) idy[il] = idy[il-1] + 1;
1621: }
1622: VecScatterCreate_StoP(nx,idx,nx,idy,xin,yin,bsx,ctx);
1623: PetscFree(idy);
1624: ISBlockRestoreIndices(ix,&idx);
1625: PetscInfo(xin,"Special case: Blocked indices to stride\n");
1626: goto functionend;
1627: }
1628: }
1630: /* general case */
1631: {
1632: PetscInt nx,ny;
1633: const PetscInt *idx,*idy;
1634: ISGetLocalSize(ix,&nx);
1635: ISGetIndices(ix,&idx);
1636: ISGetLocalSize(iy,&ny);
1637: ISGetIndices(iy,&idy);
1638: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1639: VecScatterCreate_StoP(nx,idx,ny,idy,xin,yin,1,ctx);
1640: ISRestoreIndices(ix,&idx);
1641: ISRestoreIndices(iy,&idy);
1642: PetscInfo(xin,"General case: Seq to MPI\n");
1643: goto functionend;
1644: }
1645: }
1646: /* ---------------------------------------------------------------------------*/
1647: if (xin_type == VEC_MPI_ID && yin_type == VEC_MPI_ID) {
1648: /* no special cases for now */
1649: PetscInt nx,ny;
1650: const PetscInt *idx,*idy;
1651: ISGetLocalSize(ix,&nx);
1652: ISGetIndices(ix,&idx);
1653: ISGetLocalSize(iy,&ny);
1654: ISGetIndices(iy,&idy);
1655: if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
1656: VecScatterCreate_PtoP(nx,idx,ny,idy,xin,yin,1,ctx);
1657: ISRestoreIndices(ix,&idx);
1658: ISRestoreIndices(iy,&idy);
1659: PetscInfo(xin,"General case: MPI to MPI\n");
1660: goto functionend;
1661: }
1663: functionend:
1664: *newctx = ctx;
1665: ISDestroy(&tix);
1666: ISDestroy(&tiy);
1667: VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
1668: return(0);
1669: }
1671: /* ------------------------------------------------------------------*/
1674: /*@
1675: VecScatterGetMerged - Returns true if the scatter is completed in the VecScatterBegin()
1676: and the VecScatterEnd() does nothing
1678: Not Collective
1680: Input Parameter:
1681: . ctx - scatter context created with VecScatterCreate()
1683: Output Parameter:
1684: . flg - PETSC_TRUE if the VecScatterBegin/End() are all done during the VecScatterBegin()
1686: Level: developer
1688: .seealso: VecScatterCreate(), VecScatterEnd(), VecScatterBegin()
1689: @*/
1690: PetscErrorCode VecScatterGetMerged(VecScatter ctx,PetscBool *flg)
1691: {
1694: *flg = ctx->beginandendtogether;
1695: return(0);
1696: }
1700: /*@
1701: VecScatterBegin - Begins a generalized scatter from one vector to
1702: another. Complete the scattering phase with VecScatterEnd().
1704: Neighbor-wise Collective on VecScatter and Vec
1706: Input Parameters:
1707: + inctx - scatter context generated by VecScatterCreate()
1708: . x - the vector from which we scatter
1709: . y - the vector to which we scatter
1710: . addv - either ADD_VALUES or INSERT_VALUES, with INSERT_VALUES mode any location
1711: not scattered to retains its old value; i.e. the vector is NOT first zeroed.
1712: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1713: SCATTER_FORWARD or SCATTER_REVERSE
1716: Level: intermediate
1718: Options Database: See VecScatterCreate()
1720: Notes:
1721: The vectors x and y need not be the same vectors used in the call
1722: to VecScatterCreate(), but x must have the same parallel data layout
1723: as that passed in as the x to VecScatterCreate(), similarly for the y.
1724: Most likely they have been obtained from VecDuplicate().
1726: You cannot change the values in the input vector between the calls to VecScatterBegin()
1727: and VecScatterEnd().
1729: If you use SCATTER_REVERSE the two arguments x and y should be reversed, from
1730: the SCATTER_FORWARD.
1732: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1734: This scatter is far more general than the conventional
1735: scatter, since it can be a gather or a scatter or a combination,
1736: depending on the indices ix and iy. If x is a parallel vector and y
1737: is sequential, VecScatterBegin() can serve to gather values to a
1738: single processor. Similarly, if y is parallel and x sequential, the
1739: routine can scatter from one processor to many processors.
1741: Concepts: scatter^between vectors
1742: Concepts: gather^between vectors
1744: .seealso: VecScatterCreate(), VecScatterEnd()
1745: @*/
1746: PetscErrorCode VecScatterBegin(VecScatter inctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1747: {
1749: #if defined(PETSC_USE_DEBUG)
1750: PetscInt to_n,from_n;
1751: #endif
1756: if (inctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use");
1758: #if defined(PETSC_USE_DEBUG)
1759: /*
1760: Error checking to make sure these vectors match the vectors used
1761: to create the vector scatter context. -1 in the from_n and to_n indicate the
1762: vector lengths are unknown (for example with mapped scatters) and thus
1763: no error checking is performed.
1764: */
1765: if (inctx->from_n >= 0 && inctx->to_n >= 0) {
1766: VecGetLocalSize(x,&from_n);
1767: VecGetLocalSize(y,&to_n);
1768: if (mode & SCATTER_REVERSE) {
1769: if (to_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector to != ctx from size)",to_n,inctx->from_n);
1770: if (from_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter reverse and vector from != ctx to size)",from_n,inctx->to_n);
1771: } else {
1772: if (to_n != inctx->to_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector to != ctx to size)",to_n,inctx->to_n);
1773: if (from_n != inctx->from_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Vector wrong size %D for scatter %D (scatter forward and vector from != ctx from size)",from_n,inctx->from_n);
1774: }
1775: }
1776: #endif
1778: inctx->inuse = PETSC_TRUE;
1779: PetscLogEventBarrierBegin(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1780: (*inctx->ops->begin)(inctx,x,y,addv,mode);
1781: if (inctx->beginandendtogether && inctx->ops->end) {
1782: inctx->inuse = PETSC_FALSE;
1783: (*inctx->ops->end)(inctx,x,y,addv,mode);
1784: }
1785: PetscLogEventBarrierEnd(VEC_ScatterBarrier,0,0,0,0,PetscObjectComm((PetscObject)inctx));
1786: return(0);
1787: }
1789: /* --------------------------------------------------------------------*/
1792: /*@
1793: VecScatterEnd - Ends a generalized scatter from one vector to another. Call
1794: after first calling VecScatterBegin().
1796: Neighbor-wise Collective on VecScatter and Vec
1798: Input Parameters:
1799: + ctx - scatter context generated by VecScatterCreate()
1800: . x - the vector from which we scatter
1801: . y - the vector to which we scatter
1802: . addv - either ADD_VALUES or INSERT_VALUES.
1803: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
1804: SCATTER_FORWARD, SCATTER_REVERSE
1806: Level: intermediate
1808: Notes:
1809: If you use SCATTER_REVERSE the arguments x and y should be reversed, from the SCATTER_FORWARD.
1811: y[iy[i]] = x[ix[i]], for i=0,...,ni-1
1813: .seealso: VecScatterBegin(), VecScatterCreate()
1814: @*/
1815: PetscErrorCode VecScatterEnd(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
1816: {
1823: ctx->inuse = PETSC_FALSE;
1824: if (!ctx->ops->end) return(0);
1825: if (!ctx->beginandendtogether) {
1826: PetscLogEventBegin(VEC_ScatterEnd,ctx,x,y,0);
1827: (*(ctx)->ops->end)(ctx,x,y,addv,mode);
1828: PetscLogEventEnd(VEC_ScatterEnd,ctx,x,y,0);
1829: }
1830: return(0);
1831: }
1835: /*@C
1836: VecScatterDestroy - Destroys a scatter context created by
1837: VecScatterCreate().
1839: Collective on VecScatter
1841: Input Parameter:
1842: . ctx - the scatter context
1844: Level: intermediate
1846: .seealso: VecScatterCreate(), VecScatterCopy()
1847: @*/
1848: PetscErrorCode VecScatterDestroy(VecScatter *ctx)
1849: {
1853: if (!*ctx) return(0);
1855: if ((*ctx)->inuse) SETERRQ(((PetscObject)(*ctx))->comm,PETSC_ERR_ARG_WRONGSTATE,"Scatter context is in use");
1856: if (--((PetscObject)(*ctx))->refct > 0) {*ctx = 0; return(0);}
1858: /* if memory was published with SAWs then destroy it */
1859: PetscObjectSAWsViewOff((PetscObject)(*ctx));
1860: (*(*ctx)->ops->destroy)(*ctx);
1861: #if defined(PETSC_HAVE_CUSP)
1862: VecScatterCUSPIndicesDestroy((PetscCUSPIndices*)&((*ctx)->spptr));
1863: #elif defined(PETSC_HAVE_VECCUDA)
1864: VecScatterCUDAIndicesDestroy((PetscCUDAIndices*)&((*ctx)->spptr));
1865: #endif
1866: PetscHeaderDestroy(ctx);
1867: return(0);
1868: }
1872: /*@
1873: VecScatterCopy - Makes a copy of a scatter context.
1875: Collective on VecScatter
1877: Input Parameter:
1878: . sctx - the scatter context
1880: Output Parameter:
1881: . ctx - the context copy
1883: Level: advanced
1885: .seealso: VecScatterCreate(), VecScatterDestroy()
1886: @*/
1887: PetscErrorCode VecScatterCopy(VecScatter sctx,VecScatter *ctx)
1888: {
1894: if (!sctx->ops->copy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot copy this type");
1895: PetscHeaderCreate(*ctx,VEC_SCATTER_CLASSID,"VecScatter","VecScatter","Vec",PetscObjectComm((PetscObject)sctx),VecScatterDestroy,VecScatterView);
1896: (*ctx)->to_n = sctx->to_n;
1897: (*ctx)->from_n = sctx->from_n;
1898: (*sctx->ops->copy)(sctx,*ctx);
1899: return(0);
1900: }
1903: /* ------------------------------------------------------------------*/
1906: /*@C
1907: VecScatterView - Views a vector scatter context.
1909: Collective on VecScatter
1911: Input Parameters:
1912: + ctx - the scatter context
1913: - viewer - the viewer for displaying the context
1915: Level: intermediate
1917: C@*/
1918: PetscErrorCode VecScatterView(VecScatter ctx,PetscViewer viewer)
1919: {
1924: if (!viewer) {
1925: PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)ctx),&viewer);
1926: }
1928: if (ctx->ops->view) {
1929: (*ctx->ops->view)(ctx,viewer);
1930: }
1931: return(0);
1932: }
1936: /*@C
1937: VecScatterRemap - Remaps the "from" and "to" indices in a
1938: vector scatter context. FOR EXPERTS ONLY!
1940: Collective on VecScatter
1942: Input Parameters:
1943: + scat - vector scatter context
1944: . from - remapping for "from" indices (may be NULL)
1945: - to - remapping for "to" indices (may be NULL)
1947: Level: developer
1949: Notes: In the parallel case the todata is actually the indices
1950: from which the data is TAKEN! The from stuff is where the
1951: data is finally put. This is VERY VERY confusing!
1953: In the sequential case the todata is the indices where the
1954: data is put and the fromdata is where it is taken from.
1955: This is backwards from the paralllel case! CRY! CRY! CRY!
1957: @*/
1958: PetscErrorCode VecScatterRemap(VecScatter scat,PetscInt *rto,PetscInt *rfrom)
1959: {
1960: VecScatter_Seq_General *to,*from;
1961: VecScatter_MPI_General *mto;
1962: PetscInt i;
1969: from = (VecScatter_Seq_General*)scat->fromdata;
1970: mto = (VecScatter_MPI_General*)scat->todata;
1972: if (mto->type == VEC_SCATTER_MPI_TOALL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Not for to all scatter");
1974: if (rto) {
1975: if (mto->type == VEC_SCATTER_MPI_GENERAL) {
1976: /* handle off processor parts */
1977: for (i=0; i<mto->starts[mto->n]; i++) mto->indices[i] = rto[mto->indices[i]];
1979: /* handle local part */
1980: to = &mto->local;
1981: for (i=0; i<to->n; i++) to->vslots[i] = rto[to->vslots[i]];
1982: } else if (from->type == VEC_SCATTER_SEQ_GENERAL) {
1983: for (i=0; i<from->n; i++) from->vslots[i] = rto[from->vslots[i]];
1984: } else if (from->type == VEC_SCATTER_SEQ_STRIDE) {
1985: VecScatter_Seq_Stride *sto = (VecScatter_Seq_Stride*)from;
1987: /* if the remapping is the identity and stride is identity then skip remap */
1988: if (sto->step == 1 && sto->first == 0) {
1989: for (i=0; i<sto->n; i++) {
1990: if (rto[i] != i) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1991: }
1992: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1993: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Unable to remap such scatters");
1994: }
1996: if (rfrom) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
1998: /*
1999: Mark then vector lengths as unknown because we do not know the
2000: lengths of the remapped vectors
2001: */
2002: scat->from_n = -1;
2003: scat->to_n = -1;
2004: return(0);
2005: }
2009: /*
2010: VecScatterGetTypes_Private - Returns the scatter types.
2012: scatter - The scatter.
2013: from - Upon exit this contains the type of the from scatter.
2014: to - Upon exit this contains the type of the to scatter.
2015: */
2016: PetscErrorCode VecScatterGetTypes_Private(VecScatter scatter,VecScatterType *from,VecScatterType *to)
2017: {
2018: VecScatter_Common* fromdata = (VecScatter_Common*)scatter->fromdata;
2019: VecScatter_Common* todata = (VecScatter_Common*)scatter->todata;
2022: *from = fromdata->type;
2023: *to = todata->type;
2024: return(0);
2025: }
2030: /*
2031: VecScatterIsSequential_Private - Returns true if the scatter is sequential.
2033: scatter - The scatter.
2034: flag - Upon exit flag is true if the scatter is of type VecScatter_Seq_General
2035: or VecScatter_Seq_Stride; otherwise flag is false.
2036: */
2037: PetscErrorCode VecScatterIsSequential_Private(VecScatter_Common *scatter,PetscBool *flag)
2038: {
2039: VecScatterType scatterType = scatter->type;
2042: if (scatterType == VEC_SCATTER_SEQ_GENERAL || scatterType == VEC_SCATTER_SEQ_STRIDE) {
2043: *flag = PETSC_TRUE;
2044: } else {
2045: *flag = PETSC_FALSE;
2046: }
2047: return(0);
2048: }
2050: #if defined(PETSC_HAVE_CUSP) || defined(PETSC_HAVE_VECCUDA)
2054: /*@C
2055: VecScatterInitializeForGPU - Initializes a generalized scatter from one vector
2056: to another for GPU based computation.
2058: Input Parameters:
2059: + inctx - scatter context generated by VecScatterCreate()
2060: . x - the vector from which we scatter
2061: - mode - the scattering mode, usually SCATTER_FORWARD. The available modes are:
2062: SCATTER_FORWARD or SCATTER_REVERSE
2064: Level: intermediate
2066: Notes:
2067: Effectively, this function creates all the necessary indexing buffers and work
2068: vectors needed to move data only those data points in a vector which need to
2069: be communicated across ranks. This is done at the first time this function is
2070: called. Currently, this only used in the context of the parallel SpMV call in
2071: MatMult_MPIAIJCUSP or MatMult_MPIAIJCUSPARSE.
2073: This function is executed before the call to MatMult. This enables the memory
2074: transfers to be overlapped with the MatMult SpMV kernel call.
2076: .seealso: VecScatterFinalizeForGPU(), VecScatterCreate(), VecScatterEnd()
2077: @*/
2078: PETSC_EXTERN PetscErrorCode VecScatterInitializeForGPU(VecScatter inctx,Vec x,ScatterMode mode)
2079: {
2081: VecScatter_MPI_General *to,*from;
2082: PetscErrorCode ierr;
2083: PetscInt i,*indices,*sstartsSends,*sstartsRecvs,nrecvs,nsends,bs;
2084: PetscBool isSeq1,isSeq2;
2087: VecScatterIsSequential_Private((VecScatter_Common*)inctx->fromdata,&isSeq1);
2088: VecScatterIsSequential_Private((VecScatter_Common*)inctx->todata,&isSeq2);
2089: if (isSeq1 || isSeq2) {
2090: return(0);
2091: }
2092: if (mode & SCATTER_REVERSE) {
2093: to = (VecScatter_MPI_General*)inctx->fromdata;
2094: from = (VecScatter_MPI_General*)inctx->todata;
2095: } else {
2096: to = (VecScatter_MPI_General*)inctx->todata;
2097: from = (VecScatter_MPI_General*)inctx->fromdata;
2098: }
2099: bs = to->bs;
2100: nrecvs = from->n;
2101: nsends = to->n;
2102: indices = to->indices;
2103: sstartsSends = to->starts;
2104: sstartsRecvs = from->starts;
2105: #if defined(PETSC_HAVE_CUSP)
2106: if (x->valid_GPU_array != PETSC_CUSP_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2107: #else
2108: if (x->valid_GPU_array != PETSC_CUDA_UNALLOCATED && (nsends>0 || nrecvs>0)) {
2109: #endif
2110: if (!inctx->spptr) {
2111: PetscInt k,*tindicesSends,*sindicesSends,*tindicesRecvs,*sindicesRecvs;
2112: PetscInt ns = sstartsSends[nsends],nr = sstartsRecvs[nrecvs];
2113: /* Here we create indices for both the senders and receivers. */
2114: PetscMalloc1(ns,&tindicesSends);
2115: PetscMalloc1(nr,&tindicesRecvs);
2117: PetscMemcpy(tindicesSends,indices,ns*sizeof(PetscInt));
2118: PetscMemcpy(tindicesRecvs,from->indices,nr*sizeof(PetscInt));
2120: PetscSortRemoveDupsInt(&ns,tindicesSends);
2121: PetscSortRemoveDupsInt(&nr,tindicesRecvs);
2123: PetscMalloc1(bs*ns,&sindicesSends);
2124: PetscMalloc1(from->bs*nr,&sindicesRecvs);
2126: /* sender indices */
2127: for (i=0; i<ns; i++) {
2128: for (k=0; k<bs; k++) sindicesSends[i*bs+k] = tindicesSends[i]+k;
2129: }
2130: PetscFree(tindicesSends);
2132: /* receiver indices */
2133: for (i=0; i<nr; i++) {
2134: for (k=0; k<from->bs; k++) sindicesRecvs[i*from->bs+k] = tindicesRecvs[i]+k;
2135: }
2136: PetscFree(tindicesRecvs);
2138: /* create GPU indices, work vectors, ... */
2139: #if defined(PETSC_HAVE_CUSP)
2140: VecScatterCUSPIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUSPIndices*)&inctx->spptr);
2141: #else
2142: VecScatterCUDAIndicesCreate_PtoP(ns*bs,sindicesSends,nr*from->bs,sindicesRecvs,(PetscCUDAIndices*)&inctx->spptr);
2143: #endif
2144: PetscFree(sindicesSends);
2145: PetscFree(sindicesRecvs);
2146: }
2147: }
2148: return(0);
2149: }
2153: /*@C
2154: VecScatterFinalizeForGPU - Finalizes a generalized scatter from one vector to
2155: another for GPU based computation.
2157: Input Parameter:
2158: + inctx - scatter context generated by VecScatterCreate()
2160: Level: intermediate
2162: Notes:
2163: Effectively, this function resets the temporary buffer flags. Currently, this
2164: only used in the context of the parallel SpMV call in in MatMult_MPIAIJCUDA
2165: or MatMult_MPIAIJCUDAARSE. Once the MatMultAdd is finished, the GPU temporary
2166: buffers used for messaging are no longer valid.
2168: .seealso: VecScatterInitializeForGPU(), VecScatterCreate(), VecScatterEnd()
2169: @*/
2170: PETSC_EXTERN PetscErrorCode VecScatterFinalizeForGPU(VecScatter inctx)
2171: {
2173: return(0);
2174: }
2176: #endif