Actual source code: vpscat.c
petsc-3.7.5 2017-01-01
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8: #include <petscsf.h>
12: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
13: {
14: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
15: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
16: PetscErrorCode ierr;
17: PetscInt i;
18: PetscMPIInt rank;
19: PetscViewerFormat format;
20: PetscBool iascii;
23: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
24: if (iascii) {
25: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
26: PetscViewerGetFormat(viewer,&format);
27: if (format == PETSC_VIEWER_ASCII_INFO) {
28: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
30: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: itmp = to->starts[to->n+1];
33: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
34: itmp = from->starts[from->n+1];
35: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
36: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
38: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
39: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
40: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
41: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
43: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
45: } else {
46: PetscViewerASCIIPushSynchronized(viewer);
47: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
48: if (to->n) {
49: for (i=0; i<to->n; i++) {
50: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
51: }
52: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
53: for (i=0; i<to->starts[to->n]; i++) {
54: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
55: }
56: }
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
59: if (from->n) {
60: for (i=0; i<from->n; i++) {
61: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
62: }
64: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
65: for (i=0; i<from->starts[from->n]; i++) {
66: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
67: }
68: }
69: if (to->local.n) {
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
71: for (i=0; i<to->local.n; i++) {
72: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
73: }
74: }
76: PetscViewerFlush(viewer);
77: PetscViewerASCIIPopSynchronized(viewer);
78: }
79: }
80: return(0);
81: }
83: /* -----------------------------------------------------------------------------------*/
84: /*
85: The next routine determines what part of the local part of the scatter is an
86: exact copy of values into their current location. We check this here and
87: then know that we need not perform that portion of the scatter when the vector is
88: scattering to itself with INSERT_VALUES.
90: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
92: */
95: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
96: {
97: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
99: PetscInt *nto_slots,*nfrom_slots,j = 0;
102: for (i=0; i<n; i++) {
103: if (to_slots[i] != from_slots[i]) n_nonmatching++;
104: }
106: if (!n_nonmatching) {
107: to->nonmatching_computed = PETSC_TRUE;
108: to->n_nonmatching = from->n_nonmatching = 0;
110: PetscInfo1(scatter,"Reduced %D to 0\n", n);
111: } else if (n_nonmatching == n) {
112: to->nonmatching_computed = PETSC_FALSE;
114: PetscInfo(scatter,"All values non-matching\n");
115: } else {
116: to->nonmatching_computed= PETSC_TRUE;
117: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
119: PetscMalloc1(n_nonmatching,&nto_slots);
120: PetscMalloc1(n_nonmatching,&nfrom_slots);
122: to->slots_nonmatching = nto_slots;
123: from->slots_nonmatching = nfrom_slots;
124: for (i=0; i<n; i++) {
125: if (to_slots[i] != from_slots[i]) {
126: nto_slots[j] = to_slots[i];
127: nfrom_slots[j] = from_slots[i];
128: j++;
129: }
130: }
131: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
132: }
133: return(0);
134: }
136: /* --------------------------------------------------------------------------------------*/
138: /* -------------------------------------------------------------------------------------*/
141: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
142: {
143: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
144: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
145: PetscErrorCode ierr;
146: PetscInt i;
149: if (to->use_readyreceiver) {
150: /*
151: Since we have already posted sends we must cancel them before freeing
152: the requests
153: */
154: for (i=0; i<from->n; i++) {
155: MPI_Cancel(from->requests+i);
156: }
157: for (i=0; i<to->n; i++) {
158: MPI_Cancel(to->rev_requests+i);
159: }
160: MPI_Waitall(from->n,from->requests,to->rstatus);
161: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
162: }
164: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
165: if (to->use_alltoallw) {
166: for (i=0; i<to->n; i++) {
167: MPI_Type_free(to->types+to->procs[i]);
168: }
169: PetscFree3(to->wcounts,to->wdispls,to->types);
170: if (!from->contiq) {
171: for (i=0; i<from->n; i++) {
172: MPI_Type_free(from->types+from->procs[i]);
173: }
174: }
175: PetscFree3(from->wcounts,from->wdispls,from->types);
176: }
177: #endif
179: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
180: if (to->use_window) {
181: MPI_Win_free(&from->window);
182: MPI_Win_free(&to->window);
183: PetscFree(from->winstarts);
184: PetscFree(to->winstarts);
185: }
186: #endif
188: if (to->use_alltoallv) {
189: PetscFree2(to->counts,to->displs);
190: PetscFree2(from->counts,from->displs);
191: }
193: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
194: /*
195: IBM's PE version of MPI has a bug where freeing these guys will screw up later
196: message passing.
197: */
198: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
199: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
200: if (to->requests) {
201: for (i=0; i<to->n; i++) {
202: MPI_Request_free(to->requests + i);
203: }
204: }
205: if (to->rev_requests) {
206: for (i=0; i<to->n; i++) {
207: MPI_Request_free(to->rev_requests + i);
208: }
209: }
210: }
211: /*
212: MPICH could not properly cancel requests thus with ready receiver mode we
213: cannot free the requests. It may be fixed now, if not then put the following
214: code inside a if (!to->use_readyreceiver) {
215: */
216: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
217: if (from->requests) {
218: for (i=0; i<from->n; i++) {
219: MPI_Request_free(from->requests + i);
220: }
221: }
223: if (from->rev_requests) {
224: for (i=0; i<from->n; i++) {
225: MPI_Request_free(from->rev_requests + i);
226: }
227: }
228: }
229: #endif
231: PetscFree(to->local.vslots);
232: PetscFree(from->local.vslots);
233: PetscFree2(to->counts,to->displs);
234: PetscFree2(from->counts,from->displs);
235: PetscFree(to->local.slots_nonmatching);
236: PetscFree(from->local.slots_nonmatching);
237: PetscFree(to->rev_requests);
238: PetscFree(from->rev_requests);
239: PetscFree(to->requests);
240: PetscFree(from->requests);
241: PetscFree4(to->values,to->indices,to->starts,to->procs);
242: PetscFree2(to->sstatus,to->rstatus);
243: PetscFree4(from->values,from->indices,from->starts,from->procs);
244: PetscFree(from);
245: PetscFree(to);
246: return(0);
247: }
251: /* --------------------------------------------------------------------------------------*/
252: /*
253: Special optimization to see if the local part of the scatter is actually
254: a copy. The scatter routines call PetscMemcpy() instead.
256: */
259: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
260: {
261: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
262: PetscInt to_start,from_start;
266: to_start = to_slots[0];
267: from_start = from_slots[0];
269: for (i=1; i<n; i++) {
270: to_start += bs;
271: from_start += bs;
272: if (to_slots[i] != to_start) return(0);
273: if (from_slots[i] != from_start) return(0);
274: }
275: to->is_copy = PETSC_TRUE;
276: to->copy_start = to_slots[0];
277: to->copy_length = bs*sizeof(PetscScalar)*n;
278: from->is_copy = PETSC_TRUE;
279: from->copy_start = from_slots[0];
280: from->copy_length = bs*sizeof(PetscScalar)*n;
281: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
282: return(0);
283: }
285: /* --------------------------------------------------------------------------------------*/
289: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
290: {
291: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
292: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
293: PetscErrorCode ierr;
294: PetscInt ny,bs = in_from->bs;
297: out->ops->begin = in->ops->begin;
298: out->ops->end = in->ops->end;
299: out->ops->copy = in->ops->copy;
300: out->ops->destroy = in->ops->destroy;
301: out->ops->view = in->ops->view;
303: /* allocate entire send scatter context */
304: PetscNewLog(out,&out_to);
305: PetscNewLog(out,&out_from);
307: ny = in_to->starts[in_to->n];
308: out_to->n = in_to->n;
309: out_to->type = in_to->type;
310: out_to->sendfirst = in_to->sendfirst;
312: PetscMalloc1(out_to->n,&out_to->requests);
313: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
314: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
315: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
316: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
317: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
319: out->todata = (void*)out_to;
320: out_to->local.n = in_to->local.n;
321: out_to->local.nonmatching_computed = PETSC_FALSE;
322: out_to->local.n_nonmatching = 0;
323: out_to->local.slots_nonmatching = 0;
324: if (in_to->local.n) {
325: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
326: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
327: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
328: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
329: } else {
330: out_to->local.vslots = 0;
331: out_from->local.vslots = 0;
332: }
334: /* allocate entire receive context */
335: out_from->type = in_from->type;
336: ny = in_from->starts[in_from->n];
337: out_from->n = in_from->n;
338: out_from->sendfirst = in_from->sendfirst;
340: PetscMalloc1(out_from->n,&out_from->requests);
341: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
342: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
343: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
344: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
346: out->fromdata = (void*)out_from;
347: out_from->local.n = in_from->local.n;
348: out_from->local.nonmatching_computed = PETSC_FALSE;
349: out_from->local.n_nonmatching = 0;
350: out_from->local.slots_nonmatching = 0;
352: /*
353: set up the request arrays for use with isend_init() and irecv_init()
354: */
355: {
356: PetscMPIInt tag;
357: MPI_Comm comm;
358: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
359: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
360: PetscInt i;
361: PetscBool flg;
362: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
363: MPI_Request *rev_swaits,*rev_rwaits;
364: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
366: PetscMalloc1(in_to->n,&out_to->rev_requests);
367: PetscMalloc1(in_from->n,&out_from->rev_requests);
369: rev_rwaits = out_to->rev_requests;
370: rev_swaits = out_from->rev_requests;
372: out_from->bs = out_to->bs = bs;
373: tag = ((PetscObject)out)->tag;
374: PetscObjectGetComm((PetscObject)out,&comm);
376: /* Register the receives that you will use later (sends for scatter reverse) */
377: for (i=0; i<out_from->n; i++) {
378: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
379: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
380: }
382: flg = PETSC_FALSE;
383: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
384: if (flg) {
385: out_to->use_readyreceiver = PETSC_TRUE;
386: out_from->use_readyreceiver = PETSC_TRUE;
387: for (i=0; i<out_to->n; i++) {
388: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
389: }
390: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
391: MPI_Barrier(comm);
392: PetscInfo(in,"Using VecScatter ready receiver mode\n");
393: } else {
394: out_to->use_readyreceiver = PETSC_FALSE;
395: out_from->use_readyreceiver = PETSC_FALSE;
396: flg = PETSC_FALSE;
397: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
398: if (flg) {
399: PetscInfo(in,"Using VecScatter Ssend mode\n");
400: }
401: for (i=0; i<out_to->n; i++) {
402: if (!flg) {
403: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
404: } else {
405: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
406: }
407: }
408: }
409: /* Register receives for scatter reverse */
410: for (i=0; i<out_to->n; i++) {
411: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
412: }
413: }
414: return(0);
415: }
419: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
420: {
421: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
422: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
423: PetscErrorCode ierr;
424: PetscInt ny,bs = in_from->bs;
425: PetscMPIInt size;
428: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
430: out->ops->begin = in->ops->begin;
431: out->ops->end = in->ops->end;
432: out->ops->copy = in->ops->copy;
433: out->ops->destroy = in->ops->destroy;
434: out->ops->view = in->ops->view;
436: /* allocate entire send scatter context */
437: PetscNewLog(out,&out_to);
438: PetscNewLog(out,&out_from);
440: ny = in_to->starts[in_to->n];
441: out_to->n = in_to->n;
442: out_to->type = in_to->type;
443: out_to->sendfirst = in_to->sendfirst;
445: PetscMalloc1(out_to->n,&out_to->requests);
446: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
447: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
448: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
449: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
450: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
452: out->todata = (void*)out_to;
453: out_to->local.n = in_to->local.n;
454: out_to->local.nonmatching_computed = PETSC_FALSE;
455: out_to->local.n_nonmatching = 0;
456: out_to->local.slots_nonmatching = 0;
457: if (in_to->local.n) {
458: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
459: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
460: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
461: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
462: } else {
463: out_to->local.vslots = 0;
464: out_from->local.vslots = 0;
465: }
467: /* allocate entire receive context */
468: out_from->type = in_from->type;
469: ny = in_from->starts[in_from->n];
470: out_from->n = in_from->n;
471: out_from->sendfirst = in_from->sendfirst;
473: PetscMalloc1(out_from->n,&out_from->requests);
474: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
475: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
476: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
477: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
479: out->fromdata = (void*)out_from;
480: out_from->local.n = in_from->local.n;
481: out_from->local.nonmatching_computed = PETSC_FALSE;
482: out_from->local.n_nonmatching = 0;
483: out_from->local.slots_nonmatching = 0;
485: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
487: PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
488: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
489: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
491: PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
492: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
493: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
494: return(0);
495: }
496: /* --------------------------------------------------------------------------------------------------
497: Packs and unpacks the message data into send or from receive buffers.
499: These could be generated automatically.
501: Fortran kernels etc. could be used.
502: */
503: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
504: {
505: PetscInt i;
506: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
507: }
511: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
512: {
513: PetscInt i;
516: switch (addv) {
517: case INSERT_VALUES:
518: case INSERT_ALL_VALUES:
519: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
520: break;
521: case ADD_VALUES:
522: case ADD_ALL_VALUES:
523: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
524: break;
525: #if !defined(PETSC_USE_COMPLEX)
526: case MAX_VALUES:
527: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
528: #else
529: case MAX_VALUES:
530: #endif
531: case NOT_SET_VALUES:
532: break;
533: default:
534: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
535: }
536: return(0);
537: }
541: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
542: {
543: PetscInt i;
546: switch (addv) {
547: case INSERT_VALUES:
548: case INSERT_ALL_VALUES:
549: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
550: break;
551: case ADD_VALUES:
552: case ADD_ALL_VALUES:
553: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
554: break;
555: #if !defined(PETSC_USE_COMPLEX)
556: case MAX_VALUES:
557: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
558: #else
559: case MAX_VALUES:
560: #endif
561: case NOT_SET_VALUES:
562: break;
563: default:
564: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
565: }
566: return(0);
567: }
569: /* ----------------------------------------------------------------------------------------------- */
570: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
571: {
572: PetscInt i,idx;
574: for (i=0; i<n; i++) {
575: idx = *indicesx++;
576: y[0] = x[idx];
577: y[1] = x[idx+1];
578: y += 2;
579: }
580: }
584: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
585: {
586: PetscInt i,idy;
589: switch (addv) {
590: case INSERT_VALUES:
591: case INSERT_ALL_VALUES:
592: for (i=0; i<n; i++) {
593: idy = *indicesy++;
594: y[idy] = x[0];
595: y[idy+1] = x[1];
596: x += 2;
597: }
598: break;
599: case ADD_VALUES:
600: case ADD_ALL_VALUES:
601: for (i=0; i<n; i++) {
602: idy = *indicesy++;
603: y[idy] += x[0];
604: y[idy+1] += x[1];
605: x += 2;
606: }
607: break;
608: #if !defined(PETSC_USE_COMPLEX)
609: case MAX_VALUES:
610: for (i=0; i<n; i++) {
611: idy = *indicesy++;
612: y[idy] = PetscMax(y[idy],x[0]);
613: y[idy+1] = PetscMax(y[idy+1],x[1]);
614: x += 2;
615: }
616: #else
617: case MAX_VALUES:
618: #endif
619: case NOT_SET_VALUES:
620: break;
621: default:
622: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
623: }
624: return(0);
625: }
629: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
630: {
631: PetscInt i,idx,idy;
634: switch (addv) {
635: case INSERT_VALUES:
636: case INSERT_ALL_VALUES:
637: for (i=0; i<n; i++) {
638: idx = *indicesx++;
639: idy = *indicesy++;
640: y[idy] = x[idx];
641: y[idy+1] = x[idx+1];
642: }
643: break;
644: case ADD_VALUES:
645: case ADD_ALL_VALUES:
646: for (i=0; i<n; i++) {
647: idx = *indicesx++;
648: idy = *indicesy++;
649: y[idy] += x[idx];
650: y[idy+1] += x[idx+1];
651: }
652: break;
653: #if !defined(PETSC_USE_COMPLEX)
654: case MAX_VALUES:
655: for (i=0; i<n; i++) {
656: idx = *indicesx++;
657: idy = *indicesy++;
658: y[idy] = PetscMax(y[idy],x[idx]);
659: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
660: }
661: #else
662: case MAX_VALUES:
663: #endif
664: case NOT_SET_VALUES:
665: break;
666: default:
667: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
668: }
669: return(0);
670: }
671: /* ----------------------------------------------------------------------------------------------- */
672: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
673: {
674: PetscInt i,idx;
676: for (i=0; i<n; i++) {
677: idx = *indicesx++;
678: y[0] = x[idx];
679: y[1] = x[idx+1];
680: y[2] = x[idx+2];
681: y += 3;
682: }
683: }
686: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
687: {
688: PetscInt i,idy;
691: switch (addv) {
692: case INSERT_VALUES:
693: case INSERT_ALL_VALUES:
694: for (i=0; i<n; i++) {
695: idy = *indicesy++;
696: y[idy] = x[0];
697: y[idy+1] = x[1];
698: y[idy+2] = x[2];
699: x += 3;
700: }
701: break;
702: case ADD_VALUES:
703: case ADD_ALL_VALUES:
704: for (i=0; i<n; i++) {
705: idy = *indicesy++;
706: y[idy] += x[0];
707: y[idy+1] += x[1];
708: y[idy+2] += x[2];
709: x += 3;
710: }
711: break;
712: #if !defined(PETSC_USE_COMPLEX)
713: case MAX_VALUES:
714: for (i=0; i<n; i++) {
715: idy = *indicesy++;
716: y[idy] = PetscMax(y[idy],x[0]);
717: y[idy+1] = PetscMax(y[idy+1],x[1]);
718: y[idy+2] = PetscMax(y[idy+2],x[2]);
719: x += 3;
720: }
721: #else
722: case MAX_VALUES:
723: #endif
724: case NOT_SET_VALUES:
725: break;
726: default:
727: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
728: }
729: return(0);
730: }
734: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
735: {
736: PetscInt i,idx,idy;
739: switch (addv) {
740: case INSERT_VALUES:
741: case INSERT_ALL_VALUES:
742: for (i=0; i<n; i++) {
743: idx = *indicesx++;
744: idy = *indicesy++;
745: y[idy] = x[idx];
746: y[idy+1] = x[idx+1];
747: y[idy+2] = x[idx+2];
748: }
749: break;
750: case ADD_VALUES:
751: case ADD_ALL_VALUES:
752: for (i=0; i<n; i++) {
753: idx = *indicesx++;
754: idy = *indicesy++;
755: y[idy] += x[idx];
756: y[idy+1] += x[idx+1];
757: y[idy+2] += x[idx+2];
758: }
759: break;
760: #if !defined(PETSC_USE_COMPLEX)
761: case MAX_VALUES:
762: for (i=0; i<n; i++) {
763: idx = *indicesx++;
764: idy = *indicesy++;
765: y[idy] = PetscMax(y[idy],x[idx]);
766: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
767: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
768: }
769: #else
770: case MAX_VALUES:
771: #endif
772: case NOT_SET_VALUES:
773: break;
774: default:
775: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
776: }
777: return(0);
778: }
779: /* ----------------------------------------------------------------------------------------------- */
780: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
781: {
782: PetscInt i,idx;
784: for (i=0; i<n; i++) {
785: idx = *indicesx++;
786: y[0] = x[idx];
787: y[1] = x[idx+1];
788: y[2] = x[idx+2];
789: y[3] = x[idx+3];
790: y += 4;
791: }
792: }
795: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
796: {
797: PetscInt i,idy;
800: switch (addv) {
801: case INSERT_VALUES:
802: case INSERT_ALL_VALUES:
803: for (i=0; i<n; i++) {
804: idy = *indicesy++;
805: y[idy] = x[0];
806: y[idy+1] = x[1];
807: y[idy+2] = x[2];
808: y[idy+3] = x[3];
809: x += 4;
810: }
811: break;
812: case ADD_VALUES:
813: case ADD_ALL_VALUES:
814: for (i=0; i<n; i++) {
815: idy = *indicesy++;
816: y[idy] += x[0];
817: y[idy+1] += x[1];
818: y[idy+2] += x[2];
819: y[idy+3] += x[3];
820: x += 4;
821: }
822: break;
823: #if !defined(PETSC_USE_COMPLEX)
824: case MAX_VALUES:
825: for (i=0; i<n; i++) {
826: idy = *indicesy++;
827: y[idy] = PetscMax(y[idy],x[0]);
828: y[idy+1] = PetscMax(y[idy+1],x[1]);
829: y[idy+2] = PetscMax(y[idy+2],x[2]);
830: y[idy+3] = PetscMax(y[idy+3],x[3]);
831: x += 4;
832: }
833: #else
834: case MAX_VALUES:
835: #endif
836: case NOT_SET_VALUES:
837: break;
838: default:
839: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840: }
841: return(0);
842: }
846: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
847: {
848: PetscInt i,idx,idy;
851: switch (addv) {
852: case INSERT_VALUES:
853: case INSERT_ALL_VALUES:
854: for (i=0; i<n; i++) {
855: idx = *indicesx++;
856: idy = *indicesy++;
857: y[idy] = x[idx];
858: y[idy+1] = x[idx+1];
859: y[idy+2] = x[idx+2];
860: y[idy+3] = x[idx+3];
861: }
862: break;
863: case ADD_VALUES:
864: case ADD_ALL_VALUES:
865: for (i=0; i<n; i++) {
866: idx = *indicesx++;
867: idy = *indicesy++;
868: y[idy] += x[idx];
869: y[idy+1] += x[idx+1];
870: y[idy+2] += x[idx+2];
871: y[idy+3] += x[idx+3];
872: }
873: break;
874: #if !defined(PETSC_USE_COMPLEX)
875: case MAX_VALUES:
876: for (i=0; i<n; i++) {
877: idx = *indicesx++;
878: idy = *indicesy++;
879: y[idy] = PetscMax(y[idy],x[idx]);
880: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
881: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
882: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
883: }
884: #else
885: case MAX_VALUES:
886: #endif
887: case NOT_SET_VALUES:
888: break;
889: default:
890: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
891: }
892: return(0);
893: }
894: /* ----------------------------------------------------------------------------------------------- */
895: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
896: {
897: PetscInt i,idx;
899: for (i=0; i<n; i++) {
900: idx = *indicesx++;
901: y[0] = x[idx];
902: y[1] = x[idx+1];
903: y[2] = x[idx+2];
904: y[3] = x[idx+3];
905: y[4] = x[idx+4];
906: y += 5;
907: }
908: }
912: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
913: {
914: PetscInt i,idy;
917: switch (addv) {
918: case INSERT_VALUES:
919: case INSERT_ALL_VALUES:
920: for (i=0; i<n; i++) {
921: idy = *indicesy++;
922: y[idy] = x[0];
923: y[idy+1] = x[1];
924: y[idy+2] = x[2];
925: y[idy+3] = x[3];
926: y[idy+4] = x[4];
927: x += 5;
928: }
929: break;
930: case ADD_VALUES:
931: case ADD_ALL_VALUES:
932: for (i=0; i<n; i++) {
933: idy = *indicesy++;
934: y[idy] += x[0];
935: y[idy+1] += x[1];
936: y[idy+2] += x[2];
937: y[idy+3] += x[3];
938: y[idy+4] += x[4];
939: x += 5;
940: }
941: break;
942: #if !defined(PETSC_USE_COMPLEX)
943: case MAX_VALUES:
944: for (i=0; i<n; i++) {
945: idy = *indicesy++;
946: y[idy] = PetscMax(y[idy],x[0]);
947: y[idy+1] = PetscMax(y[idy+1],x[1]);
948: y[idy+2] = PetscMax(y[idy+2],x[2]);
949: y[idy+3] = PetscMax(y[idy+3],x[3]);
950: y[idy+4] = PetscMax(y[idy+4],x[4]);
951: x += 5;
952: }
953: #else
954: case MAX_VALUES:
955: #endif
956: case NOT_SET_VALUES:
957: break;
958: default:
959: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
960: }
961: return(0);
962: }
966: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
967: {
968: PetscInt i,idx,idy;
971: switch (addv) {
972: case INSERT_VALUES:
973: case INSERT_ALL_VALUES:
974: for (i=0; i<n; i++) {
975: idx = *indicesx++;
976: idy = *indicesy++;
977: y[idy] = x[idx];
978: y[idy+1] = x[idx+1];
979: y[idy+2] = x[idx+2];
980: y[idy+3] = x[idx+3];
981: y[idy+4] = x[idx+4];
982: }
983: break;
984: case ADD_VALUES:
985: case ADD_ALL_VALUES:
986: for (i=0; i<n; i++) {
987: idx = *indicesx++;
988: idy = *indicesy++;
989: y[idy] += x[idx];
990: y[idy+1] += x[idx+1];
991: y[idy+2] += x[idx+2];
992: y[idy+3] += x[idx+3];
993: y[idy+4] += x[idx+4];
994: }
995: break;
996: #if !defined(PETSC_USE_COMPLEX)
997: case MAX_VALUES:
998: for (i=0; i<n; i++) {
999: idx = *indicesx++;
1000: idy = *indicesy++;
1001: y[idy] = PetscMax(y[idy],x[idx]);
1002: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1003: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1004: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1005: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1006: }
1007: #else
1008: case MAX_VALUES:
1009: #endif
1010: case NOT_SET_VALUES:
1011: break;
1012: default:
1013: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1014: }
1015: return(0);
1016: }
1017: /* ----------------------------------------------------------------------------------------------- */
1018: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1019: {
1020: PetscInt i,idx;
1022: for (i=0; i<n; i++) {
1023: idx = *indicesx++;
1024: y[0] = x[idx];
1025: y[1] = x[idx+1];
1026: y[2] = x[idx+2];
1027: y[3] = x[idx+3];
1028: y[4] = x[idx+4];
1029: y[5] = x[idx+5];
1030: y += 6;
1031: }
1032: }
1036: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1037: {
1038: PetscInt i,idy;
1041: switch (addv) {
1042: case INSERT_VALUES:
1043: case INSERT_ALL_VALUES:
1044: for (i=0; i<n; i++) {
1045: idy = *indicesy++;
1046: y[idy] = x[0];
1047: y[idy+1] = x[1];
1048: y[idy+2] = x[2];
1049: y[idy+3] = x[3];
1050: y[idy+4] = x[4];
1051: y[idy+5] = x[5];
1052: x += 6;
1053: }
1054: break;
1055: case ADD_VALUES:
1056: case ADD_ALL_VALUES:
1057: for (i=0; i<n; i++) {
1058: idy = *indicesy++;
1059: y[idy] += x[0];
1060: y[idy+1] += x[1];
1061: y[idy+2] += x[2];
1062: y[idy+3] += x[3];
1063: y[idy+4] += x[4];
1064: y[idy+5] += x[5];
1065: x += 6;
1066: }
1067: break;
1068: #if !defined(PETSC_USE_COMPLEX)
1069: case MAX_VALUES:
1070: for (i=0; i<n; i++) {
1071: idy = *indicesy++;
1072: y[idy] = PetscMax(y[idy],x[0]);
1073: y[idy+1] = PetscMax(y[idy+1],x[1]);
1074: y[idy+2] = PetscMax(y[idy+2],x[2]);
1075: y[idy+3] = PetscMax(y[idy+3],x[3]);
1076: y[idy+4] = PetscMax(y[idy+4],x[4]);
1077: y[idy+5] = PetscMax(y[idy+5],x[5]);
1078: x += 6;
1079: }
1080: #else
1081: case MAX_VALUES:
1082: #endif
1083: case NOT_SET_VALUES:
1084: break;
1085: default:
1086: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1087: }
1088: return(0);
1089: }
1093: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1094: {
1095: PetscInt i,idx,idy;
1098: switch (addv) {
1099: case INSERT_VALUES:
1100: case INSERT_ALL_VALUES:
1101: for (i=0; i<n; i++) {
1102: idx = *indicesx++;
1103: idy = *indicesy++;
1104: y[idy] = x[idx];
1105: y[idy+1] = x[idx+1];
1106: y[idy+2] = x[idx+2];
1107: y[idy+3] = x[idx+3];
1108: y[idy+4] = x[idx+4];
1109: y[idy+5] = x[idx+5];
1110: }
1111: break;
1112: case ADD_VALUES:
1113: case ADD_ALL_VALUES:
1114: for (i=0; i<n; i++) {
1115: idx = *indicesx++;
1116: idy = *indicesy++;
1117: y[idy] += x[idx];
1118: y[idy+1] += x[idx+1];
1119: y[idy+2] += x[idx+2];
1120: y[idy+3] += x[idx+3];
1121: y[idy+4] += x[idx+4];
1122: y[idy+5] += x[idx+5];
1123: }
1124: break;
1125: #if !defined(PETSC_USE_COMPLEX)
1126: case MAX_VALUES:
1127: for (i=0; i<n; i++) {
1128: idx = *indicesx++;
1129: idy = *indicesy++;
1130: y[idy] = PetscMax(y[idy],x[idx]);
1131: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1132: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1133: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1134: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1135: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1136: }
1137: #else
1138: case MAX_VALUES:
1139: #endif
1140: case NOT_SET_VALUES:
1141: break;
1142: default:
1143: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1144: }
1145: return(0);
1146: }
1147: /* ----------------------------------------------------------------------------------------------- */
1148: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1149: {
1150: PetscInt i,idx;
1152: for (i=0; i<n; i++) {
1153: idx = *indicesx++;
1154: y[0] = x[idx];
1155: y[1] = x[idx+1];
1156: y[2] = x[idx+2];
1157: y[3] = x[idx+3];
1158: y[4] = x[idx+4];
1159: y[5] = x[idx+5];
1160: y[6] = x[idx+6];
1161: y += 7;
1162: }
1163: }
1167: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1168: {
1169: PetscInt i,idy;
1172: switch (addv) {
1173: case INSERT_VALUES:
1174: case INSERT_ALL_VALUES:
1175: for (i=0; i<n; i++) {
1176: idy = *indicesy++;
1177: y[idy] = x[0];
1178: y[idy+1] = x[1];
1179: y[idy+2] = x[2];
1180: y[idy+3] = x[3];
1181: y[idy+4] = x[4];
1182: y[idy+5] = x[5];
1183: y[idy+6] = x[6];
1184: x += 7;
1185: }
1186: break;
1187: case ADD_VALUES:
1188: case ADD_ALL_VALUES:
1189: for (i=0; i<n; i++) {
1190: idy = *indicesy++;
1191: y[idy] += x[0];
1192: y[idy+1] += x[1];
1193: y[idy+2] += x[2];
1194: y[idy+3] += x[3];
1195: y[idy+4] += x[4];
1196: y[idy+5] += x[5];
1197: y[idy+6] += x[6];
1198: x += 7;
1199: }
1200: break;
1201: #if !defined(PETSC_USE_COMPLEX)
1202: case MAX_VALUES:
1203: for (i=0; i<n; i++) {
1204: idy = *indicesy++;
1205: y[idy] = PetscMax(y[idy],x[0]);
1206: y[idy+1] = PetscMax(y[idy+1],x[1]);
1207: y[idy+2] = PetscMax(y[idy+2],x[2]);
1208: y[idy+3] = PetscMax(y[idy+3],x[3]);
1209: y[idy+4] = PetscMax(y[idy+4],x[4]);
1210: y[idy+5] = PetscMax(y[idy+5],x[5]);
1211: y[idy+6] = PetscMax(y[idy+6],x[6]);
1212: x += 7;
1213: }
1214: #else
1215: case MAX_VALUES:
1216: #endif
1217: case NOT_SET_VALUES:
1218: break;
1219: default:
1220: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1221: }
1222: return(0);
1223: }
1227: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1228: {
1229: PetscInt i,idx,idy;
1232: switch (addv) {
1233: case INSERT_VALUES:
1234: case INSERT_ALL_VALUES:
1235: for (i=0; i<n; i++) {
1236: idx = *indicesx++;
1237: idy = *indicesy++;
1238: y[idy] = x[idx];
1239: y[idy+1] = x[idx+1];
1240: y[idy+2] = x[idx+2];
1241: y[idy+3] = x[idx+3];
1242: y[idy+4] = x[idx+4];
1243: y[idy+5] = x[idx+5];
1244: y[idy+6] = x[idx+6];
1245: }
1246: break;
1247: case ADD_VALUES:
1248: case ADD_ALL_VALUES:
1249: for (i=0; i<n; i++) {
1250: idx = *indicesx++;
1251: idy = *indicesy++;
1252: y[idy] += x[idx];
1253: y[idy+1] += x[idx+1];
1254: y[idy+2] += x[idx+2];
1255: y[idy+3] += x[idx+3];
1256: y[idy+4] += x[idx+4];
1257: y[idy+5] += x[idx+5];
1258: y[idy+6] += x[idx+6];
1259: }
1260: break;
1261: #if !defined(PETSC_USE_COMPLEX)
1262: case MAX_VALUES:
1263: for (i=0; i<n; i++) {
1264: idx = *indicesx++;
1265: idy = *indicesy++;
1266: y[idy] = PetscMax(y[idy],x[idx]);
1267: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1268: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1269: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1270: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1271: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1272: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1273: }
1274: #else
1275: case MAX_VALUES:
1276: #endif
1277: case NOT_SET_VALUES:
1278: break;
1279: default:
1280: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1281: }
1282: return(0);
1283: }
1284: /* ----------------------------------------------------------------------------------------------- */
1285: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1286: {
1287: PetscInt i,idx;
1289: for (i=0; i<n; i++) {
1290: idx = *indicesx++;
1291: y[0] = x[idx];
1292: y[1] = x[idx+1];
1293: y[2] = x[idx+2];
1294: y[3] = x[idx+3];
1295: y[4] = x[idx+4];
1296: y[5] = x[idx+5];
1297: y[6] = x[idx+6];
1298: y[7] = x[idx+7];
1299: y += 8;
1300: }
1301: }
1305: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1306: {
1307: PetscInt i,idy;
1310: switch (addv) {
1311: case INSERT_VALUES:
1312: case INSERT_ALL_VALUES:
1313: for (i=0; i<n; i++) {
1314: idy = *indicesy++;
1315: y[idy] = x[0];
1316: y[idy+1] = x[1];
1317: y[idy+2] = x[2];
1318: y[idy+3] = x[3];
1319: y[idy+4] = x[4];
1320: y[idy+5] = x[5];
1321: y[idy+6] = x[6];
1322: y[idy+7] = x[7];
1323: x += 8;
1324: }
1325: break;
1326: case ADD_VALUES:
1327: case ADD_ALL_VALUES:
1328: for (i=0; i<n; i++) {
1329: idy = *indicesy++;
1330: y[idy] += x[0];
1331: y[idy+1] += x[1];
1332: y[idy+2] += x[2];
1333: y[idy+3] += x[3];
1334: y[idy+4] += x[4];
1335: y[idy+5] += x[5];
1336: y[idy+6] += x[6];
1337: y[idy+7] += x[7];
1338: x += 8;
1339: }
1340: break;
1341: #if !defined(PETSC_USE_COMPLEX)
1342: case MAX_VALUES:
1343: for (i=0; i<n; i++) {
1344: idy = *indicesy++;
1345: y[idy] = PetscMax(y[idy],x[0]);
1346: y[idy+1] = PetscMax(y[idy+1],x[1]);
1347: y[idy+2] = PetscMax(y[idy+2],x[2]);
1348: y[idy+3] = PetscMax(y[idy+3],x[3]);
1349: y[idy+4] = PetscMax(y[idy+4],x[4]);
1350: y[idy+5] = PetscMax(y[idy+5],x[5]);
1351: y[idy+6] = PetscMax(y[idy+6],x[6]);
1352: y[idy+7] = PetscMax(y[idy+7],x[7]);
1353: x += 8;
1354: }
1355: #else
1356: case MAX_VALUES:
1357: #endif
1358: case NOT_SET_VALUES:
1359: break;
1360: default:
1361: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1362: }
1363: return(0);
1364: }
1368: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1369: {
1370: PetscInt i,idx,idy;
1373: switch (addv) {
1374: case INSERT_VALUES:
1375: case INSERT_ALL_VALUES:
1376: for (i=0; i<n; i++) {
1377: idx = *indicesx++;
1378: idy = *indicesy++;
1379: y[idy] = x[idx];
1380: y[idy+1] = x[idx+1];
1381: y[idy+2] = x[idx+2];
1382: y[idy+3] = x[idx+3];
1383: y[idy+4] = x[idx+4];
1384: y[idy+5] = x[idx+5];
1385: y[idy+6] = x[idx+6];
1386: y[idy+7] = x[idx+7];
1387: }
1388: break;
1389: case ADD_VALUES:
1390: case ADD_ALL_VALUES:
1391: for (i=0; i<n; i++) {
1392: idx = *indicesx++;
1393: idy = *indicesy++;
1394: y[idy] += x[idx];
1395: y[idy+1] += x[idx+1];
1396: y[idy+2] += x[idx+2];
1397: y[idy+3] += x[idx+3];
1398: y[idy+4] += x[idx+4];
1399: y[idy+5] += x[idx+5];
1400: y[idy+6] += x[idx+6];
1401: y[idy+7] += x[idx+7];
1402: }
1403: break;
1404: #if !defined(PETSC_USE_COMPLEX)
1405: case MAX_VALUES:
1406: for (i=0; i<n; i++) {
1407: idx = *indicesx++;
1408: idy = *indicesy++;
1409: y[idy] = PetscMax(y[idy],x[idx]);
1410: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1411: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1412: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1413: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1414: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1415: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1416: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1417: }
1418: #else
1419: case MAX_VALUES:
1420: #endif
1421: case NOT_SET_VALUES:
1422: break;
1423: default:
1424: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1425: }
1426: return(0);
1427: }
1429: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1430: {
1431: PetscInt i,idx;
1433: for (i=0; i<n; i++) {
1434: idx = *indicesx++;
1435: y[0] = x[idx];
1436: y[1] = x[idx+1];
1437: y[2] = x[idx+2];
1438: y[3] = x[idx+3];
1439: y[4] = x[idx+4];
1440: y[5] = x[idx+5];
1441: y[6] = x[idx+6];
1442: y[7] = x[idx+7];
1443: y[8] = x[idx+8];
1444: y += 9;
1445: }
1446: }
1450: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1451: {
1452: PetscInt i,idy;
1455: switch (addv) {
1456: case INSERT_VALUES:
1457: case INSERT_ALL_VALUES:
1458: for (i=0; i<n; i++) {
1459: idy = *indicesy++;
1460: y[idy] = x[0];
1461: y[idy+1] = x[1];
1462: y[idy+2] = x[2];
1463: y[idy+3] = x[3];
1464: y[idy+4] = x[4];
1465: y[idy+5] = x[5];
1466: y[idy+6] = x[6];
1467: y[idy+7] = x[7];
1468: y[idy+8] = x[8];
1469: x += 9;
1470: }
1471: break;
1472: case ADD_VALUES:
1473: case ADD_ALL_VALUES:
1474: for (i=0; i<n; i++) {
1475: idy = *indicesy++;
1476: y[idy] += x[0];
1477: y[idy+1] += x[1];
1478: y[idy+2] += x[2];
1479: y[idy+3] += x[3];
1480: y[idy+4] += x[4];
1481: y[idy+5] += x[5];
1482: y[idy+6] += x[6];
1483: y[idy+7] += x[7];
1484: y[idy+8] += x[8];
1485: x += 9;
1486: }
1487: break;
1488: #if !defined(PETSC_USE_COMPLEX)
1489: case MAX_VALUES:
1490: for (i=0; i<n; i++) {
1491: idy = *indicesy++;
1492: y[idy] = PetscMax(y[idy],x[0]);
1493: y[idy+1] = PetscMax(y[idy+1],x[1]);
1494: y[idy+2] = PetscMax(y[idy+2],x[2]);
1495: y[idy+3] = PetscMax(y[idy+3],x[3]);
1496: y[idy+4] = PetscMax(y[idy+4],x[4]);
1497: y[idy+5] = PetscMax(y[idy+5],x[5]);
1498: y[idy+6] = PetscMax(y[idy+6],x[6]);
1499: y[idy+7] = PetscMax(y[idy+7],x[7]);
1500: y[idy+8] = PetscMax(y[idy+8],x[8]);
1501: x += 9;
1502: }
1503: #else
1504: case MAX_VALUES:
1505: #endif
1506: case NOT_SET_VALUES:
1507: break;
1508: default:
1509: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1510: }
1511: return(0);
1512: }
1516: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1517: {
1518: PetscInt i,idx,idy;
1521: switch (addv) {
1522: case INSERT_VALUES:
1523: case INSERT_ALL_VALUES:
1524: for (i=0; i<n; i++) {
1525: idx = *indicesx++;
1526: idy = *indicesy++;
1527: y[idy] = x[idx];
1528: y[idy+1] = x[idx+1];
1529: y[idy+2] = x[idx+2];
1530: y[idy+3] = x[idx+3];
1531: y[idy+4] = x[idx+4];
1532: y[idy+5] = x[idx+5];
1533: y[idy+6] = x[idx+6];
1534: y[idy+7] = x[idx+7];
1535: y[idy+8] = x[idx+8];
1536: }
1537: break;
1538: case ADD_VALUES:
1539: case ADD_ALL_VALUES:
1540: for (i=0; i<n; i++) {
1541: idx = *indicesx++;
1542: idy = *indicesy++;
1543: y[idy] += x[idx];
1544: y[idy+1] += x[idx+1];
1545: y[idy+2] += x[idx+2];
1546: y[idy+3] += x[idx+3];
1547: y[idy+4] += x[idx+4];
1548: y[idy+5] += x[idx+5];
1549: y[idy+6] += x[idx+6];
1550: y[idy+7] += x[idx+7];
1551: y[idy+8] += x[idx+8];
1552: }
1553: break;
1554: #if !defined(PETSC_USE_COMPLEX)
1555: case MAX_VALUES:
1556: for (i=0; i<n; i++) {
1557: idx = *indicesx++;
1558: idy = *indicesy++;
1559: y[idy] = PetscMax(y[idy],x[idx]);
1560: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1561: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1562: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1563: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1564: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1565: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1566: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1567: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1568: }
1569: #else
1570: case MAX_VALUES:
1571: #endif
1572: case NOT_SET_VALUES:
1573: break;
1574: default:
1575: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1576: }
1577: return(0);
1578: }
1580: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1581: {
1582: PetscInt i,idx;
1584: for (i=0; i<n; i++) {
1585: idx = *indicesx++;
1586: y[0] = x[idx];
1587: y[1] = x[idx+1];
1588: y[2] = x[idx+2];
1589: y[3] = x[idx+3];
1590: y[4] = x[idx+4];
1591: y[5] = x[idx+5];
1592: y[6] = x[idx+6];
1593: y[7] = x[idx+7];
1594: y[8] = x[idx+8];
1595: y[9] = x[idx+9];
1596: y += 10;
1597: }
1598: }
1602: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1603: {
1604: PetscInt i,idy;
1607: switch (addv) {
1608: case INSERT_VALUES:
1609: case INSERT_ALL_VALUES:
1610: for (i=0; i<n; i++) {
1611: idy = *indicesy++;
1612: y[idy] = x[0];
1613: y[idy+1] = x[1];
1614: y[idy+2] = x[2];
1615: y[idy+3] = x[3];
1616: y[idy+4] = x[4];
1617: y[idy+5] = x[5];
1618: y[idy+6] = x[6];
1619: y[idy+7] = x[7];
1620: y[idy+8] = x[8];
1621: y[idy+9] = x[9];
1622: x += 10;
1623: }
1624: break;
1625: case ADD_VALUES:
1626: case ADD_ALL_VALUES:
1627: for (i=0; i<n; i++) {
1628: idy = *indicesy++;
1629: y[idy] += x[0];
1630: y[idy+1] += x[1];
1631: y[idy+2] += x[2];
1632: y[idy+3] += x[3];
1633: y[idy+4] += x[4];
1634: y[idy+5] += x[5];
1635: y[idy+6] += x[6];
1636: y[idy+7] += x[7];
1637: y[idy+8] += x[8];
1638: y[idy+9] += x[9];
1639: x += 10;
1640: }
1641: break;
1642: #if !defined(PETSC_USE_COMPLEX)
1643: case MAX_VALUES:
1644: for (i=0; i<n; i++) {
1645: idy = *indicesy++;
1646: y[idy] = PetscMax(y[idy],x[0]);
1647: y[idy+1] = PetscMax(y[idy+1],x[1]);
1648: y[idy+2] = PetscMax(y[idy+2],x[2]);
1649: y[idy+3] = PetscMax(y[idy+3],x[3]);
1650: y[idy+4] = PetscMax(y[idy+4],x[4]);
1651: y[idy+5] = PetscMax(y[idy+5],x[5]);
1652: y[idy+6] = PetscMax(y[idy+6],x[6]);
1653: y[idy+7] = PetscMax(y[idy+7],x[7]);
1654: y[idy+8] = PetscMax(y[idy+8],x[8]);
1655: y[idy+9] = PetscMax(y[idy+9],x[9]);
1656: x += 10;
1657: }
1658: #else
1659: case MAX_VALUES:
1660: #endif
1661: case NOT_SET_VALUES:
1662: break;
1663: default:
1664: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1665: }
1666: return(0);
1667: }
1671: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1672: {
1673: PetscInt i,idx,idy;
1676: switch (addv) {
1677: case INSERT_VALUES:
1678: case INSERT_ALL_VALUES:
1679: for (i=0; i<n; i++) {
1680: idx = *indicesx++;
1681: idy = *indicesy++;
1682: y[idy] = x[idx];
1683: y[idy+1] = x[idx+1];
1684: y[idy+2] = x[idx+2];
1685: y[idy+3] = x[idx+3];
1686: y[idy+4] = x[idx+4];
1687: y[idy+5] = x[idx+5];
1688: y[idy+6] = x[idx+6];
1689: y[idy+7] = x[idx+7];
1690: y[idy+8] = x[idx+8];
1691: y[idy+9] = x[idx+9];
1692: }
1693: break;
1694: case ADD_VALUES:
1695: case ADD_ALL_VALUES:
1696: for (i=0; i<n; i++) {
1697: idx = *indicesx++;
1698: idy = *indicesy++;
1699: y[idy] += x[idx];
1700: y[idy+1] += x[idx+1];
1701: y[idy+2] += x[idx+2];
1702: y[idy+3] += x[idx+3];
1703: y[idy+4] += x[idx+4];
1704: y[idy+5] += x[idx+5];
1705: y[idy+6] += x[idx+6];
1706: y[idy+7] += x[idx+7];
1707: y[idy+8] += x[idx+8];
1708: y[idy+9] += x[idx+9];
1709: }
1710: break;
1711: #if !defined(PETSC_USE_COMPLEX)
1712: case MAX_VALUES:
1713: for (i=0; i<n; i++) {
1714: idx = *indicesx++;
1715: idy = *indicesy++;
1716: y[idy] = PetscMax(y[idy],x[idx]);
1717: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1718: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1719: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1720: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1721: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1722: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1723: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1724: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1725: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1726: }
1727: #else
1728: case MAX_VALUES:
1729: #endif
1730: case NOT_SET_VALUES:
1731: break;
1732: default:
1733: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1734: }
1735: return(0);
1736: }
1738: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1739: {
1740: PetscInt i,idx;
1742: for (i=0; i<n; i++) {
1743: idx = *indicesx++;
1744: y[0] = x[idx];
1745: y[1] = x[idx+1];
1746: y[2] = x[idx+2];
1747: y[3] = x[idx+3];
1748: y[4] = x[idx+4];
1749: y[5] = x[idx+5];
1750: y[6] = x[idx+6];
1751: y[7] = x[idx+7];
1752: y[8] = x[idx+8];
1753: y[9] = x[idx+9];
1754: y[10] = x[idx+10];
1755: y += 11;
1756: }
1757: }
1761: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1762: {
1763: PetscInt i,idy;
1766: switch (addv) {
1767: case INSERT_VALUES:
1768: case INSERT_ALL_VALUES:
1769: for (i=0; i<n; i++) {
1770: idy = *indicesy++;
1771: y[idy] = x[0];
1772: y[idy+1] = x[1];
1773: y[idy+2] = x[2];
1774: y[idy+3] = x[3];
1775: y[idy+4] = x[4];
1776: y[idy+5] = x[5];
1777: y[idy+6] = x[6];
1778: y[idy+7] = x[7];
1779: y[idy+8] = x[8];
1780: y[idy+9] = x[9];
1781: y[idy+10] = x[10];
1782: x += 11;
1783: }
1784: break;
1785: case ADD_VALUES:
1786: case ADD_ALL_VALUES:
1787: for (i=0; i<n; i++) {
1788: idy = *indicesy++;
1789: y[idy] += x[0];
1790: y[idy+1] += x[1];
1791: y[idy+2] += x[2];
1792: y[idy+3] += x[3];
1793: y[idy+4] += x[4];
1794: y[idy+5] += x[5];
1795: y[idy+6] += x[6];
1796: y[idy+7] += x[7];
1797: y[idy+8] += x[8];
1798: y[idy+9] += x[9];
1799: y[idy+10] += x[10];
1800: x += 11;
1801: }
1802: break;
1803: #if !defined(PETSC_USE_COMPLEX)
1804: case MAX_VALUES:
1805: for (i=0; i<n; i++) {
1806: idy = *indicesy++;
1807: y[idy] = PetscMax(y[idy],x[0]);
1808: y[idy+1] = PetscMax(y[idy+1],x[1]);
1809: y[idy+2] = PetscMax(y[idy+2],x[2]);
1810: y[idy+3] = PetscMax(y[idy+3],x[3]);
1811: y[idy+4] = PetscMax(y[idy+4],x[4]);
1812: y[idy+5] = PetscMax(y[idy+5],x[5]);
1813: y[idy+6] = PetscMax(y[idy+6],x[6]);
1814: y[idy+7] = PetscMax(y[idy+7],x[7]);
1815: y[idy+8] = PetscMax(y[idy+8],x[8]);
1816: y[idy+9] = PetscMax(y[idy+9],x[9]);
1817: y[idy+10] = PetscMax(y[idy+10],x[10]);
1818: x += 11;
1819: }
1820: #else
1821: case MAX_VALUES:
1822: #endif
1823: case NOT_SET_VALUES:
1824: break;
1825: default:
1826: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1827: }
1828: return(0);
1829: }
1833: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1834: {
1835: PetscInt i,idx,idy;
1838: switch (addv) {
1839: case INSERT_VALUES:
1840: case INSERT_ALL_VALUES:
1841: for (i=0; i<n; i++) {
1842: idx = *indicesx++;
1843: idy = *indicesy++;
1844: y[idy] = x[idx];
1845: y[idy+1] = x[idx+1];
1846: y[idy+2] = x[idx+2];
1847: y[idy+3] = x[idx+3];
1848: y[idy+4] = x[idx+4];
1849: y[idy+5] = x[idx+5];
1850: y[idy+6] = x[idx+6];
1851: y[idy+7] = x[idx+7];
1852: y[idy+8] = x[idx+8];
1853: y[idy+9] = x[idx+9];
1854: y[idy+10] = x[idx+10];
1855: }
1856: break;
1857: case ADD_VALUES:
1858: case ADD_ALL_VALUES:
1859: for (i=0; i<n; i++) {
1860: idx = *indicesx++;
1861: idy = *indicesy++;
1862: y[idy] += x[idx];
1863: y[idy+1] += x[idx+1];
1864: y[idy+2] += x[idx+2];
1865: y[idy+3] += x[idx+3];
1866: y[idy+4] += x[idx+4];
1867: y[idy+5] += x[idx+5];
1868: y[idy+6] += x[idx+6];
1869: y[idy+7] += x[idx+7];
1870: y[idy+8] += x[idx+8];
1871: y[idy+9] += x[idx+9];
1872: y[idy+10] += x[idx+10];
1873: }
1874: break;
1875: #if !defined(PETSC_USE_COMPLEX)
1876: case MAX_VALUES:
1877: for (i=0; i<n; i++) {
1878: idx = *indicesx++;
1879: idy = *indicesy++;
1880: y[idy] = PetscMax(y[idy],x[idx]);
1881: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1882: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1883: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1884: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1885: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1886: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1887: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1888: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1889: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1890: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1891: }
1892: #else
1893: case MAX_VALUES:
1894: #endif
1895: case NOT_SET_VALUES:
1896: break;
1897: default:
1898: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1899: }
1900: return(0);
1901: }
1903: /* ----------------------------------------------------------------------------------------------- */
1904: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1905: {
1906: PetscInt i,idx;
1908: for (i=0; i<n; i++) {
1909: idx = *indicesx++;
1910: y[0] = x[idx];
1911: y[1] = x[idx+1];
1912: y[2] = x[idx+2];
1913: y[3] = x[idx+3];
1914: y[4] = x[idx+4];
1915: y[5] = x[idx+5];
1916: y[6] = x[idx+6];
1917: y[7] = x[idx+7];
1918: y[8] = x[idx+8];
1919: y[9] = x[idx+9];
1920: y[10] = x[idx+10];
1921: y[11] = x[idx+11];
1922: y += 12;
1923: }
1924: }
1928: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1929: {
1930: PetscInt i,idy;
1933: switch (addv) {
1934: case INSERT_VALUES:
1935: case INSERT_ALL_VALUES:
1936: for (i=0; i<n; i++) {
1937: idy = *indicesy++;
1938: y[idy] = x[0];
1939: y[idy+1] = x[1];
1940: y[idy+2] = x[2];
1941: y[idy+3] = x[3];
1942: y[idy+4] = x[4];
1943: y[idy+5] = x[5];
1944: y[idy+6] = x[6];
1945: y[idy+7] = x[7];
1946: y[idy+8] = x[8];
1947: y[idy+9] = x[9];
1948: y[idy+10] = x[10];
1949: y[idy+11] = x[11];
1950: x += 12;
1951: }
1952: break;
1953: case ADD_VALUES:
1954: case ADD_ALL_VALUES:
1955: for (i=0; i<n; i++) {
1956: idy = *indicesy++;
1957: y[idy] += x[0];
1958: y[idy+1] += x[1];
1959: y[idy+2] += x[2];
1960: y[idy+3] += x[3];
1961: y[idy+4] += x[4];
1962: y[idy+5] += x[5];
1963: y[idy+6] += x[6];
1964: y[idy+7] += x[7];
1965: y[idy+8] += x[8];
1966: y[idy+9] += x[9];
1967: y[idy+10] += x[10];
1968: y[idy+11] += x[11];
1969: x += 12;
1970: }
1971: break;
1972: #if !defined(PETSC_USE_COMPLEX)
1973: case MAX_VALUES:
1974: for (i=0; i<n; i++) {
1975: idy = *indicesy++;
1976: y[idy] = PetscMax(y[idy],x[0]);
1977: y[idy+1] = PetscMax(y[idy+1],x[1]);
1978: y[idy+2] = PetscMax(y[idy+2],x[2]);
1979: y[idy+3] = PetscMax(y[idy+3],x[3]);
1980: y[idy+4] = PetscMax(y[idy+4],x[4]);
1981: y[idy+5] = PetscMax(y[idy+5],x[5]);
1982: y[idy+6] = PetscMax(y[idy+6],x[6]);
1983: y[idy+7] = PetscMax(y[idy+7],x[7]);
1984: y[idy+8] = PetscMax(y[idy+8],x[8]);
1985: y[idy+9] = PetscMax(y[idy+9],x[9]);
1986: y[idy+10] = PetscMax(y[idy+10],x[10]);
1987: y[idy+11] = PetscMax(y[idy+11],x[11]);
1988: x += 12;
1989: }
1990: #else
1991: case MAX_VALUES:
1992: #endif
1993: case NOT_SET_VALUES:
1994: break;
1995: default:
1996: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1997: }
1998: return(0);
1999: }
2003: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2004: {
2005: PetscInt i,idx,idy;
2008: switch (addv) {
2009: case INSERT_VALUES:
2010: case INSERT_ALL_VALUES:
2011: for (i=0; i<n; i++) {
2012: idx = *indicesx++;
2013: idy = *indicesy++;
2014: y[idy] = x[idx];
2015: y[idy+1] = x[idx+1];
2016: y[idy+2] = x[idx+2];
2017: y[idy+3] = x[idx+3];
2018: y[idy+4] = x[idx+4];
2019: y[idy+5] = x[idx+5];
2020: y[idy+6] = x[idx+6];
2021: y[idy+7] = x[idx+7];
2022: y[idy+8] = x[idx+8];
2023: y[idy+9] = x[idx+9];
2024: y[idy+10] = x[idx+10];
2025: y[idy+11] = x[idx+11];
2026: }
2027: break;
2028: case ADD_VALUES:
2029: case ADD_ALL_VALUES:
2030: for (i=0; i<n; i++) {
2031: idx = *indicesx++;
2032: idy = *indicesy++;
2033: y[idy] += x[idx];
2034: y[idy+1] += x[idx+1];
2035: y[idy+2] += x[idx+2];
2036: y[idy+3] += x[idx+3];
2037: y[idy+4] += x[idx+4];
2038: y[idy+5] += x[idx+5];
2039: y[idy+6] += x[idx+6];
2040: y[idy+7] += x[idx+7];
2041: y[idy+8] += x[idx+8];
2042: y[idy+9] += x[idx+9];
2043: y[idy+10] += x[idx+10];
2044: y[idy+11] += x[idx+11];
2045: }
2046: break;
2047: #if !defined(PETSC_USE_COMPLEX)
2048: case MAX_VALUES:
2049: for (i=0; i<n; i++) {
2050: idx = *indicesx++;
2051: idy = *indicesy++;
2052: y[idy] = PetscMax(y[idy],x[idx]);
2053: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
2054: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
2055: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
2056: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
2057: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
2058: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
2059: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
2060: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
2061: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
2062: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2063: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2064: }
2065: #else
2066: case MAX_VALUES:
2067: #endif
2068: case NOT_SET_VALUES:
2069: break;
2070: default:
2071: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2072: }
2073: return(0);
2074: }
2076: /* ----------------------------------------------------------------------------------------------- */
2077: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2078: {
2079: PetscInt i,idx;
2081: for (i=0; i<n; i++) {
2082: idx = *indicesx++;
2083: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2084: y += bs;
2085: }
2086: }
2090: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2091: {
2092: PetscInt i,idy,j;
2095: switch (addv) {
2096: case INSERT_VALUES:
2097: case INSERT_ALL_VALUES:
2098: for (i=0; i<n; i++) {
2099: idy = *indicesy++;
2100: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2101: x += bs;
2102: }
2103: break;
2104: case ADD_VALUES:
2105: case ADD_ALL_VALUES:
2106: for (i=0; i<n; i++) {
2107: idy = *indicesy++;
2108: for (j=0; j<bs; j++) y[idy+j] += x[j];
2109: x += bs;
2110: }
2111: break;
2112: #if !defined(PETSC_USE_COMPLEX)
2113: case MAX_VALUES:
2114: for (i=0; i<n; i++) {
2115: idy = *indicesy++;
2116: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2117: x += bs;
2118: }
2119: #else
2120: case MAX_VALUES:
2121: #endif
2122: case NOT_SET_VALUES:
2123: break;
2124: default:
2125: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2126: }
2127: return(0);
2128: }
2132: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2133: {
2134: PetscInt i,idx,idy,j;
2137: switch (addv) {
2138: case INSERT_VALUES:
2139: case INSERT_ALL_VALUES:
2140: for (i=0; i<n; i++) {
2141: idx = *indicesx++;
2142: idy = *indicesy++;
2143: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2144: }
2145: break;
2146: case ADD_VALUES:
2147: case ADD_ALL_VALUES:
2148: for (i=0; i<n; i++) {
2149: idx = *indicesx++;
2150: idy = *indicesy++;
2151: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2152: }
2153: break;
2154: #if !defined(PETSC_USE_COMPLEX)
2155: case MAX_VALUES:
2156: for (i=0; i<n; i++) {
2157: idx = *indicesx++;
2158: idy = *indicesy++;
2159: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2160: }
2161: #else
2162: case MAX_VALUES:
2163: #endif
2164: case NOT_SET_VALUES:
2165: break;
2166: default:
2167: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2168: }
2169: return(0);
2170: }
2172: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2173: #define BS 1
2174: #include <../src/vec/vec/utils/vpscat.h>
2175: #define BS 2
2176: #include <../src/vec/vec/utils/vpscat.h>
2177: #define BS 3
2178: #include <../src/vec/vec/utils/vpscat.h>
2179: #define BS 4
2180: #include <../src/vec/vec/utils/vpscat.h>
2181: #define BS 5
2182: #include <../src/vec/vec/utils/vpscat.h>
2183: #define BS 6
2184: #include <../src/vec/vec/utils/vpscat.h>
2185: #define BS 7
2186: #include <../src/vec/vec/utils/vpscat.h>
2187: #define BS 8
2188: #include <../src/vec/vec/utils/vpscat.h>
2189: #define BS 9
2190: #include <../src/vec/vec/utils/vpscat.h>
2191: #define BS 10
2192: #include <../src/vec/vec/utils/vpscat.h>
2193: #define BS 11
2194: #include <../src/vec/vec/utils/vpscat.h>
2195: #define BS 12
2196: #include <../src/vec/vec/utils/vpscat.h>
2197: #define BS bs
2198: #include <../src/vec/vec/utils/vpscat.h>
2200: /* ==========================================================================================*/
2202: /* create parallel to sequential scatter context */
2204: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2208: /*@
2209: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
2211: Collective on VecScatter
2213: Input Parameters:
2214: + VecScatter - obtained with VecScatterCreateEmpty()
2215: . nsends -
2216: . sendSizes -
2217: . sendProcs -
2218: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
2219: . nrecvs - number of receives to expect
2220: . recvSizes -
2221: . recvProcs - processes who are sending to me
2222: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
2223: - bs - size of block
2225: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
2226: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
2228: Probably does not handle sends to self properly. It should remove those from the counts that are used
2229: in allocating space inside of the from struct
2231: Level: intermediate
2233: @*/
2234: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
2235: {
2236: VecScatter_MPI_General *from, *to;
2237: PetscInt sendSize, recvSize;
2238: PetscInt n, i;
2239: PetscErrorCode ierr;
2241: /* allocate entire send scatter context */
2242: PetscNewLog(ctx,&to);
2243: to->n = nsends;
2244: for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];
2246: PetscMalloc1(to->n,&to->requests);
2247: PetscMalloc4(bs*sendSize,&to->values,sendSize,&to->indices,to->n+1,&to->starts,to->n,&to->procs);
2248: PetscMalloc2(PetscMax(to->n,nrecvs),&to->sstatus,PetscMax(to->n,nrecvs),&to->rstatus);
2250: to->starts[0] = 0;
2251: for (n = 0; n < to->n; n++) {
2252: if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
2253: to->starts[n+1] = to->starts[n] + sendSizes[n];
2254: to->procs[n] = sendProcs[n];
2255: for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
2256: }
2257: ctx->todata = (void*) to;
2259: /* allocate entire receive scatter context */
2260: PetscNewLog(ctx,&from);
2261: from->n = nrecvs;
2262: for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];
2264: PetscMalloc1(from->n,&from->requests);
2265: PetscMalloc4(bs*recvSize,&from->values,recvSize,&from->indices,from->n+1,&from->starts,from->n,&from->procs);
2267: from->starts[0] = 0;
2268: for (n = 0; n < from->n; n++) {
2269: if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
2270: from->starts[n+1] = from->starts[n] + recvSizes[n];
2271: from->procs[n] = recvProcs[n];
2272: for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
2273: }
2274: ctx->fromdata = (void*)from;
2276: /* No local scatter optimization */
2277: from->local.n = 0;
2278: from->local.vslots = 0;
2279: to->local.n = 0;
2280: to->local.vslots = 0;
2281: from->local.nonmatching_computed = PETSC_FALSE;
2282: from->local.n_nonmatching = 0;
2283: from->local.slots_nonmatching = 0;
2284: to->local.nonmatching_computed = PETSC_FALSE;
2285: to->local.n_nonmatching = 0;
2286: to->local.slots_nonmatching = 0;
2288: from->type = VEC_SCATTER_MPI_GENERAL;
2289: to->type = VEC_SCATTER_MPI_GENERAL;
2290: from->bs = bs;
2291: to->bs = bs;
2292: VecScatterCreateCommon_PtoS(from, to, ctx);
2294: /* mark lengths as negative so it won't check local vector lengths */
2295: ctx->from_n = ctx->to_n = -1;
2296: return(0);
2297: }
2299: /*
2300: bs indicates how many elements there are in each block. Normally this would be 1.
2302: contains check that PetscMPIInt can handle the sizes needed
2303: */
2306: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2307: {
2308: VecScatter_MPI_General *from,*to;
2309: PetscMPIInt size,rank,imdex,tag,n;
2310: PetscInt *source = NULL,*owners = NULL,nxr;
2311: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2312: PetscMPIInt *nprocs = NULL,nrecvs;
2313: PetscInt i,j,idx,nsends;
2314: PetscMPIInt *owner = NULL;
2315: PetscInt *starts = NULL,count,slen;
2316: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2317: PetscMPIInt *onodes1,*olengths1;
2318: MPI_Comm comm;
2319: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2320: MPI_Status recv_status,*send_status;
2321: PetscErrorCode ierr;
2324: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2325: PetscObjectGetComm((PetscObject)xin,&comm);
2326: MPI_Comm_rank(comm,&rank);
2327: MPI_Comm_size(comm,&size);
2328: owners = xin->map->range;
2329: VecGetSize(yin,&lengthy);
2330: VecGetSize(xin,&lengthx);
2332: /* first count number of contributors to each processor */
2333: PetscMalloc2(size,&nprocs,nx,&owner);
2334: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2336: j = 0;
2337: nsends = 0;
2338: for (i=0; i<nx; i++) {
2339: idx = bs*inidx[i];
2340: if (idx < owners[j]) j = 0;
2341: for (; j<size; j++) {
2342: if (idx < owners[j+1]) {
2343: if (!nprocs[j]++) nsends++;
2344: owner[i] = j;
2345: break;
2346: }
2347: }
2348: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2349: }
2351: nprocslocal = nprocs[rank];
2352: nprocs[rank] = 0;
2353: if (nprocslocal) nsends--;
2354: /* inform other processors of number of messages and max length*/
2355: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2356: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2357: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2358: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2360: /* post receives: */
2361: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2362: count = 0;
2363: for (i=0; i<nrecvs; i++) {
2364: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2365: count += olengths1[i];
2366: }
2368: /* do sends:
2369: 1) starts[i] gives the starting index in svalues for stuff going to
2370: the ith processor
2371: */
2372: nxr = 0;
2373: for (i=0; i<nx; i++) {
2374: if (owner[i] != rank) nxr++;
2375: }
2376: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2378: starts[0] = 0;
2379: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2380: for (i=0; i<nx; i++) {
2381: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2382: }
2383: starts[0] = 0;
2384: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2385: count = 0;
2386: for (i=0; i<size; i++) {
2387: if (nprocs[i]) {
2388: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2389: }
2390: }
2392: /* wait on receives */
2393: count = nrecvs;
2394: slen = 0;
2395: while (count) {
2396: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2397: /* unpack receives into our local space */
2398: MPI_Get_count(&recv_status,MPIU_INT,&n);
2399: slen += n;
2400: count--;
2401: }
2403: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2405: /* allocate entire send scatter context */
2406: PetscNewLog(ctx,&to);
2407: to->n = nrecvs;
2409: PetscMalloc1(nrecvs,&to->requests);
2410: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2411: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2413: ctx->todata = (void*)to;
2414: to->starts[0] = 0;
2416: if (nrecvs) {
2417: /* move the data into the send scatter */
2418: base = owners[rank];
2419: rsvalues = rvalues;
2420: for (i=0; i<nrecvs; i++) {
2421: to->starts[i+1] = to->starts[i] + olengths1[i];
2422: to->procs[i] = onodes1[i];
2423: values = rsvalues;
2424: rsvalues += olengths1[i];
2425: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2426: }
2427: }
2428: PetscFree(olengths1);
2429: PetscFree(onodes1);
2430: PetscFree3(rvalues,source,recv_waits);
2432: /* allocate entire receive scatter context */
2433: PetscNewLog(ctx,&from);
2434: from->n = nsends;
2436: PetscMalloc1(nsends,&from->requests);
2437: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2438: ctx->fromdata = (void*)from;
2440: /* move data into receive scatter */
2441: PetscMalloc2(size,&lowner,nsends+1,&start);
2442: count = 0; from->starts[0] = start[0] = 0;
2443: for (i=0; i<size; i++) {
2444: if (nprocs[i]) {
2445: lowner[i] = count;
2446: from->procs[count++] = i;
2447: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2448: }
2449: }
2451: for (i=0; i<nx; i++) {
2452: if (owner[i] != rank) {
2453: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2454: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2455: }
2456: }
2457: PetscFree2(lowner,start);
2458: PetscFree2(nprocs,owner);
2460: /* wait on sends */
2461: if (nsends) {
2462: PetscMalloc1(nsends,&send_status);
2463: MPI_Waitall(nsends,send_waits,send_status);
2464: PetscFree(send_status);
2465: }
2466: PetscFree3(svalues,send_waits,starts);
2468: if (nprocslocal) {
2469: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2470: /* we have a scatter to ourselves */
2471: PetscMalloc1(nt,&to->local.vslots);
2472: PetscMalloc1(nt,&from->local.vslots);
2473: nt = 0;
2474: for (i=0; i<nx; i++) {
2475: idx = bs*inidx[i];
2476: if (idx >= owners[rank] && idx < owners[rank+1]) {
2477: to->local.vslots[nt] = idx - owners[rank];
2478: from->local.vslots[nt++] = bs*inidy[i];
2479: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2480: }
2481: }
2482: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2483: } else {
2484: from->local.n = 0;
2485: from->local.vslots = 0;
2486: to->local.n = 0;
2487: to->local.vslots = 0;
2488: }
2490: from->local.nonmatching_computed = PETSC_FALSE;
2491: from->local.n_nonmatching = 0;
2492: from->local.slots_nonmatching = 0;
2493: to->local.nonmatching_computed = PETSC_FALSE;
2494: to->local.n_nonmatching = 0;
2495: to->local.slots_nonmatching = 0;
2497: from->type = VEC_SCATTER_MPI_GENERAL;
2498: to->type = VEC_SCATTER_MPI_GENERAL;
2499: from->bs = bs;
2500: to->bs = bs;
2502: VecScatterCreateCommon_PtoS(from,to,ctx);
2503: return(0);
2504: }
2506: /*
2507: bs indicates how many elements there are in each block. Normally this would be 1.
2508: */
2511: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2512: {
2513: MPI_Comm comm;
2514: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2515: PetscInt bs = to->bs;
2516: PetscMPIInt size;
2517: PetscInt i, n;
2521: PetscObjectGetComm((PetscObject)ctx,&comm);
2522: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2523: ctx->ops->destroy = VecScatterDestroy_PtoP;
2525: ctx->reproduce = PETSC_FALSE;
2526: to->sendfirst = PETSC_FALSE;
2527: PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2528: PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2529: from->sendfirst = to->sendfirst;
2531: MPI_Comm_size(comm,&size);
2532: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2533: to->contiq = PETSC_FALSE;
2534: n = from->starts[from->n];
2535: from->contiq = PETSC_TRUE;
2536: for (i=1; i<n; i++) {
2537: if (from->indices[i] != from->indices[i-1] + bs) {
2538: from->contiq = PETSC_FALSE;
2539: break;
2540: }
2541: }
2543: to->use_alltoallv = PETSC_FALSE;
2544: PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2545: from->use_alltoallv = to->use_alltoallv;
2546: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2547: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2548: if (to->use_alltoallv) {
2549: to->use_alltoallw = PETSC_FALSE;
2550: PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2551: }
2552: from->use_alltoallw = to->use_alltoallw;
2553: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2554: #endif
2556: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2557: to->use_window = PETSC_FALSE;
2558: PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2559: from->use_window = to->use_window;
2560: #endif
2562: if (to->use_alltoallv) {
2564: PetscMalloc2(size,&to->counts,size,&to->displs);
2565: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2566: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
2568: to->displs[0] = 0;
2569: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
2571: PetscMalloc2(size,&from->counts,size,&from->displs);
2572: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2573: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2574: from->displs[0] = 0;
2575: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
2577: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2578: if (to->use_alltoallw) {
2579: PetscMPIInt mpibs, mpilen;
2581: ctx->packtogether = PETSC_FALSE;
2582: PetscMPIIntCast(bs,&mpibs);
2583: PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2584: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2585: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2586: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
2588: for (i=0; i<to->n; i++) {
2589: to->wcounts[to->procs[i]] = 1;
2590: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2591: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2592: MPI_Type_commit(to->types+to->procs[i]);
2593: }
2594: PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2595: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2596: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2597: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2599: if (from->contiq) {
2600: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2601: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2603: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2604: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2606: } else {
2607: for (i=0; i<from->n; i++) {
2608: from->wcounts[from->procs[i]] = 1;
2609: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2610: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2611: MPI_Type_commit(from->types+from->procs[i]);
2612: }
2613: }
2614: } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2616: #else
2617: to->use_alltoallw = PETSC_FALSE;
2618: from->use_alltoallw = PETSC_FALSE;
2619: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2620: #endif
2621: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2622: } else if (to->use_window) {
2623: PetscMPIInt temptag,winsize;
2624: MPI_Request *request;
2625: MPI_Status *status;
2627: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2628: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2629: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2630: PetscMalloc1(to->n,&to->winstarts);
2631: PetscMalloc2(to->n,&request,to->n,&status);
2632: for (i=0; i<to->n; i++) {
2633: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2634: }
2635: for (i=0; i<from->n; i++) {
2636: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2637: }
2638: MPI_Waitall(to->n,request,status);
2639: PetscFree2(request,status);
2641: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2642: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2643: PetscMalloc1(from->n,&from->winstarts);
2644: PetscMalloc2(from->n,&request,from->n,&status);
2645: for (i=0; i<from->n; i++) {
2646: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2647: }
2648: for (i=0; i<to->n; i++) {
2649: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2650: }
2651: MPI_Waitall(from->n,request,status);
2652: PetscFree2(request,status);
2653: #endif
2654: } else {
2655: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2656: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2657: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2658: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2659: MPI_Request *rev_swaits,*rev_rwaits;
2660: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2662: /* allocate additional wait variables for the "reverse" scatter */
2663: PetscMalloc1(to->n,&rev_rwaits);
2664: PetscMalloc1(from->n,&rev_swaits);
2665: to->rev_requests = rev_rwaits;
2666: from->rev_requests = rev_swaits;
2668: /* Register the receives that you will use later (sends for scatter reverse) */
2669: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2670: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2671: if (use_rsend) {
2672: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2673: to->use_readyreceiver = PETSC_TRUE;
2674: from->use_readyreceiver = PETSC_TRUE;
2675: } else {
2676: to->use_readyreceiver = PETSC_FALSE;
2677: from->use_readyreceiver = PETSC_FALSE;
2678: }
2679: if (use_ssend) {
2680: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2681: }
2683: for (i=0; i<from->n; i++) {
2684: if (use_rsend) {
2685: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2686: } else if (use_ssend) {
2687: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2688: } else {
2689: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2690: }
2691: }
2693: for (i=0; i<to->n; i++) {
2694: if (use_rsend) {
2695: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2696: } else if (use_ssend) {
2697: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2698: } else {
2699: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2700: }
2701: }
2702: /* Register receives for scatter and reverse */
2703: for (i=0; i<from->n; i++) {
2704: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2705: }
2706: for (i=0; i<to->n; i++) {
2707: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2708: }
2709: if (use_rsend) {
2710: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2711: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2712: MPI_Barrier(comm);
2713: }
2715: ctx->ops->copy = VecScatterCopy_PtoP_X;
2716: }
2717: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2719: #if defined(PETSC_USE_DEBUG)
2720: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2721: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2722: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2723: #endif
2725: switch (bs) {
2726: case 12:
2727: ctx->ops->begin = VecScatterBegin_12;
2728: ctx->ops->end = VecScatterEnd_12;
2729: break;
2730: case 11:
2731: ctx->ops->begin = VecScatterBegin_11;
2732: ctx->ops->end = VecScatterEnd_11;
2733: break;
2734: case 10:
2735: ctx->ops->begin = VecScatterBegin_10;
2736: ctx->ops->end = VecScatterEnd_10;
2737: break;
2738: case 9:
2739: ctx->ops->begin = VecScatterBegin_9;
2740: ctx->ops->end = VecScatterEnd_9;
2741: break;
2742: case 8:
2743: ctx->ops->begin = VecScatterBegin_8;
2744: ctx->ops->end = VecScatterEnd_8;
2745: break;
2746: case 7:
2747: ctx->ops->begin = VecScatterBegin_7;
2748: ctx->ops->end = VecScatterEnd_7;
2749: break;
2750: case 6:
2751: ctx->ops->begin = VecScatterBegin_6;
2752: ctx->ops->end = VecScatterEnd_6;
2753: break;
2754: case 5:
2755: ctx->ops->begin = VecScatterBegin_5;
2756: ctx->ops->end = VecScatterEnd_5;
2757: break;
2758: case 4:
2759: ctx->ops->begin = VecScatterBegin_4;
2760: ctx->ops->end = VecScatterEnd_4;
2761: break;
2762: case 3:
2763: ctx->ops->begin = VecScatterBegin_3;
2764: ctx->ops->end = VecScatterEnd_3;
2765: break;
2766: case 2:
2767: ctx->ops->begin = VecScatterBegin_2;
2768: ctx->ops->end = VecScatterEnd_2;
2769: break;
2770: case 1:
2771: ctx->ops->begin = VecScatterBegin_1;
2772: ctx->ops->end = VecScatterEnd_1;
2773: break;
2774: default:
2775: ctx->ops->begin = VecScatterBegin_bs;
2776: ctx->ops->end = VecScatterEnd_bs;
2778: }
2779: ctx->ops->view = VecScatterView_MPI;
2780: /* Check if the local scatter is actually a copy; important special case */
2781: if (to->local.n) {
2782: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2783: }
2784: return(0);
2785: }
2789: /* ------------------------------------------------------------------------------------*/
2790: /*
2791: Scatter from local Seq vectors to a parallel vector.
2792: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2793: reverses the result.
2794: */
2797: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2798: {
2799: PetscErrorCode ierr;
2800: MPI_Request *waits;
2801: VecScatter_MPI_General *to,*from;
2804: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2805: to = (VecScatter_MPI_General*)ctx->fromdata;
2806: from = (VecScatter_MPI_General*)ctx->todata;
2807: ctx->todata = (void*)to;
2808: ctx->fromdata = (void*)from;
2809: /* these two are special, they are ALWAYS stored in to struct */
2810: to->sstatus = from->sstatus;
2811: to->rstatus = from->rstatus;
2813: from->sstatus = 0;
2814: from->rstatus = 0;
2816: waits = from->rev_requests;
2817: from->rev_requests = from->requests;
2818: from->requests = waits;
2819: waits = to->rev_requests;
2820: to->rev_requests = to->requests;
2821: to->requests = waits;
2822: return(0);
2823: }
2825: /* ---------------------------------------------------------------------------------*/
2828: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2829: {
2831: PetscMPIInt size,rank,tag,imdex,n;
2832: PetscInt *owners = xin->map->range;
2833: PetscMPIInt *nprocs = NULL;
2834: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2835: PetscMPIInt *owner = NULL;
2836: PetscInt *starts = NULL,count,slen;
2837: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2838: PetscMPIInt *onodes1,*olengths1,nrecvs;
2839: MPI_Comm comm;
2840: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2841: MPI_Status recv_status,*send_status = NULL;
2842: PetscBool duplicate = PETSC_FALSE;
2843: #if defined(PETSC_USE_DEBUG)
2844: PetscBool found = PETSC_FALSE;
2845: #endif
2848: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2849: PetscObjectGetComm((PetscObject)xin,&comm);
2850: MPI_Comm_size(comm,&size);
2851: MPI_Comm_rank(comm,&rank);
2852: if (size == 1) {
2853: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2854: return(0);
2855: }
2857: /*
2858: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2859: They then call the StoPScatterCreate()
2860: */
2861: /* first count number of contributors to each processor */
2862: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2863: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2865: lastidx = -1;
2866: j = 0;
2867: for (i=0; i<nx; i++) {
2868: /* if indices are NOT locally sorted, need to start search at the beginning */
2869: if (lastidx > (idx = bs*inidx[i])) j = 0;
2870: lastidx = idx;
2871: for (; j<size; j++) {
2872: if (idx >= owners[j] && idx < owners[j+1]) {
2873: nprocs[j]++;
2874: owner[i] = j;
2875: #if defined(PETSC_USE_DEBUG)
2876: found = PETSC_TRUE;
2877: #endif
2878: break;
2879: }
2880: }
2881: #if defined(PETSC_USE_DEBUG)
2882: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2883: found = PETSC_FALSE;
2884: #endif
2885: }
2886: nsends = 0;
2887: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2889: /* inform other processors of number of messages and max length*/
2890: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2891: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2892: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2893: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2895: /* post receives: */
2896: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2898: count = 0;
2899: for (i=0; i<nrecvs; i++) {
2900: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2901: count += olengths1[i];
2902: }
2903: PetscFree(onodes1);
2905: /* do sends:
2906: 1) starts[i] gives the starting index in svalues for stuff going to
2907: the ith processor
2908: */
2909: starts[0]= 0;
2910: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2911: for (i=0; i<nx; i++) {
2912: svalues[2*starts[owner[i]]] = bs*inidx[i];
2913: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2914: }
2916: starts[0] = 0;
2917: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2918: count = 0;
2919: for (i=0; i<size; i++) {
2920: if (nprocs[i]) {
2921: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2922: count++;
2923: }
2924: }
2925: PetscFree3(nprocs,owner,starts);
2927: /* wait on receives */
2928: count = nrecvs;
2929: slen = 0;
2930: while (count) {
2931: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2932: /* unpack receives into our local space */
2933: MPI_Get_count(&recv_status,MPIU_INT,&n);
2934: slen += n/2;
2935: count--;
2936: }
2937: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2939: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2940: base = owners[rank];
2941: count = 0;
2942: rsvalues = rvalues;
2943: for (i=0; i<nrecvs; i++) {
2944: values = rsvalues;
2945: rsvalues += 2*olengths1[i];
2946: for (j=0; j<olengths1[i]; j++) {
2947: local_inidx[count] = values[2*j] - base;
2948: local_inidy[count++] = values[2*j+1];
2949: }
2950: }
2951: PetscFree(olengths1);
2953: /* wait on sends */
2954: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2955: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2957: /*
2958: should sort and remove duplicates from local_inidx,local_inidy
2959: */
2961: #if defined(do_it_slow)
2962: /* sort on the from index */
2963: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2964: start = 0;
2965: while (start < slen) {
2966: count = start+1;
2967: last = local_inidx[start];
2968: while (count < slen && last == local_inidx[count]) count++;
2969: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2970: /* sort on to index */
2971: PetscSortInt(count-start,local_inidy+start);
2972: }
2973: /* remove duplicates; not most efficient way, but probably good enough */
2974: i = start;
2975: while (i < count-1) {
2976: if (local_inidy[i] != local_inidy[i+1]) i++;
2977: else { /* found a duplicate */
2978: duplicate = PETSC_TRUE;
2979: for (j=i; j<slen-1; j++) {
2980: local_inidx[j] = local_inidx[j+1];
2981: local_inidy[j] = local_inidy[j+1];
2982: }
2983: slen--;
2984: count--;
2985: }
2986: }
2987: start = count;
2988: }
2989: #endif
2990: if (duplicate) {
2991: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2992: }
2993: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2994: PetscFree2(local_inidx,local_inidy);
2995: return(0);
2996: }
3000: /*@
3001: PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed
3003: Input Parameters:
3004: . gv - A distributed Vec
3006: Output Parameters:
3007: . sf - The SF created mapping a sequential Vec to gv
3009: Level: developer
3011: .seealso: DMPlexDistributedToSequential()
3012: @*/
3013: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
3014: {
3015: PetscSFNode *remotenodes;
3016: PetscInt *localnodes;
3017: PetscInt N, n, start, numroots, l;
3018: PetscMPIInt rank;
3022: PetscSFCreate(comm, sf);
3023: VecGetSize(gv, &N);
3024: VecGetLocalSize(gv, &n);
3025: VecGetOwnershipRange(gv, &start, NULL);
3026: MPI_Comm_rank(comm, &rank);
3027: PetscMalloc1(n, &localnodes);
3028: PetscMalloc1(n, &remotenodes);
3029: if (!rank) numroots = N;
3030: else numroots = 0;
3031: for (l = 0; l < n; ++l) {
3032: localnodes[l] = l;
3033: remotenodes[l].rank = 0;
3034: remotenodes[l].index = l+start;
3035: }
3036: PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
3037: return(0);
3038: }