Actual source code: gasm.c

petsc-3.7.5 2017-01-01
Report Typos and Errors
  1: /*
  2:   This file defines an "generalized" additive Schwarz preconditioner for any Mat implementation.
  3:   In this version each processor may intersect multiple subdomains and any subdomain may
  4:   intersect multiple processors.  Intersections of subdomains with processors are called *local
  5:   subdomains*.

  7:        N    - total number of distinct global subdomains          (set explicitly in PCGASMSetTotalSubdomains() or implicitly PCGASMSetSubdomains() and then calculated in PCSetUp_GASM())
  8:        n    - actual number of local subdomains on this processor (set in PCGASMSetSubdomains() or calculated in PCGASMSetTotalSubdomains())
  9:        nmax - maximum number of local subdomains per processor    (calculated in PCSetUp_GASM())
 10: */
 11: #include <petsc/private/pcimpl.h>     /*I "petscpc.h" I*/
 12: #include <petscdm.h>

 14: typedef struct {
 15:   PetscInt    N,n,nmax;
 16:   PetscInt    overlap;                  /* overlap requested by user */
 17:   PCGASMType  type;                     /* use reduced interpolation, restriction or both */
 18:   PetscBool   type_set;                 /* if user set this value (so won't change it for symmetric problems) */
 19:   PetscBool   same_subdomain_solvers;   /* flag indicating whether all local solvers are same */
 20:   PetscBool   sort_indices;             /* flag to sort subdomain indices */
 21:   PetscBool   user_subdomains;          /* whether the user set explicit subdomain index sets -- keep them on PCReset() */
 22:   PetscBool   dm_subdomains;            /* whether DM is allowed to define subdomains */
 23:   PetscBool   hierarchicalpartitioning;
 24:   IS          *ois;                     /* index sets that define the outer (conceptually, overlapping) subdomains */
 25:   IS          *iis;                     /* index sets that define the inner (conceptually, nonoverlapping) subdomains */
 26:   KSP         *ksp;                     /* linear solvers for each subdomain */
 27:   Mat         *pmat;                    /* subdomain block matrices */
 28:   Vec         gx,gy;                    /* Merged work vectors */
 29:   Vec         *x,*y;                    /* Split work vectors; storage aliases pieces of storage of the above merged vectors. */
 30:   VecScatter  gorestriction;            /* merged restriction to disjoint union of outer subdomains */
 31:   VecScatter  girestriction;            /* merged restriction to disjoint union of inner subdomains */
 32:   VecScatter  pctoouter;
 33:   IS          permutationIS;
 34:   Mat         permutationP;
 35:   Mat         pcmat;
 36:   Vec         pcx,pcy;
 37: } PC_GASM;

 41: static PetscErrorCode  PCGASMComputeGlobalSubdomainNumbering_Private(PC pc,PetscInt **numbering,PetscInt **permutation)
 42: {
 43:   PC_GASM        *osm = (PC_GASM*)pc->data;
 44:   PetscInt       i;

 48:   /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
 49:   PetscMalloc2(osm->n,numbering,osm->n,permutation);
 50:   PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->iis,NULL,*numbering);
 51:   for (i = 0; i < osm->n; ++i) (*permutation)[i] = i;
 52:   PetscSortIntWithPermutation(osm->n,*numbering,*permutation);
 53:   return(0);
 54: }

 58: static PetscErrorCode  PCGASMSubdomainView_Private(PC pc, PetscInt i, PetscViewer viewer)
 59: {
 60:   PC_GASM        *osm = (PC_GASM*)pc->data;
 61:   PetscInt       j,nidx;
 62:   const PetscInt *idx;
 63:   PetscViewer    sviewer;
 64:   char           *cidx;

 68:   if (i < 0 || i > osm->n) SETERRQ2(PetscObjectComm((PetscObject)viewer), PETSC_ERR_ARG_WRONG, "Invalid subdomain %D: must nonnegative and less than %D", i, osm->n);
 69:   /* Inner subdomains. */
 70:   ISGetLocalSize(osm->iis[i], &nidx);
 71:   /*
 72:    No more than 15 characters per index plus a space.
 73:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 74:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
 75:    For nidx == 0, the whole string 16 '\0'.
 76:    */
 77:   PetscMalloc1(16*(nidx+1)+1, &cidx);
 78:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
 79:   ISGetIndices(osm->iis[i], &idx);
 80:   for (j = 0; j < nidx; ++j) {
 81:     PetscViewerStringSPrintf(sviewer, "%D ", idx[j]);
 82:   }
 83:   ISRestoreIndices(osm->iis[i],&idx);
 84:   PetscViewerDestroy(&sviewer);
 85:   PetscViewerASCIIPrintf(viewer, "Inner subdomain:\n");
 86:   PetscViewerFlush(viewer);
 87:   PetscViewerASCIIPushSynchronized(viewer);
 88:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
 89:   PetscViewerFlush(viewer);
 90:   PetscViewerASCIIPopSynchronized(viewer);
 91:   PetscViewerASCIIPrintf(viewer, "\n");
 92:   PetscViewerFlush(viewer);
 93:   PetscFree(cidx);
 94:   /* Outer subdomains. */
 95:   ISGetLocalSize(osm->ois[i], &nidx);
 96:   /*
 97:    No more than 15 characters per index plus a space.
 98:    PetscViewerStringSPrintf requires a string of size at least 2, so use (nidx+1) instead of nidx,
 99:    in case nidx == 0. That will take care of the space for the trailing '\0' as well.
100:    For nidx == 0, the whole string 16 '\0'.
101:    */
102:   PetscMalloc1(16*(nidx+1)+1, &cidx);
103:   PetscViewerStringOpen(PETSC_COMM_SELF, cidx, 16*(nidx+1)+1, &sviewer);
104:   ISGetIndices(osm->ois[i], &idx);
105:   for (j = 0; j < nidx; ++j) {
106:     PetscViewerStringSPrintf(sviewer,"%D ", idx[j]);
107:   }
108:   PetscViewerDestroy(&sviewer);
109:   ISRestoreIndices(osm->ois[i],&idx);
110:   PetscViewerASCIIPrintf(viewer, "Outer subdomain:\n");
111:   PetscViewerFlush(viewer);
112:   PetscViewerASCIIPushSynchronized(viewer);
113:   PetscViewerASCIISynchronizedPrintf(viewer, "%s", cidx);
114:   PetscViewerFlush(viewer);
115:   PetscViewerASCIIPopSynchronized(viewer);
116:   PetscViewerASCIIPrintf(viewer, "\n");
117:   PetscViewerFlush(viewer);
118:   PetscFree(cidx);
119:   return(0);
120: }

124: static PetscErrorCode  PCGASMPrintSubdomains(PC pc)
125: {
126:   PC_GASM        *osm = (PC_GASM*)pc->data;
127:   const char     *prefix;
128:   char           fname[PETSC_MAX_PATH_LEN+1];
129:   PetscInt       l, d, count;
130:   PetscBool      doprint,found;
131:   PetscViewer    viewer, sviewer = NULL;
132:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

136:   PCGetOptionsPrefix(pc,&prefix);
137:   doprint  = PETSC_FALSE;
138:   PetscOptionsGetBool(NULL,prefix,"-pc_gasm_print_subdomains",&doprint,NULL);
139:   if (!doprint) return(0);
140:   PetscOptionsGetString(NULL,prefix,"-pc_gasm_print_subdomains",fname,PETSC_MAX_PATH_LEN,&found);
141:   if (!found) { PetscStrcpy(fname,"stdout"); };
142:   PetscViewerASCIIOpen(PetscObjectComm((PetscObject)pc),fname,&viewer);
143:   /*
144:    Make sure the viewer has a name. Otherwise this may cause a deadlock or other weird errors when creating a subcomm viewer:
145:    the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed collectively on the comm.
146:   */
147:   PetscObjectName((PetscObject)viewer);
148:   l    = 0;
149:   PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
150:   for (count = 0; count < osm->N; ++count) {
151:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
152:     if (l<osm->n) {
153:       d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
154:       if (numbering[d] == count) {
155:         PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
156:         PCGASMSubdomainView_Private(pc,d,sviewer);
157:         PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
158:         ++l;
159:       }
160:     }
161:     MPI_Barrier(PetscObjectComm((PetscObject)pc));
162:   }
163:   PetscFree2(numbering,permutation);
164:   PetscViewerDestroy(&viewer);
165:   return(0);
166: }


171: static PetscErrorCode PCView_GASM(PC pc,PetscViewer viewer)
172: {
173:   PC_GASM        *osm = (PC_GASM*)pc->data;
174:   const char     *prefix;
176:   PetscMPIInt    rank, size;
177:   PetscInt       bsz;
178:   PetscBool      iascii,view_subdomains=PETSC_FALSE;
179:   PetscViewer    sviewer;
180:   PetscInt       count, l;
181:   char           overlap[256]     = "user-defined overlap";
182:   char           gsubdomains[256] = "unknown total number of subdomains";
183:   char           msubdomains[256] = "unknown max number of local subdomains";
184:   PetscInt       *numbering,*permutation;/* global numbering of locally-supported subdomains and the permutation from the local ordering */

187:   MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size);
188:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank);

190:   if (osm->overlap >= 0) {
191:     PetscSNPrintf(overlap,sizeof(overlap),"requested amount of overlap = %D",osm->overlap);
192:   }
193:   if (osm->N != PETSC_DETERMINE) {
194:     PetscSNPrintf(gsubdomains, sizeof(gsubdomains), "total number of subdomains = %D",osm->N);
195:   }
196:   if (osm->nmax != PETSC_DETERMINE) {
197:     PetscSNPrintf(msubdomains,sizeof(msubdomains),"max number of local subdomains = %D",osm->nmax);
198:   }

200:   PCGetOptionsPrefix(pc,&prefix);
201:   PetscOptionsGetBool(NULL,prefix,"-pc_gasm_view_subdomains",&view_subdomains,NULL);

203:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
204:   if (iascii) {
205:     /*
206:      Make sure the viewer has a name. Otherwise this may cause a deadlock when creating a subcomm viewer:
207:      the subcomm viewer will attempt to inherit the viewer's name, which, if not set, will be constructed
208:      collectively on the comm.
209:      */
210:     PetscObjectName((PetscObject)viewer);
211:     PetscViewerASCIIPrintf(viewer,"Generalized additive Schwarz:\n");
212:     PetscViewerASCIIPrintf(viewer,"Restriction/interpolation type: %s\n",PCGASMTypes[osm->type]);
213:     PetscViewerASCIIPrintf(viewer,"%s\n",overlap);
214:     PetscViewerASCIIPrintf(viewer,"%s\n",gsubdomains);
215:     PetscViewerASCIIPrintf(viewer,"%s\n",msubdomains);
216:     PetscViewerASCIIPushSynchronized(viewer);
217:     PetscViewerASCIISynchronizedPrintf(viewer,"[%d|%d] number of locally-supported subdomains = %D\n",rank,size,osm->n);
218:     PetscViewerFlush(viewer);
219:     PetscViewerASCIIPopSynchronized(viewer);
220:     /* Cannot take advantage of osm->same_subdomain_solvers without a global numbering of subdomains. */
221:     PetscViewerASCIIPrintf(viewer,"Subdomain solver info is as follows:\n");
222:     PetscViewerASCIIPushTab(viewer);
223:     PetscViewerASCIIPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
224:     /* Make sure that everybody waits for the banner to be printed. */
225:     MPI_Barrier(PetscObjectComm((PetscObject)viewer));
226:     /* Now let subdomains go one at a time in the global numbering order and print their subdomain/solver info. */
227:     PCGASMComputeGlobalSubdomainNumbering_Private(pc,&numbering,&permutation);
228:     l = 0;
229:     for (count = 0; count < osm->N; ++count) {
230:       PetscMPIInt srank, ssize;
231:       if (l<osm->n) {
232:         PetscInt d = permutation[l]; /* d is the local number of the l-th smallest (in the global ordering) among the locally supported subdomains */
233:         if (numbering[d] == count) {
234:           MPI_Comm_size(((PetscObject)osm->ois[d])->comm, &ssize);
235:           MPI_Comm_rank(((PetscObject)osm->ois[d])->comm, &srank);
236:           PetscViewerGetSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
237:           ISGetLocalSize(osm->ois[d],&bsz);
238:           PetscViewerASCIISynchronizedPrintf(sviewer,"[%d|%d] (subcomm [%d|%d]) local subdomain number %D, local size = %D\n",rank,size,srank,ssize,d,bsz);
239:           PetscViewerFlush(sviewer);
240:           if (view_subdomains) {
241:             PCGASMSubdomainView_Private(pc,d,sviewer);
242:           }
243:           if (!pc->setupcalled) {
244:             PetscViewerASCIIPrintf(sviewer, "Solver not set up yet: PCSetUp() not yet called\n");
245:           } else {
246:             KSPView(osm->ksp[d],sviewer);
247:           }
248:           PetscViewerASCIIPrintf(sviewer,"- - - - - - - - - - - - - - - - - -\n");
249:           PetscViewerFlush(sviewer);
250:           PetscViewerRestoreSubViewer(viewer,((PetscObject)osm->ois[d])->comm, &sviewer);
251:           ++l;
252:         }
253:       }
254:       MPI_Barrier(PetscObjectComm((PetscObject)pc));
255:     }
256:     PetscFree2(numbering,permutation);
257:     PetscViewerASCIIPopTab(viewer);
258:   }
259:   PetscViewerFlush(viewer);
260:   PetscViewerASCIIPopSynchronized(viewer);
261:   return(0);
262: }

264: PETSC_INTERN PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[]);



270: PetscErrorCode PCGASMSetHierarchicalPartitioning(PC pc)
271: {
272:    PC_GASM              *osm = (PC_GASM*)pc->data;
273:    MatPartitioning       part;
274:    MPI_Comm              comm;
275:    PetscMPIInt           size;
276:    PetscInt              nlocalsubdomains,fromrows_localsize;
277:    IS                    partitioning,fromrows,isn;
278:    Vec                   outervec;
279:    PetscErrorCode        ierr;

282:    PetscObjectGetComm((PetscObject)pc,&comm);
283:    MPI_Comm_size(comm,&size);
284:    /* we do not need a hierarchical partitioning when
285:     * the total number of subdomains is consistent with
286:     * the number of MPI tasks.
287:     * For the following cases, we do not need to use HP
288:     * */
289:    if(osm->N==PETSC_DETERMINE || osm->N>=size || osm->N==1){
290:           return(0);
291:    }
292:    if(size%osm->N != 0){
293:      SETERRQ2(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"have to specify the total number of subdomains %d to be a factor of the number of processors %d \n",osm->N,size);
294:    }
295:    nlocalsubdomains = size/osm->N;
296:    osm->n           = 1;
297:    MatPartitioningCreate(comm,&part);
298:    MatPartitioningSetAdjacency(part,pc->pmat);
299:    MatPartitioningSetType(part,MATPARTITIONINGHIERARCH);
300:    MatPartitioningHierarchicalSetNcoarseparts(part,osm->N);
301:    MatPartitioningHierarchicalSetNfineparts(part,nlocalsubdomains);
302:    MatPartitioningSetFromOptions(part);
303:    /* get new processor owner number of each vertex */
304:    MatPartitioningApply(part,&partitioning);
305:    ISBuildTwoSided(partitioning,NULL,&fromrows);
306:    ISPartitioningToNumbering(partitioning,&isn);
307:    ISDestroy(&isn);
308:    ISGetLocalSize(fromrows,&fromrows_localsize);
309:    MatPartitioningDestroy(&part);
310:    MatCreateVecs(pc->pmat,&outervec,NULL);
311:    VecCreateMPI(comm,fromrows_localsize,PETSC_DETERMINE,&(osm->pcx));
312:    VecDuplicate(osm->pcx,&(osm->pcy));
313:    VecScatterCreate(osm->pcx,NULL,outervec,fromrows,&(osm->pctoouter));
314:    MatGetSubMatrix(pc->pmat,fromrows,fromrows,MAT_INITIAL_MATRIX,&(osm->permutationP));
315:    PetscObjectReference((PetscObject)fromrows);
316:    osm->permutationIS = fromrows;
317:    osm->pcmat =  pc->pmat;
318:    PetscObjectReference((PetscObject)osm->permutationP);
319:    pc->pmat = osm->permutationP;
320:    VecDestroy(&outervec);
321:    ISDestroy(&fromrows);
322:    ISDestroy(&partitioning);
323:    osm->n           = PETSC_DETERMINE;
324:    return(0);
325: }



331: static PetscErrorCode PCSetUp_GASM(PC pc)
332: {
333:   PC_GASM        *osm = (PC_GASM*)pc->data;
335:   PetscBool      symset,flg;
336:   PetscInt       i,nInnerIndices,nTotalInnerIndices;
337:   PetscMPIInt    rank, size;
338:   MatReuse       scall = MAT_REUSE_MATRIX;
339:   KSP            ksp;
340:   PC             subpc;
341:   const char     *prefix,*pprefix;
342:   Vec            x,y;
343:   PetscInt       oni;       /* Number of indices in the i-th local outer subdomain.               */
344:   const PetscInt *oidxi;    /* Indices from the i-th subdomain local outer subdomain.             */
345:   PetscInt       on;        /* Number of indices in the disjoint union of local outer subdomains. */
346:   PetscInt       *oidx;     /* Indices in the disjoint union of local outer subdomains. */
347:   IS             gois;      /* Disjoint union the global indices of outer subdomains.             */
348:   IS             goid;      /* Identity IS of the size of the disjoint union of outer subdomains. */
349:   PetscScalar    *gxarray, *gyarray;
350:   PetscInt       gostart;   /* Start of locally-owned indices in the vectors -- osm->gx,osm->gy -- over the disjoint union of outer subdomains. */
351:   PetscInt       num_subdomains    = 0;
352:   DM             *subdomain_dm     = NULL;
353:   char           **subdomain_names = NULL;
354:   PetscInt       *numbering;


358:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
359:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
360:   if (!pc->setupcalled) {
361:         /* use a hierarchical partitioning */
362:     if(osm->hierarchicalpartitioning){
363:       PCGASMSetHierarchicalPartitioning(pc);
364:     }
365:     if (!osm->type_set) {
366:       MatIsSymmetricKnown(pc->pmat,&symset,&flg);
367:       if (symset && flg) osm->type = PC_GASM_BASIC;
368:     }

370:     if (osm->n == PETSC_DETERMINE) {
371:       if (osm->N != PETSC_DETERMINE) {
372:            /* No local subdomains given, but the desired number of total subdomains is known, so construct them accordingly. */
373:            PCGASMCreateSubdomains(pc->pmat,osm->N,&osm->n,&osm->iis);
374:       } else if (osm->dm_subdomains && pc->dm) {
375:         /* try pc->dm next, if allowed */
376:         PetscInt  d;
377:         IS       *inner_subdomain_is, *outer_subdomain_is;
378:         DMCreateDomainDecomposition(pc->dm, &num_subdomains, &subdomain_names, &inner_subdomain_is, &outer_subdomain_is, &subdomain_dm);
379:         if (num_subdomains) {
380:           PCGASMSetSubdomains(pc, num_subdomains, inner_subdomain_is, outer_subdomain_is);
381:         }
382:         for (d = 0; d < num_subdomains; ++d) {
383:           if (inner_subdomain_is) {ISDestroy(&inner_subdomain_is[d]);}
384:           if (outer_subdomain_is) {ISDestroy(&outer_subdomain_is[d]);}
385:         }
386:         PetscFree(inner_subdomain_is);
387:         PetscFree(outer_subdomain_is);
388:       } else {
389:         /* still no subdomains; use one per processor */
390:         osm->nmax = osm->n = 1;
391:         MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
392:         osm->N    = size;
393:         PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
394:       }
395:     }
396:     if (!osm->iis) {
397:       /*
398:        osm->n was set in PCGASMSetSubdomains(), but the actual subdomains have not been supplied.
399:        We create the requisite number of local inner subdomains and then expand them into
400:        out subdomains, if necessary.
401:        */
402:       PCGASMCreateLocalSubdomains(pc->pmat,osm->n,&osm->iis);
403:     }
404:     if (!osm->ois) {
405:       /*
406:             Initially make outer subdomains the same as inner subdomains. If nonzero additional overlap
407:             has been requested, copy the inner subdomains over so they can be modified.
408:       */
409:       PetscMalloc1(osm->n,&osm->ois);
410:       for (i=0; i<osm->n; ++i) {
411:         if (osm->overlap > 0 && osm->N>1) { /* With positive overlap, osm->iis[i] will be modified */
412:           ISDuplicate(osm->iis[i],(osm->ois)+i);
413:           ISCopy(osm->iis[i],osm->ois[i]);
414:         } else {
415:           PetscObjectReference((PetscObject)((osm->iis)[i]));
416:           osm->ois[i] = osm->iis[i];
417:         }
418:       }
419:       if (osm->overlap>0 && osm->N>1) {
420:            /* Extend the "overlapping" regions by a number of steps */
421:            MatIncreaseOverlapSplit(pc->pmat,osm->n,osm->ois,osm->overlap);
422:       }
423:     }

425:     /* Now the subdomains are defined.  Determine their global and max local numbers, if necessary. */
426:     if (osm->nmax == PETSC_DETERMINE) {
427:       PetscMPIInt inwork,outwork;
428:       /* determine global number of subdomains and the max number of local subdomains */
429:       inwork = osm->n;
430:       MPIU_Allreduce(&inwork,&outwork,1,MPI_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
431:       osm->nmax  = outwork;
432:     }
433:     if (osm->N == PETSC_DETERMINE) {
434:       /* Determine the number of globally-distinct subdomains and compute a global numbering for them. */
435:       PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,&osm->N,NULL);
436:     }


439:     if (osm->sort_indices) {
440:       for (i=0; i<osm->n; i++) {
441:         ISSort(osm->ois[i]);
442:         ISSort(osm->iis[i]);
443:       }
444:     }
445:     PCGetOptionsPrefix(pc,&prefix);
446:     PCGASMPrintSubdomains(pc);

448:     /*
449:        Merge the ISs, create merged vectors and restrictions.
450:      */
451:     /* Merge outer subdomain ISs and construct a restriction onto the disjoint union of local outer subdomains. */
452:     on = 0;
453:     for (i=0; i<osm->n; i++) {
454:       ISGetLocalSize(osm->ois[i],&oni);
455:       on  += oni;
456:     }
457:     PetscMalloc1(on, &oidx);
458:     on   = 0;
459:     /* Merge local indices together */
460:     for (i=0; i<osm->n; i++) {
461:       ISGetLocalSize(osm->ois[i],&oni);
462:       ISGetIndices(osm->ois[i],&oidxi);
463:       PetscMemcpy(oidx+on,oidxi,sizeof(PetscInt)*oni);
464:       ISRestoreIndices(osm->ois[i],&oidxi);
465:       on  += oni;
466:     }
467:     ISCreateGeneral(((PetscObject)(pc))->comm,on,oidx,PETSC_OWN_POINTER,&gois);
468:     nTotalInnerIndices = 0;
469:     for(i=0; i<osm->n; i++){
470:       ISGetLocalSize(osm->iis[i],&nInnerIndices);
471:       nTotalInnerIndices += nInnerIndices;
472:     }
473:     VecCreateMPI(((PetscObject)(pc))->comm,nTotalInnerIndices,PETSC_DETERMINE,&x);
474:     VecDuplicate(x,&y);

476:     VecCreateMPI(PetscObjectComm((PetscObject)pc),on,PETSC_DECIDE,&osm->gx);
477:     VecDuplicate(osm->gx,&osm->gy);
478:     VecGetOwnershipRange(osm->gx, &gostart, NULL);
479:     ISCreateStride(PetscObjectComm((PetscObject)pc),on,gostart,1, &goid);
480:     /* gois might indices not on local */
481:     VecScatterCreate(x,gois,osm->gx,goid, &(osm->gorestriction));
482:     PetscMalloc1(osm->n,&numbering);
483:     PetscObjectsListGetGlobalNumbering(PetscObjectComm((PetscObject)pc),osm->n,(PetscObject*)osm->ois,NULL,numbering);
484:     VecDestroy(&x);
485:     ISDestroy(&gois);

487:     /* Merge inner subdomain ISs and construct a restriction onto the disjoint union of local inner subdomains. */
488:     {
489:       PetscInt        ini;           /* Number of indices the i-th a local inner subdomain. */
490:       PetscInt        in;            /* Number of indices in the disjoint uniont of local inner subdomains. */
491:       PetscInt       *iidx;          /* Global indices in the merged local inner subdomain. */
492:       PetscInt       *ioidx;         /* Global indices of the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
493:       IS              giis;          /* IS for the disjoint union of inner subdomains. */
494:       IS              giois;         /* IS for the disjoint union of inner subdomains within the disjoint union of outer subdomains. */
495:       PetscScalar    *array;
496:       const PetscInt *indices;
497:       PetscInt        k;
498:       on = 0;
499:       for (i=0; i<osm->n; i++) {
500:         ISGetLocalSize(osm->ois[i],&oni);
501:         on  += oni;
502:       }
503:       PetscMalloc1(on, &iidx);
504:       PetscMalloc1(on, &ioidx);
505:       VecGetArray(y,&array);
506:       /* set communicator id to determine where overlap is */
507:       in   = 0;
508:       for (i=0; i<osm->n; i++) {
509:         ISGetLocalSize(osm->iis[i],&ini);
510:         for (k = 0; k < ini; ++k){
511:           array[in+k] = numbering[i];
512:         }
513:         in += ini;
514:       }
515:       VecRestoreArray(y,&array);
516:       VecScatterBegin(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
517:       VecScatterEnd(osm->gorestriction,y,osm->gy,INSERT_VALUES,SCATTER_FORWARD);
518:       VecGetOwnershipRange(osm->gy,&gostart, NULL);
519:       VecGetArray(osm->gy,&array);
520:       on  = 0;
521:       in  = 0;
522:       for (i=0; i<osm->n; i++) {
523:             ISGetLocalSize(osm->ois[i],&oni);
524:             ISGetIndices(osm->ois[i],&indices);
525:             for (k=0; k<oni; k++) {
526:           /*  skip overlapping indices to get inner domain */
527:           if(PetscRealPart(array[on+k]) != numbering[i]) continue;
528:           iidx[in]    = indices[k];
529:           ioidx[in++] = gostart+on+k;
530:             }
531:             ISRestoreIndices(osm->ois[i], &indices);
532:             on += oni;
533:       }
534:       VecRestoreArray(osm->gy,&array);
535:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,iidx,PETSC_OWN_POINTER,&giis);
536:       ISCreateGeneral(PetscObjectComm((PetscObject)pc),in,ioidx,PETSC_OWN_POINTER,&giois);
537:       VecScatterCreate(y,giis,osm->gy,giois,&osm->girestriction);
538:       VecDestroy(&y);
539:       ISDestroy(&giis);
540:       ISDestroy(&giois);
541:     }
542:     ISDestroy(&goid);
543:     PetscFree(numbering);

545:     /* Create the subdomain work vectors. */
546:     PetscMalloc1(osm->n,&osm->x);
547:     PetscMalloc1(osm->n,&osm->y);
548:     VecGetArray(osm->gx, &gxarray);
549:     VecGetArray(osm->gy, &gyarray);
550:     for (i=0, on=0; i<osm->n; ++i, on += oni) {
551:       PetscInt oNi;
552:       ISGetLocalSize(osm->ois[i],&oni);
553:       /* on a sub communicator */
554:       ISGetSize(osm->ois[i],&oNi);
555:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gxarray+on,&osm->x[i]);
556:       VecCreateMPIWithArray(((PetscObject)(osm->ois[i]))->comm,1,oni,oNi,gyarray+on,&osm->y[i]);
557:     }
558:     VecRestoreArray(osm->gx, &gxarray);
559:     VecRestoreArray(osm->gy, &gyarray);
560:     /* Create the subdomain solvers */
561:     PetscMalloc1(osm->n,&osm->ksp);
562:     for (i=0; i<osm->n; i++) {
563:       char subprefix[PETSC_MAX_PATH_LEN+1];
564:       KSPCreate(((PetscObject)(osm->ois[i]))->comm,&ksp);
565:       PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
566:       PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
567:       KSPSetType(ksp,KSPPREONLY);
568:       KSPGetPC(ksp,&subpc); /* Why do we need this here? */
569:       if (subdomain_dm) {
570:             KSPSetDM(ksp,subdomain_dm[i]);
571:             DMDestroy(subdomain_dm+i);
572:       }
573:       PCGetOptionsPrefix(pc,&prefix);
574:       KSPSetOptionsPrefix(ksp,prefix);
575:       if (subdomain_names && subdomain_names[i]) {
576:              PetscSNPrintf(subprefix,PETSC_MAX_PATH_LEN,"sub_%s_",subdomain_names[i]);
577:              KSPAppendOptionsPrefix(ksp,subprefix);
578:              PetscFree(subdomain_names[i]);
579:       }
580:       KSPAppendOptionsPrefix(ksp,"sub_");
581:       osm->ksp[i] = ksp;
582:     }
583:     PetscFree(subdomain_dm);
584:     PetscFree(subdomain_names);
585:     scall = MAT_INITIAL_MATRIX;

587:   } else { /* if (pc->setupcalled) */
588:     /*
589:        Destroy the submatrices from the previous iteration
590:     */
591:     if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
592:       MatDestroyMatrices(osm->n,&osm->pmat);
593:       scall = MAT_INITIAL_MATRIX;
594:     }
595:     if(osm->permutationIS){
596:       MatGetSubMatrix(pc->pmat,osm->permutationIS,osm->permutationIS,scall,&osm->permutationP);
597:       PetscObjectReference((PetscObject)osm->permutationP);
598:       osm->pcmat = pc->pmat;
599:       pc->pmat   = osm->permutationP;
600:     }

602:   }


605:   /*
606:      Extract out the submatrices.
607:   */
608:   if (size > 1) {
609:     MatGetSubMatricesMPI(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
610:   } else {
611:     MatGetSubMatrices(pc->pmat,osm->n,osm->ois,osm->ois,scall,&osm->pmat);
612:   }
613:   if (scall == MAT_INITIAL_MATRIX) {
614:     PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
615:     for (i=0; i<osm->n; i++) {
616:       PetscLogObjectParent((PetscObject)pc,(PetscObject)osm->pmat[i]);
617:       PetscObjectSetOptionsPrefix((PetscObject)osm->pmat[i],pprefix);
618:     }
619:   }

621:   /* Return control to the user so that the submatrices can be modified (e.g., to apply
622:      different boundary conditions for the submatrices than for the global problem) */
623:   PCModifySubMatrices(pc,osm->n,osm->ois,osm->ois,osm->pmat,pc->modifysubmatricesP);

625:   /*
626:      Loop over submatrices putting them into local ksps
627:   */
628:   for (i=0; i<osm->n; i++) {
629:     KSPSetOperators(osm->ksp[i],osm->pmat[i],osm->pmat[i]);
630:     if (!pc->setupcalled) {
631:       KSPSetFromOptions(osm->ksp[i]);
632:     }
633:   }
634:   if(osm->pcmat){
635:     MatDestroy(&pc->pmat);
636:     pc->pmat   = osm->pcmat;
637:     osm->pcmat = 0;
638:   }
639:   return(0);
640: }

644: static PetscErrorCode PCSetUpOnBlocks_GASM(PC pc)
645: {
646:   PC_GASM        *osm = (PC_GASM*)pc->data;
648:   PetscInt       i;

651:   for (i=0; i<osm->n; i++) {
652:     KSPSetUp(osm->ksp[i]);
653:   }
654:   return(0);
655: }

659: static PetscErrorCode PCApply_GASM(PC pc,Vec xin,Vec yout)
660: {
661:   PC_GASM        *osm = (PC_GASM*)pc->data;
663:   PetscInt       i;
664:   Vec            x,y;
665:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

668:   if(osm->pctoouter){
669:     VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
670:     VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
671:     x = osm->pcx;
672:     y = osm->pcy;
673:   }else{
674:         x = xin;
675:         y = yout;
676:   }
677:   /*
678:      Support for limiting the restriction or interpolation only to the inner
679:      subdomain values (leaving the other values 0).
680:   */
681:   if (!(osm->type & PC_GASM_RESTRICT)) {
682:     /* have to zero the work RHS since scatter may leave some slots empty */
683:     VecZeroEntries(osm->gx);
684:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
685:   } else {
686:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
687:   }
688:   VecZeroEntries(osm->gy);
689:   if (!(osm->type & PC_GASM_RESTRICT)) {
690:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
691:   } else {
692:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
693:   }
694:   /* do the subdomain solves */
695:   for (i=0; i<osm->n; ++i) {
696:     KSPSolve(osm->ksp[i],osm->x[i],osm->y[i]);
697:   }
698:   /* Do we need to zero y ?? */
699:   VecZeroEntries(y);
700:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
701:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
702:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
703:   } else {
704:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
705:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
706:   }
707:   if(osm->pctoouter){
708:     VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
709:     VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
710:   }
711:   return(0);
712: }

716: static PetscErrorCode PCApplyTranspose_GASM(PC pc,Vec xin,Vec yout)
717: {
718:   PC_GASM        *osm = (PC_GASM*)pc->data;
720:   PetscInt       i;
721:   Vec            x,y;
722:   ScatterMode    forward = SCATTER_FORWARD,reverse = SCATTER_REVERSE;

725:   if(osm->pctoouter){
726:    VecScatterBegin(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
727:    VecScatterEnd(osm->pctoouter,xin,osm->pcx,INSERT_VALUES,SCATTER_REVERSE);
728:    x = osm->pcx;
729:    y = osm->pcy;
730:   }else{
731:         x = xin;
732:         y = yout;
733:   }
734:   /*
735:      Support for limiting the restriction or interpolation to only local
736:      subdomain values (leaving the other values 0).

738:      Note: these are reversed from the PCApply_GASM() because we are applying the
739:      transpose of the three terms
740:   */
741:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
742:     /* have to zero the work RHS since scatter may leave some slots empty */
743:     VecZeroEntries(osm->gx);
744:     VecScatterBegin(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
745:   } else {
746:     VecScatterBegin(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
747:   }
748:   VecZeroEntries(osm->gy);
749:   if (!(osm->type & PC_GASM_INTERPOLATE)) {
750:     VecScatterEnd(osm->girestriction,x,osm->gx,INSERT_VALUES,forward);
751:   } else {
752:     VecScatterEnd(osm->gorestriction,x,osm->gx,INSERT_VALUES,forward);
753:   }
754:   /* do the local solves */
755:   for (i=0; i<osm->n; ++i) { /* Note that the solves are local, so we can go to osm->n, rather than osm->nmax. */
756:     KSPSolveTranspose(osm->ksp[i],osm->x[i],osm->y[i]);
757:   }
758:   VecZeroEntries(y);
759:   if (!(osm->type & PC_GASM_RESTRICT)) {
760:     VecScatterBegin(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
761:     VecScatterEnd(osm->girestriction,osm->gy,y,ADD_VALUES,reverse);
762:   } else {
763:     VecScatterBegin(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
764:     VecScatterEnd(osm->gorestriction,osm->gy,y,ADD_VALUES,reverse);
765:   }
766:   if(osm->pctoouter){
767:    VecScatterBegin(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
768:    VecScatterEnd(osm->pctoouter,y,yout,INSERT_VALUES,SCATTER_FORWARD);
769:   }
770:   return(0);
771: }

775: static PetscErrorCode PCReset_GASM(PC pc)
776: {
777:   PC_GASM        *osm = (PC_GASM*)pc->data;
779:   PetscInt       i;

782:   if (osm->ksp) {
783:     for (i=0; i<osm->n; i++) {
784:       KSPReset(osm->ksp[i]);
785:     }
786:   }
787:   if (osm->pmat) {
788:     if (osm->n > 0) {
789:       MatDestroyMatrices(osm->n,&osm->pmat);
790:     }
791:   }
792:   if (osm->x) {
793:     for (i=0; i<osm->n; i++) {
794:       VecDestroy(&osm->x[i]);
795:       VecDestroy(&osm->y[i]);
796:     }
797:   }
798:   VecDestroy(&osm->gx);
799:   VecDestroy(&osm->gy);

801:   VecScatterDestroy(&osm->gorestriction);
802:   VecScatterDestroy(&osm->girestriction);
803:   if (!osm->user_subdomains) {
804:     PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
805:     osm->N    = PETSC_DETERMINE;
806:     osm->nmax = PETSC_DETERMINE;
807:   }
808:   if(osm->pctoouter){
809:         VecScatterDestroy(&(osm->pctoouter));
810:   }
811:   if(osm->permutationIS){
812:         ISDestroy(&(osm->permutationIS));
813:   }
814:   if(osm->pcx){
815:         VecDestroy(&(osm->pcx));
816:   }
817:   if(osm->pcy){
818:         VecDestroy(&(osm->pcy));
819:   }
820:   if(osm->permutationP){
821:     MatDestroy(&(osm->permutationP));
822:   }
823:   if(osm->pcmat){
824:         MatDestroy(&osm->pcmat);
825:   }
826:   return(0);
827: }

831: static PetscErrorCode PCDestroy_GASM(PC pc)
832: {
833:   PC_GASM        *osm = (PC_GASM*)pc->data;
835:   PetscInt       i;

838:   PCReset_GASM(pc);
839:   /* PCReset will not destroy subdomains, if user_subdomains is true. */
840:   PCGASMDestroySubdomains(osm->n,&osm->ois,&osm->iis);
841:   if (osm->ksp) {
842:     for (i=0; i<osm->n; i++) {
843:       KSPDestroy(&osm->ksp[i]);
844:     }
845:     PetscFree(osm->ksp);
846:   }
847:   PetscFree(osm->x);
848:   PetscFree(osm->y);
849:   PetscFree(pc->data);
850:   return(0);
851: }

855: static PetscErrorCode PCSetFromOptions_GASM(PetscOptionItems *PetscOptionsObject,PC pc)
856: {
857:   PC_GASM        *osm = (PC_GASM*)pc->data;
859:   PetscInt       blocks,ovl;
860:   PetscBool      symset,flg;
861:   PCGASMType     gasmtype;

864:   /* set the type to symmetric if matrix is symmetric */
865:   if (!osm->type_set && pc->pmat) {
866:     MatIsSymmetricKnown(pc->pmat,&symset,&flg);
867:     if (symset && flg) osm->type = PC_GASM_BASIC;
868:   }
869:   PetscOptionsHead(PetscOptionsObject,"Generalized additive Schwarz options");
870:   PetscOptionsBool("-pc_gasm_use_dm_subdomains","If subdomains aren't set, use DMCreateDomainDecomposition() to define subdomains.","PCGASMSetUseDMSubdomains",osm->dm_subdomains,&osm->dm_subdomains,&flg);
871:   PetscOptionsInt("-pc_gasm_total_subdomains","Total number of subdomains across communicator","PCGASMSetTotalSubdomains",osm->N,&blocks,&flg);
872:   if (flg) {
873:     PCGASMSetTotalSubdomains(pc,blocks);
874:   }
875:   PetscOptionsInt("-pc_gasm_overlap","Number of overlapping degrees of freedom","PCGASMSetOverlap",osm->overlap,&ovl,&flg);
876:   if (flg) {
877:     PCGASMSetOverlap(pc,ovl);
878:     osm->dm_subdomains = PETSC_FALSE;
879:   }
880:   flg  = PETSC_FALSE;
881:   PetscOptionsEnum("-pc_gasm_type","Type of restriction/extension","PCGASMSetType",PCGASMTypes,(PetscEnum)osm->type,(PetscEnum*)&gasmtype,&flg);
882:   if (flg) {PCGASMSetType(pc,gasmtype);}
883:   PetscOptionsBool("-pc_gasm_use_hierachical_partitioning","use hierarchical partitioning",NULL,osm->hierarchicalpartitioning,&osm->hierarchicalpartitioning,&flg);
884:   PetscOptionsTail();
885:   return(0);
886: }

888: /*------------------------------------------------------------------------------------*/

892: /*@
893:     PCGASMSetTotalSubdomains - sets the total number of subdomains to use across the
894:                                communicator.
895:     Logically collective on pc

897:     Input Parameters:
898: +   pc  - the preconditioner
899: -   N   - total number of subdomains


902:     Level: beginner

904: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

906: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap()
907:           PCGASMCreateSubdomains2D()
908: @*/
909: PetscErrorCode  PCGASMSetTotalSubdomains(PC pc,PetscInt N)
910: {
911:   PC_GASM        *osm = (PC_GASM*)pc->data;
912:   PetscMPIInt    size,rank;

916:   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Total number of subdomains must be 1 or more, got N = %D",N);
917:   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetTotalSubdomains() should be called before calling PCSetUp().");

919:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
920:   osm->ois = osm->iis = NULL;

922:   MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
923:   MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
924:   osm->N    = N;
925:   osm->n    = PETSC_DETERMINE;
926:   osm->nmax = PETSC_DETERMINE;
927:   osm->dm_subdomains = PETSC_FALSE;
928:   return(0);
929: }

933: static PetscErrorCode  PCGASMSetSubdomains_GASM(PC pc,PetscInt n,IS iis[],IS ois[])
934: {
935:   PC_GASM        *osm = (PC_GASM*)pc->data;
937:   PetscInt       i;

940:   if (n < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Each process must have 1 or more subdomains, got n = %D",n);
941:   if (pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetSubdomains() should be called before calling PCSetUp().");

943:   PCGASMDestroySubdomains(osm->n,&osm->iis,&osm->ois);
944:   osm->iis  = osm->ois = NULL;
945:   osm->n    = n;
946:   osm->N    = PETSC_DETERMINE;
947:   osm->nmax = PETSC_DETERMINE;
948:   if (ois) {
949:     PetscMalloc1(n,&osm->ois);
950:     for (i=0; i<n; i++) {
951:       PetscObjectReference((PetscObject)ois[i]);
952:       osm->ois[i] = ois[i];
953:     }
954:     /*
955:        Since the user set the outer subdomains, even if nontrivial overlap was requested via PCGASMSetOverlap(),
956:        it will be ignored.  To avoid confusion later on (e.g., when viewing the PC), the overlap size is set to -1.
957:     */
958:     osm->overlap = -1;
959:     if (!iis) {
960:       PetscMalloc1(n,&osm->iis);
961:       for (i=0; i<n; i++) {
962:         for (i=0; i<n; i++) {
963:           PetscObjectReference((PetscObject)ois[i]);
964:           osm->iis[i] = ois[i];
965:         }
966:       }
967:     }
968:   }
969:   if (iis) {
970:     PetscMalloc1(n,&osm->iis);
971:     for (i=0; i<n; i++) {
972:       PetscObjectReference((PetscObject)iis[i]);
973:       osm->iis[i] = iis[i];
974:     }
975:     if (!ois) {
976:       PetscMalloc1(n,&osm->ois);
977:       for (i=0; i<n; i++) {
978:         for (i=0; i<n; i++) {
979:           PetscObjectReference((PetscObject)iis[i]);
980:           osm->ois[i] = iis[i];
981:         }
982:       }
983:       /* If necessary, osm->ois[i] will be expanded using the requested overlap size in PCSetUp_GASM(). */
984:     }
985:   }
986:   if (iis)  osm->user_subdomains = PETSC_TRUE;
987:   return(0);
988: }


993: static PetscErrorCode  PCGASMSetOverlap_GASM(PC pc,PetscInt ovl)
994: {
995:   PC_GASM *osm = (PC_GASM*)pc->data;

998:   if (ovl < 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap value requested");
999:   if (pc->setupcalled && ovl != osm->overlap) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"PCGASMSetOverlap() should be called before PCSetUp().");
1000:   if (!pc->setupcalled) osm->overlap = ovl;
1001:   return(0);
1002: }

1006: static PetscErrorCode  PCGASMSetType_GASM(PC pc,PCGASMType type)
1007: {
1008:   PC_GASM *osm = (PC_GASM*)pc->data;

1011:   osm->type     = type;
1012:   osm->type_set = PETSC_TRUE;
1013:   return(0);
1014: }

1018: static PetscErrorCode  PCGASMSetSortIndices_GASM(PC pc,PetscBool doSort)
1019: {
1020:   PC_GASM *osm = (PC_GASM*)pc->data;

1023:   osm->sort_indices = doSort;
1024:   return(0);
1025: }

1029: /*
1030:    FIXME: This routine might need to be modified now that multiple ranks per subdomain are allowed.
1031:         In particular, it would upset the global subdomain number calculation.
1032: */
1033: static PetscErrorCode  PCGASMGetSubKSP_GASM(PC pc,PetscInt *n,PetscInt *first,KSP **ksp)
1034: {
1035:   PC_GASM        *osm = (PC_GASM*)pc->data;

1039:   if (osm->n < 1) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Need to call PCSetUP() on PC (or KSPSetUp() on the outer KSP object) before calling here");

1041:   if (n) *n = osm->n;
1042:   if (first) {
1043:     MPI_Scan(&osm->n,first,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
1044:     *first -= osm->n;
1045:   }
1046:   if (ksp) {
1047:     /* Assume that local solves are now different; not necessarily
1048:        true, though!  This flag is used only for PCView_GASM() */
1049:     *ksp                        = osm->ksp;
1050:     osm->same_subdomain_solvers = PETSC_FALSE;
1051:   }
1052:   return(0);
1053: } /* PCGASMGetSubKSP_GASM() */

1057: /*@C
1058:     PCGASMSetSubdomains - Sets the subdomains for this processor
1059:     for the additive Schwarz preconditioner.

1061:     Collective on pc

1063:     Input Parameters:
1064: +   pc  - the preconditioner object
1065: .   n   - the number of subdomains for this processor
1066: .   iis - the index sets that define the inner subdomains (or NULL for PETSc to determine subdomains)
1067: -   ois - the index sets that define the outer subdomains (or NULL to use the same as iis, or to construct by expanding iis by the requested overlap)

1069:     Notes:
1070:     The IS indices use the parallel, global numbering of the vector entries.
1071:     Inner subdomains are those where the correction is applied.
1072:     Outer subdomains are those where the residual necessary to obtain the
1073:     corrections is obtained (see PCGASMType for the use of inner/outer subdomains).
1074:     Both inner and outer subdomains can extend over several processors.
1075:     This processor's portion of a subdomain is known as a local subdomain.

1077:     By default the GASM preconditioner uses 1 (local) subdomain per processor.


1080:     Level: advanced

1082: .keywords: PC, GASM, set, subdomains, additive Schwarz

1084: .seealso: PCGASMSetNumSubdomains(), PCGASMSetOverlap(), PCGASMGetSubKSP(),
1085:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1086: @*/
1087: PetscErrorCode  PCGASMSetSubdomains(PC pc,PetscInt n,IS iis[],IS ois[])
1088: {
1089:   PC_GASM *osm = (PC_GASM*)pc->data;

1094:   PetscTryMethod(pc,"PCGASMSetSubdomains_C",(PC,PetscInt,IS[],IS[]),(pc,n,iis,ois));
1095:   osm->dm_subdomains = PETSC_FALSE;
1096:   return(0);
1097: }


1102: /*@
1103:     PCGASMSetOverlap - Sets the overlap between a pair of subdomains for the
1104:     additive Schwarz preconditioner.  Either all or no processors in the
1105:     pc communicator must call this routine.

1107:     Logically Collective on pc

1109:     Input Parameters:
1110: +   pc  - the preconditioner context
1111: -   ovl - the amount of overlap between subdomains (ovl >= 0, default value = 0)

1113:     Options Database Key:
1114: .   -pc_gasm_overlap <overlap> - Sets overlap

1116:     Notes:
1117:     By default the GASM preconditioner uses 1 subdomain per processor.  To use
1118:     multiple subdomain per perocessor or "straddling" subdomains that intersect
1119:     multiple processors use PCGASMSetSubdomains() (or option -pc_gasm_total_subdomains <n>).

1121:     The overlap defaults to 0, so if one desires that no additional
1122:     overlap be computed beyond what may have been set with a call to
1123:     PCGASMSetSubdomains(), then ovl must be set to be 0.  In particular, if one does
1124:     not explicitly set the subdomains in application code, then all overlap would be computed
1125:     internally by PETSc, and using an overlap of 0 would result in an GASM
1126:     variant that is equivalent to the block Jacobi preconditioner.

1128:     Note that one can define initial index sets with any overlap via
1129:     PCGASMSetSubdomains(); the routine PCGASMSetOverlap() merely allows
1130:     PETSc to extend that overlap further, if desired.

1132:     Level: intermediate

1134: .keywords: PC, GASM, set, overlap

1136: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1137:           PCGASMCreateSubdomains2D(), PCGASMGetSubdomains()
1138: @*/
1139: PetscErrorCode  PCGASMSetOverlap(PC pc,PetscInt ovl)
1140: {
1142:   PC_GASM *osm = (PC_GASM*)pc->data;

1147:   PetscTryMethod(pc,"PCGASMSetOverlap_C",(PC,PetscInt),(pc,ovl));
1148:   osm->dm_subdomains = PETSC_FALSE;
1149:   return(0);
1150: }

1154: /*@
1155:     PCGASMSetType - Sets the type of restriction and interpolation used
1156:     for local problems in the additive Schwarz method.

1158:     Logically Collective on PC

1160:     Input Parameters:
1161: +   pc  - the preconditioner context
1162: -   type - variant of GASM, one of
1163: .vb
1164:       PC_GASM_BASIC       - full interpolation and restriction
1165:       PC_GASM_RESTRICT    - full restriction, local processor interpolation
1166:       PC_GASM_INTERPOLATE - full interpolation, local processor restriction
1167:       PC_GASM_NONE        - local processor restriction and interpolation
1168: .ve

1170:     Options Database Key:
1171: .   -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1173:     Level: intermediate

1175: .keywords: PC, GASM, set, type

1177: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1178:           PCGASMCreateSubdomains2D()
1179: @*/
1180: PetscErrorCode  PCGASMSetType(PC pc,PCGASMType type)
1181: {

1187:   PetscTryMethod(pc,"PCGASMSetType_C",(PC,PCGASMType),(pc,type));
1188:   return(0);
1189: }

1193: /*@
1194:     PCGASMSetSortIndices - Determines whether subdomain indices are sorted.

1196:     Logically Collective on PC

1198:     Input Parameters:
1199: +   pc  - the preconditioner context
1200: -   doSort - sort the subdomain indices

1202:     Level: intermediate

1204: .keywords: PC, GASM, set, type

1206: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(),
1207:           PCGASMCreateSubdomains2D()
1208: @*/
1209: PetscErrorCode  PCGASMSetSortIndices(PC pc,PetscBool doSort)
1210: {

1216:   PetscTryMethod(pc,"PCGASMSetSortIndices_C",(PC,PetscBool),(pc,doSort));
1217:   return(0);
1218: }

1222: /*@C
1223:    PCGASMGetSubKSP - Gets the local KSP contexts for all blocks on
1224:    this processor.

1226:    Collective on PC iff first_local is requested

1228:    Input Parameter:
1229: .  pc - the preconditioner context

1231:    Output Parameters:
1232: +  n_local - the number of blocks on this processor or NULL
1233: .  first_local - the global number of the first block on this processor or NULL,
1234:                  all processors must request or all must pass NULL
1235: -  ksp - the array of KSP contexts

1237:    Note:
1238:    After PCGASMGetSubKSP() the array of KSPes is not to be freed

1240:    Currently for some matrix implementations only 1 block per processor
1241:    is supported.

1243:    You must call KSPSetUp() before calling PCGASMGetSubKSP().

1245:    Level: advanced

1247: .keywords: PC, GASM, additive Schwarz, get, sub, KSP, context

1249: .seealso: PCGASMSetSubdomains(), PCGASMSetOverlap(),
1250:           PCGASMCreateSubdomains2D(),
1251: @*/
1252: PetscErrorCode  PCGASMGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
1253: {

1258:   PetscUseMethod(pc,"PCGASMGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
1259:   return(0);
1260: }

1262: /* -------------------------------------------------------------------------------------*/
1263: /*MC
1264:    PCGASM - Use the (restricted) additive Schwarz method, each block is (approximately) solved with
1265:            its own KSP object.

1267:    Options Database Keys:
1268: +  -pc_gasm_total_subdomains <n>  - Sets total number of local subdomains to be distributed among processors
1269: .  -pc_gasm_view_subdomains       - activates the printing of subdomain indices in PCView(), -ksp_view or -snes_view
1270: .  -pc_gasm_print_subdomains      - activates the printing of subdomain indices in PCSetUp()
1271: .  -pc_gasm_overlap <ovl>         - Sets overlap by which to (automatically) extend local subdomains
1272: -  -pc_gasm_type [basic,restrict,interpolate,none] - Sets GASM type

1274:      IMPORTANT: If you run with, for example, 3 blocks on 1 processor or 3 blocks on 3 processors you
1275:       will get a different convergence rate due to the default option of -pc_gasm_type restrict. Use
1276:       -pc_gasm_type basic to use the standard GASM.

1278:    Notes: Each processor can have one or more blocks, but a block cannot be shared by more
1279:      than one processor. Defaults to one block per processor.

1281:      To set options on the solvers for each block append -sub_ to all the KSP, and PC
1282:         options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly

1284:      To set the options on the solvers separate for each block call PCGASMGetSubKSP()
1285:          and set the options directly on the resulting KSP object (you can access its PC
1286:          with KSPGetPC())


1289:    Level: beginner

1291:    Concepts: additive Schwarz method

1293:     References:
1294: +   1. - M Dryja, OB Widlund, An additive variant of the Schwarz alternating method for the case of many subregions
1295:      Courant Institute, New York University Technical report
1296: -   2. - Barry Smith, Petter Bjorstad, and William Gropp, Domain Decompositions: Parallel Multilevel Methods for Elliptic Partial Differential Equations,
1297:     Cambridge University Press.

1299: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
1300:            PCBJACOBI,  PCGASMGetSubKSP(), PCGASMSetSubdomains(),
1301:            PCSetModifySubmatrices(), PCGASMSetOverlap(), PCGASMSetType()

1303: M*/

1307: PETSC_EXTERN PetscErrorCode PCCreate_GASM(PC pc)
1308: {
1310:   PC_GASM        *osm;

1313:   PetscNewLog(pc,&osm);

1315:   osm->N                        = PETSC_DETERMINE;
1316:   osm->n                        = PETSC_DECIDE;
1317:   osm->nmax                     = PETSC_DETERMINE;
1318:   osm->overlap                  = 0;
1319:   osm->ksp                      = 0;
1320:   osm->gorestriction            = 0;
1321:   osm->girestriction            = 0;
1322:   osm->pctoouter                = 0;
1323:   osm->gx                       = 0;
1324:   osm->gy                       = 0;
1325:   osm->x                        = 0;
1326:   osm->y                        = 0;
1327:   osm->pcx                      = 0;
1328:   osm->pcy                      = 0;
1329:   osm->permutationIS            = 0;
1330:   osm->permutationP             = 0;
1331:   osm->pcmat                    = 0;
1332:   osm->ois                      = 0;
1333:   osm->iis                      = 0;
1334:   osm->pmat                     = 0;
1335:   osm->type                     = PC_GASM_RESTRICT;
1336:   osm->same_subdomain_solvers   = PETSC_TRUE;
1337:   osm->sort_indices             = PETSC_TRUE;
1338:   osm->dm_subdomains            = PETSC_FALSE;
1339:   osm->hierarchicalpartitioning = PETSC_FALSE;

1341:   pc->data                 = (void*)osm;
1342:   pc->ops->apply           = PCApply_GASM;
1343:   pc->ops->applytranspose  = PCApplyTranspose_GASM;
1344:   pc->ops->setup           = PCSetUp_GASM;
1345:   pc->ops->reset           = PCReset_GASM;
1346:   pc->ops->destroy         = PCDestroy_GASM;
1347:   pc->ops->setfromoptions  = PCSetFromOptions_GASM;
1348:   pc->ops->setuponblocks   = PCSetUpOnBlocks_GASM;
1349:   pc->ops->view            = PCView_GASM;
1350:   pc->ops->applyrichardson = 0;

1352:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSubdomains_C",PCGASMSetSubdomains_GASM);
1353:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetOverlap_C",PCGASMSetOverlap_GASM);
1354:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetType_C",PCGASMSetType_GASM);
1355:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMSetSortIndices_C",PCGASMSetSortIndices_GASM);
1356:   PetscObjectComposeFunction((PetscObject)pc,"PCGASMGetSubKSP_C",PCGASMGetSubKSP_GASM);
1357:   return(0);
1358: }


1363: PetscErrorCode  PCGASMCreateLocalSubdomains(Mat A, PetscInt nloc, IS *iis[])
1364: {
1365:   MatPartitioning mpart;
1366:   const char      *prefix;
1367:   PetscErrorCode  (*f)(Mat,MatReuse,Mat*);
1368:   PetscMPIInt     size;
1369:   PetscInt        i,j,rstart,rend,bs;
1370:   PetscBool       isbaij = PETSC_FALSE,foundpart = PETSC_FALSE;
1371:   Mat             Ad     = NULL, adj;
1372:   IS              ispart,isnumb,*is;
1373:   PetscErrorCode  ierr;

1376:   if (nloc < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"number of local subdomains must > 0, got nloc = %D",nloc);

1378:   /* Get prefix, row distribution, and block size */
1379:   MatGetOptionsPrefix(A,&prefix);
1380:   MatGetOwnershipRange(A,&rstart,&rend);
1381:   MatGetBlockSize(A,&bs);
1382:   if (rstart/bs*bs != rstart || rend/bs*bs != rend) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"bad row distribution [%D,%D) for matrix block size %D",rstart,rend,bs);

1384:   /* Get diagonal block from matrix if possible */
1385:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1386:   PetscObjectQueryFunction((PetscObject)A,"MatGetDiagonalBlock_C",&f);
1387:   if (f) {
1388:     MatGetDiagonalBlock(A,&Ad);
1389:   } else if (size == 1) {
1390:     Ad = A;
1391:   }
1392:   if (Ad) {
1393:     PetscObjectTypeCompare((PetscObject)Ad,MATSEQBAIJ,&isbaij);
1394:     if (!isbaij) {PetscObjectTypeCompare((PetscObject)Ad,MATSEQSBAIJ,&isbaij);}
1395:   }
1396:   if (Ad && nloc > 1) {
1397:     PetscBool  match,done;
1398:     /* Try to setup a good matrix partitioning if available */
1399:     MatPartitioningCreate(PETSC_COMM_SELF,&mpart);
1400:     PetscObjectSetOptionsPrefix((PetscObject)mpart,prefix);
1401:     MatPartitioningSetFromOptions(mpart);
1402:     PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGCURRENT,&match);
1403:     if (!match) {
1404:       PetscObjectTypeCompare((PetscObject)mpart,MATPARTITIONINGSQUARE,&match);
1405:     }
1406:     if (!match) { /* assume a "good" partitioner is available */
1407:       PetscInt       na;
1408:       const PetscInt *ia,*ja;
1409:       MatGetRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1410:       if (done) {
1411:         /* Build adjacency matrix by hand. Unfortunately a call to
1412:            MatConvert(Ad,MATMPIADJ,MAT_INITIAL_MATRIX,&adj) will
1413:            remove the block-aij structure and we cannot expect
1414:            MatPartitioning to split vertices as we need */
1415:         PetscInt       i,j,len,nnz,cnt,*iia=0,*jja=0;
1416:         const PetscInt *row;
1417:         nnz = 0;
1418:         for (i=0; i<na; i++) { /* count number of nonzeros */
1419:           len = ia[i+1] - ia[i];
1420:           row = ja + ia[i];
1421:           for (j=0; j<len; j++) {
1422:             if (row[j] == i) { /* don't count diagonal */
1423:               len--; break;
1424:             }
1425:           }
1426:           nnz += len;
1427:         }
1428:         PetscMalloc1(na+1,&iia);
1429:         PetscMalloc1(nnz,&jja);
1430:         nnz    = 0;
1431:         iia[0] = 0;
1432:         for (i=0; i<na; i++) { /* fill adjacency */
1433:           cnt = 0;
1434:           len = ia[i+1] - ia[i];
1435:           row = ja + ia[i];
1436:           for (j=0; j<len; j++) {
1437:             if (row[j] != i) jja[nnz+cnt++] = row[j]; /* if not diagonal */
1438:           }
1439:           nnz += cnt;
1440:           iia[i+1] = nnz;
1441:         }
1442:         /* Partitioning of the adjacency matrix */
1443:         MatCreateMPIAdj(PETSC_COMM_SELF,na,na,iia,jja,NULL,&adj);
1444:         MatPartitioningSetAdjacency(mpart,adj);
1445:         MatPartitioningSetNParts(mpart,nloc);
1446:         MatPartitioningApply(mpart,&ispart);
1447:         ISPartitioningToNumbering(ispart,&isnumb);
1448:         MatDestroy(&adj);
1449:         foundpart = PETSC_TRUE;
1450:       }
1451:       MatRestoreRowIJ(Ad,0,PETSC_TRUE,isbaij,&na,&ia,&ja,&done);
1452:     }
1453:     MatPartitioningDestroy(&mpart);
1454:   }
1455:   PetscMalloc1(nloc,&is);
1456:   if (!foundpart) {

1458:     /* Partitioning by contiguous chunks of rows */

1460:     PetscInt mbs   = (rend-rstart)/bs;
1461:     PetscInt start = rstart;
1462:     for (i=0; i<nloc; i++) {
1463:       PetscInt count = (mbs/nloc + ((mbs % nloc) > i)) * bs;
1464:       ISCreateStride(PETSC_COMM_SELF,count,start,1,&is[i]);
1465:       start += count;
1466:     }

1468:   } else {

1470:     /* Partitioning by adjacency of diagonal block  */

1472:     const PetscInt *numbering;
1473:     PetscInt       *count,nidx,*indices,*newidx,start=0;
1474:     /* Get node count in each partition */
1475:     PetscMalloc1(nloc,&count);
1476:     ISPartitioningCount(ispart,nloc,count);
1477:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1478:       for (i=0; i<nloc; i++) count[i] *= bs;
1479:     }
1480:     /* Build indices from node numbering */
1481:     ISGetLocalSize(isnumb,&nidx);
1482:     PetscMalloc1(nidx,&indices);
1483:     for (i=0; i<nidx; i++) indices[i] = i; /* needs to be initialized */
1484:     ISGetIndices(isnumb,&numbering);
1485:     PetscSortIntWithPermutation(nidx,numbering,indices);
1486:     ISRestoreIndices(isnumb,&numbering);
1487:     if (isbaij && bs > 1) { /* adjust for the block-aij case */
1488:       PetscMalloc1(nidx*bs,&newidx);
1489:       for (i=0; i<nidx; i++) {
1490:         for (j=0; j<bs; j++) newidx[i*bs+j] = indices[i]*bs + j;
1491:       }
1492:       PetscFree(indices);
1493:       nidx   *= bs;
1494:       indices = newidx;
1495:     }
1496:     /* Shift to get global indices */
1497:     for (i=0; i<nidx; i++) indices[i] += rstart;

1499:     /* Build the index sets for each block */
1500:     for (i=0; i<nloc; i++) {
1501:       ISCreateGeneral(PETSC_COMM_SELF,count[i],&indices[start],PETSC_COPY_VALUES,&is[i]);
1502:       ISSort(is[i]);
1503:       start += count[i];
1504:     }

1506:     PetscFree(count);
1507:     PetscFree(indices);
1508:     ISDestroy(&isnumb);
1509:     ISDestroy(&ispart);
1510:   }
1511:   *iis = is;
1512:   return(0);
1513: }

1517: PETSC_INTERN PetscErrorCode  PCGASMCreateStraddlingSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1518: {
1519:   PetscErrorCode  ierr;

1522:   MatSubdomainsCreateCoalesce(A,N,n,iis);
1523:   return(0);
1524: }



1530: /*@C
1531:    PCGASMCreateSubdomains - Creates n index sets defining n nonoverlapping subdomains for the additive
1532:    Schwarz preconditioner for a any problem based on its matrix.

1534:    Collective

1536:    Input Parameters:
1537: +  A       - The global matrix operator
1538: -  N       - the number of global subdomains requested

1540:    Output Parameters:
1541: +  n   - the number of subdomains created on this processor
1542: -  iis - the array of index sets defining the local inner subdomains (on which the correction is applied)

1544:    Level: advanced

1546:    Note: When N >= A's communicator size, each subdomain is local -- contained within a single processor.
1547:          When N < size, the subdomains are 'straddling' (processor boundaries) and are no longer local.
1548:          The resulting subdomains can be use in PCGASMSetSubdomains(pc,n,iss,NULL).  The overlapping
1549:          outer subdomains will be automatically generated from these according to the requested amount of
1550:          overlap; this is currently supported only with local subdomains.


1553: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid

1555: .seealso: PCGASMSetSubdomains(), PCGASMDestroySubdomains()
1556: @*/
1557: PetscErrorCode  PCGASMCreateSubdomains(Mat A,PetscInt N,PetscInt *n,IS *iis[])
1558: {
1559:   PetscMPIInt     size;
1560:   PetscErrorCode  ierr;


1566:   if (N < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Number of subdomains must be > 0, N = %D",N);
1567:   MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
1568:   if (N >= size) {
1569:     *n = N/size + (N%size);
1570:     PCGASMCreateLocalSubdomains(A,*n,iis);
1571:   } else {
1572:     PCGASMCreateStraddlingSubdomains(A,N,n,iis);
1573:   }
1574:   return(0);
1575: }

1579: /*@C
1580:    PCGASMDestroySubdomains - Destroys the index sets created with
1581:    PCGASMCreateSubdomains() or PCGASMCreateSubdomains2D. Should be
1582:    called after setting subdomains with PCGASMSetSubdomains().

1584:    Collective

1586:    Input Parameters:
1587: +  n   - the number of index sets
1588: .  iis - the array of inner subdomains,
1589: -  ois - the array of outer subdomains, can be NULL

1591:    Level: intermediate

1593:    Notes: this is merely a convenience subroutine that walks each list,
1594:    destroys each IS on the list, and then frees the list. At the end the
1595:    list pointers are set to NULL.

1597: .keywords: PC, GASM, additive Schwarz, create, subdomains, unstructured grid

1599: .seealso: PCGASMCreateSubdomains(), PCGASMSetSubdomains()
1600: @*/
1601: PetscErrorCode  PCGASMDestroySubdomains(PetscInt n,IS **iis,IS **ois)
1602: {
1603:   PetscInt       i;

1607:   if (n <= 0) return(0);
1608:   if (ois) {
1610:     if (*ois) {
1612:       for (i=0; i<n; i++) {
1613:         ISDestroy(&(*ois)[i]);
1614:       }
1615:       PetscFree((*ois));
1616:     }
1617:   }
1618:   if (iis) {
1620:     if (*iis) {
1622:       for (i=0; i<n; i++) {
1623:         ISDestroy(&(*iis)[i]);
1624:       }
1625:       PetscFree((*iis));
1626:     }
1627:   }
1628:   return(0);
1629: }


1632: #define PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,xleft_loc,ylow_loc,xright_loc,yhigh_loc,n) \
1633:   {                                                                                                       \
1634:     PetscInt first_row = first/M, last_row = last/M+1;                                                     \
1635:     /*                                                                                                    \
1636:      Compute ylow_loc and yhigh_loc so that (ylow_loc,xleft) and (yhigh_loc,xright) are the corners       \
1637:      of the bounding box of the intersection of the subdomain with the local ownership range (local       \
1638:      subdomain).                                                                                          \
1639:      Also compute xleft_loc and xright_loc as the lower and upper bounds on the first and last rows       \
1640:      of the intersection.                                                                                 \
1641:     */                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             \
1642:     /* ylow_loc is the grid row containing the first element of the local sumbdomain */                   \
1643:     *ylow_loc = PetscMax(first_row,ylow);                                                                    \
1644:     /* xleft_loc is the offset of first element of the local subdomain within its grid row (might actually be outside the local subdomain) */ \
1645:     *xleft_loc = *ylow_loc==first_row ? PetscMax(first%M,xleft) : xleft;                                                                            \
1646:     /* yhigh_loc is the grid row above the last local subdomain element */                                                                    \
1647:     *yhigh_loc = PetscMin(last_row,yhigh);                                                                                                     \
1648:     /* xright is the offset of the end of the  local subdomain within its grid row (might actually be outside the local subdomain) */         \
1649:     *xright_loc = *yhigh_loc==last_row ? PetscMin(xright,last%M) : xright;                                                                          \
1650:     /* Now compute the size of the local subdomain n. */ \
1651:     *n = 0;                                               \
1652:     if (*ylow_loc < *yhigh_loc) {                           \
1653:       PetscInt width = xright-xleft;                     \
1654:       *n += width*(*yhigh_loc-*ylow_loc-1);                 \
1655:       *n += PetscMin(PetscMax(*xright_loc-xleft,0),width); \
1656:       *n -= PetscMin(PetscMax(*xleft_loc-xleft,0), width); \
1657:     } \
1658:   }



1664: /*@
1665:    PCGASMCreateSubdomains2D - Creates the index sets for the overlapping Schwarz
1666:    preconditioner for a two-dimensional problem on a regular grid.

1668:    Collective

1670:    Input Parameters:
1671: +  M, N               - the global number of grid points in the x and y directions
1672: .  Mdomains, Ndomains - the global number of subdomains in the x and y directions
1673: .  dof                - degrees of freedom per node
1674: -  overlap            - overlap in mesh lines

1676:    Output Parameters:
1677: +  Nsub - the number of local subdomains created
1678: .  iis  - array of index sets defining inner (nonoverlapping) subdomains
1679: -  ois  - array of index sets defining outer (overlapping, if overlap > 0) subdomains


1682:    Level: advanced

1684: .keywords: PC, GASM, additive Schwarz, create, subdomains, 2D, regular grid

1686: .seealso: PCGASMSetSubdomains(), PCGASMGetSubKSP(), PCGASMSetOverlap()
1687: @*/
1688: PetscErrorCode  PCGASMCreateSubdomains2D(PC pc,PetscInt M,PetscInt N,PetscInt Mdomains,PetscInt Ndomains,PetscInt dof,PetscInt overlap,PetscInt *nsub,IS **iis,IS **ois)
1689: {
1691:   PetscMPIInt    size, rank;
1692:   PetscInt       i, j;
1693:   PetscInt       maxheight, maxwidth;
1694:   PetscInt       xstart, xleft, xright, xleft_loc, xright_loc;
1695:   PetscInt       ystart, ylow,  yhigh,  ylow_loc,  yhigh_loc;
1696:   PetscInt       x[2][2], y[2][2], n[2];
1697:   PetscInt       first, last;
1698:   PetscInt       nidx, *idx;
1699:   PetscInt       ii,jj,s,q,d;
1700:   PetscInt       k,kk;
1701:   PetscMPIInt    color;
1702:   MPI_Comm       comm, subcomm;
1703:   IS             **xis = 0, **is = ois, **is_local = iis;

1706:   PetscObjectGetComm((PetscObject)pc, &comm);
1707:   MPI_Comm_size(comm, &size);
1708:   MPI_Comm_rank(comm, &rank);
1709:   MatGetOwnershipRange(pc->pmat, &first, &last);
1710:   if (first%dof || last%dof) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Matrix row partitioning unsuitable for domain decomposition: local row range (%D,%D) "
1711:                                       "does not respect the number of degrees of freedom per grid point %D", first, last, dof);

1713:   /* Determine the number of domains with nonzero intersections with the local ownership range. */
1714:   s      = 0;
1715:   ystart = 0;
1716:   for (j=0; j<Ndomains; ++j) {
1717:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1718:     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1719:     /* Vertical domain limits with an overlap. */
1720:     ylow   = PetscMax(ystart - overlap,0);
1721:     yhigh  = PetscMin(ystart + maxheight + overlap,N);
1722:     xstart = 0;
1723:     for (i=0; i<Mdomains; ++i) {
1724:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1725:       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1726:       /* Horizontal domain limits with an overlap. */
1727:       xleft  = PetscMax(xstart - overlap,0);
1728:       xright = PetscMin(xstart + maxwidth + overlap,M);
1729:       /*
1730:          Determine whether this subdomain intersects this processor's ownership range of pc->pmat.
1731:       */
1732:       PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1733:       if (nidx) ++s;
1734:       xstart += maxwidth;
1735:     } /* for (i = 0; i < Mdomains; ++i) */
1736:     ystart += maxheight;
1737:   } /* for (j = 0; j < Ndomains; ++j) */

1739:   /* Now we can allocate the necessary number of ISs. */
1740:   *nsub  = s;
1741:   PetscMalloc1(*nsub,is);
1742:   PetscMalloc1(*nsub,is_local);
1743:   s      = 0;
1744:   ystart = 0;
1745:   for (j=0; j<Ndomains; ++j) {
1746:     maxheight = N/Ndomains + ((N % Ndomains) > j); /* Maximal height of subdomain */
1747:     if (maxheight < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the vertical directon for mesh height %D", Ndomains, N);
1748:     /* Vertical domain limits with an overlap. */
1749:     y[0][0] = PetscMax(ystart - overlap,0);
1750:     y[0][1] = PetscMin(ystart + maxheight + overlap,N);
1751:     /* Vertical domain limits without an overlap. */
1752:     y[1][0] = ystart;
1753:     y[1][1] = PetscMin(ystart + maxheight,N);
1754:     xstart  = 0;
1755:     for (i=0; i<Mdomains; ++i) {
1756:       maxwidth = M/Mdomains + ((M % Mdomains) > i); /* Maximal width of subdomain */
1757:       if (maxwidth < 2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Too many %D subdomains in the horizontal direction for mesh width %D", Mdomains, M);
1758:       /* Horizontal domain limits with an overlap. */
1759:       x[0][0] = PetscMax(xstart - overlap,0);
1760:       x[0][1] = PetscMin(xstart + maxwidth + overlap,M);
1761:       /* Horizontal domain limits without an overlap. */
1762:       x[1][0] = xstart;
1763:       x[1][1] = PetscMin(xstart+maxwidth,M);
1764:       /*
1765:          Determine whether this domain intersects this processor's ownership range of pc->pmat.
1766:          Do this twice: first for the domains with overlaps, and once without.
1767:          During the first pass create the subcommunicators, and use them on the second pass as well.
1768:       */
1769:       for (q = 0; q < 2; ++q) {
1770:         PetscBool split = PETSC_FALSE;
1771:         /*
1772:           domain limits, (xleft, xright) and (ylow, yheigh) are adjusted
1773:           according to whether the domain with an overlap or without is considered.
1774:         */
1775:         xleft = x[q][0]; xright = x[q][1];
1776:         ylow  = y[q][0]; yhigh  = y[q][1];
1777:         PCGASMLocalSubdomainBounds2D(M,N,xleft,ylow,xright,yhigh,first,last,(&xleft_loc),(&ylow_loc),(&xright_loc),(&yhigh_loc),(&nidx));
1778:         nidx *= dof;
1779:         n[q]  = nidx;
1780:         /*
1781:          Based on the counted number of indices in the local domain *with an overlap*,
1782:          construct a subcommunicator of all the processors supporting this domain.
1783:          Observe that a domain with an overlap might have nontrivial local support,
1784:          while the domain without an overlap might not.  Hence, the decision to participate
1785:          in the subcommunicator must be based on the domain with an overlap.
1786:          */
1787:         if (q == 0) {
1788:           if (nidx) color = 1;
1789:           else color = MPI_UNDEFINED;
1790:           MPI_Comm_split(comm, color, rank, &subcomm);
1791:           split = PETSC_TRUE;
1792:         }
1793:         /*
1794:          Proceed only if the number of local indices *with an overlap* is nonzero.
1795:          */
1796:         if (n[0]) {
1797:           if (q == 0) xis = is;
1798:           if (q == 1) {
1799:             /*
1800:              The IS for the no-overlap subdomain shares a communicator with the overlapping domain.
1801:              Moreover, if the overlap is zero, the two ISs are identical.
1802:              */
1803:             if (overlap == 0) {
1804:               (*is_local)[s] = (*is)[s];
1805:               PetscObjectReference((PetscObject)(*is)[s]);
1806:               continue;
1807:             } else {
1808:               xis     = is_local;
1809:               subcomm = ((PetscObject)(*is)[s])->comm;
1810:             }
1811:           } /* if (q == 1) */
1812:           idx  = NULL;
1813:           PetscMalloc1(nidx,&idx);
1814:           if (nidx) {
1815:             k = 0;
1816:             for (jj=ylow_loc; jj<yhigh_loc; ++jj) {
1817:               PetscInt x0 = (jj==ylow_loc) ? xleft_loc : xleft;
1818:               PetscInt x1 = (jj==yhigh_loc-1) ? xright_loc : xright;
1819:               kk = dof*(M*jj + x0);
1820:               for (ii=x0; ii<x1; ++ii) {
1821:                 for (d = 0; d < dof; ++d) {
1822:                   idx[k++] = kk++;
1823:                 }
1824:               }
1825:             }
1826:           }
1827:           ISCreateGeneral(subcomm,nidx,idx,PETSC_OWN_POINTER,(*xis)+s);
1828:           if (split) {
1829:             MPI_Comm_free(&subcomm);
1830:           }
1831:         }/* if (n[0]) */
1832:       }/* for (q = 0; q < 2; ++q) */
1833:       if (n[0]) ++s;
1834:       xstart += maxwidth;
1835:     } /* for (i = 0; i < Mdomains; ++i) */
1836:     ystart += maxheight;
1837:   } /* for (j = 0; j < Ndomains; ++j) */
1838:   return(0);
1839: }

1843: /*@C
1844:     PCGASMGetSubdomains - Gets the subdomains supported on this processor
1845:     for the additive Schwarz preconditioner.

1847:     Not Collective

1849:     Input Parameter:
1850: .   pc - the preconditioner context

1852:     Output Parameters:
1853: +   n   - the number of subdomains for this processor (default value = 1)
1854: .   iis - the index sets that define the inner subdomains (without overlap) supported on this processor (can be NULL)
1855: -   ois - the index sets that define the outer subdomains (with overlap) supported on this processor (can be NULL)


1858:     Notes:
1859:     The user is responsible for destroying the ISs and freeing the returned arrays.
1860:     The IS numbering is in the parallel, global numbering of the vector.

1862:     Level: advanced

1864: .keywords: PC, GASM, get, subdomains, additive Schwarz

1866: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(), PCGASMCreateSubdomains2D(),
1867:           PCGASMSetSubdomains(), PCGASMGetSubmatrices()
1868: @*/
1869: PetscErrorCode  PCGASMGetSubdomains(PC pc,PetscInt *n,IS *iis[],IS *ois[])
1870: {
1871:   PC_GASM        *osm;
1873:   PetscBool      match;
1874:   PetscInt       i;

1878:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1879:   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Incorrect object type: expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1880:   osm = (PC_GASM*)pc->data;
1881:   if (n) *n = osm->n;
1882:   if (iis) {
1883:     PetscMalloc1(osm->n, iis);
1884:   }
1885:   if (ois) {
1886:     PetscMalloc1(osm->n, ois);
1887:   }
1888:   if (iis || ois) {
1889:     for (i = 0; i < osm->n; ++i) {
1890:       if (iis) (*iis)[i] = osm->iis[i];
1891:       if (ois) (*ois)[i] = osm->ois[i];
1892:     }
1893:   }
1894:   return(0);
1895: }

1899: /*@C
1900:     PCGASMGetSubmatrices - Gets the local submatrices (for this processor
1901:     only) for the additive Schwarz preconditioner.

1903:     Not Collective

1905:     Input Parameter:
1906: .   pc - the preconditioner context

1908:     Output Parameters:
1909: +   n   - the number of matrices for this processor (default value = 1)
1910: -   mat - the matrices

1912:     Notes: matrices returned by this routine have the same communicators as the index sets (IS)
1913:            used to define subdomains in PCGASMSetSubdomains()
1914:     Level: advanced

1916: .keywords: PC, GASM, set, local, subdomains, additive Schwarz, block Jacobi

1918: .seealso: PCGASMSetOverlap(), PCGASMGetSubKSP(),
1919:           PCGASMCreateSubdomains2D(), PCGASMSetSubdomains(), PCGASMGetSubdomains()
1920: @*/
1921: PetscErrorCode  PCGASMGetSubmatrices(PC pc,PetscInt *n,Mat *mat[])
1922: {
1923:   PC_GASM        *osm;
1925:   PetscBool      match;

1931:   if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call after KSPSetUP() or PCSetUp().");
1932:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1933:   if (!match) SETERRQ2(PetscObjectComm((PetscObject)pc), PETSC_ERR_ARG_WRONG, "Expected %s, got %s instead", PCGASM, ((PetscObject)pc)->type_name);
1934:   osm = (PC_GASM*)pc->data;
1935:   if (n) *n = osm->n;
1936:   if (mat) *mat = osm->pmat;
1937:   return(0);
1938: }

1942: /*@
1943:     PCGASMSetUseDMSubdomains - Indicates whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1944:     Logically Collective

1946:     Input Parameter:
1947: +   pc  - the preconditioner
1948: -   flg - boolean indicating whether to use subdomains defined by the DM

1950:     Options Database Key:
1951: .   -pc_gasm_dm_subdomains -pc_gasm_overlap -pc_gasm_total_subdomains

1953:     Level: intermediate

1955:     Notes:
1956:     PCGASMSetSubdomains(), PCGASMSetTotalSubdomains() or PCGASMSetOverlap() take precedence over PCGASMSetUseDMSubdomains(),
1957:     so setting PCGASMSetSubdomains() with nontrivial subdomain ISs or any of PCGASMSetTotalSubdomains() and PCGASMSetOverlap()
1958:     automatically turns the latter off.

1960: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

1962: .seealso: PCGASMGetUseDMSubdomains(), PCGASMSetSubdomains(), PCGASMSetOverlap()
1963:           PCGASMCreateSubdomains2D()
1964: @*/
1965: PetscErrorCode  PCGASMSetUseDMSubdomains(PC pc,PetscBool flg)
1966: {
1967:   PC_GASM        *osm = (PC_GASM*)pc->data;
1969:   PetscBool      match;

1974:   if (pc->setupcalled) SETERRQ(((PetscObject)pc)->comm,PETSC_ERR_ARG_WRONGSTATE,"Not for a setup PC.");
1975:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
1976:   if (match) {
1977:     if (!osm->user_subdomains && osm->N == PETSC_DETERMINE && osm->overlap < 0) {
1978:       osm->dm_subdomains = flg;
1979:     }
1980:   }
1981:   return(0);
1982: }

1986: /*@
1987:     PCGASMGetUseDMSubdomains - Returns flag indicating whether to use DMCreateDomainDecomposition() to define the subdomains, whenever possible.
1988:     Not Collective

1990:     Input Parameter:
1991: .   pc  - the preconditioner

1993:     Output Parameter:
1994: .   flg - boolean indicating whether to use subdomains defined by the DM

1996:     Level: intermediate

1998: .keywords: PC, ASM, DM, set, subdomains, additive Schwarz

2000: .seealso: PCGASMSetUseDMSubdomains(), PCGASMSetOverlap()
2001:           PCGASMCreateSubdomains2D()
2002: @*/
2003: PetscErrorCode  PCGASMGetUseDMSubdomains(PC pc,PetscBool* flg)
2004: {
2005:   PC_GASM        *osm = (PC_GASM*)pc->data;
2007:   PetscBool      match;

2012:   PetscObjectTypeCompare((PetscObject)pc,PCGASM,&match);
2013:   if (match) {
2014:     if (flg) *flg = osm->dm_subdomains;
2015:   }
2016:   return(0);
2017: }