Actual source code: ml.c

  1: #ifdef PETSC_RCS_HEADER
  2: static char vcid[] = "$Id: mlNew.c,v 1.18 2001/01/27 21:32:36 knepley Exp $";
  3: #endif
  4: /*
  5:    Defines a multilevel preconditioner due to Vivek Sarin for AIJ matrices
  6: */
 7:  #include src/sles/pc/pcimpl.h
 8:  #include src/mesh/impls/triangular/triimpl.h
 9:  #include src/grid/gridimpl.h
 10: #include <fcntl.h>
 11: #if defined(PETSC_HAVE_UNISTD_H)
 12: #include <unistd.h>
 13: #endif
 14: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
 15:   #include <sys/procfs.h>
 16:   #include <sys/hwperftypes.h>
 17:   #include <sys/hwperfmacros.h>
 18: #endif
 19:  #include ml.h

 21: int PC_MLEvents[PC_ML_MAX_EVENTS];

 23: #undef  __FUNCT__
 25: /*@C
 26:   PCMultiLevelInitializePackage - This function initializes everything in the ML package. It is called
 27:   from PetscDLLibraryRegister() when using dynamic libraries, and on the first call to PCCreate()
 28:   when using static libraries.

 30:   Input Parameter:
 31:   path - The dynamic library path, or PETSC_NULL

 33:   Level: developer

 35: .keywords: PC, multilevel, initialize, package
 36: .seealso: PetscInitialize()
 37: @*/
 38: int PCMultiLevelInitializePackage(char *path) {
 39:   static PetscTruth initialized = PETSC_FALSE;
 40:   int               ierr;

 43:   if (initialized == PETSC_TRUE) return(0);
 44:   initialized = PETSC_TRUE;
 45:   /* Register Classes */
 46:   /* Register Constructors and Serializers */
 47:   /* Register Events */
 48:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpInit],                        "MLSetUpInit",      PC_COOKIE);
 49:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrained],                 "MLSetUpConstr",    PC_COOKIE);
 50:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrainedBd],               "MLSetUpConstrBd",  PC_COOKIE);
 51:   PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpParallel],                    "MLSetUpParallel",  PC_COOKIE);
 52:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionMesh],              "MLRdPartMesh",     PC_COOKIE);
 53:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionRowCol],            "MLRdPartRowCol",   PC_COOKIE);
 54:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceFactor],                     "MLRdFactor",       PC_COOKIE);
 55:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGrad],                     "MLRdBdGrad",       PC_COOKIE);
 56:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradExtract],              "MLRdBdGradExtrct", PC_COOKIE);
 57:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradRowPartLocalToGlobal], "MLRdBdGrdRwPtL2G", PC_COOKIE);
 58:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceShrinkMesh],                 "MLRdShrinkMesh",   PC_COOKIE);
 59:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricLeftParallel],       "MLApplySymmLeftP", PC_COOKIE);
 60:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricRightParallel],      "MLApplySymmRghtP", PC_COOKIE);
 61:   PetscLogEventRegister(&PC_MLEvents[PC_ML_QRFactorization],                  "MLQRFact",         PC_COOKIE);
 62:   PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplyQR],                          "MLQRApply",        PC_COOKIE);
 63:   PetscLogEventRegister(&PC_MLEvents[PC_ML_CreateBdGrad],                     "MLCreateBdGrad",   PC_COOKIE);
 64:   /* Process info exclusions */
 65:   /* Process summary exclusions */
 66:   /* Add options checkers */
 67:   return(0);
 68: }

 70: #undef  __FUNCT__
 72: int PCConvert_Multilevel(PC pc)
 73: {
 74:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
 75: #ifdef PETSC_HAVE_MATHEMATICA
 76:   int            ierr;
 77: #endif

 80:   /* Convert a Mathematica MLData object into a PC_Multilevel object */
 81:   if (ml->mathViewer != PETSC_NULL) {
 82: #ifdef PETSC_HAVE_MATHEMATICA
 83:     PetscViewerMathematicaMultiLevelConvert(ml->mathViewer, pc);
 84: #else
 85:     SETERRQ(PETSC_ERR_SUP, " ");
 86: #endif
 87:   }
 88:   return(0);
 89: }

 91: #undef __FUNCT__  
 93: static int PCDestroyStructures_Multilevel(PC pc)
 94: {
 95:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
 96:   int            numProcs, numRows, numCols;
 97:   int            level, part;
 98:   int            ierr;

101:   /* Destroy gradient structures */
102:   MPI_Comm_size(pc->comm, &numProcs);
103:   VarOrderingDestroy(ml->sOrder);
104:   VarOrderingDestroy(ml->tOrder);
105:   LocalVarOrderingDestroy(ml->sLocOrder);
106:   LocalVarOrderingDestroy(ml->tLocOrder);
107:   GMatDestroy(ml->B);
108:   if (ml->reduceSol != PETSC_NULL)
109:     {GVecDestroy(ml->reduceSol);                                                                   }
110:   if (ml->diag      != PETSC_NULL)
111:     {GVecDestroy(ml->diag);                                                                        }

113:   /* Destroy mesh structures */
114:   if (ml->useMath == PETSC_FALSE) {
115:     PetscFree(ml->numElements);
116:     PetscFree(ml->numVertices);
117:     for(level = 0; level <= ml->numLevels; level++) {
118:       PetscFree(ml->meshes[level][MESH_OFFSETS]);
119:       PetscFree(ml->meshes[level][MESH_ADJ]);
120:       PetscFree(ml->meshes[level][MESH_ELEM]);
121:       PetscFree(ml->meshes[level]);
122:     }
123:     PetscFree(ml->vertices);
124:     PetscFree(ml->meshes);
125:   }

127:   /* Destroy factorization structures */
128:   if (ml->useMath == PETSC_FALSE)
129:   {
130:     for(level = 0; level < ml->numLevels; level++)
131:     {
132:       if (ml->numPartitions[level] == 0)
133:         continue;
134:       for(part = 0; part < ml->numPartitions[level]; part++)
135:       {
136:         numRows = ml->numPartitionRows[level][part];
137:         numCols = ml->numPartitionCols[level][part];
138:         if (numRows > 0)
139:         {
140:           PetscFree(ml->factors[level][part][FACT_U]);
141:           if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE)
142:           {
143:             PetscFree(ml->factors[level][part][FACT_QR]);
144:             PetscFree(ml->factors[level][part][FACT_TAU]);
145:           }
146:         }
147:         if (numCols > 0)
148:         {
149:           PetscFree(ml->factors[level][part][FACT_DINV]);
150:           PetscFree(ml->factors[level][part][FACT_V]);
151:         }
152:         PetscFree(ml->factors[level][part]);
153:       }
154:       PetscFree(ml->factors[level]);
155:       MatDestroy(ml->grads[level]);
156:       VecDestroy(ml->bdReduceVecs[level]);
157:       VecDestroy(ml->colReduceVecs[level]);
158:       VecDestroy(ml->colReduceVecs2[level]);
159:     }
160:     PetscFree(ml->factors);
161:     PetscFree(ml->grads);
162:     PetscFree(ml->bdReduceVecs);
163:     PetscFree(ml->colReduceVecs);
164:     PetscFree(ml->colReduceVecs2);
165:     PetscFree(ml->interiorWork);
166:     PetscFree(ml->interiorWork2);
167:     if (ml->svdWork != PETSC_NULL)
168:       {PetscFree(ml->svdWork);                                                                     }
169:   }

171:   /* Destroy numbering structures */
172:   if (ml->useMath == PETSC_FALSE)
173:   {
174:     for(level = 0; level < ml->numLevels; level++)
175:     {
176:       if (ml->numPartitions[level] == 0)
177:         continue;
178:       for(part = 0; part < ml->numPartitions[level]; part++)
179:       {
180:         if (ml->numPartitionCols[level][part] > 0)
181:           {PetscFree(ml->colPartition[level][part]);                                               }
182:         if (ml->numPartitionRows[level][part] > 0)
183:           {PetscFree(ml->rowPartition[level][PART_ROW_INT][part]);                                 }
184:       }
185:       if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0)
186:         {PetscFree(ml->rowPartition[level][PART_ROW_BD][0]);                                       }
187:       if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0)
188:         {PetscFree(ml->rowPartition[level][PART_ROW_RES][0]);                                      }
189:       PetscFree(ml->rowPartition[level][PART_ROW_INT]);
190:       PetscFree(ml->rowPartition[level][PART_ROW_BD]);
191:       PetscFree(ml->rowPartition[level][PART_ROW_RES]);
192:       PetscFree(ml->numPartitionCols[level]);
193:       PetscFree(ml->colPartition[level]);
194:       PetscFree(ml->numPartitionRows[level]);
195:       PetscFree(ml->rowPartition[level]);
196:     }
197:     PetscFree(ml->numPartitions);
198:     PetscFree(ml->numPartitionCols);
199:     PetscFree(ml->numPartitionRows);
200:     PetscFree(ml->colPartition);
201:     PetscFree(ml->rowPartition);
202:   }
203:   if (ml->range    != PETSC_NULL) {
204:     PetscFree(ml->range);
205:   }
206:   VecScatterDestroy(ml->rangeScatter);
207:   if (ml->nullCols != PETSC_NULL) {
208:     PetscFree(ml->nullCols);
209:   }

211:   /* Destroy parallel structures */
212:   if (numProcs > 1)
213:   {
214:     MatDestroy(ml->locB);
215:     MatDestroy(ml->interfaceB);
216:     VecDestroy(ml->interiorRhs);
217:     VecScatterDestroy(ml->interiorScatter);
218:     VecDestroy(ml->interfaceRhs);
219:     VecScatterDestroy(ml->interfaceScatter);
220: #ifndef PETSC_HAVE_PLAPACK
221:     VecDestroy(ml->locInterfaceRhs);
222:     VecScatterDestroy(ml->locInterfaceScatter);
223: #endif
224:     VecDestroy(ml->interfaceColRhs);
225:     VecScatterDestroy(ml->interfaceColScatter);
226:     VecDestroy(ml->colWorkVec);
227:     PetscFree(ml->interfaceRows);
228:     PetscFree(ml->firstInterfaceRow);
229:     PetscFree(ml->firstNullCol);
230: #ifdef PETSC_HAVE_PLAPACK
231:     PLA_Obj_free(&ml->interfaceQR);
232:     PLA_Obj_free(&ml->interfaceTAU);
233:     PLA_Obj_free(&ml->PLArhsD);
234:     PLA_Obj_free(&ml->PLArhsP);
235:     PLA_Temp_free(&ml->PLAlayout);
236: #else
237:     PetscFree(ml->interfaceQR);
238:     PetscFree(ml->interfaceTAU);
239:     PetscFree(ml->work);
240: #endif
241:   }
242:   return(0);
243: }

245: #undef __FUNCT__  
247: static int PCSetUp_Multilevel(PC pc)
248: {
249:   PC_Multilevel   *ml = (PC_Multilevel *) pc->data;
250:   GMat             A  = pc->mat;
251:   Grid             grid;
252:   Mesh             mesh;
253:   Mesh_Triangular *tri;
254:   Partition        p;
255:   /* Mesh support */
256:   char            *typeName;
257:   int              numVertices, numNodes, numEdges, numElements;
258:   int             *bcNodes;
259:   /* Parallel support */
260:   PetscTruth       isInterfaceRow;
261:   int             *diagInterfaceRows;
262:   int             *offdiagInterfaceRows;
263:   int             *interfaceRows;
264:   int             *interfaceCols;
265:   int             *interiorRows;
266:   int              numNewRange;
267:   int             *newRange;
268:   int             *rangeRows;
269:   int             *nullCols;
270:   int              rowLen;
271:   int             *rowLens;
272:   int             *cols;
273:   PetscScalar     *vals;
274:   Vec              tempV, tempB;
275:   PetscScalar     *arrayB;
276:   IS               localIS, globalIS;
277:   int             *recvCounts;
278:   int              numProcs, rank, proc;
279:   PetscScalar      one  = 1.0;
280:   int              elem, newElem, corner, row, row2, col, nullCol, rangeCol, bc, count, lossCount;
281: #ifdef PETSC_HAVE_PLAPACK
282:   double          *locB_I;
283: #else
284:   int              minSize;
285: #endif
286: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
287:   int              pid, fd;
288:   char             procfile[64];
289:   int              event, event0, event1;
290:   int              gen_start, gen_read;
291:   long long        count0, globalCount0;
292:   long long        count1, globalCount1;
293:   hwperf_profevctrarg_t evctr_args;
294:   hwperf_cntr_t         counts;
295: #endif
296:   PetscTruth       opt, match;
297:   int              ierr;

300:   /* We need this so that setup is not called recursively by PCMultiLevelApplyXXX */
301:   if (pc->setupcalled > 0) {
302:     return(0);
303:   }
304:   pc->setupcalled = 2;

306:   /* Destroy old structures -- This should not happen anymore */
307:   if (ml->numLevels >= 0) {
308:     SETERRQ(PETSC_ERR_ARG_WRONG, "Should not reform an existing decomposition");
309:     /* PCDestroyStructures_Multilevel(pc);                                                          */
310:   }

312:   PC_MLLogEventBegin(PC_ML_SetUpInit, pc, 0, 0, 0);
313:   /* Get the grid */
314:   GMatGetGrid(A, &grid);
315:   ml->grid = grid;

317:   /* Get mesh information */
318:   GridGetMesh(grid, &mesh);
319:   MeshGetType(mesh, &typeName);
320:   PetscTypeCompare((PetscObject) mesh, MESH_TRIANGULAR_2D, &match);
321:   if (match == PETSC_FALSE) SETERRQ(PETSC_ERR_SUP, "Multilevel preconditioner only supports 2D triangular meshes");
322:   tri  = (Mesh_Triangular *) mesh->data;
323:   /* Set target partition size */
324:   ml->partSize  = 10;
325:   PetscOptionsGetInt(pc->prefix, "-pc_ml_partition_size", &ml->partSize, &opt);

327:   /* Setup parallel information */
328:   p        = mesh->part;
329:   numProcs = p->numProcs;
330:   rank     = p->rank;

332:   /* Enable Mathematica support */
333:   PetscOptionsHasName(pc->prefix, "-pc_ml_math", &opt);
334:   if (opt == PETSC_TRUE) {
335:     ml->useMath = PETSC_TRUE;
336:     PetscViewerMathematicaOpen(pc->comm, PETSC_DECIDE, PETSC_NULL, PETSC_NULL, &ml->mathViewer);
337:   }

339:   PetscOptionsHasName(pc->prefix, "-pc_ml_mesh_view_draw", &opt);
340:   if (opt == PETSC_TRUE) {
341:     PetscViewerDrawOpen(pc->comm, 0, 0, 0, 0, 300, 300, &ml->factorizationViewer);
342:   }

344:   /* Setup allocation */
345:   PetscOptionsGetInt(pc->prefix, "-pc_ml_max_levels", &ml->maxLevels, &opt);
346:   if (ml->maxLevels < 0)
347:     ml->maxLevels = 25;

349:   /* Set the threshold for a final QR factorization */
350:   PetscOptionsGetInt(pc->prefix, "-pc_ml_qr_thresh", &ml->QRthresh, &opt);
351:   PetscOptionsGetReal(pc->prefix, "-pc_ml_qr_factor", &ml->DoQRFactor, &opt);

353:   /* Allocate mesh storage */
354:   ml->numMeshes = 1;
355:   MeshGetInfo(mesh, &numVertices, &numNodes, &numEdges, &numElements);
356:   PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numElements);
357:   PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numVertices);
358:   PetscMalloc((ml->maxLevels+1) * sizeof(int **), &ml->meshes);
359:   PetscMalloc(NUM_MESH_DIV      * sizeof(int *),  &ml->meshes[0]);
360:   PetscMalloc((numElements*3)   * sizeof(int),    &ml->meshes[0][MESH_ELEM]);
361:   PetscMalloc(grid->numPointBC  * sizeof(int),    &bcNodes);
362:   PetscLogObjectMemory(pc, (ml->maxLevels+1)*2 * sizeof(int) + (ml->maxLevels+1) * sizeof(int **) + NUM_MESH_DIV * sizeof(int *) +
363:                        (numElements*3) * sizeof(int) + grid->numPointBC * sizeof(int));

365:   /* Create the local graph */
366:   for(bc = 0; bc < grid->numPointBC; bc++) {
367:     bcNodes[bc] = grid->pointBC[bc].node;
368:   }
369:   MeshCreateLocalCSR(mesh, &ml->numVertices[0], &ml->numEdges, &ml->meshes[0][MESH_OFFSETS],
370:                             &ml->meshes[0][MESH_ADJ], &ml->vertices, grid->numPointBC, bcNodes, PETSC_FALSE);
371: 

373:   /* Create the element descriptions */
374:   ml->numElements[0] = 0;
375:   for(elem = 0; elem < numElements; elem++) {
376:     /* Remove all elements containing a constrained node */
377:     for(bc = 0; bc < grid->numPointBC; bc++)
378:       if ((tri->faces[elem*mesh->numCorners+0] == bcNodes[bc]) ||
379:           (tri->faces[elem*mesh->numCorners+1] == bcNodes[bc]) ||
380:           (tri->faces[elem*mesh->numCorners+2] == bcNodes[bc]))
381:         break;
382:     if (bc < grid->numPointBC)
383:       continue;

385:     /* Remove elements with off-processor nodes */
386:     if ((tri->faces[elem*mesh->numCorners+0] >= numNodes) ||
387:         (tri->faces[elem*mesh->numCorners+1] >= numNodes) ||
388:         (tri->faces[elem*mesh->numCorners+2] >= numNodes))
389:       continue;

391:     /* Add the element */
392:     for(corner = 0; corner < 3; corner++) {
393:       /* Decrement node numbers to account for constrained nodes */
394:       newElem = tri->faces[elem*mesh->numCorners+corner];
395:       for(bc = 0; bc < grid->numPointBC; bc++)
396:         if ((bcNodes[bc] >= 0) && (tri->faces[elem*mesh->numCorners+corner] > bcNodes[bc]))
397:           newElem--;
398:       ml->meshes[0][MESH_ELEM][ml->numElements[0]*3+corner] = newElem+1;
399:     }
400:     ml->numElements[0]++;
401:   }

403:   if (ml->factorizationViewer != PETSC_NULL) {
404:     PetscTruth isPeriodic;
405:     PetscDraw  draw;

407:     MeshView(mesh, ml->factorizationViewer);
408:     PetscViewerFlush(ml->factorizationViewer);
409:     PetscViewerDrawGetDraw(ml->factorizationViewer, 0, &draw);
410:     MeshIsPeriodic(mesh, &isPeriodic);
411:     if (isPeriodic == PETSC_FALSE) {
412:       /* Does not work in periodic case */
413:       PCMultiLevelView_Private(pc, ml->factorizationViewer, 0, ml->numVertices[0], ml->numElements[0], ml->meshes[0]);
414: 
415:     }
416:   }

418:   /* Cleanup */
419:   PetscFree(bcNodes);
420: #ifdef PETSC_USE_BOPT_g
421:   PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
422: #endif

424:   if ((ml->sField < 0) || (ml->tField < 0)) {
425:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Shape and test function fields must be setup.");
426:   }
427:   /* Create shape function variable orderings */
428:   VarOrderingCreateGeneral(grid, 1, &ml->sField, &ml->sOrder);
429:   LocalVarOrderingCreate(grid, 1, &ml->sField, &ml->sLocOrder);
430:   /* Create test function variable orderings */
431:   VarOrderingCreateGeneral(grid, 1, &ml->tField, &ml->tOrder);
432:   LocalVarOrderingCreate(grid, 1, &ml->tField, &ml->tLocOrder);
433:   /* Check orderings */
434:   if (ml->numVertices[0] != ml->sOrder->numLocVars) {
435:     SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Invalid local graph: %d vertices != %d vars", ml->numVertices[0], ml->sOrder->numLocVars);
436:   }
437:   /* Allocate the operator */
438:   GMatCreateRectangular(grid, ml->sOrder, ml->tOrder, &ml->B);
439:   /* Create the multiplier solution vector */
440:   GVecCreateRectangular(ml->grid, ml->tOrder, &ml->reduceSol);
441:   /* Construct the operator */
442:   MatSetOption(ml->B, MAT_NEW_NONZERO_ALLOCATION_ERR);
443:   GMatEvaluateOperatorGalerkin(ml->B, PETSC_NULL, 1, &ml->sField, &ml->tField, ml->gradOp,
444:                                       ml->gradAlpha, MAT_FINAL_ASSEMBLY, ml);
445: 

447:   /* Precondition the initial system with its diagonal */
448:   PetscOptionsHasName(pc->prefix, "-pc_ml_jacobi", &opt);
449:   if (opt == PETSC_TRUE) {
450:     GVecCreateConstrained(ml->grid, &ml->diag);
451:     GMatGetDiagonalConstrained(pc->mat, ml->diag);
452:     VecReciprocal(ml->diag);
453:     VecSqrt(ml->diag);
454:     MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
455:   }

457:   /* Parallel Support */
458:   if (numProcs > 1) {
459:     PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
460:     /* Separate interior and interface rows */
461:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &interiorRows);
462:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &ml->interfaceRows);
463:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &diagInterfaceRows);
464:     PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &offdiagInterfaceRows);
465:     PetscMemzero(diagInterfaceRows,    ml->tOrder->numLocVars * sizeof(int));
466:     PetscMemzero(offdiagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
467:     PetscLogObjectMemory(pc, ml->tOrder->numLocVars * sizeof(int));
468:     ml->numInteriorRows     = 0;
469:     ml->numLocInterfaceRows = 0;
470:     for(row = ml->tOrder->firstVar[rank]; row < ml->tOrder->firstVar[rank+1]; row++) {
471:       MatGetRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
472: #ifdef PETSC_USE_BOPT_g
473:       if (rowLen <= 0) {
474:         PetscPrintf(PETSC_COMM_SELF, "row %d is nulln", row);
475:         SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null row in gradient");
476:       }
477: #endif
478:       /* Find rows on domain boundaries */
479:       for(col = 0, isInterfaceRow = PETSC_FALSE; col < rowLen; col++) {
480:         if ((cols[col] < ml->sOrder->firstVar[rank]) || (cols[col] >= ml->sOrder->firstVar[rank+1])) {
481:           isInterfaceRow = PETSC_TRUE;
482:           offdiagInterfaceRows[ml->numLocInterfaceRows]++;
483:         }
484:       }
485:       if (isInterfaceRow == PETSC_FALSE) {
486:         interiorRows[ml->numInteriorRows++] = row;
487:       } else {
488:         ml->interfaceRows[ml->numLocInterfaceRows]   = row;
489:         diagInterfaceRows[ml->numLocInterfaceRows++] = rowLen - offdiagInterfaceRows[ml->numLocInterfaceRows];
490:       }

492:       MatRestoreRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
493:     }
494:     if (ml->numInteriorRows + ml->numLocInterfaceRows != ml->tOrder->numLocVars) {
495:       SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
496:     }

498:     /* Place interior rows after interface rows */
499:     ml->interiorRows = ml->interfaceRows + ml->numLocInterfaceRows;
500:     PetscMemcpy(ml->interiorRows, interiorRows, ml->numInteriorRows * sizeof(int));
501:     PetscFree(interiorRows);

503:     /* Calculate global size of the interface matrix */
504:     PetscMalloc((numProcs+1) * sizeof(int), &ml->firstInterfaceRow);
505:     PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
506:     MPI_Allgather(&ml->numLocInterfaceRows, 1, MPI_INT, &ml->firstInterfaceRow[1], 1, MPI_INT, pc->comm);
507: 
508:     ml->firstInterfaceRow[0] = 0;
509:     for(proc = 1; proc < numProcs+1; proc++)
510:       ml->firstInterfaceRow[proc] += ml->firstInterfaceRow[proc-1];
511:     ml->numInterfaceRows = ml->firstInterfaceRow[numProcs];

513:     /* Setup allocation */
514:     PetscMalloc((ml->numInteriorRows+1) * sizeof(int), &rowLens);
515:     for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++) {
516:       if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row)) {
517:         lossCount++;
518:         continue;
519:       } else {
520:         MatGetRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
521:         rowLens[count] = rowLen;
522:         MatRestoreRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
523:         count++;
524:       }
525:     }
526:     if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
527:     if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");

529:     /* Create the gradient local to this processor */
530:     MatCreateSeqAIJ(PETSC_COMM_SELF, ml->numInteriorRows, ml->sOrder->numLocVars, 0, rowLens, &ml->locB);
531: 
532:     PetscFree(rowLens);
533:     for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++)
534:     {
535:       if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row))
536:       {
537:         lossCount++;
538:         continue;
539:       }
540:       else
541:       {
542:         MatGetRow(ml->B, row, &rowLen, &cols, &vals);
543:         /* This is definitely cheating */
544:         for(col = 0; col < rowLen; col++)
545:           cols[col] -= ml->sOrder->firstVar[rank];
546:         MatSetValues(ml->locB, 1, &count, rowLen, cols, vals, INSERT_VALUES);
547:         MatRestoreRow(ml->B, row, &rowLen, &cols, &vals);
548:         count++;
549:       }
550:     }
551:     MatAssemblyBegin(ml->locB, MAT_FINAL_ASSEMBLY);
552:     MatAssemblyEnd(ml->locB, MAT_FINAL_ASSEMBLY);
553:     if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
554:     if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
555:     PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
556:   } else {
557:     ml->locB = ml->B;
558:   }
559:   MatGetLocalSize(ml->locB, &ml->numRows, &ml->numCols);
560: #ifdef PETSC_USE_BOPT_g
561:   PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
562: #endif

564: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
565:   /* Events for the SGI R10000 hardware counters, Add 16 to each event for Counter 1.

567:      Event Counter 0                                            Counter 1
568:      0     Cycles                                               Cycles
569:      1     Instructions Issued                                  Instructions Graduated
570:      2     Load/prefetch/sync Issued                            Load/prefetch/sync Graduated
571:      3     Stores Issued                                        Stores Graduated
572:      4     Store conditional Issued                             Store conditional Graduated
573:      5     Failed store conditional                             Floating point instructions Graduated
574:      6     Branches decoded                                     Write back from data cache to secondary cache
575:      7     Write back from secondary cache to system interface  TLB refill exceptions
576:      8     Single-bit ECC errors on secondary cache data        Branches mispredicted
577:      9     Instruction cache misses                             Data cache misses
578:      10    Secondary instruction cache misses                   Secondary data cache misses
579:      11    Secondary instruction cache way mispredicted         Secondary data cache way mispredicted
580:      12    External intervention requests                       External intervention hits
581:      13    External invalidation requests                       External invalidation hits
582:      14    Virtual coherency                                    Upgrade requests on clean secondary cache lines
583:      15    Instructions graduated                               Upgrade requests on shared secondary cache lines
584:    */
585:   /* Setup events */
586:   event0 = 0;
587:   event1 = 21;
588:   PetscOptionsHasName(pc->prefix, "-pc_ml_flops_monitor", &opt);
589:   if (opt == PETSC_TRUE)
590:     event1 = 21;
591:   PetscOptionsHasName(pc->prefix, "-pc_ml_cache_monitor", &opt);
592:   if (opt == PETSC_TRUE)
593:     event1 = 25;
594:   PetscOptionsHasName(pc->prefix, "-pc_ml_sec_cache_monitor", &opt);
595:   if (opt == PETSC_TRUE)
596:     event1 = 26;

598:   /* Open the proc file for iotctl() */
599:   pid = getpid();
600:   sprintf(procfile, "/proc/%010d", pid);
601:   if ((fd  = open(procfile, O_RDWR)) < 0) SETERRQ(PETSC_ERR_FILE_OPEN, "Could not open proc file");
602:   /* Set the event for counter 0 */
603:   for (event = 0; event < HWPERF_EVENTMAX; event++) {
604:     if (event == event0) {
605:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
606:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
607:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event;
608:       evctr_args.hwp_ovflw_freq[event]                                = 0;
609:     } else {
610:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
611:       evctr_args.hwp_ovflw_freq[event]                       = 0;
612:     }
613:   }
614:   /* Set the event for counter 1 */
615:   for (event = HWPERF_CNT1BASE; event < HWPERF_EVENTMAX; event++) {
616:     if (event == event1) {
617:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
618:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
619:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event - HWPERF_CNT1BASE;
620:       evctr_args.hwp_ovflw_freq[event]                                = 0;
621:     } else {
622:       evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
623:       evctr_args.hwp_ovflw_freq[event]                       = 0;
624:     }
625:   }
626:   evctr_args.hwp_ovflw_sig = 0;
627:   /* Start SGI hardware counters */
628:   if ((gen_start = ioctl(fd, PIOCENEVCTRS, (void *) &evctr_args)) < 0) {
629:     SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
630:   }
631: #endif
632:   PC_MLLogEventEnd(PC_ML_SetUpInit, pc, 0, 0, 0);

634:   /* Factorize the gradient matrix */
635:   PCMultiLevelReduce(pc);

637:   /* Parallel support */
638:   if (numProcs > 1) {
639:     PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
640:     /* Get size information */
641:     PetscMalloc(numProcs     * sizeof(int), &nullCols);
642:     PetscMalloc((numProcs+1) * sizeof(int), &ml->firstNullCol);
643:     PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
644:     MPI_Allreduce(&ml->numLocNullCols, &ml->numNullCols, 1, MPI_INT, MPI_SUM, pc->comm);
645:     MPI_Allgather(&ml->numLocNullCols, 1, MPI_INT, nullCols, 1, MPI_INT, pc->comm);
646:     for(proc = 1, ml->firstNullCol[0] = 0; proc <= numProcs; proc++) {
647:       ml->firstNullCol[proc] = nullCols[proc-1] + ml->firstNullCol[proc-1];
648:     }
649:     PetscLogInfo(pc, "Interface rows: %d Total rows: %d NullCols: %dn", ml->numInterfaceRows, ml->tOrder->numVars, ml->numNullCols);

651:     /* Get all interface rows */
652:     PetscMalloc(ml->numInterfaceRows * sizeof(int), &interfaceRows);
653:     PetscMalloc(numProcs             * sizeof(int), &recvCounts);
654:     for(proc = 0; proc < numProcs; proc++)
655:       recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
656:     MPI_Allgatherv(ml->interfaceRows, ml->numLocInterfaceRows,           MPI_INT,
657:                    interfaceRows,     recvCounts, ml->firstInterfaceRow, MPI_INT, pc->comm);
658:     PetscFree(recvCounts);

660:     /* Get all the interface columns */
661:     PetscMalloc(ml->numNullCols    * sizeof(int), &interfaceCols);
662:     if (ml->numLocNullCols > 0) {
663:       PetscMalloc(ml->numLocNullCols * sizeof(int), &cols);
664:     }
665:     for(col = 0; col < ml->numLocNullCols; col++) {
666:       cols[col] = ml->nullCols[col] + ml->sOrder->firstVar[rank];
667:     }
668:     MPI_Allgatherv(cols,          ml->numLocNullCols,     MPI_INT,
669:                    interfaceCols, nullCols, ml->firstNullCol, MPI_INT, pc->comm);
670:     if (ml->numLocNullCols > 0) {
671:       PetscFree(cols);
672:     }

674:     /* Get the rows for the range split onto processors like a column vector
675:          Here we are looping over all local columns (ml->sOrder->numLocVars)
676:        using the local column number (col). The number of null columns discovered
677:        (nullCol) should equals the number found in the serial factorization
678:        (ml->numLocNullCols), as the number of range columns (rangeCol) should
679:        agree with the local rank (ml->rank).
680:      */
681:     PetscMalloc(ml->sOrder->numLocVars * sizeof(int), &rangeRows);
682:     for(col = 0, nullCol = 0, rangeCol = 0; col < ml->sOrder->numLocVars; col++) {
683:       if ((nullCol < ml->numLocNullCols) && (ml->nullCols[nullCol] == col)) {
684:         rangeRows[col] = interfaceRows[ml->firstNullCol[rank]+nullCol++];
685:       } else {
686:         rangeRows[col] = ml->interiorRows[ml->range[rangeCol++]];
687:       }
688:     }
689:     if ((rangeCol != ml->rank) || (nullCol != ml->numLocNullCols)) {
690:       PetscPrintf(PETSC_COMM_SELF, "[%d]rank: %d rangeCol: %d null: %d nullCol: %dn",
691:                   rank, ml->rank, rangeCol, ml->numLocNullCols, nullCol);
692:       SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
693:     }

695:     /* Renumber and enlarge range[] to account for interface rows */
696:     ml->localRank = ml->rank;
697:     if (ml->numNullCols < ml->firstInterfaceRow[rank])
698:       numNewRange = 0;
699:     else
700:       numNewRange = PetscMin(ml->firstInterfaceRow[rank+1], ml->numNullCols) - ml->firstInterfaceRow[rank];
701:     if (ml->rank + numNewRange > 0)
702:     {
703:       /* We maintain the range[] in local row numbers */
704:       PetscMalloc((ml->rank + numNewRange) * sizeof(int), &newRange);
705:       for(row = 0; row < ml->rank; row++)
706:         newRange[row] = ml->interiorRows[ml->range[row]] - ml->tOrder->firstVar[rank];
707:       for(row = 0; row < numNewRange; row++)
708:         newRange[ml->rank+row] = ml->interfaceRows[row] - ml->tOrder->firstVar[rank];
709:       ml->rank += numNewRange;
710:       if (ml->range != PETSC_NULL)
711:         {PetscFree(ml->range);                                                                     }
712:       ml->range = newRange;
713:     }
714:     MPI_Allreduce(&ml->rank, &ml->globalRank, 1, MPI_INT, MPI_SUM, pc->comm);

716:     /* Create work vectors for applying P in parallel */
717:     GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);

719:     /* Create scatter from the solution vector to a local interior vector */
720:     ISCreateStride(PETSC_COMM_SELF, ml->numInteriorRows, 0, 1, &localIS);
721:     ISCreateGeneral(PETSC_COMM_SELF, ml->numInteriorRows, ml->interiorRows, &globalIS);
722:     VecCreateSeq(PETSC_COMM_SELF, ml->numInteriorRows, &ml->interiorRhs);
723:     VecScatterCreate(pc->vec, globalIS, ml->interiorRhs, localIS, &ml->interiorScatter);
724:     ISDestroy(localIS);
725:     ISDestroy(globalIS);

727:     /* Create scatter from the solution vector to a local interface vector */
728:     ISCreateStride(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->firstInterfaceRow[rank], 1, &localIS);
729:     ISCreateGeneral(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->interfaceRows, &globalIS);
730:     VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &ml->interfaceRhs);
731:     VecScatterCreate(pc->vec, globalIS, ml->interfaceRhs, localIS, &ml->interfaceScatter);
732:     ISDestroy(localIS);
733:     ISDestroy(globalIS);

735: #ifndef PETSC_HAVE_PLAPACK
736:     /* Create scatter from the solution vector to an interface vector */
737:     if (rank == 0) {
738:       ISCreateStride(PETSC_COMM_SELF, ml->numInterfaceRows, 0, 1, &localIS);
739:       ISCreateGeneral(PETSC_COMM_SELF, ml->numInterfaceRows, interfaceRows, &globalIS);
740:       VecCreateSeq(PETSC_COMM_SELF, ml->numInterfaceRows, &ml->locInterfaceRhs);
741:       VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
742:     } else {
743:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
744:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
745:       VecCreateSeq(PETSC_COMM_SELF, 0, &ml->locInterfaceRhs);
746:       VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
747:     }
748:     ISDestroy(localIS);
749:     ISDestroy(globalIS);
750: #endif

752:     /* Create scatter from a column vector to a column interface vector */
753: #ifdef PETSC_HAVE_PLAPACK
754:     ISCreateStride(PETSC_COMM_SELF, ml->numLocNullCols, ml->firstNullCol[rank], 1, &localIS);
755:     ISCreateGeneral(PETSC_COMM_SELF, ml->numLocNullCols, &interfaceCols[ml->firstNullCol[rank]], &globalIS);
756: 
757:     VecCreateMPI(pc->comm, ml->numLocNullCols, ml->numNullCols, &ml->interfaceColRhs);
758:     VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
759: 
760:     ISDestroy(localIS);
761:     ISDestroy(globalIS);
762: #else
763:     if (rank == 0) {
764:       ISCreateStride(PETSC_COMM_SELF, ml->numNullCols, 0, 1, &localIS);
765:       ISCreateGeneral(PETSC_COMM_SELF, ml->numNullCols, interfaceCols, &globalIS);
766:       VecCreateSeq(PETSC_COMM_SELF, ml->numNullCols, &ml->interfaceColRhs);
767:       VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
768: 
769:     } else {
770:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
771:       ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
772:       VecCreateSeq(PETSC_COMM_SELF, 0, &ml->interfaceColRhs);
773:       VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
774: 
775:     }
776:     ISDestroy(localIS);
777:     ISDestroy(globalIS);
778: #endif

780:     /* Create scatter from a solution vector to a column vector */
781:     ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
782:     ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
783:     VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
784:     ISDestroy(localIS);
785:     ISDestroy(globalIS);

787:     /* Create sparse B_I */
788:     MatCreateMPIAIJ(pc->comm, ml->numLocInterfaceRows, ml->sOrder->numLocVars, ml->numInterfaceRows,
789:                            ml->sOrder->numVars, 0, diagInterfaceRows, 0, offdiagInterfaceRows, &ml->interfaceB);
790: 
791:     PetscFree(diagInterfaceRows);
792:     PetscFree(offdiagInterfaceRows);

794:     for(row = 0; row < ml->numLocInterfaceRows; row++) {
795:       /* Copy row into B^p_I */
796:       row2 = row + ml->firstInterfaceRow[rank];
797:       MatGetRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
798:       MatSetValues(ml->interfaceB, 1, &row2, rowLen, cols, vals, INSERT_VALUES);
799:       MatRestoreRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
800:     }
801:     MatAssemblyBegin(ml->interfaceB, MAT_FINAL_ASSEMBLY);
802:     MatAssemblyEnd(ml->interfaceB, MAT_FINAL_ASSEMBLY);

804:     /* Get V_I: The rows of V corresponding to the zero singular values for each domain */
805:     VecDuplicateVecs(ml->colWorkVec, ml->numNullCols, &ml->interfaceV);
806:     for(col = 0; col < ml->numNullCols; col++) {
807:       tempV   = ml->interfaceV[col];
808:       if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
809:         nullCol = ml->nullCols[col-ml->firstNullCol[rank]]+ml->sOrder->firstVar[rank];
810:         ierr    = VecSetValues(tempV, 1, &nullCol, &one, INSERT_VALUES);
811:       }
812:       ierr      = VecAssemblyBegin(tempV);
813:       ierr      = VecAssemblyEnd(tempV);
814:       if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
815:         ierr    = PCMultiLevelApplyV(pc, tempV, tempV);
816:       }
817:     }

819: #ifdef PETSC_HAVE_PLAPACK
820:     /* You MUST initialize all PLAPACK variables to PETSC_NULL */
821:     ml->PLAlayout    = PETSC_NULL;
822:     ml->interfaceQR  = PETSC_NULL;
823:     ml->interfaceTAU = PETSC_NULL;
824:     ml->PLArhsD      = PETSC_NULL;
825:     ml->PLArhsP      = PETSC_NULL;
826:     /* Create the template for vector and matrix alignment */
827:     PLA_Temp_create(ml->numNullCols, 0, &ml->PLAlayout);
828:     /* Create the interface matrix B_Gamma */
829:     PLA_Matrix_create(MPI_DOUBLE, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, 0, 0, &ml->interfaceQR);
830: 
831:     /* Create the vector of scaling factors Tau */
832:     PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->interfaceTAU);
833:     /* Create the storage vectors for application of D and P */
834:     PLA_Vector_create(MPI_DOUBLE, ml->numNullCols,      ml->PLAlayout, 0, &ml->PLArhsD);
835:     PLA_Vector_create(MPI_DOUBLE, ml->numInterfaceRows, ml->PLAlayout, 0, &ml->PLArhsP);
836:     /* Create temporary space for the local rows of the interface matrix */
837:     PetscMalloc(ml->numLocInterfaceRows*ml->numNullCols * sizeof(double), &locB_I);

839:     /* Construct B_I V_I */
840:     ierr  = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
841:     for(col = 0; col < ml->numNullCols; col++) {
842:       MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
843:       /* Copy into storage -- column-major order */
844:       ierr  = VecGetArray(tempB, &arrayB);
845:       PetscMemcpy(&locB_I[ml->numLocInterfaceRows*col], arrayB, ml->numLocInterfaceRows * sizeof(double));
846: 
847:       VecRestoreArray(tempB, &arrayB);
848:     }
849:     VecDestroy(tempB);
850:     VecDestroyVecs(ml->interfaceV, ml->numNullCols);

852:     /* Set the matrix entries */
853:     PLA_Obj_set_to_zero(ml->interfaceQR);
854:     PLA_API_begin();
855:     PLA_Obj_API_open(ml->interfaceQR);
856:     PLA_API_axpy_matrix_to_global(ml->numLocInterfaceRows, ml->numNullCols, &one, locB_I,
857:                                          ml->numLocInterfaceRows, ml->interfaceQR, ml->firstInterfaceRow[rank], 0);
858: 
859:     PLA_Obj_API_close(ml->interfaceQR);
860:     PLA_API_end();
861:     PetscFree(locB_I);
862: #else
863:     /* Allocate storage for the QR factorization */
864:     minSize = PetscMin(ml->numInterfaceRows, ml->numNullCols);
865:     PetscMalloc(ml->numInterfaceRows*ml->numNullCols * sizeof(double), &ml->interfaceQR);
866:     PetscMalloc(minSize                              * sizeof(double), &ml->interfaceTAU);
867:     PetscLogObjectMemory(pc, (ml->numInterfaceRows*ml->numNullCols + minSize)*sizeof(double));
868:     PetscMemzero(ml->interfaceQR,  ml->numInterfaceRows*ml->numNullCols * sizeof(double));
869:     PetscMemzero(ml->interfaceTAU, minSize                              * sizeof(double));

871:     /* Construct B_I V_I */
872:     VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
873:     for(col = 0; col < ml->numNullCols; col++) {
874:       MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
875:       /* Copy into storage -- column-major order */
876:       VecGetArray(tempB, &arrayB);
877:       PetscMemcpy(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], arrayB,
878:                          ml->numLocInterfaceRows * sizeof(double));
879: 
880:       VecRestoreArray(tempB, &arrayB);
881:     }
882:     VecDestroy(tempB);
883:     VecDestroyVecs(ml->interfaceV, ml->numNullCols);

885:     /* Gather matrix / B^1_I V^1_I | ... | B^1_I V^p_I 
886:                      | B^2_I V^1_I | ... | B^2_I V^p_I |
887:                                      ...
888:                       B^p_I V^1_I | ... | B^p_I V^p_I /

890:        Note: It has already been put in column-major order
891:     */
892:     PetscMalloc(numProcs * sizeof(int), &recvCounts);
893:     for(proc = 0; proc < numProcs; proc++)
894:       recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
895:     for(col = 0; col < ml->numNullCols; col++) {
896:       MPI_Gatherv(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], ml->numLocInterfaceRows,
897:                          MPI_DOUBLE, &ml->interfaceQR[ml->numInterfaceRows*col], recvCounts, ml->firstInterfaceRow,
898:                          MPI_DOUBLE, 0, pc->comm);
899: 
900:     }
901:     PetscFree(recvCounts);

903:     /* Allocate workspace -- Also used in applying P */
904:     ml->workLen = ml->numInterfaceRows;
905:     PetscMalloc(ml->workLen * sizeof(double), &ml->work);
906:     PetscLogObjectMemory(pc, ml->workLen * sizeof(double));
907: #endif

909:     /* Do QR factorization on one processor */
910:     PC_MLLogEventBegin(PC_ML_QRFactorization, pc, 0, 0, 0);
911: #ifdef PETSC_HAVE_PLAPACK
912:     PLA_QR(ml->interfaceQR, ml->interfaceTAU);
913: #else
914:     if (rank == 0) {
915:       LAgeqrf_(&ml->numInterfaceRows, &ml->numNullCols, ml->interfaceQR,
916:              &ml->numInterfaceRows, ml->interfaceTAU, ml->work, &ml->workLen, &ierr);
917:       if (ierr) SETERRQ(PETSC_ERR_LIB, "Invalid interface QR");
918:     }
919: #endif
920:     PC_MLLogEventEnd(PC_ML_QRFactorization, pc, 0, 0, 0);

922: #ifdef PETSC_USE_BOPT_g
923:     /* Diagnostics */
924:     PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug_qr", &opt);
925:     if (opt == PETSC_TRUE) {
926:       PetscReal *intQR;
927:       PetscReal *intTAU;
928:       int     r, c;
929: #ifdef PETSC_HAVE_PLAPACK
930:       PLA_Obj tau = PETSC_NULL;
931:       PLA_Obj qr  = PETSC_NULL;
932:       int     length, width, stride, ldim;

934:       PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numNullCols, 1, ml->PLAlayout, &tau);
935:       PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, &qr);
936:       PLA_Copy(ml->interfaceTAU, tau);
937:       PLA_Copy(ml->interfaceQR,  qr);
938:       PLA_Obj_local_info(tau, &length, &width, (void **) &intTAU, &stride, &ldim);
939:       if ((rank == 0) && ((length != ml->numNullCols) || (width != 1) || (stride != 1)))
940:         SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface tau vector");
941:       PLA_Obj_local_info(qr,  &length, &width, (void **) &intQR,  &stride, &ldim);
942:       if ((rank == 0) && ((length != ml->numInterfaceRows) || (width != ml->numNullCols) || (ldim != ml->numInterfaceRows)))
943:         SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface qr matrix");
944: #else
945:       intTAU = ml->interfaceTAU;
946:       intQR  = ml->interfaceQR;
947: #endif
948:       if (rank == 0) {
949:         for(c = 0; c < ml->numNullCols; c++)
950:           PetscPrintf(pc->comm, "%.4g ", intTAU[c]);
951:         PetscPrintf(pc->comm, "n");
952:         for(r = 0; r < ml->numInterfaceRows; r++) {
953:           for(c = 0; c < ml->numNullCols; c++)
954:             PetscPrintf(pc->comm, "%.4g ", intQR[ml->numInterfaceRows*c+r]);
955:           PetscPrintf(pc->comm, "n");
956:         }
957:       }
958: #ifdef PETSC_HAVE_PLAPACK
959:       PLA_Obj_free(&tau);
960:       PLA_Obj_free(&qr);
961: #endif
962:     }
963: #endif

965:     /* Scatter Cleanup */
966:     PetscFree(rangeRows);
967:     PetscFree(nullCols);
968:     PetscFree(interfaceRows);
969:     PetscFree(interfaceCols);
970:     PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
971:   } else {
972:     /* Get the rows for the range split onto processors like a column vector */
973:     PetscMalloc(ml->rank * sizeof(int), &rangeRows);
974:     for(row = 0; row < ml->rank; row++)
975:       rangeRows[row] = ml->range[row];
976:     GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);

978:     /* Create scatter from a solution vector to a column vector */
979:     ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
980:     ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
981:     VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
982:     ISDestroy(localIS);
983:     ISDestroy(globalIS);

985:     /* Cleanup */
986:     PetscFree(rangeRows);
987:     VecDestroy(ml->colWorkVec);
988:   }

990:   if (ml->diag != PETSC_NULL) {
991:     /* Recover original B */
992:     VecReciprocal(ml->diag);
993:     MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
994:     VecReciprocal(ml->diag);
995:   }

997: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
998:   /* Read the hardware counter values */
999:   if ((gen_read = ioctl(fd, PIOCGETEVCTRS, (void *) &counts)) < 0) {
1000:     SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
1001:   }
1002:   /* Generation number should be the same */
1003:   if (gen_start != gen_read) SETERRQ(PETSC_ERR_LIB, "Lost SGI hardware counters");
1004:   /* Release the counters */
1005:   if ((ioctl(fd, PIOCRELEVCTRS)) < 0) SETERRQ(PETSC_ERR_LIB, "Could not free SGI hardware counters");
1006:   close(fd);
1007:   count0 = counts.hwp_evctr[event0];
1008:   count1 = counts.hwp_evctr[event1];
1009:   MPI_Reduce(&count0, &globalCount0, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1010:   MPI_Reduce(&count1, &globalCount1, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1011:   /* Use hardware counter data */
1012:   switch (event1) {
1013:   case 21:
1014:     PetscPrintf(pc->comm, "Flops: %ldn", globalCount1);
1015:     break;
1016:   case 25:
1017:     PetscPrintf(pc->comm, "Data cache misses: %ldn", globalCount1);
1018:     break;
1019:   case 26:
1020:     PetscPrintf(pc->comm, "Secondary data cache misses: %ldn", globalCount1);
1021:     break;
1022:   }
1023: #endif

1025:   PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug", &opt);
1026:   if (opt == PETSC_TRUE) {
1027:     PCDebug_Multilevel(pc);
1028:   }

1030:   return(0);
1031: }

1033: #undef __FUNCT__  
1035: static int PCPreSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1036: {
1037:   PC_Multilevel *ml   = (PC_Multilevel *) pc->data;
1038:   Grid           grid = ml->grid;
1039:   GVec           bdReduceVec, bdReduceVec2;
1040:   GVec           reduceVec;
1041:   GMat           bdReduceMat;
1042:   VarOrdering    sOrder;
1043:   PetscScalar    one = 1.0, reduceAlpha;
1044:   int            bc;
1045:   int            ierr;

1048:   /* Create -g = B^T u + B^T_Gamma f_Gamma */
1049:   GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec);
1050:   VarOrderingCreateSubset(grid->reduceOrder, 1, &ml->tField, PETSC_FALSE, &sOrder);
1051: #define OLD_REDUCTION
1052: #ifdef OLD_REDUCTION
1053:   GMatCreateRectangular(grid, sOrder, ml->sOrder, &bdReduceMat);
1054:   MatSetOption(bdReduceMat, MAT_NEW_NONZERO_ALLOCATION_ERR);
1055:   for(bc = 0; bc < grid->numBC; bc++) {
1056:     /* Conditions on A */
1057:     if (ml->tField == grid->bc[bc].field) {
1058:       GMatEvaluateOperatorGalerkin(bdReduceMat, PETSC_NULL, 1, &ml->tField, &ml->sField, ml->divOp,
1059:                                           1.0, MAT_FINAL_ASSEMBLY, ml);
1060: 
1061:     }
1062:     /* Conditions on B^T only show up in the u equations, so they are already handled */
1063:   }
1064:   for(bc = 0; bc < grid->numPointBC; bc++) {
1065:     /* Conditions on A */
1066:     if (ml->tField == grid->pointBC[bc].field) {
1067:       GVecEvaluateOperatorGalerkin(bdReduceVec, grid->bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1068:                                           ml->divOp, 1.0, ml);
1069: 
1070:     }
1071:     /* Conditions on B^T only show up in the u equations, so they are already handled */
1072: #ifdef PETSC_USE_BOPT_g
1073:     PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
1074: #endif
1075:   }
1076: #else
1077:   GVecEvaluateOperatorGalerkin(bdReduceVec, bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1078:                                       ml->divOp, 1.0, ml);
1079: 
1080: #endif
1081:   VarOrderingDestroy(sOrder);
1082: #ifdef OLD_REDUCTION
1083:   /* Should be taken out when I fix the MF version */
1084:   MatMult(bdReduceMat, grid->bdReduceVec, bdReduceVec);
1085:   GMatDestroy(bdReduceMat);
1086: #endif

1088:   /* Add in the B^T u part */
1089:   if (ml->nonlinearIterate != PETSC_NULL) {
1090:     GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec2);
1091:     PCMultiLevelApplyGradientTrans(pc, ml->nonlinearIterate, bdReduceVec2);
1092:     VecAXPY(&one, bdReduceVec2, bdReduceVec);
1093:     GVecDestroy(bdReduceVec2);
1094:   }

1096:   /* Calculate -D^{-T} Z^T g = D^{-T} Z^T B^T_Gamma f_Gamma */
1097:   PCMultiLevelApplyVTrans(pc, bdReduceVec, bdReduceVec);
1098:   PCMultiLevelApplyDInvTrans(pc, bdReduceVec, bdReduceVec);

1100:   /* Calculate -P_1 D^{-1} Z^T g */
1101:   PCMultiLevelApplyP1(pc, bdReduceVec, ml->reduceSol);

1103:   /* Calculate -A P_1 D^{-1} Z^T g -- This does not seem like good design, but I will deal with it later */
1104:   GVecCreateRectangular(grid, ml->tOrder, &reduceVec);
1105:   MatMult(pc->mat, ml->reduceSol, reduceVec);

1107:   /* f --> f - A P_1 D^{-1} Z^T g */
1108:   GridGetBCMultiplier(grid, &reduceAlpha);
1109:   VecAXPY(&reduceAlpha, reduceVec, rhs);

1111:   /* Cleanup */
1112:   GVecDestroy(reduceVec);
1113:   GVecDestroy(bdReduceVec);
1114:   return(0);
1115: }

1117: #undef __FUNCT__  
1119: static int PCPostSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1120: {
1122:   return(0);
1123: }

1125: #undef __FUNCT__  
1127: static int PCDestroy_Multilevel(PC pc)
1128: {
1129:   PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1130:   int            ierr;

1133:   if (ml->numLevels >= 0) {
1134:     PCDestroyStructures_Multilevel(pc);
1135:   }
1136:   if (ml->mathViewer != PETSC_NULL) {
1137:     PetscViewerDestroy(ml->mathViewer);
1138:   }
1139:   if (ml->factorizationViewer != PETSC_NULL) {
1140:     PetscViewerDestroy(ml->factorizationViewer);
1141:   }
1142:   PetscFree(ml);
1143:   return(0);
1144: }

1146: #undef __FUNCT__  
1148: int PCApply_Multilevel(PC pc, Vec x, Vec y)
1149: {

1153:   PCMultiLevelApplyP(pc, x, y);
1154:   return(0);
1155: }

1157: #undef __FUNCT__  
1159: int PCApplyTrans_Multilevel(PC pc, Vec x, Vec y)
1160: {

1164:   PCMultiLevelApplyPTrans(pc, x, y);
1165:   return(0);
1166: }

1168: #undef __FUNCT__  
1170: int PCApplySymmetricLeft_Multilevel(PC pc, Vec x, Vec y)
1171: {

1175:   PCMultiLevelApplyP2Trans(pc, x, y);
1176:   return(0);
1177: }

1179: #undef __FUNCT__  
1181: int PCApplySymmetricRight_Multilevel(PC pc, Vec x, Vec y)
1182: {

1186:   PCMultiLevelApplyP2(pc, x, y);
1187:   return(0);
1188: }

1190: EXTERN_C_BEGIN
1191: #undef __FUNCT__  
1193: int PCCreate_Multilevel(PC pc)
1194: {
1195:   PC_Multilevel *ml;
1196:   int            ierr;

1199:   PCMultiLevelInitializePackage(PETSC_NULL);

1201:   PetscNew(PC_Multilevel, &ml);
1202:   PetscLogObjectMemory(pc, sizeof(PC_Multilevel));
1203:   pc->data                     = (void *) ml;

1205:   pc->ops->setup               = PCSetUp_Multilevel;
1206:   pc->ops->apply               = PCApply_Multilevel;
1207:   pc->ops->applyrichardson     = PETSC_NULL;
1208:   pc->ops->applyBA             = PETSC_NULL;
1209:   pc->ops->applytranspose      = PCApplyTrans_Multilevel;
1210:   pc->ops->applyBAtranspose    = PETSC_NULL;
1211:   pc->ops->setfromoptions      = PETSC_NULL;
1212:   pc->ops->presolve            = PCPreSolve_Multilevel;
1213:   pc->ops->postsolve           = PCPostSolve_Multilevel;
1214:   pc->ops->getfactoredmatrix   = PETSC_NULL;
1215:   pc->ops->applysymmetricleft  = PCApplySymmetricLeft_Multilevel;
1216:   pc->ops->applysymmetricright = PCApplySymmetricRight_Multilevel;
1217:   pc->ops->setuponblocks       = PETSC_NULL;
1218:   pc->ops->destroy             = PCDestroy_Multilevel;
1219:   pc->ops->view                = PETSC_NULL;
1220:   pc->modifysubmatrices        = PETSC_NULL;

1222:   ml->grid                = PETSC_NULL;
1223:   ml->useMath             = PETSC_FALSE;
1224:   ml->mathViewer          = PETSC_NULL;
1225:   ml->factorizationViewer = PETSC_NULL;

1227:   ml->gradOp              = GRADIENT;
1228:   ml->divOp               = DIVERGENCE;
1229:   ml->sField              = -1;
1230:   ml->tField              = -1;
1231:   ml->sOrder              = PETSC_NULL;
1232:   ml->sLocOrder           = PETSC_NULL;
1233:   ml->tOrder              = PETSC_NULL;
1234:   ml->tLocOrder           = PETSC_NULL;
1235:   ml->gradAlpha           = 1.0;
1236:   ml->B                   = PETSC_NULL;
1237:   ml->locB                = PETSC_NULL;
1238:   ml->reduceSol           = PETSC_NULL;
1239:   ml->nonlinearIterate    = PETSC_NULL;
1240:   ml->diag                = PETSC_NULL;

1242:   ml->numMeshes           = -1;
1243:   ml->numElements         = PETSC_NULL;
1244:   ml->numVertices         = PETSC_NULL;
1245:   ml->vertices            = PETSC_NULL;
1246:   ml->numEdges            = -1;
1247:   ml->meshes              = PETSC_NULL;

1249:   ml->numPartitions       = PETSC_NULL;
1250:   ml->numPartitionCols    = PETSC_NULL;
1251:   ml->colPartition        = PETSC_NULL;
1252:   ml->numPartitionRows    = PETSC_NULL;
1253:   ml->rowPartition        = PETSC_NULL;
1254:   ml->rank                = -1;
1255:   ml->localRank           = -1;
1256:   ml->range               = PETSC_NULL;
1257:   ml->rangeScatter        = PETSC_NULL;
1258:   ml->numLocNullCols      = -1;
1259:   ml->nullCols            = PETSC_NULL;

1261:   ml->numRows             = -1;
1262:   ml->numCols             = -1;
1263:   ml->numLevels           = -1;
1264:   ml->maxLevels           = -1;
1265:   ml->QRthresh            = 0;
1266:   ml->DoQRFactor          = 2;
1267:   ml->zeroTol             = 1.0e-10;
1268:   ml->factors             = PETSC_NULL;
1269:   ml->grads               = PETSC_NULL;

1271:   ml->globalRank          = -1;
1272:   ml->numNullCols         = -1;
1273:   ml->numLocInterfaceRows = -1;
1274:   ml->numInterfaceRows    = -1;
1275:   ml->interfaceRows       = PETSC_NULL;
1276:   ml->firstInterfaceRow   = PETSC_NULL;
1277:   ml->numInteriorRows     = -1;
1278:   ml->interiorRows        = PETSC_NULL;
1279:   ml->interfaceQR         = PETSC_NULL;
1280:   ml->interfaceTAU        = PETSC_NULL;
1281:   ml->interfaceV          = PETSC_NULL;
1282:   ml->interfaceB          = PETSC_NULL;
1283:   ml->interiorRhs         = PETSC_NULL;
1284:   ml->interiorScatter     = PETSC_NULL;
1285:   ml->interfaceRhs        = PETSC_NULL;
1286:   ml->interfaceScatter    = PETSC_NULL;
1287: #ifndef PETSC_HAVE_PLAPACK
1288:   ml->locInterfaceRhs     = PETSC_NULL;
1289:   ml->locInterfaceScatter = PETSC_NULL;
1290: #endif
1291:   ml->interfaceColRhs     = PETSC_NULL;
1292:   ml->interfaceColScatter = PETSC_NULL;
1293:   ml->reduceScatter       = PETSC_NULL;
1294:   ml->colWorkVec          = PETSC_NULL;
1295:   ml->globalColWorkVec    = PETSC_NULL;

1297:   ml->workLen             = -1;
1298:   ml->work                = PETSC_NULL;
1299:   ml->svdWorkLen          = -1;
1300:   ml->svdWork             = PETSC_NULL;
1301:   return(0);
1302: }
1303: EXTERN_C_END