Actual source code: ml.c
1: #ifdef PETSC_RCS_HEADER
2: static char vcid[] = "$Id: mlNew.c,v 1.18 2001/01/27 21:32:36 knepley Exp $";
3: #endif
4: /*
5: Defines a multilevel preconditioner due to Vivek Sarin for AIJ matrices
6: */
7: #include src/sles/pc/pcimpl.h
8: #include src/mesh/impls/triangular/triimpl.h
9: #include src/grid/gridimpl.h
10: #include <fcntl.h>
11: #if defined(PETSC_HAVE_UNISTD_H)
12: #include <unistd.h>
13: #endif
14: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
15: #include <sys/procfs.h>
16: #include <sys/hwperftypes.h>
17: #include <sys/hwperfmacros.h>
18: #endif
19: #include ml.h
21: int PC_MLEvents[PC_ML_MAX_EVENTS];
23: #undef __FUNCT__
25: /*@C
26: PCMultiLevelInitializePackage - This function initializes everything in the ML package. It is called
27: from PetscDLLibraryRegister() when using dynamic libraries, and on the first call to PCCreate()
28: when using static libraries.
30: Input Parameter:
31: path - The dynamic library path, or PETSC_NULL
33: Level: developer
35: .keywords: PC, multilevel, initialize, package
36: .seealso: PetscInitialize()
37: @*/
38: int PCMultiLevelInitializePackage(char *path) {
39: static PetscTruth initialized = PETSC_FALSE;
40: int ierr;
43: if (initialized == PETSC_TRUE) return(0);
44: initialized = PETSC_TRUE;
45: /* Register Classes */
46: /* Register Constructors and Serializers */
47: /* Register Events */
48: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpInit], "MLSetUpInit", PC_COOKIE);
49: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrained], "MLSetUpConstr", PC_COOKIE);
50: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrainedBd], "MLSetUpConstrBd", PC_COOKIE);
51: PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpParallel], "MLSetUpParallel", PC_COOKIE);
52: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionMesh], "MLRdPartMesh", PC_COOKIE);
53: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionRowCol], "MLRdPartRowCol", PC_COOKIE);
54: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceFactor], "MLRdFactor", PC_COOKIE);
55: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGrad], "MLRdBdGrad", PC_COOKIE);
56: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradExtract], "MLRdBdGradExtrct", PC_COOKIE);
57: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradRowPartLocalToGlobal], "MLRdBdGrdRwPtL2G", PC_COOKIE);
58: PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceShrinkMesh], "MLRdShrinkMesh", PC_COOKIE);
59: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricLeftParallel], "MLApplySymmLeftP", PC_COOKIE);
60: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricRightParallel], "MLApplySymmRghtP", PC_COOKIE);
61: PetscLogEventRegister(&PC_MLEvents[PC_ML_QRFactorization], "MLQRFact", PC_COOKIE);
62: PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplyQR], "MLQRApply", PC_COOKIE);
63: PetscLogEventRegister(&PC_MLEvents[PC_ML_CreateBdGrad], "MLCreateBdGrad", PC_COOKIE);
64: /* Process info exclusions */
65: /* Process summary exclusions */
66: /* Add options checkers */
67: return(0);
68: }
70: #undef __FUNCT__
72: int PCConvert_Multilevel(PC pc)
73: {
74: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
75: #ifdef PETSC_HAVE_MATHEMATICA
76: int ierr;
77: #endif
80: /* Convert a Mathematica MLData object into a PC_Multilevel object */
81: if (ml->mathViewer != PETSC_NULL) {
82: #ifdef PETSC_HAVE_MATHEMATICA
83: PetscViewerMathematicaMultiLevelConvert(ml->mathViewer, pc);
84: #else
85: SETERRQ(PETSC_ERR_SUP, " ");
86: #endif
87: }
88: return(0);
89: }
91: #undef __FUNCT__
93: static int PCDestroyStructures_Multilevel(PC pc)
94: {
95: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
96: int numProcs, numRows, numCols;
97: int level, part;
98: int ierr;
101: /* Destroy gradient structures */
102: MPI_Comm_size(pc->comm, &numProcs);
103: VarOrderingDestroy(ml->sOrder);
104: VarOrderingDestroy(ml->tOrder);
105: LocalVarOrderingDestroy(ml->sLocOrder);
106: LocalVarOrderingDestroy(ml->tLocOrder);
107: GMatDestroy(ml->B);
108: if (ml->reduceSol != PETSC_NULL)
109: {GVecDestroy(ml->reduceSol); }
110: if (ml->diag != PETSC_NULL)
111: {GVecDestroy(ml->diag); }
113: /* Destroy mesh structures */
114: if (ml->useMath == PETSC_FALSE) {
115: PetscFree(ml->numElements);
116: PetscFree(ml->numVertices);
117: for(level = 0; level <= ml->numLevels; level++) {
118: PetscFree(ml->meshes[level][MESH_OFFSETS]);
119: PetscFree(ml->meshes[level][MESH_ADJ]);
120: PetscFree(ml->meshes[level][MESH_ELEM]);
121: PetscFree(ml->meshes[level]);
122: }
123: PetscFree(ml->vertices);
124: PetscFree(ml->meshes);
125: }
127: /* Destroy factorization structures */
128: if (ml->useMath == PETSC_FALSE)
129: {
130: for(level = 0; level < ml->numLevels; level++)
131: {
132: if (ml->numPartitions[level] == 0)
133: continue;
134: for(part = 0; part < ml->numPartitions[level]; part++)
135: {
136: numRows = ml->numPartitionRows[level][part];
137: numCols = ml->numPartitionCols[level][part];
138: if (numRows > 0)
139: {
140: PetscFree(ml->factors[level][part][FACT_U]);
141: if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE)
142: {
143: PetscFree(ml->factors[level][part][FACT_QR]);
144: PetscFree(ml->factors[level][part][FACT_TAU]);
145: }
146: }
147: if (numCols > 0)
148: {
149: PetscFree(ml->factors[level][part][FACT_DINV]);
150: PetscFree(ml->factors[level][part][FACT_V]);
151: }
152: PetscFree(ml->factors[level][part]);
153: }
154: PetscFree(ml->factors[level]);
155: MatDestroy(ml->grads[level]);
156: VecDestroy(ml->bdReduceVecs[level]);
157: VecDestroy(ml->colReduceVecs[level]);
158: VecDestroy(ml->colReduceVecs2[level]);
159: }
160: PetscFree(ml->factors);
161: PetscFree(ml->grads);
162: PetscFree(ml->bdReduceVecs);
163: PetscFree(ml->colReduceVecs);
164: PetscFree(ml->colReduceVecs2);
165: PetscFree(ml->interiorWork);
166: PetscFree(ml->interiorWork2);
167: if (ml->svdWork != PETSC_NULL)
168: {PetscFree(ml->svdWork); }
169: }
171: /* Destroy numbering structures */
172: if (ml->useMath == PETSC_FALSE)
173: {
174: for(level = 0; level < ml->numLevels; level++)
175: {
176: if (ml->numPartitions[level] == 0)
177: continue;
178: for(part = 0; part < ml->numPartitions[level]; part++)
179: {
180: if (ml->numPartitionCols[level][part] > 0)
181: {PetscFree(ml->colPartition[level][part]); }
182: if (ml->numPartitionRows[level][part] > 0)
183: {PetscFree(ml->rowPartition[level][PART_ROW_INT][part]); }
184: }
185: if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0)
186: {PetscFree(ml->rowPartition[level][PART_ROW_BD][0]); }
187: if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0)
188: {PetscFree(ml->rowPartition[level][PART_ROW_RES][0]); }
189: PetscFree(ml->rowPartition[level][PART_ROW_INT]);
190: PetscFree(ml->rowPartition[level][PART_ROW_BD]);
191: PetscFree(ml->rowPartition[level][PART_ROW_RES]);
192: PetscFree(ml->numPartitionCols[level]);
193: PetscFree(ml->colPartition[level]);
194: PetscFree(ml->numPartitionRows[level]);
195: PetscFree(ml->rowPartition[level]);
196: }
197: PetscFree(ml->numPartitions);
198: PetscFree(ml->numPartitionCols);
199: PetscFree(ml->numPartitionRows);
200: PetscFree(ml->colPartition);
201: PetscFree(ml->rowPartition);
202: }
203: if (ml->range != PETSC_NULL) {
204: PetscFree(ml->range);
205: }
206: VecScatterDestroy(ml->rangeScatter);
207: if (ml->nullCols != PETSC_NULL) {
208: PetscFree(ml->nullCols);
209: }
211: /* Destroy parallel structures */
212: if (numProcs > 1)
213: {
214: MatDestroy(ml->locB);
215: MatDestroy(ml->interfaceB);
216: VecDestroy(ml->interiorRhs);
217: VecScatterDestroy(ml->interiorScatter);
218: VecDestroy(ml->interfaceRhs);
219: VecScatterDestroy(ml->interfaceScatter);
220: #ifndef PETSC_HAVE_PLAPACK
221: VecDestroy(ml->locInterfaceRhs);
222: VecScatterDestroy(ml->locInterfaceScatter);
223: #endif
224: VecDestroy(ml->interfaceColRhs);
225: VecScatterDestroy(ml->interfaceColScatter);
226: VecDestroy(ml->colWorkVec);
227: PetscFree(ml->interfaceRows);
228: PetscFree(ml->firstInterfaceRow);
229: PetscFree(ml->firstNullCol);
230: #ifdef PETSC_HAVE_PLAPACK
231: PLA_Obj_free(&ml->interfaceQR);
232: PLA_Obj_free(&ml->interfaceTAU);
233: PLA_Obj_free(&ml->PLArhsD);
234: PLA_Obj_free(&ml->PLArhsP);
235: PLA_Temp_free(&ml->PLAlayout);
236: #else
237: PetscFree(ml->interfaceQR);
238: PetscFree(ml->interfaceTAU);
239: PetscFree(ml->work);
240: #endif
241: }
242: return(0);
243: }
245: #undef __FUNCT__
247: static int PCSetUp_Multilevel(PC pc)
248: {
249: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
250: GMat A = pc->mat;
251: Grid grid;
252: Mesh mesh;
253: Mesh_Triangular *tri;
254: Partition p;
255: /* Mesh support */
256: char *typeName;
257: int numVertices, numNodes, numEdges, numElements;
258: int *bcNodes;
259: /* Parallel support */
260: PetscTruth isInterfaceRow;
261: int *diagInterfaceRows;
262: int *offdiagInterfaceRows;
263: int *interfaceRows;
264: int *interfaceCols;
265: int *interiorRows;
266: int numNewRange;
267: int *newRange;
268: int *rangeRows;
269: int *nullCols;
270: int rowLen;
271: int *rowLens;
272: int *cols;
273: PetscScalar *vals;
274: Vec tempV, tempB;
275: PetscScalar *arrayB;
276: IS localIS, globalIS;
277: int *recvCounts;
278: int numProcs, rank, proc;
279: PetscScalar one = 1.0;
280: int elem, newElem, corner, row, row2, col, nullCol, rangeCol, bc, count, lossCount;
281: #ifdef PETSC_HAVE_PLAPACK
282: double *locB_I;
283: #else
284: int minSize;
285: #endif
286: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
287: int pid, fd;
288: char procfile[64];
289: int event, event0, event1;
290: int gen_start, gen_read;
291: long long count0, globalCount0;
292: long long count1, globalCount1;
293: hwperf_profevctrarg_t evctr_args;
294: hwperf_cntr_t counts;
295: #endif
296: PetscTruth opt, match;
297: int ierr;
300: /* We need this so that setup is not called recursively by PCMultiLevelApplyXXX */
301: if (pc->setupcalled > 0) {
302: return(0);
303: }
304: pc->setupcalled = 2;
306: /* Destroy old structures -- This should not happen anymore */
307: if (ml->numLevels >= 0) {
308: SETERRQ(PETSC_ERR_ARG_WRONG, "Should not reform an existing decomposition");
309: /* PCDestroyStructures_Multilevel(pc); */
310: }
312: PC_MLLogEventBegin(PC_ML_SetUpInit, pc, 0, 0, 0);
313: /* Get the grid */
314: GMatGetGrid(A, &grid);
315: ml->grid = grid;
317: /* Get mesh information */
318: GridGetMesh(grid, &mesh);
319: MeshGetType(mesh, &typeName);
320: PetscTypeCompare((PetscObject) mesh, MESH_TRIANGULAR_2D, &match);
321: if (match == PETSC_FALSE) SETERRQ(PETSC_ERR_SUP, "Multilevel preconditioner only supports 2D triangular meshes");
322: tri = (Mesh_Triangular *) mesh->data;
323: /* Set target partition size */
324: ml->partSize = 10;
325: PetscOptionsGetInt(pc->prefix, "-pc_ml_partition_size", &ml->partSize, &opt);
327: /* Setup parallel information */
328: p = mesh->part;
329: numProcs = p->numProcs;
330: rank = p->rank;
332: /* Enable Mathematica support */
333: PetscOptionsHasName(pc->prefix, "-pc_ml_math", &opt);
334: if (opt == PETSC_TRUE) {
335: ml->useMath = PETSC_TRUE;
336: PetscViewerMathematicaOpen(pc->comm, PETSC_DECIDE, PETSC_NULL, PETSC_NULL, &ml->mathViewer);
337: }
339: PetscOptionsHasName(pc->prefix, "-pc_ml_mesh_view_draw", &opt);
340: if (opt == PETSC_TRUE) {
341: PetscViewerDrawOpen(pc->comm, 0, 0, 0, 0, 300, 300, &ml->factorizationViewer);
342: }
344: /* Setup allocation */
345: PetscOptionsGetInt(pc->prefix, "-pc_ml_max_levels", &ml->maxLevels, &opt);
346: if (ml->maxLevels < 0)
347: ml->maxLevels = 25;
349: /* Set the threshold for a final QR factorization */
350: PetscOptionsGetInt(pc->prefix, "-pc_ml_qr_thresh", &ml->QRthresh, &opt);
351: PetscOptionsGetReal(pc->prefix, "-pc_ml_qr_factor", &ml->DoQRFactor, &opt);
353: /* Allocate mesh storage */
354: ml->numMeshes = 1;
355: MeshGetInfo(mesh, &numVertices, &numNodes, &numEdges, &numElements);
356: PetscMalloc((ml->maxLevels+1) * sizeof(int), &ml->numElements);
357: PetscMalloc((ml->maxLevels+1) * sizeof(int), &ml->numVertices);
358: PetscMalloc((ml->maxLevels+1) * sizeof(int **), &ml->meshes);
359: PetscMalloc(NUM_MESH_DIV * sizeof(int *), &ml->meshes[0]);
360: PetscMalloc((numElements*3) * sizeof(int), &ml->meshes[0][MESH_ELEM]);
361: PetscMalloc(grid->numPointBC * sizeof(int), &bcNodes);
362: PetscLogObjectMemory(pc, (ml->maxLevels+1)*2 * sizeof(int) + (ml->maxLevels+1) * sizeof(int **) + NUM_MESH_DIV * sizeof(int *) +
363: (numElements*3) * sizeof(int) + grid->numPointBC * sizeof(int));
365: /* Create the local graph */
366: for(bc = 0; bc < grid->numPointBC; bc++) {
367: bcNodes[bc] = grid->pointBC[bc].node;
368: }
369: MeshCreateLocalCSR(mesh, &ml->numVertices[0], &ml->numEdges, &ml->meshes[0][MESH_OFFSETS],
370: &ml->meshes[0][MESH_ADJ], &ml->vertices, grid->numPointBC, bcNodes, PETSC_FALSE);
371:
373: /* Create the element descriptions */
374: ml->numElements[0] = 0;
375: for(elem = 0; elem < numElements; elem++) {
376: /* Remove all elements containing a constrained node */
377: for(bc = 0; bc < grid->numPointBC; bc++)
378: if ((tri->faces[elem*mesh->numCorners+0] == bcNodes[bc]) ||
379: (tri->faces[elem*mesh->numCorners+1] == bcNodes[bc]) ||
380: (tri->faces[elem*mesh->numCorners+2] == bcNodes[bc]))
381: break;
382: if (bc < grid->numPointBC)
383: continue;
385: /* Remove elements with off-processor nodes */
386: if ((tri->faces[elem*mesh->numCorners+0] >= numNodes) ||
387: (tri->faces[elem*mesh->numCorners+1] >= numNodes) ||
388: (tri->faces[elem*mesh->numCorners+2] >= numNodes))
389: continue;
391: /* Add the element */
392: for(corner = 0; corner < 3; corner++) {
393: /* Decrement node numbers to account for constrained nodes */
394: newElem = tri->faces[elem*mesh->numCorners+corner];
395: for(bc = 0; bc < grid->numPointBC; bc++)
396: if ((bcNodes[bc] >= 0) && (tri->faces[elem*mesh->numCorners+corner] > bcNodes[bc]))
397: newElem--;
398: ml->meshes[0][MESH_ELEM][ml->numElements[0]*3+corner] = newElem+1;
399: }
400: ml->numElements[0]++;
401: }
403: if (ml->factorizationViewer != PETSC_NULL) {
404: PetscTruth isPeriodic;
405: PetscDraw draw;
407: MeshView(mesh, ml->factorizationViewer);
408: PetscViewerFlush(ml->factorizationViewer);
409: PetscViewerDrawGetDraw(ml->factorizationViewer, 0, &draw);
410: MeshIsPeriodic(mesh, &isPeriodic);
411: if (isPeriodic == PETSC_FALSE) {
412: /* Does not work in periodic case */
413: PCMultiLevelView_Private(pc, ml->factorizationViewer, 0, ml->numVertices[0], ml->numElements[0], ml->meshes[0]);
414:
415: }
416: }
418: /* Cleanup */
419: PetscFree(bcNodes);
420: #ifdef PETSC_USE_BOPT_g
421: PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
422: #endif
424: if ((ml->sField < 0) || (ml->tField < 0)) {
425: SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Shape and test function fields must be setup.");
426: }
427: /* Create shape function variable orderings */
428: VarOrderingCreateGeneral(grid, 1, &ml->sField, &ml->sOrder);
429: LocalVarOrderingCreate(grid, 1, &ml->sField, &ml->sLocOrder);
430: /* Create test function variable orderings */
431: VarOrderingCreateGeneral(grid, 1, &ml->tField, &ml->tOrder);
432: LocalVarOrderingCreate(grid, 1, &ml->tField, &ml->tLocOrder);
433: /* Check orderings */
434: if (ml->numVertices[0] != ml->sOrder->numLocVars) {
435: SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Invalid local graph: %d vertices != %d vars", ml->numVertices[0], ml->sOrder->numLocVars);
436: }
437: /* Allocate the operator */
438: GMatCreateRectangular(grid, ml->sOrder, ml->tOrder, &ml->B);
439: /* Create the multiplier solution vector */
440: GVecCreateRectangular(ml->grid, ml->tOrder, &ml->reduceSol);
441: /* Construct the operator */
442: MatSetOption(ml->B, MAT_NEW_NONZERO_ALLOCATION_ERR);
443: GMatEvaluateOperatorGalerkin(ml->B, PETSC_NULL, 1, &ml->sField, &ml->tField, ml->gradOp,
444: ml->gradAlpha, MAT_FINAL_ASSEMBLY, ml);
445:
447: /* Precondition the initial system with its diagonal */
448: PetscOptionsHasName(pc->prefix, "-pc_ml_jacobi", &opt);
449: if (opt == PETSC_TRUE) {
450: GVecCreateConstrained(ml->grid, &ml->diag);
451: GMatGetDiagonalConstrained(pc->mat, ml->diag);
452: VecReciprocal(ml->diag);
453: VecSqrt(ml->diag);
454: MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
455: }
457: /* Parallel Support */
458: if (numProcs > 1) {
459: PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
460: /* Separate interior and interface rows */
461: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &interiorRows);
462: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &ml->interfaceRows);
463: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &diagInterfaceRows);
464: PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &offdiagInterfaceRows);
465: PetscMemzero(diagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
466: PetscMemzero(offdiagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));
467: PetscLogObjectMemory(pc, ml->tOrder->numLocVars * sizeof(int));
468: ml->numInteriorRows = 0;
469: ml->numLocInterfaceRows = 0;
470: for(row = ml->tOrder->firstVar[rank]; row < ml->tOrder->firstVar[rank+1]; row++) {
471: MatGetRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
472: #ifdef PETSC_USE_BOPT_g
473: if (rowLen <= 0) {
474: PetscPrintf(PETSC_COMM_SELF, "row %d is nulln", row);
475: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null row in gradient");
476: }
477: #endif
478: /* Find rows on domain boundaries */
479: for(col = 0, isInterfaceRow = PETSC_FALSE; col < rowLen; col++) {
480: if ((cols[col] < ml->sOrder->firstVar[rank]) || (cols[col] >= ml->sOrder->firstVar[rank+1])) {
481: isInterfaceRow = PETSC_TRUE;
482: offdiagInterfaceRows[ml->numLocInterfaceRows]++;
483: }
484: }
485: if (isInterfaceRow == PETSC_FALSE) {
486: interiorRows[ml->numInteriorRows++] = row;
487: } else {
488: ml->interfaceRows[ml->numLocInterfaceRows] = row;
489: diagInterfaceRows[ml->numLocInterfaceRows++] = rowLen - offdiagInterfaceRows[ml->numLocInterfaceRows];
490: }
492: MatRestoreRow(ml->B, row, &rowLen, &cols, PETSC_NULL);
493: }
494: if (ml->numInteriorRows + ml->numLocInterfaceRows != ml->tOrder->numLocVars) {
495: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
496: }
498: /* Place interior rows after interface rows */
499: ml->interiorRows = ml->interfaceRows + ml->numLocInterfaceRows;
500: PetscMemcpy(ml->interiorRows, interiorRows, ml->numInteriorRows * sizeof(int));
501: PetscFree(interiorRows);
503: /* Calculate global size of the interface matrix */
504: PetscMalloc((numProcs+1) * sizeof(int), &ml->firstInterfaceRow);
505: PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
506: MPI_Allgather(&ml->numLocInterfaceRows, 1, MPI_INT, &ml->firstInterfaceRow[1], 1, MPI_INT, pc->comm);
507:
508: ml->firstInterfaceRow[0] = 0;
509: for(proc = 1; proc < numProcs+1; proc++)
510: ml->firstInterfaceRow[proc] += ml->firstInterfaceRow[proc-1];
511: ml->numInterfaceRows = ml->firstInterfaceRow[numProcs];
513: /* Setup allocation */
514: PetscMalloc((ml->numInteriorRows+1) * sizeof(int), &rowLens);
515: for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++) {
516: if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row)) {
517: lossCount++;
518: continue;
519: } else {
520: MatGetRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
521: rowLens[count] = rowLen;
522: MatRestoreRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);
523: count++;
524: }
525: }
526: if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
527: if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
529: /* Create the gradient local to this processor */
530: MatCreateSeqAIJ(PETSC_COMM_SELF, ml->numInteriorRows, ml->sOrder->numLocVars, 0, rowLens, &ml->locB);
531:
532: PetscFree(rowLens);
533: for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++)
534: {
535: if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row))
536: {
537: lossCount++;
538: continue;
539: }
540: else
541: {
542: MatGetRow(ml->B, row, &rowLen, &cols, &vals);
543: /* This is definitely cheating */
544: for(col = 0; col < rowLen; col++)
545: cols[col] -= ml->sOrder->firstVar[rank];
546: MatSetValues(ml->locB, 1, &count, rowLen, cols, vals, INSERT_VALUES);
547: MatRestoreRow(ml->B, row, &rowLen, &cols, &vals);
548: count++;
549: }
550: }
551: MatAssemblyBegin(ml->locB, MAT_FINAL_ASSEMBLY);
552: MatAssemblyEnd(ml->locB, MAT_FINAL_ASSEMBLY);
553: if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
554: if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
555: PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
556: } else {
557: ml->locB = ml->B;
558: }
559: MatGetLocalSize(ml->locB, &ml->numRows, &ml->numCols);
560: #ifdef PETSC_USE_BOPT_g
561: PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
562: #endif
564: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
565: /* Events for the SGI R10000 hardware counters, Add 16 to each event for Counter 1.
567: Event Counter 0 Counter 1
568: 0 Cycles Cycles
569: 1 Instructions Issued Instructions Graduated
570: 2 Load/prefetch/sync Issued Load/prefetch/sync Graduated
571: 3 Stores Issued Stores Graduated
572: 4 Store conditional Issued Store conditional Graduated
573: 5 Failed store conditional Floating point instructions Graduated
574: 6 Branches decoded Write back from data cache to secondary cache
575: 7 Write back from secondary cache to system interface TLB refill exceptions
576: 8 Single-bit ECC errors on secondary cache data Branches mispredicted
577: 9 Instruction cache misses Data cache misses
578: 10 Secondary instruction cache misses Secondary data cache misses
579: 11 Secondary instruction cache way mispredicted Secondary data cache way mispredicted
580: 12 External intervention requests External intervention hits
581: 13 External invalidation requests External invalidation hits
582: 14 Virtual coherency Upgrade requests on clean secondary cache lines
583: 15 Instructions graduated Upgrade requests on shared secondary cache lines
584: */
585: /* Setup events */
586: event0 = 0;
587: event1 = 21;
588: PetscOptionsHasName(pc->prefix, "-pc_ml_flops_monitor", &opt);
589: if (opt == PETSC_TRUE)
590: event1 = 21;
591: PetscOptionsHasName(pc->prefix, "-pc_ml_cache_monitor", &opt);
592: if (opt == PETSC_TRUE)
593: event1 = 25;
594: PetscOptionsHasName(pc->prefix, "-pc_ml_sec_cache_monitor", &opt);
595: if (opt == PETSC_TRUE)
596: event1 = 26;
598: /* Open the proc file for iotctl() */
599: pid = getpid();
600: sprintf(procfile, "/proc/%010d", pid);
601: if ((fd = open(procfile, O_RDWR)) < 0) SETERRQ(PETSC_ERR_FILE_OPEN, "Could not open proc file");
602: /* Set the event for counter 0 */
603: for (event = 0; event < HWPERF_EVENTMAX; event++) {
604: if (event == event0) {
605: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
606: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie = 1;
607: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev = event;
608: evctr_args.hwp_ovflw_freq[event] = 0;
609: } else {
610: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
611: evctr_args.hwp_ovflw_freq[event] = 0;
612: }
613: }
614: /* Set the event for counter 1 */
615: for (event = HWPERF_CNT1BASE; event < HWPERF_EVENTMAX; event++) {
616: if (event == event1) {
617: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
618: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie = 1;
619: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev = event - HWPERF_CNT1BASE;
620: evctr_args.hwp_ovflw_freq[event] = 0;
621: } else {
622: evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
623: evctr_args.hwp_ovflw_freq[event] = 0;
624: }
625: }
626: evctr_args.hwp_ovflw_sig = 0;
627: /* Start SGI hardware counters */
628: if ((gen_start = ioctl(fd, PIOCENEVCTRS, (void *) &evctr_args)) < 0) {
629: SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
630: }
631: #endif
632: PC_MLLogEventEnd(PC_ML_SetUpInit, pc, 0, 0, 0);
634: /* Factorize the gradient matrix */
635: PCMultiLevelReduce(pc);
637: /* Parallel support */
638: if (numProcs > 1) {
639: PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);
640: /* Get size information */
641: PetscMalloc(numProcs * sizeof(int), &nullCols);
642: PetscMalloc((numProcs+1) * sizeof(int), &ml->firstNullCol);
643: PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
644: MPI_Allreduce(&ml->numLocNullCols, &ml->numNullCols, 1, MPI_INT, MPI_SUM, pc->comm);
645: MPI_Allgather(&ml->numLocNullCols, 1, MPI_INT, nullCols, 1, MPI_INT, pc->comm);
646: for(proc = 1, ml->firstNullCol[0] = 0; proc <= numProcs; proc++) {
647: ml->firstNullCol[proc] = nullCols[proc-1] + ml->firstNullCol[proc-1];
648: }
649: PetscLogInfo(pc, "Interface rows: %d Total rows: %d NullCols: %dn", ml->numInterfaceRows, ml->tOrder->numVars, ml->numNullCols);
651: /* Get all interface rows */
652: PetscMalloc(ml->numInterfaceRows * sizeof(int), &interfaceRows);
653: PetscMalloc(numProcs * sizeof(int), &recvCounts);
654: for(proc = 0; proc < numProcs; proc++)
655: recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
656: MPI_Allgatherv(ml->interfaceRows, ml->numLocInterfaceRows, MPI_INT,
657: interfaceRows, recvCounts, ml->firstInterfaceRow, MPI_INT, pc->comm);
658: PetscFree(recvCounts);
660: /* Get all the interface columns */
661: PetscMalloc(ml->numNullCols * sizeof(int), &interfaceCols);
662: if (ml->numLocNullCols > 0) {
663: PetscMalloc(ml->numLocNullCols * sizeof(int), &cols);
664: }
665: for(col = 0; col < ml->numLocNullCols; col++) {
666: cols[col] = ml->nullCols[col] + ml->sOrder->firstVar[rank];
667: }
668: MPI_Allgatherv(cols, ml->numLocNullCols, MPI_INT,
669: interfaceCols, nullCols, ml->firstNullCol, MPI_INT, pc->comm);
670: if (ml->numLocNullCols > 0) {
671: PetscFree(cols);
672: }
674: /* Get the rows for the range split onto processors like a column vector
675: Here we are looping over all local columns (ml->sOrder->numLocVars)
676: using the local column number (col). The number of null columns discovered
677: (nullCol) should equals the number found in the serial factorization
678: (ml->numLocNullCols), as the number of range columns (rangeCol) should
679: agree with the local rank (ml->rank).
680: */
681: PetscMalloc(ml->sOrder->numLocVars * sizeof(int), &rangeRows);
682: for(col = 0, nullCol = 0, rangeCol = 0; col < ml->sOrder->numLocVars; col++) {
683: if ((nullCol < ml->numLocNullCols) && (ml->nullCols[nullCol] == col)) {
684: rangeRows[col] = interfaceRows[ml->firstNullCol[rank]+nullCol++];
685: } else {
686: rangeRows[col] = ml->interiorRows[ml->range[rangeCol++]];
687: }
688: }
689: if ((rangeCol != ml->rank) || (nullCol != ml->numLocNullCols)) {
690: PetscPrintf(PETSC_COMM_SELF, "[%d]rank: %d rangeCol: %d null: %d nullCol: %dn",
691: rank, ml->rank, rangeCol, ml->numLocNullCols, nullCol);
692: SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
693: }
695: /* Renumber and enlarge range[] to account for interface rows */
696: ml->localRank = ml->rank;
697: if (ml->numNullCols < ml->firstInterfaceRow[rank])
698: numNewRange = 0;
699: else
700: numNewRange = PetscMin(ml->firstInterfaceRow[rank+1], ml->numNullCols) - ml->firstInterfaceRow[rank];
701: if (ml->rank + numNewRange > 0)
702: {
703: /* We maintain the range[] in local row numbers */
704: PetscMalloc((ml->rank + numNewRange) * sizeof(int), &newRange);
705: for(row = 0; row < ml->rank; row++)
706: newRange[row] = ml->interiorRows[ml->range[row]] - ml->tOrder->firstVar[rank];
707: for(row = 0; row < numNewRange; row++)
708: newRange[ml->rank+row] = ml->interfaceRows[row] - ml->tOrder->firstVar[rank];
709: ml->rank += numNewRange;
710: if (ml->range != PETSC_NULL)
711: {PetscFree(ml->range); }
712: ml->range = newRange;
713: }
714: MPI_Allreduce(&ml->rank, &ml->globalRank, 1, MPI_INT, MPI_SUM, pc->comm);
716: /* Create work vectors for applying P in parallel */
717: GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);
719: /* Create scatter from the solution vector to a local interior vector */
720: ISCreateStride(PETSC_COMM_SELF, ml->numInteriorRows, 0, 1, &localIS);
721: ISCreateGeneral(PETSC_COMM_SELF, ml->numInteriorRows, ml->interiorRows, &globalIS);
722: VecCreateSeq(PETSC_COMM_SELF, ml->numInteriorRows, &ml->interiorRhs);
723: VecScatterCreate(pc->vec, globalIS, ml->interiorRhs, localIS, &ml->interiorScatter);
724: ISDestroy(localIS);
725: ISDestroy(globalIS);
727: /* Create scatter from the solution vector to a local interface vector */
728: ISCreateStride(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->firstInterfaceRow[rank], 1, &localIS);
729: ISCreateGeneral(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->interfaceRows, &globalIS);
730: VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &ml->interfaceRhs);
731: VecScatterCreate(pc->vec, globalIS, ml->interfaceRhs, localIS, &ml->interfaceScatter);
732: ISDestroy(localIS);
733: ISDestroy(globalIS);
735: #ifndef PETSC_HAVE_PLAPACK
736: /* Create scatter from the solution vector to an interface vector */
737: if (rank == 0) {
738: ISCreateStride(PETSC_COMM_SELF, ml->numInterfaceRows, 0, 1, &localIS);
739: ISCreateGeneral(PETSC_COMM_SELF, ml->numInterfaceRows, interfaceRows, &globalIS);
740: VecCreateSeq(PETSC_COMM_SELF, ml->numInterfaceRows, &ml->locInterfaceRhs);
741: VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
742: } else {
743: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
744: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
745: VecCreateSeq(PETSC_COMM_SELF, 0, &ml->locInterfaceRhs);
746: VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter);
747: }
748: ISDestroy(localIS);
749: ISDestroy(globalIS);
750: #endif
752: /* Create scatter from a column vector to a column interface vector */
753: #ifdef PETSC_HAVE_PLAPACK
754: ISCreateStride(PETSC_COMM_SELF, ml->numLocNullCols, ml->firstNullCol[rank], 1, &localIS);
755: ISCreateGeneral(PETSC_COMM_SELF, ml->numLocNullCols, &interfaceCols[ml->firstNullCol[rank]], &globalIS);
756:
757: VecCreateMPI(pc->comm, ml->numLocNullCols, ml->numNullCols, &ml->interfaceColRhs);
758: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
759:
760: ISDestroy(localIS);
761: ISDestroy(globalIS);
762: #else
763: if (rank == 0) {
764: ISCreateStride(PETSC_COMM_SELF, ml->numNullCols, 0, 1, &localIS);
765: ISCreateGeneral(PETSC_COMM_SELF, ml->numNullCols, interfaceCols, &globalIS);
766: VecCreateSeq(PETSC_COMM_SELF, ml->numNullCols, &ml->interfaceColRhs);
767: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
768:
769: } else {
770: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);
771: ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);
772: VecCreateSeq(PETSC_COMM_SELF, 0, &ml->interfaceColRhs);
773: VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
774:
775: }
776: ISDestroy(localIS);
777: ISDestroy(globalIS);
778: #endif
780: /* Create scatter from a solution vector to a column vector */
781: ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
782: ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
783: VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
784: ISDestroy(localIS);
785: ISDestroy(globalIS);
787: /* Create sparse B_I */
788: MatCreateMPIAIJ(pc->comm, ml->numLocInterfaceRows, ml->sOrder->numLocVars, ml->numInterfaceRows,
789: ml->sOrder->numVars, 0, diagInterfaceRows, 0, offdiagInterfaceRows, &ml->interfaceB);
790:
791: PetscFree(diagInterfaceRows);
792: PetscFree(offdiagInterfaceRows);
794: for(row = 0; row < ml->numLocInterfaceRows; row++) {
795: /* Copy row into B^p_I */
796: row2 = row + ml->firstInterfaceRow[rank];
797: MatGetRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
798: MatSetValues(ml->interfaceB, 1, &row2, rowLen, cols, vals, INSERT_VALUES);
799: MatRestoreRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);
800: }
801: MatAssemblyBegin(ml->interfaceB, MAT_FINAL_ASSEMBLY);
802: MatAssemblyEnd(ml->interfaceB, MAT_FINAL_ASSEMBLY);
804: /* Get V_I: The rows of V corresponding to the zero singular values for each domain */
805: VecDuplicateVecs(ml->colWorkVec, ml->numNullCols, &ml->interfaceV);
806: for(col = 0; col < ml->numNullCols; col++) {
807: tempV = ml->interfaceV[col];
808: if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
809: nullCol = ml->nullCols[col-ml->firstNullCol[rank]]+ml->sOrder->firstVar[rank];
810: ierr = VecSetValues(tempV, 1, &nullCol, &one, INSERT_VALUES);
811: }
812: ierr = VecAssemblyBegin(tempV);
813: ierr = VecAssemblyEnd(tempV);
814: if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
815: ierr = PCMultiLevelApplyV(pc, tempV, tempV);
816: }
817: }
819: #ifdef PETSC_HAVE_PLAPACK
820: /* You MUST initialize all PLAPACK variables to PETSC_NULL */
821: ml->PLAlayout = PETSC_NULL;
822: ml->interfaceQR = PETSC_NULL;
823: ml->interfaceTAU = PETSC_NULL;
824: ml->PLArhsD = PETSC_NULL;
825: ml->PLArhsP = PETSC_NULL;
826: /* Create the template for vector and matrix alignment */
827: PLA_Temp_create(ml->numNullCols, 0, &ml->PLAlayout);
828: /* Create the interface matrix B_Gamma */
829: PLA_Matrix_create(MPI_DOUBLE, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, 0, 0, &ml->interfaceQR);
830:
831: /* Create the vector of scaling factors Tau */
832: PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->interfaceTAU);
833: /* Create the storage vectors for application of D and P */
834: PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->PLArhsD);
835: PLA_Vector_create(MPI_DOUBLE, ml->numInterfaceRows, ml->PLAlayout, 0, &ml->PLArhsP);
836: /* Create temporary space for the local rows of the interface matrix */
837: PetscMalloc(ml->numLocInterfaceRows*ml->numNullCols * sizeof(double), &locB_I);
839: /* Construct B_I V_I */
840: ierr = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
841: for(col = 0; col < ml->numNullCols; col++) {
842: MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
843: /* Copy into storage -- column-major order */
844: ierr = VecGetArray(tempB, &arrayB);
845: PetscMemcpy(&locB_I[ml->numLocInterfaceRows*col], arrayB, ml->numLocInterfaceRows * sizeof(double));
846:
847: VecRestoreArray(tempB, &arrayB);
848: }
849: VecDestroy(tempB);
850: VecDestroyVecs(ml->interfaceV, ml->numNullCols);
852: /* Set the matrix entries */
853: PLA_Obj_set_to_zero(ml->interfaceQR);
854: PLA_API_begin();
855: PLA_Obj_API_open(ml->interfaceQR);
856: PLA_API_axpy_matrix_to_global(ml->numLocInterfaceRows, ml->numNullCols, &one, locB_I,
857: ml->numLocInterfaceRows, ml->interfaceQR, ml->firstInterfaceRow[rank], 0);
858:
859: PLA_Obj_API_close(ml->interfaceQR);
860: PLA_API_end();
861: PetscFree(locB_I);
862: #else
863: /* Allocate storage for the QR factorization */
864: minSize = PetscMin(ml->numInterfaceRows, ml->numNullCols);
865: PetscMalloc(ml->numInterfaceRows*ml->numNullCols * sizeof(double), &ml->interfaceQR);
866: PetscMalloc(minSize * sizeof(double), &ml->interfaceTAU);
867: PetscLogObjectMemory(pc, (ml->numInterfaceRows*ml->numNullCols + minSize)*sizeof(double));
868: PetscMemzero(ml->interfaceQR, ml->numInterfaceRows*ml->numNullCols * sizeof(double));
869: PetscMemzero(ml->interfaceTAU, minSize * sizeof(double));
871: /* Construct B_I V_I */
872: VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);
873: for(col = 0; col < ml->numNullCols; col++) {
874: MatMult(ml->interfaceB, ml->interfaceV[col], tempB);
875: /* Copy into storage -- column-major order */
876: VecGetArray(tempB, &arrayB);
877: PetscMemcpy(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], arrayB,
878: ml->numLocInterfaceRows * sizeof(double));
879:
880: VecRestoreArray(tempB, &arrayB);
881: }
882: VecDestroy(tempB);
883: VecDestroyVecs(ml->interfaceV, ml->numNullCols);
885: /* Gather matrix / B^1_I V^1_I | ... | B^1_I V^p_I
886: | B^2_I V^1_I | ... | B^2_I V^p_I |
887: ...
888: B^p_I V^1_I | ... | B^p_I V^p_I /
890: Note: It has already been put in column-major order
891: */
892: PetscMalloc(numProcs * sizeof(int), &recvCounts);
893: for(proc = 0; proc < numProcs; proc++)
894: recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
895: for(col = 0; col < ml->numNullCols; col++) {
896: MPI_Gatherv(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], ml->numLocInterfaceRows,
897: MPI_DOUBLE, &ml->interfaceQR[ml->numInterfaceRows*col], recvCounts, ml->firstInterfaceRow,
898: MPI_DOUBLE, 0, pc->comm);
899:
900: }
901: PetscFree(recvCounts);
903: /* Allocate workspace -- Also used in applying P */
904: ml->workLen = ml->numInterfaceRows;
905: PetscMalloc(ml->workLen * sizeof(double), &ml->work);
906: PetscLogObjectMemory(pc, ml->workLen * sizeof(double));
907: #endif
909: /* Do QR factorization on one processor */
910: PC_MLLogEventBegin(PC_ML_QRFactorization, pc, 0, 0, 0);
911: #ifdef PETSC_HAVE_PLAPACK
912: PLA_QR(ml->interfaceQR, ml->interfaceTAU);
913: #else
914: if (rank == 0) {
915: LAgeqrf_(&ml->numInterfaceRows, &ml->numNullCols, ml->interfaceQR,
916: &ml->numInterfaceRows, ml->interfaceTAU, ml->work, &ml->workLen, &ierr);
917: if (ierr) SETERRQ(PETSC_ERR_LIB, "Invalid interface QR");
918: }
919: #endif
920: PC_MLLogEventEnd(PC_ML_QRFactorization, pc, 0, 0, 0);
922: #ifdef PETSC_USE_BOPT_g
923: /* Diagnostics */
924: PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug_qr", &opt);
925: if (opt == PETSC_TRUE) {
926: PetscReal *intQR;
927: PetscReal *intTAU;
928: int r, c;
929: #ifdef PETSC_HAVE_PLAPACK
930: PLA_Obj tau = PETSC_NULL;
931: PLA_Obj qr = PETSC_NULL;
932: int length, width, stride, ldim;
934: PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numNullCols, 1, ml->PLAlayout, &tau);
935: PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, &qr);
936: PLA_Copy(ml->interfaceTAU, tau);
937: PLA_Copy(ml->interfaceQR, qr);
938: PLA_Obj_local_info(tau, &length, &width, (void **) &intTAU, &stride, &ldim);
939: if ((rank == 0) && ((length != ml->numNullCols) || (width != 1) || (stride != 1)))
940: SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface tau vector");
941: PLA_Obj_local_info(qr, &length, &width, (void **) &intQR, &stride, &ldim);
942: if ((rank == 0) && ((length != ml->numInterfaceRows) || (width != ml->numNullCols) || (ldim != ml->numInterfaceRows)))
943: SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface qr matrix");
944: #else
945: intTAU = ml->interfaceTAU;
946: intQR = ml->interfaceQR;
947: #endif
948: if (rank == 0) {
949: for(c = 0; c < ml->numNullCols; c++)
950: PetscPrintf(pc->comm, "%.4g ", intTAU[c]);
951: PetscPrintf(pc->comm, "n");
952: for(r = 0; r < ml->numInterfaceRows; r++) {
953: for(c = 0; c < ml->numNullCols; c++)
954: PetscPrintf(pc->comm, "%.4g ", intQR[ml->numInterfaceRows*c+r]);
955: PetscPrintf(pc->comm, "n");
956: }
957: }
958: #ifdef PETSC_HAVE_PLAPACK
959: PLA_Obj_free(&tau);
960: PLA_Obj_free(&qr);
961: #endif
962: }
963: #endif
965: /* Scatter Cleanup */
966: PetscFree(rangeRows);
967: PetscFree(nullCols);
968: PetscFree(interfaceRows);
969: PetscFree(interfaceCols);
970: PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);
971: } else {
972: /* Get the rows for the range split onto processors like a column vector */
973: PetscMalloc(ml->rank * sizeof(int), &rangeRows);
974: for(row = 0; row < ml->rank; row++)
975: rangeRows[row] = ml->range[row];
976: GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);
978: /* Create scatter from a solution vector to a column vector */
979: ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS);
980: ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);
981: VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);
982: ISDestroy(localIS);
983: ISDestroy(globalIS);
985: /* Cleanup */
986: PetscFree(rangeRows);
987: VecDestroy(ml->colWorkVec);
988: }
990: if (ml->diag != PETSC_NULL) {
991: /* Recover original B */
992: VecReciprocal(ml->diag);
993: MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);
994: VecReciprocal(ml->diag);
995: }
997: #if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
998: /* Read the hardware counter values */
999: if ((gen_read = ioctl(fd, PIOCGETEVCTRS, (void *) &counts)) < 0) {
1000: SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
1001: }
1002: /* Generation number should be the same */
1003: if (gen_start != gen_read) SETERRQ(PETSC_ERR_LIB, "Lost SGI hardware counters");
1004: /* Release the counters */
1005: if ((ioctl(fd, PIOCRELEVCTRS)) < 0) SETERRQ(PETSC_ERR_LIB, "Could not free SGI hardware counters");
1006: close(fd);
1007: count0 = counts.hwp_evctr[event0];
1008: count1 = counts.hwp_evctr[event1];
1009: MPI_Reduce(&count0, &globalCount0, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1010: MPI_Reduce(&count1, &globalCount1, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);
1011: /* Use hardware counter data */
1012: switch (event1) {
1013: case 21:
1014: PetscPrintf(pc->comm, "Flops: %ldn", globalCount1);
1015: break;
1016: case 25:
1017: PetscPrintf(pc->comm, "Data cache misses: %ldn", globalCount1);
1018: break;
1019: case 26:
1020: PetscPrintf(pc->comm, "Secondary data cache misses: %ldn", globalCount1);
1021: break;
1022: }
1023: #endif
1025: PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug", &opt);
1026: if (opt == PETSC_TRUE) {
1027: PCDebug_Multilevel(pc);
1028: }
1030: return(0);
1031: }
1033: #undef __FUNCT__
1035: static int PCPreSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1036: {
1037: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1038: Grid grid = ml->grid;
1039: GVec bdReduceVec, bdReduceVec2;
1040: GVec reduceVec;
1041: GMat bdReduceMat;
1042: VarOrdering sOrder;
1043: PetscScalar one = 1.0, reduceAlpha;
1044: int bc;
1045: int ierr;
1048: /* Create -g = B^T u + B^T_Gamma f_Gamma */
1049: GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec);
1050: VarOrderingCreateSubset(grid->reduceOrder, 1, &ml->tField, PETSC_FALSE, &sOrder);
1051: #define OLD_REDUCTION
1052: #ifdef OLD_REDUCTION
1053: GMatCreateRectangular(grid, sOrder, ml->sOrder, &bdReduceMat);
1054: MatSetOption(bdReduceMat, MAT_NEW_NONZERO_ALLOCATION_ERR);
1055: for(bc = 0; bc < grid->numBC; bc++) {
1056: /* Conditions on A */
1057: if (ml->tField == grid->bc[bc].field) {
1058: GMatEvaluateOperatorGalerkin(bdReduceMat, PETSC_NULL, 1, &ml->tField, &ml->sField, ml->divOp,
1059: 1.0, MAT_FINAL_ASSEMBLY, ml);
1060:
1061: }
1062: /* Conditions on B^T only show up in the u equations, so they are already handled */
1063: }
1064: for(bc = 0; bc < grid->numPointBC; bc++) {
1065: /* Conditions on A */
1066: if (ml->tField == grid->pointBC[bc].field) {
1067: GVecEvaluateOperatorGalerkin(bdReduceVec, grid->bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1068: ml->divOp, 1.0, ml);
1069:
1070: }
1071: /* Conditions on B^T only show up in the u equations, so they are already handled */
1072: #ifdef PETSC_USE_BOPT_g
1073: PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);
1074: #endif
1075: }
1076: #else
1077: GVecEvaluateOperatorGalerkin(bdReduceVec, bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
1078: ml->divOp, 1.0, ml);
1079:
1080: #endif
1081: VarOrderingDestroy(sOrder);
1082: #ifdef OLD_REDUCTION
1083: /* Should be taken out when I fix the MF version */
1084: MatMult(bdReduceMat, grid->bdReduceVec, bdReduceVec);
1085: GMatDestroy(bdReduceMat);
1086: #endif
1088: /* Add in the B^T u part */
1089: if (ml->nonlinearIterate != PETSC_NULL) {
1090: GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec2);
1091: PCMultiLevelApplyGradientTrans(pc, ml->nonlinearIterate, bdReduceVec2);
1092: VecAXPY(&one, bdReduceVec2, bdReduceVec);
1093: GVecDestroy(bdReduceVec2);
1094: }
1096: /* Calculate -D^{-T} Z^T g = D^{-T} Z^T B^T_Gamma f_Gamma */
1097: PCMultiLevelApplyVTrans(pc, bdReduceVec, bdReduceVec);
1098: PCMultiLevelApplyDInvTrans(pc, bdReduceVec, bdReduceVec);
1100: /* Calculate -P_1 D^{-1} Z^T g */
1101: PCMultiLevelApplyP1(pc, bdReduceVec, ml->reduceSol);
1103: /* Calculate -A P_1 D^{-1} Z^T g -- This does not seem like good design, but I will deal with it later */
1104: GVecCreateRectangular(grid, ml->tOrder, &reduceVec);
1105: MatMult(pc->mat, ml->reduceSol, reduceVec);
1107: /* f --> f - A P_1 D^{-1} Z^T g */
1108: GridGetBCMultiplier(grid, &reduceAlpha);
1109: VecAXPY(&reduceAlpha, reduceVec, rhs);
1111: /* Cleanup */
1112: GVecDestroy(reduceVec);
1113: GVecDestroy(bdReduceVec);
1114: return(0);
1115: }
1117: #undef __FUNCT__
1119: static int PCPostSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
1120: {
1122: return(0);
1123: }
1125: #undef __FUNCT__
1127: static int PCDestroy_Multilevel(PC pc)
1128: {
1129: PC_Multilevel *ml = (PC_Multilevel *) pc->data;
1130: int ierr;
1133: if (ml->numLevels >= 0) {
1134: PCDestroyStructures_Multilevel(pc);
1135: }
1136: if (ml->mathViewer != PETSC_NULL) {
1137: PetscViewerDestroy(ml->mathViewer);
1138: }
1139: if (ml->factorizationViewer != PETSC_NULL) {
1140: PetscViewerDestroy(ml->factorizationViewer);
1141: }
1142: PetscFree(ml);
1143: return(0);
1144: }
1146: #undef __FUNCT__
1148: int PCApply_Multilevel(PC pc, Vec x, Vec y)
1149: {
1153: PCMultiLevelApplyP(pc, x, y);
1154: return(0);
1155: }
1157: #undef __FUNCT__
1159: int PCApplyTrans_Multilevel(PC pc, Vec x, Vec y)
1160: {
1164: PCMultiLevelApplyPTrans(pc, x, y);
1165: return(0);
1166: }
1168: #undef __FUNCT__
1170: int PCApplySymmetricLeft_Multilevel(PC pc, Vec x, Vec y)
1171: {
1175: PCMultiLevelApplyP2Trans(pc, x, y);
1176: return(0);
1177: }
1179: #undef __FUNCT__
1181: int PCApplySymmetricRight_Multilevel(PC pc, Vec x, Vec y)
1182: {
1186: PCMultiLevelApplyP2(pc, x, y);
1187: return(0);
1188: }
1190: EXTERN_C_BEGIN
1191: #undef __FUNCT__
1193: int PCCreate_Multilevel(PC pc)
1194: {
1195: PC_Multilevel *ml;
1196: int ierr;
1199: PCMultiLevelInitializePackage(PETSC_NULL);
1201: PetscNew(PC_Multilevel, &ml);
1202: PetscLogObjectMemory(pc, sizeof(PC_Multilevel));
1203: pc->data = (void *) ml;
1205: pc->ops->setup = PCSetUp_Multilevel;
1206: pc->ops->apply = PCApply_Multilevel;
1207: pc->ops->applyrichardson = PETSC_NULL;
1208: pc->ops->applyBA = PETSC_NULL;
1209: pc->ops->applytranspose = PCApplyTrans_Multilevel;
1210: pc->ops->applyBAtranspose = PETSC_NULL;
1211: pc->ops->setfromoptions = PETSC_NULL;
1212: pc->ops->presolve = PCPreSolve_Multilevel;
1213: pc->ops->postsolve = PCPostSolve_Multilevel;
1214: pc->ops->getfactoredmatrix = PETSC_NULL;
1215: pc->ops->applysymmetricleft = PCApplySymmetricLeft_Multilevel;
1216: pc->ops->applysymmetricright = PCApplySymmetricRight_Multilevel;
1217: pc->ops->setuponblocks = PETSC_NULL;
1218: pc->ops->destroy = PCDestroy_Multilevel;
1219: pc->ops->view = PETSC_NULL;
1220: pc->modifysubmatrices = PETSC_NULL;
1222: ml->grid = PETSC_NULL;
1223: ml->useMath = PETSC_FALSE;
1224: ml->mathViewer = PETSC_NULL;
1225: ml->factorizationViewer = PETSC_NULL;
1227: ml->gradOp = GRADIENT;
1228: ml->divOp = DIVERGENCE;
1229: ml->sField = -1;
1230: ml->tField = -1;
1231: ml->sOrder = PETSC_NULL;
1232: ml->sLocOrder = PETSC_NULL;
1233: ml->tOrder = PETSC_NULL;
1234: ml->tLocOrder = PETSC_NULL;
1235: ml->gradAlpha = 1.0;
1236: ml->B = PETSC_NULL;
1237: ml->locB = PETSC_NULL;
1238: ml->reduceSol = PETSC_NULL;
1239: ml->nonlinearIterate = PETSC_NULL;
1240: ml->diag = PETSC_NULL;
1242: ml->numMeshes = -1;
1243: ml->numElements = PETSC_NULL;
1244: ml->numVertices = PETSC_NULL;
1245: ml->vertices = PETSC_NULL;
1246: ml->numEdges = -1;
1247: ml->meshes = PETSC_NULL;
1249: ml->numPartitions = PETSC_NULL;
1250: ml->numPartitionCols = PETSC_NULL;
1251: ml->colPartition = PETSC_NULL;
1252: ml->numPartitionRows = PETSC_NULL;
1253: ml->rowPartition = PETSC_NULL;
1254: ml->rank = -1;
1255: ml->localRank = -1;
1256: ml->range = PETSC_NULL;
1257: ml->rangeScatter = PETSC_NULL;
1258: ml->numLocNullCols = -1;
1259: ml->nullCols = PETSC_NULL;
1261: ml->numRows = -1;
1262: ml->numCols = -1;
1263: ml->numLevels = -1;
1264: ml->maxLevels = -1;
1265: ml->QRthresh = 0;
1266: ml->DoQRFactor = 2;
1267: ml->zeroTol = 1.0e-10;
1268: ml->factors = PETSC_NULL;
1269: ml->grads = PETSC_NULL;
1271: ml->globalRank = -1;
1272: ml->numNullCols = -1;
1273: ml->numLocInterfaceRows = -1;
1274: ml->numInterfaceRows = -1;
1275: ml->interfaceRows = PETSC_NULL;
1276: ml->firstInterfaceRow = PETSC_NULL;
1277: ml->numInteriorRows = -1;
1278: ml->interiorRows = PETSC_NULL;
1279: ml->interfaceQR = PETSC_NULL;
1280: ml->interfaceTAU = PETSC_NULL;
1281: ml->interfaceV = PETSC_NULL;
1282: ml->interfaceB = PETSC_NULL;
1283: ml->interiorRhs = PETSC_NULL;
1284: ml->interiorScatter = PETSC_NULL;
1285: ml->interfaceRhs = PETSC_NULL;
1286: ml->interfaceScatter = PETSC_NULL;
1287: #ifndef PETSC_HAVE_PLAPACK
1288: ml->locInterfaceRhs = PETSC_NULL;
1289: ml->locInterfaceScatter = PETSC_NULL;
1290: #endif
1291: ml->interfaceColRhs = PETSC_NULL;
1292: ml->interfaceColScatter = PETSC_NULL;
1293: ml->reduceScatter = PETSC_NULL;
1294: ml->colWorkVec = PETSC_NULL;
1295: ml->globalColWorkVec = PETSC_NULL;
1297: ml->workLen = -1;
1298: ml->work = PETSC_NULL;
1299: ml->svdWorkLen = -1;
1300: ml->svdWork = PETSC_NULL;
1301: return(0);
1302: }
1303: EXTERN_C_END