#ifdef PETSC_RCS_HEADER static char vcid[] = "$Id: mlSerial.c,v 1.12 2000/10/22 20:27:52 knepley Exp $"; #endif /* Defines the serial multilevel preconditioner routines */ #include "src/sles/pc/pcimpl.h" /*I "pc.h" I*/ #include "ml.h" #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelDoQR_Private" PetscTruth PCMultiLevelDoQR_Private(PC pc, int numRows, int numCols) { /* This functions decides whether to QR factorize the matrix U of an SVD. This is a memory saving device when B = UDV is long and skinny. */ PC_Multilevel *ml = (PC_Multilevel *) pc->data; PetscFunctionBegin; if (numRows > (int) numCols*ml->DoQRFactor) { PetscFunctionReturn(PETSC_TRUE); } else { PetscFunctionReturn(PETSC_FALSE); } } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelPartitionMesh_Private" int PCMultiLevelPartitionMesh_Private(PC pc, int level, int **mesh, int *numParts, int *colPartition) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; PetscTruth change; int numCols, freeCols; int numAdj, newNumAdj, numFound; int *offsets; int *adj; int *sizes; int part, col, newCol, adjCol, newAdjCol; int ierr; PetscFunctionBegin; ierr = PC_MLLogEventBegin(PC_ML_ReducePartitionMesh, pc, 0, 0, 0); CHKERRQ(ierr); if (ml->useMath == PETSC_FALSE) { offsets = mesh[MESH_OFFSETS]; adj = mesh[MESH_ADJ]; /* Set initial number of partitions */ numCols = ml->numVertices[level]; *numParts = numCols/ml->partSize; /* If the graph is small enough, just return the whole thing */ if (*numParts == 0) { *numParts = 1; ierr = PetscMemzero(colPartition, numCols * sizeof(int)); CHKERRQ(ierr); ierr = PC_MLLogEventEnd(PC_ML_ReducePartitionMesh, pc, 0, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } /* Get connectivity information */ /* Initialize partition and partition sizes */ ierr = PetscMalloc((*numParts) * sizeof(int), &sizes); CHKERRQ(ierr); for(col = 0; col < numCols; col++) colPartition[col] = -1; /* Pick numParts starting points -- Conforms to the Mathematica */ for(part = 1; part <= *numParts; part++) { colPartition[ml->partSize*part-1] = part-1; sizes[part-1] = 1; } /* Start enlarging partitions */ freeCols = numCols - (*numParts); change = PETSC_TRUE; while(freeCols > 0) { /* Quit if nothing more can be done */ if (change == PETSC_FALSE) break; change = PETSC_FALSE; /* Add a node to each partition if possible */ for(part = 0; part < (*numParts); part++) { /* Look for the column adjacent to the most columns in this partition */ newCol = -1; numAdj = 0; for(col = 0, numFound = 0; (col < numCols) && (numFound < sizes[part]); col++) { if (colPartition[col] == part) { numFound++; /* Look for adjacent columns ignoring self edge */ for(adjCol = offsets[col]+1; adjCol < offsets[col+1]; adjCol++) { /* Ignore columns already in a partition */ if (colPartition[adj[adjCol]] >= 0) continue; newNumAdj = 0; /* Count the number of columns adjacent to this one that are already in the partition */ for(newAdjCol = offsets[adj[adjCol]]+1; newAdjCol < offsets[adj[adjCol]+1]; newAdjCol++) if (colPartition[adj[newAdjCol]] == part) newNumAdj++; if ((newNumAdj > numAdj) || ((newNumAdj == numAdj) && (adj[adjCol] < newCol))) { newCol = adj[adjCol]; numAdj = newNumAdj; } } } } if (newCol >= 0) { colPartition[newCol] = part; sizes[part]++; freeCols--; change = PETSC_TRUE; } } } /* Take care of extra columns */ if (change == PETSC_FALSE) { for(col = 0; col < numCols; col++) if (colPartition[col] < 0) colPartition[col] = (*numParts)++; } ierr = PetscFree(sizes); CHKERRQ(ierr); } else { #ifdef PETSC_HAVE_MATHEMATICA ierr = ViewerMathematicaPartitionMesh(ml->mathViewer, ml->numElements[level], ml->numVertices[level], ml->vertices, mesh, numParts, colPartition); CHKERRQ(ierr); #else SETERRQ(PETSC_ERR_SUP, " "); #endif } ierr = PC_MLLogEventEnd(PC_ML_ReducePartitionMesh, pc, 0, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelPartitionCols_Private" int PCMultiLevelPartitionCols_Private(PC pc, int level, int numParts, int numCols, int *colPartition, int *globalCols) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; int numLocCols; int part, col; int ierr; PetscFunctionBegin; ierr = PetscMalloc(numParts * sizeof(int), &ml->numPartitionCols[level]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numParts * sizeof(int)); ierr = PetscMemzero(ml->numPartitionCols[level], numParts * sizeof(int)); CHKERRQ(ierr); for(col = 0; col < numCols; col++) { ml->numPartitionCols[level][colPartition[col]]++; } ierr = PetscMalloc(numParts * sizeof(int *), &ml->colPartition[level]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numParts * sizeof(int *)); for(part = 0; part < numParts; part++) { numLocCols = ml->numPartitionCols[level][part]; ierr = PetscMalloc(numLocCols * sizeof(int), &ml->colPartition[level][part]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numLocCols * sizeof(int)); ml->numPartitionCols[level][part] = 0; } for(col = 0; col < numCols; col++) { ml->colPartition[level][colPartition[col]][ml->numPartitionCols[level][colPartition[col]]++] = globalCols[col]; } for(part = 0, col = 0; part < numParts; part++) { col += ml->numPartitionCols[level][part]; } if (col != numCols) { int rank; ierr = MPI_Comm_rank(pc->comm, &rank); CHKERRQ(ierr); PetscPrintf(PETSC_COMM_SELF, "[%d] %d does not equal the number of columns %d\n", rank, col, numCols); for(col = 0; col < numCols; col++) { PetscPrintf(PETSC_COMM_SELF, "[%d] part: %d col: %d\n", rank, part, colPartition[col]); } SETERRQ(PETSC_ERR_PLIB, "Invalid column partition"); } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelPartitionRows_Private" int PCMultiLevelPartitionRows_Private(PC pc, int level, int numParts, int numCols, int numRows, int *colPartition, int *rowPartition, int *globalCols, Mat grad) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; int numLocRows, numLocCols, ncols, numSlots; int *cols; PetscScalar *vals; int part, col, locCol, row; int ierr; PetscFunctionBegin; /* Allocation */ numSlots = numParts + NUM_PART_ROW_DIV - 1; ierr = PetscMalloc(numSlots * sizeof(int), &ml->numPartitionRows[level]); CHKERRQ(ierr); ierr = PetscMalloc(NUM_PART_ROW_DIV * sizeof(int **), &ml->rowPartition[level]); CHKERRQ(ierr); ierr = PetscMalloc(numParts * sizeof(int *), &ml->rowPartition[level][PART_ROW_INT]); CHKERRQ(ierr); ierr = PetscMalloc(1 * sizeof(int *), &ml->rowPartition[level][PART_ROW_BD]); CHKERRQ(ierr); ierr = PetscMalloc(1 * sizeof(int *), &ml->rowPartition[level][PART_ROW_RES]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numSlots*sizeof(int) + NUM_PART_ROW_DIV*sizeof(int **) + numSlots*sizeof(int *)); /* Create row partition */ for(row = 0; row < numRows; row++) { /* Find partitions adjacent to this row */ ierr = MatGetRow(grad, row, &ncols, &cols, &vals); CHKERRQ(ierr); ierr = PetscMemzero(ml->numPartitionRows[level], numParts * sizeof(int)); CHKERRQ(ierr); for(col = 0; col < ncols; col++) { /* Could invert this mapping explicitly or do bisection */ for(locCol = 0; locCol < numCols; locCol++) if (globalCols[locCol] == cols[col]) break; if (locCol == numCols) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid boundary gradient"); ml->numPartitionRows[level][colPartition[locCol]] = 1; } ierr = MatRestoreRow(grad, row, &ncols, &cols, &vals); CHKERRQ(ierr); /* Classify row */ if (ncols == 0) { /* This is caused by rows with nonzeros only in full rank partitions, so that B_\Gamma (I - D^{-1} D) has a null row. */ rowPartition[row] = 0; continue; } for(part = 0, numLocCols = 0; part < numParts; part++) { if (ml->numPartitionRows[level][part] > 0) { numLocCols++; rowPartition[row] = part; } } if (numLocCols == 2) { rowPartition[row] = numParts + PART_ROW_BD - 1; } else if (numLocCols > 2) { rowPartition[row] = numParts + PART_ROW_RES - 1; } else if (numLocCols < 1) { SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid boundary gradient"); } } /* Copy rows */ ierr = PetscMemzero(ml->numPartitionRows[level], (numParts + NUM_PART_ROW_DIV - 1) * sizeof(int)); CHKERRQ(ierr); for(row = 0; row < numRows; row++) { ml->numPartitionRows[level][rowPartition[row]]++; } for(part = 0; part < numParts; part++) { numLocRows = ml->numPartitionRows[level][part]; if (numLocRows) { ierr = PetscMalloc(numLocRows * sizeof(int), &ml->rowPartition[level][PART_ROW_INT][part]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numLocRows * sizeof(int)); } ml->numPartitionRows[level][part] = 0; } numLocRows = ml->numPartitionRows[level][numParts]; if (numLocRows) { ierr = PetscMalloc(numLocRows * sizeof(int), &ml->rowPartition[level][PART_ROW_BD][0]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numLocRows * sizeof(int)); } ml->numPartitionRows[level][numParts] = 0; numLocRows = ml->numPartitionRows[level][numParts+1]; if (numLocRows) { ierr = PetscMalloc(numLocRows * sizeof(int), &ml->rowPartition[level][PART_ROW_RES][0]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numLocRows * sizeof(int)); } ml->numPartitionRows[level][numParts+1] = 0; for(row = 0; row < numRows; row++) { if (rowPartition[row] < numParts) { ml->rowPartition[level][PART_ROW_INT][rowPartition[row]][ml->numPartitionRows[level][rowPartition[row]]++] = row; } else if (rowPartition[row] == numParts) { ml->rowPartition[level][PART_ROW_BD][0][ml->numPartitionRows[level][rowPartition[row]]++] = row; } else if (rowPartition[row] > numParts) { ml->rowPartition[level][PART_ROW_RES][0][ml->numPartitionRows[level][rowPartition[row]]++] = row; } } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelFactorPartitions_Private" int PCMultiLevelFactorPartitions_Private(PC pc, int level, int numParts, Mat grad) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; int ncols; int *cols; PetscScalar *vals; PetscReal *B; int maxSize; int numRows, numCols; int part, row, col, locCol; int ierr; PetscFunctionBegin; ierr = PC_MLLogEventBegin(PC_ML_ReduceFactor, pc, grad, 0, 0); CHKERRQ(ierr); /* Allocation */ for(part = 0, maxSize = 0; part < numParts; part++) { numRows = ml->numPartitionRows[level][part]; numCols = ml->numPartitionCols[level][part]; maxSize = PetscMax(maxSize, numRows*numCols); } if (maxSize == 0) { PetscFunctionReturn(2); /* SETERRQ(1, "Empty interior (increase partition size)"); */ } ierr = PetscMalloc(numParts * sizeof(PetscReal **), &ml->factors[level]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numParts * sizeof(PetscReal **)); ierr = PetscMalloc(maxSize * sizeof(PetscReal), &B); CHKERRQ(ierr); for(part = 0; part < numParts; part++) { /* Allocation */ numRows = ml->numPartitionRows[level][part]; numCols = ml->numPartitionCols[level][part]; ierr = PetscMalloc(NUM_FACT_DIV * sizeof(PetscReal *), &ml->factors[level][part]); CHKERRQ(ierr); ierr = PetscMalloc(numCols * sizeof(PetscReal), &ml->factors[level][part][FACT_DINV]); CHKERRQ(ierr); ierr = PetscMalloc(numCols*numCols * sizeof(PetscReal), &ml->factors[level][part][FACT_V]); CHKERRQ(ierr); PetscLogObjectMemory(pc, NUM_FACT_DIV * sizeof(double *) + (numCols + numCols*numCols)*sizeof(PetscReal)); if (numRows > 0) { /* If numCols < numRows, LAPACK will leave a bogus values in the end of D^{-1} */ if (numCols > numRows) { ierr = PetscMemzero(ml->factors[level][part][FACT_DINV], numCols * sizeof(PetscReal)); CHKERRQ(ierr); } if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE) { /* Allocation */ ierr = PetscMalloc(numRows*numCols * sizeof(PetscReal), &ml->factors[level][part][FACT_QR]); CHKERRQ(ierr); ierr = PetscMalloc(numCols * sizeof(PetscReal), &ml->factors[level][part][FACT_TAU]); CHKERRQ(ierr); ierr = PetscMalloc(numCols*numCols * sizeof(PetscReal), &ml->factors[level][part][FACT_U]); CHKERRQ(ierr); PetscLogObjectMemory(pc, (numRows*numCols + numCols + numCols*numCols)*sizeof(PetscReal)); /* Extract the interior gradient for this partition */ ierr = PetscMemzero(ml->factors[level][part][FACT_QR], numRows*numCols * sizeof(PetscReal)); CHKERRQ(ierr); for(row = 0; row < numRows; row++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_INT][part][row], &ncols, &cols, &vals); CHKERRQ(ierr); for(col = 0; col < ncols; col++) { for(locCol = 0; locCol < numCols; locCol++) { if (cols[col] == ml->colPartition[level][part][locCol]) break; } if (locCol < numCols) { ml->factors[level][part][FACT_QR][numRows*locCol+row] = vals[col]; } } ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_INT][part][row], &ncols, &cols, &vals); CHKERRQ(ierr); } /* Do QR of B */ LAgeqrf_(&numRows, &numCols, ml->factors[level][part][FACT_QR], &numRows, ml->factors[level][part][FACT_TAU], ml->svdWork, &ml->svdWorkLen, &ierr); CHKERRQ(ierr); /* Put R in B */ ierr = PetscMemzero(B, numCols*numCols * sizeof(PetscReal)); CHKERRQ(ierr); for(col = 0; col < numCols; col++) for(row = 0; row <= col; row++) B[col*numCols+row] = ml->factors[level][part][FACT_QR][col*numRows+row]; /* Do SVD of R */ #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_ERR_SUP,"GESVD - Lapack routine is unavilable."); #else LAgesvd_("A", "A", &numCols, &numCols, B, &numCols, ml->factors[level][part][FACT_DINV], ml->factors[level][part][FACT_U], &numCols, ml->factors[level][part][FACT_V], &numCols, ml->svdWork, &ml->svdWorkLen, &ierr); CHKERRQ(ierr); #endif } else { /* Allocation */ ierr = PetscMalloc(numRows*numRows * sizeof(PetscReal), &ml->factors[level][part][FACT_U]); CHKERRQ(ierr); PetscLogObjectMemory(pc, numRows*numRows * sizeof(PetscReal)); /* Extract the interior gradient for this partition */ ierr = PetscMemzero(B, numRows*numCols * sizeof(PetscReal)); CHKERRQ(ierr); for(row = 0; row < numRows; row++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_INT][part][row], &ncols, &cols, &vals); CHKERRQ(ierr); for(col = 0; col < ncols; col++) { for(locCol = 0; locCol < numCols; locCol++) { if (cols[col] == ml->colPartition[level][part][locCol]) break; } if (locCol < numCols) { B[numRows*locCol+row] = vals[col]; } } ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_INT][part][row], &ncols, &cols, &vals); CHKERRQ(ierr); } /* Factor the gradient */ #if defined(PETSC_MISSING_LAPACK_GESVD) SETERRQ(PETSC_ERR_SUP,"GESVD - Lapack routine is unavilable."); #else LAgesvd_("A", "A", &numRows, &numCols, B, &numRows, ml->factors[level][part][FACT_DINV], ml->factors[level][part][FACT_U], &numRows, ml->factors[level][part][FACT_V], &numCols, ml->svdWork, &ml->svdWorkLen, &ierr); #endif CHKERRQ(ierr); } /* Invert the singular values */ for(col = 0; col < numCols; col++) { if (ml->factors[level][part][FACT_DINV][col] > ml->zeroTol) ml->factors[level][part][FACT_DINV][col] = 1.0/ml->factors[level][part][FACT_DINV][col]; } } else { /* Create null singular values for D^{-1} and the identity for V */ ierr = PetscMemzero(ml->factors[level][part][FACT_DINV], numCols * sizeof(PetscReal)); CHKERRQ(ierr); ierr = PetscMemzero(ml->factors[level][part][FACT_V], numCols*numCols * sizeof(PetscReal)); CHKERRQ(ierr); for(col = 0; col < numCols; col++) ml->factors[level][part][FACT_V][col*numCols+col] = 1.0; } } /* Cleanup */ ierr = PetscFree(B); CHKERRQ(ierr); ierr = PC_MLLogEventEnd(PC_ML_ReduceFactor, pc, grad, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelExtractBoundaryGradient_Private" int PCMultiLevelExtractBoundaryGradient_Private(PC pc, int level, int numParts, int colsRemain, int *newCols, PetscScalar *newVals, Mat grad) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; int ncols; int *cols; PetscScalar *vals; PetscScalar *temp; int *rowLens; int numBdRows, numResRows, rowsRemain, numCols, numVals, prevCols; int part, row, newRow, col, locCol = 0; int ierr; PetscFunctionBegin; ierr = PC_MLLogEventBegin(PC_ML_ReduceBdGradExtract, pc, grad, 0, 0); CHKERRQ(ierr); /* Initialization */ numBdRows = ml->numPartitionRows[level][numParts]; numResRows = ml->numPartitionRows[level][numParts+1]; rowsRemain = numBdRows + numResRows; /* Get the nonzero structure */ ierr = VecGetArray(ml->bdReduceVecs[level], &temp); CHKERRQ(ierr); rowLens = (int *) temp; for(row = 0; row < numBdRows; row++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_BD][0][row], &ncols, &cols, PETSC_NULL); CHKERRQ(ierr); for(col = 0, numVals = 0; col < ncols; col++) { for(part = 0, prevCols = 0; part < numParts; part++) { numCols = ml->numPartitionCols[level][part]; for(locCol = 0; locCol < numCols; locCol++) if (cols[col] == ml->colPartition[level][part][locCol]) break; if (locCol < numCols) break; prevCols += numCols; } if (part < numParts) numVals++; } if (numVals == 0) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null boundary row in boundary gradient"); rowLens[row] = numVals; ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_BD][0][row], &ncols, &cols, PETSC_NULL); CHKERRQ(ierr); } for(row = 0; row < numResRows; row++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_RES][0][row], &ncols, &cols, PETSC_NULL); CHKERRQ(ierr); for(col = 0, numVals = 0; col < ncols; col++) { for(part = 0, prevCols = 0; part < numParts; part++) { numCols = ml->numPartitionCols[level][part]; for(locCol = 0; locCol < numCols; locCol++) if (cols[col] == ml->colPartition[level][part][locCol]) break; if (locCol < numCols) break; prevCols += numCols; } if (part < numParts) numVals++; } if (numVals == 0) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null residual row in boundary gradient"); rowLens[row+numBdRows] = numVals; ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_RES][0][row], &ncols, &cols, PETSC_NULL); CHKERRQ(ierr); } /* Create the boundary gradient B_\Gamma */ ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, rowsRemain, colsRemain, 0, rowLens, &ml->grads[level]); CHKERRQ(ierr); ierr = MatSetOption(ml->grads[level], MAT_NEW_NONZERO_ALLOCATION_ERR); CHKERRQ(ierr); ierr = VecRestoreArray(ml->bdReduceVecs[level], &temp); CHKERRQ(ierr); /* Extract boundary rows */ for(row = 0; row < numBdRows; row++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_BD][0][row], &ncols, &cols, &vals); CHKERRQ(ierr); for(col = 0, numVals = 0; col < ncols; col++) { for(part = 0, prevCols = 0; part < numParts; part++) { numCols = ml->numPartitionCols[level][part]; for(locCol = 0; locCol < numCols; locCol++) if (cols[col] == ml->colPartition[level][part][locCol]) break; if (locCol < numCols) break; prevCols += numCols; } if (part < numParts) { newCols[numVals] = locCol + prevCols; newVals[numVals] = vals[col]; numVals++; } } ierr = MatSetValues(ml->grads[level], 1, &row, numVals, newCols, newVals, INSERT_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_BD][0][row], &ncols, &cols, &vals); CHKERRQ(ierr); } /* Extract residual rows */ for(row = 0, newRow = numBdRows; row < numResRows; row++, newRow++) { ierr = MatGetRow(grad, ml->rowPartition[level][PART_ROW_RES][0][row], &ncols, &cols, &vals); CHKERRQ(ierr); for(col = 0, numVals = 0; col < ncols; col++) { for(part = 0, prevCols = 0; part < numParts; part++) { numCols = ml->numPartitionCols[level][part]; for(locCol = 0; locCol < numCols; locCol++) if (cols[col] == ml->colPartition[level][part][locCol]) break; if (locCol < numCols) break; prevCols += numCols; } if (part < numParts) { newCols[numVals] = locCol + prevCols; newVals[numVals] = vals[col]; numVals++; } } ierr = MatSetValues(ml->grads[level], 1, &newRow, numVals, newCols, newVals, INSERT_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(grad, ml->rowPartition[level][PART_ROW_RES][0][row], &ncols, &cols, &vals); CHKERRQ(ierr); } ierr = MatAssemblyBegin(ml->grads[level], MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(ml->grads[level], MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = PC_MLLogEventEnd(PC_ML_ReduceBdGradExtract, pc, grad, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelShrinkMesh_Private" int PCMultiLevelShrinkMesh_Private(PC pc, int level, int numNewCols, int *colPartition, int numElements, int **oldMesh, int *numNewElements, int ***newMesh) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; int numCols = ml->numVertices[level]; int *adjCols; int maxSize; int col, oldCol, newCol, adjCol, count; int elem; int ierr; PetscFunctionBegin; ierr = PC_MLLogEventBegin(PC_ML_ReduceShrinkMesh, pc, 0, 0, 0); CHKERRQ(ierr); ml->numMeshes++; /* Allocation */ maxSize = PetscMax(PetscMin(oldMesh[MESH_OFFSETS][numCols], numNewCols*numNewCols + 1), 1); ierr = PetscMalloc(NUM_MESH_DIV * sizeof(int *), newMesh); CHKERRQ(ierr); ierr = PetscMalloc((numNewCols+1) * sizeof(int), &(*newMesh)[MESH_OFFSETS]); CHKERRQ(ierr); ierr = PetscMalloc(maxSize * sizeof(int), &(*newMesh)[MESH_ADJ]); CHKERRQ(ierr); ierr = PetscMalloc((numNewCols+1) * sizeof(int), &adjCols); CHKERRQ(ierr); PetscLogObjectMemory(pc, NUM_MESH_DIV * sizeof(int *) + (numNewCols+1 + maxSize)*sizeof(int)); /* Shrink adjacency lists */ (*newMesh)[MESH_OFFSETS][0] = 0; for(col = 0, count = 0; col < numNewCols; col++) { ierr = PetscMemzero(adjCols, numNewCols * sizeof(int)); CHKERRQ(ierr); /* Put in self edge */ (*newMesh)[MESH_ADJ][count++] = col; adjCols[col] = 1; /* Get all columns in this partition */ for(oldCol = 0; oldCol < numCols; oldCol++) { if (colPartition[oldCol] == col) { for(adjCol = oldMesh[MESH_OFFSETS][oldCol]; adjCol < oldMesh[MESH_OFFSETS][oldCol+1]; adjCol++) { /* Check for adjacent column */ newCol = colPartition[oldMesh[MESH_ADJ][adjCol]]; /* Null partitions have number -1 */ if ((newCol < 0) || (adjCols[newCol])) continue; (*newMesh)[MESH_ADJ][count++] = newCol; adjCols[newCol] = 1; } } } (*newMesh)[MESH_OFFSETS][col+1] = count; } ierr = PetscFree(adjCols); CHKERRQ(ierr); /* Eliminate redundant elements */ maxSize = PetscMax(PetscMin(numElements, PetscMax(numCols-3, 0)*2 + 1), 1); ierr = PetscMalloc(maxSize*3 * sizeof(int), &(*newMesh)[MESH_ELEM]); CHKERRQ(ierr); PetscLogObjectMemory(pc, maxSize*3 * sizeof(int)); for(elem = 0, *numNewElements = 0; elem < numElements; elem++) { if ((colPartition[oldMesh[MESH_ELEM][elem*3]-1] != colPartition[oldMesh[MESH_ELEM][elem*3+1]-1]) && (colPartition[oldMesh[MESH_ELEM][elem*3+1]-1] != colPartition[oldMesh[MESH_ELEM][elem*3+2]-1]) && (colPartition[oldMesh[MESH_ELEM][elem*3+2]-1] != colPartition[oldMesh[MESH_ELEM][elem*3]-1]) && (colPartition[oldMesh[MESH_ELEM][elem*3]-1] >= 0) && (colPartition[oldMesh[MESH_ELEM][elem*3+1]-1] >= 0) && (colPartition[oldMesh[MESH_ELEM][elem*3+2]-1] >= 0)) { (*newMesh)[MESH_ELEM][(*numNewElements)*3] = colPartition[oldMesh[MESH_ELEM][elem*3]-1] + 1; (*newMesh)[MESH_ELEM][(*numNewElements)*3+1] = colPartition[oldMesh[MESH_ELEM][elem*3+1]-1] + 1; (*newMesh)[MESH_ELEM][(*numNewElements)*3+2] = colPartition[oldMesh[MESH_ELEM][elem*3+2]-1] + 1; (*numNewElements)++; } } ierr = PC_MLLogEventEnd(PC_ML_ReduceShrinkMesh, pc, 0, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelRowPartitionLocalToGlobal_Private" int PCMultiLevelRowPartitionLocalToGlobal_Private(PC pc, int level) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; PetscScalar *bdArray; int *prevIndices; int *rowIndices; int numParts, numIntRows, numBdRows, numResRows; int part, row; int ierr; PetscFunctionBegin; if (level <= 0) PetscFunctionReturn(0); ierr = PC_MLLogEventBegin(PC_ML_ReduceBdGradRowPartLocalToGlobal, pc, 0, 0, 0); CHKERRQ(ierr); ierr = VecGetArray(ml->bdReduceVecs[level-1], &bdArray); CHKERRQ(ierr); /* Load unreduced rows from last level into a work vector */ prevIndices = (int *) bdArray; numParts = ml->numPartitions[level-1]; numBdRows = ml->numPartitionRows[level-1][numParts]; ierr = PetscMemcpy(prevIndices, ml->rowPartition[level-1][PART_ROW_BD][0], numBdRows * sizeof(int)); CHKERRQ(ierr); numResRows = ml->numPartitionRows[level-1][numParts+1]; ierr = PetscMemcpy(prevIndices+numBdRows, ml->rowPartition[level-1][PART_ROW_RES][0], numResRows * sizeof(int)); CHKERRQ(ierr); /* Interior edges */ numParts = ml->numPartitions[level]; for(part = 0; part < numParts; part++) { rowIndices = ml->rowPartition[level][PART_ROW_INT][part]; numIntRows = ml->numPartitionRows[level][part]; for(row = 0; row < numIntRows; row++) rowIndices[row] = prevIndices[rowIndices[row]]; } /* Boundary edges */ rowIndices = ml->rowPartition[level][PART_ROW_BD][0]; numBdRows = ml->numPartitionRows[level][numParts]; for(row = 0; row < numBdRows; row++) rowIndices[row] = prevIndices[rowIndices[row]]; /* Residual edges */ rowIndices = ml->rowPartition[level][PART_ROW_RES][0]; numResRows = ml->numPartitionRows[level][numParts+1]; for(row = 0; row < numResRows; row++) rowIndices[row] = prevIndices[rowIndices[row]]; ierr = VecRestoreArray(ml->bdReduceVecs[level-1], &bdArray); CHKERRQ(ierr); ierr = PC_MLLogEventEnd(PC_ML_ReduceBdGradRowPartLocalToGlobal, pc, 0, 0, 0); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelReduce" /*@C PCMultiLevelReduce This function creates a multilevel factorization of the gradient matrix which may be used for preconditioning a linear solve. This is called by PCSetUp() when the PC_MULTILEVEL type is selected. Input Parameters: . pc - The preconditioner context Level: intermediate .keywords multilevel .seealso @*/ int PCMultiLevelReduce(PC pc) { PC_Multilevel *ml; PetscTruth adj; Mat grad; int ncols; int *cols; int *newCols; PetscScalar *vals; PetscScalar *newVals; PetscScalar *colReduceArray; PetscScalar *colReduceArray2; int *colPartition; int *rowPartition; int *globalCols; int *localCols; int *globalPart; int rowsRemain, colsRemain, newColsRemain; int numParts, numNewParts, numCols, numNullCols, numVals; int numBdRows, numResRows; int rankDef; PetscScalar val; int size, sizeMax; int invalidNull; int level, part, newPart, row, col, col2, locCol, nullCol, vCol, loop, anyAgain; #define NEW_BD_GRAD #ifdef NEW_BD_GRAD int *firstCol, *colPart; #ifdef NEW_BD_GRAD_CHECK int oldPart, oldLocCol; #endif #endif int ierr, ierr2; PetscFunctionBegin; PetscValidHeaderSpecific(pc, PC_COOKIE); ml = (PC_Multilevel *) pc->data; /* Create P and Z such that P^T B Z = / D \ \ 0 / which is almost and SVD */ if (ml->useMath == PETSC_FALSE) { /* Initialization */ /* ierr = MatConvert(ml->locB, MATSAME, &grad); CHKERRQ(ierr); */ grad = ml->locB; colsRemain = ml->numCols; rowsRemain = ml->numRows; ml->rank = 0; sizeMax = PetscMax(ml->numRows, ml->numCols); ml->svdWorkLen = PetscMax(3*PetscMin(ml->numRows,ml->numCols) + PetscMax(ml->numRows,ml->numCols), 5*PetscMin(ml->numRows,ml->numCols) - 4); /* Allocation */ ierr = PetscMalloc(ml->maxLevels * sizeof(int), &ml->numPartitions); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(int *), &ml->numPartitionCols); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(int **), &ml->colPartition); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(int *), &ml->numPartitionRows); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(int ***), &ml->rowPartition); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(double ***), &ml->factors); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(Mat), &ml->grads); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(Vec), &ml->bdReduceVecs); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(Vec), &ml->colReduceVecs); CHKERRQ(ierr); ierr = PetscMalloc(ml->maxLevels * sizeof(Vec), &ml->colReduceVecs2); CHKERRQ(ierr); ierr = PetscMalloc(ml->svdWorkLen * sizeof(double), &ml->svdWork); CHKERRQ(ierr); ierr = PetscMalloc(ml->numCols * sizeof(int), &ml->range); CHKERRQ(ierr); PetscLogObjectMemory(pc, ml->maxLevels*(sizeof(int) + sizeof(int *) + sizeof(int **) + sizeof(int *) + sizeof(int ***) + sizeof(double ***) + sizeof(Mat) + sizeof(Vec)*3) + ml->svdWorkLen * sizeof(double) + ml->numCols * sizeof(int)); ierr = PetscMalloc(ml->numCols * sizeof(int), &colPartition); CHKERRQ(ierr); ierr = PetscMalloc(ml->numCols * sizeof(int), &globalCols); CHKERRQ(ierr); ierr = PetscMalloc(sizeMax * sizeof(int), &rowPartition); CHKERRQ(ierr); for(col = 0; col < ml->numCols; col++) { ml->range[col] = -1; globalCols[col] = col; } /* Loop until the graph has ml->QRthresh nodes or all rows have been reduced */ for(level = 0, anyAgain = 0; (colsRemain > ml->QRthresh) && (rowsRemain > 0); level++) { if (level >= ml->maxLevels) SETERRQ(PETSC_ERR_ARG_SIZ, "Exceeded maximum level"); loop = 2; while(loop) { /* Partition the columns (nodes): Give a list with the partition number of each column */ ierr = PCMultiLevelPartitionMesh_Private(pc, level, ml->meshes[level], &numParts, colPartition); CHKERRQ(ierr); ml->numPartitions[level] = numParts; /* Get the global columns in each partition */ ierr = PC_MLLogEventBegin(PC_ML_ReducePartitionRowCol, pc, 0, 0, 0); CHKERRQ(ierr); ierr = PCMultiLevelPartitionCols_Private(pc, level, numParts, colsRemain, colPartition, globalCols); CHKERRQ(ierr); /* Partition rows (edges): Give the local rows in each partition, then boundary and residual rows */ ierr = PCMultiLevelPartitionRows_Private(pc, level, numParts, colsRemain, rowsRemain, colPartition, rowPartition, globalCols, grad); CHKERRQ(ierr); ierr = PC_MLLogEventEnd(PC_ML_ReducePartitionRowCol, pc, 0, 0, 0); CHKERRQ(ierr); /* Factor the interior of each partition */ loop = PCMultiLevelFactorPartitions_Private(pc, level, numParts, grad); CHKERRQ(ierr); if (loop == 2) { /* Free memory from bad partition */ for(part = 0; part < numParts; part++) { if (ml->numPartitionCols[level][part] > 0) {ierr = PetscFree(ml->colPartition[level][part]); CHKERRQ(ierr);} if (ml->numPartitionRows[level][part] > 0) {ierr = PetscFree(ml->rowPartition[level][PART_ROW_INT][part]); CHKERRQ(ierr);} } if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0) {ierr = PetscFree(ml->rowPartition[level][PART_ROW_BD][0]); CHKERRQ(ierr);} if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0) {ierr = PetscFree(ml->rowPartition[level][PART_ROW_RES][0]); CHKERRQ(ierr);} ierr = PetscFree(ml->rowPartition[level][PART_ROW_INT]); CHKERRQ(ierr); ierr = PetscFree(ml->rowPartition[level][PART_ROW_BD]); CHKERRQ(ierr); ierr = PetscFree(ml->rowPartition[level][PART_ROW_RES]); CHKERRQ(ierr); ierr = PetscFree(ml->colPartition[level]); CHKERRQ(ierr); ierr = PetscFree(ml->rowPartition[level]); CHKERRQ(ierr); ierr = PetscFree(ml->numPartitionCols[level]); CHKERRQ(ierr); ierr = PetscFree(ml->numPartitionRows[level]); CHKERRQ(ierr); /* Increase partition size */ PetscLogInfo(pc, "PCMultiLevelReduce: Increasing partition size from %d to %d\n", ml->partSize, ml->partSize*2); if (ml->partSize > ml->numCols) SETERRQ(PETSC_ERR_PLIB, "Partition has grown too large."); ml->partSize *= 2; } else if (loop != 0) { CHKERRQ(loop); } } /* QR factor boundary matrices */ /* Create the boundary gradient B_\Gamma and workspace for applying it */ ierr = PC_MLLogEventBegin(PC_ML_ReduceBdGrad, pc, 0, 0, 0); CHKERRQ(ierr); numBdRows = ml->numPartitionRows[level][numParts]; numResRows = ml->numPartitionRows[level][numParts+1]; rowsRemain = numBdRows + numResRows; ierr = VecCreateSeq(PETSC_COMM_SELF, rowsRemain, &ml->bdReduceVecs[level]); CHKERRQ(ierr); ierr = VecCreateSeq(PETSC_COMM_SELF, colsRemain, &ml->colReduceVecs[level]); CHKERRQ(ierr); ierr = VecDuplicate(ml->colReduceVecs[level], &ml->colReduceVecs2[level]); CHKERRQ(ierr); /* Workspace for matrix reduction */ ierr = VecGetArray(ml->colReduceVecs[0], &colReduceArray); CHKERRQ(ierr); ierr = VecGetArray(ml->colReduceVecs2[0], &colReduceArray2); CHKERRQ(ierr); newCols = (int *) colReduceArray; newVals = ml->svdWork; /* Extract the boundary gradient B_\Gamma in CSR format */ ierr = PCMultiLevelExtractBoundaryGradient_Private(pc, level, numParts, colsRemain, newCols, newVals, grad); CHKERRQ(ierr); /* Convert the local (per level) numbering to the global (original) numbering */ ierr = PCMultiLevelRowPartitionLocalToGlobal_Private(pc, level); CHKERRQ(ierr); /* Determine the columns active at the next level and fix up colPartition[] */ globalPart = (int *) colReduceArray2; localCols = rowPartition; for(part = 0; part < numParts; part++) globalCols[part] = -1; for(part = 0, newPart = 0, newColsRemain = 0, numNewParts = numParts; part < numParts; part++, newPart++) { for(col = 0, rankDef = 0; col < ml->numPartitionCols[level][part]; col++) { if (ml->factors[level][part][FACT_DINV][col] < ml->zeroTol) { newColsRemain++; rankDef++; if (rankDef == 1) { globalCols[newPart] = ml->colPartition[level][part][col]; globalPart[newPart] = part; localCols[newPart] = col; } else { globalCols[numNewParts] = ml->colPartition[level][part][col]; globalPart[numNewParts] = part; localCols[numNewParts] = col; /* Put the column in its own partition */ numCols = ml->numPartitionCols[level][part] - (col+1); for(col2 = colsRemain-1; numCols >= 0; col2--) { if (colPartition[col2] == newPart) { numCols--; if (numCols < 0) colPartition[col2] = numNewParts++; } } } } else { ml->rank++; #ifdef PETSC_USE_BOPT_g if (ml->range[ml->colPartition[level][part][col]] != -1) { PetscLogInfo(pc, "Level %d partition %d col %d: column %d has already been reduced\n", level, part, col, ml->colPartition[level][part][col]); SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid reduction"); } if ((ml->rowPartition[level][PART_ROW_INT][part][col] < 0) || (ml->rowPartition[level][PART_ROW_INT][part][col] > ml->numRows)) { SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid row index in range"); } #endif ml->range[ml->colPartition[level][part][col]] = ml->rowPartition[level][PART_ROW_INT][part][col]; } } #ifdef PETSC_USE_BOPT_g PetscLogInfo(pc, "Lvl: %d Partition: %d has rank deficiency %d\n", level, part, rankDef); #endif if (rankDef == 0) { /* Eliminate this partition */ for(col = 0; col < colsRemain; col++) { if (colPartition[col] == newPart) { colPartition[col] = -1; } else if (colPartition[col] > newPart) { colPartition[col]--; } } /* Shift everything down */ size = PetscMax(numNewParts, numParts) - part; ierr = PetscMemmove(&globalCols[newPart], &globalCols[newPart+1], size * sizeof(int)); CHKERRQ(ierr); ierr = PetscMemmove(&globalPart[newPart], &globalPart[newPart+1], size * sizeof(int)); CHKERRQ(ierr); ierr = PetscMemmove(&localCols[newPart], &localCols[newPart+1], size * sizeof(int)); CHKERRQ(ierr); newPart--; numNewParts--; } } #ifdef PETSC_USE_BOPT_g for(col = 0, row = 0; col < ml->numCols; col++) { if (ml->range[col] == -1) { row++; } else if (ml->range[col] < 0) { SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space"); } } if (row != newColsRemain) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space"); for(vCol = 0; vCol < newColsRemain; vCol++) { if (globalCols[vCol] < 0) { PetscPrintf(PETSC_COMM_SELF, "Invalid column in reduced boundary gradient\n"); PetscPrintf(PETSC_COMM_SELF, "Lvl: %d newRows: %d newCols: %d\n", level, rowsRemain, newColsRemain); for(col = 0; col < newColsRemain; col++) { PetscPrintf(PETSC_COMM_SELF, " globalCols[%d]: %d\n", col, globalCols[col]); } SETERRQ(PETSC_ERR_ARG_CORRUPT, "Negative column"); } } #endif ierr = PC_MLLogEventEnd(PC_ML_ReduceBdGrad, pc, 0, 0, 0); CHKERRQ(ierr); /* Transform B_\Gamma to B_\Gamma V (I - D^{-1} D) */ ierr = PC_MLLogEventBegin(PC_ML_CreateBdGrad, pc, 0, 0, 0); CHKERRQ(ierr); if (level > 0) { ierr = MatDestroy(grad); CHKERRQ(ierr); } ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, rowsRemain, ml->numCols, newColsRemain, PETSC_NULL, &grad); if (ierr) { PetscPrintf(PETSC_COMM_SELF, "Interface Gradient failure:"); PetscPrintf(PETSC_COMM_SELF, " level: %d", level); PetscPrintf(PETSC_COMM_SELF, " rows: %d cols: %d\n", rowsRemain, ml->numCols); PetscPrintf(PETSC_COMM_SELF, " nonzeroes per row: %d\n", newColsRemain); CHKERRQ(ierr); } #ifdef NEW_BD_GRAD ierr = PetscMalloc((numParts+1) * sizeof(int), &firstCol); CHKERRQ(ierr); ierr = PetscMalloc(ml->numCols * sizeof(int), &colPart); CHKERRQ(ierr); for(part = 1, firstCol[0] = 0; part <= numParts; part++) { firstCol[part] = firstCol[part-1] + ml->numPartitionCols[level][part-1]; for(col = 0; col < ml->numPartitionCols[level][part-1]; col++) { colPart[col+firstCol[part-1]] = part-1; } } #endif for(row = 0; row < rowsRemain; row++) { ierr = MatGetRow(ml->grads[level], row, &ncols, &cols, &vals); CHKERRQ(ierr); /* Sparse VecDot with the active columns of V */ for(vCol = 0, numVals = 0; vCol < newColsRemain; vCol++) { /* Calculate the dot product */ for(col = 0, adj = PETSC_FALSE, val = 0.0; col < ncols; col++) { /* Get the local column number */ #ifndef NEW_BD_GRAD locCol = cols[col]; for(part = 0; part < PetscMin(numParts, globalPart[vCol]+2); part++) { numCols = ml->numPartitionCols[level][part]; if (locCol >= numCols) { locCol -= numCols; continue; } break; } #else part = colPart[cols[col]]; locCol = cols[col] - firstCol[part]; numCols = ml->numPartitionCols[level][part]; #ifdef NEW_BD_GRAD_CHECK /***** CHECK THE RESULT ***/ oldLocCol = cols[col]; for(oldPart = 0; oldPart < PetscMin(numParts, globalPart[vCol]+2); oldPart++) { numCols = ml->numPartitionCols[level][oldPart]; if (oldLocCol >= numCols) { oldLocCol -= numCols; continue; } break; } if ((oldPart == globalPart[vCol]) && (oldPart != part)) SETERRQ2(PETSC_ERR_PLIB, "Invalid partition %d should be %d", part, oldPart); if ((oldPart == globalPart[vCol]) && (oldLocCol != locCol)) SETERRQ2(PETSC_ERR_PLIB, "Invalid local column %d should be %d", locCol, oldLocCol); #endif #endif /* Multiply by the correct element of V (I - D^{-1} D) */ if (part == globalPart[vCol]) { val += vals[col]*ml->factors[level][part][FACT_V][locCol*numCols+localCols[vCol]]; adj = PETSC_TRUE; } } /* Store the dot product in B_\Gamma V (I - D^{-1} D) */ if (adj == PETSC_TRUE) { newCols[numVals] = globalCols[vCol]; newVals[numVals] = val; numVals++; } } /* This is an overestimate */ PetscLogFlops(numVals*ncols); #if 0 #ifdef PETSC_USE_BOPT_g if (numVals == 0) PetscLogInfo(pc, "Null row %d in reduced boundary gradient matrix\n", row); #endif #endif ierr = MatSetValues(grad, 1, &row, numVals, newCols, newVals, INSERT_VALUES); CHKERRQ(ierr); ierr = MatRestoreRow(ml->grads[level], row, &ncols, &cols, &vals); CHKERRQ(ierr); } ierr = MatAssemblyBegin(grad, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(grad, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); #ifdef NEW_BD_GRAD ierr = PetscFree(firstCol); CHKERRQ(ierr); ierr = PetscFree(colPart); CHKERRQ(ierr); #endif ierr = PC_MLLogEventEnd(PC_ML_CreateBdGrad, pc, 0, 0, 0); CHKERRQ(ierr); /* Construct the coarse graph */ ml->numVertices[level+1] = newColsRemain; ierr = PCMultiLevelShrinkMesh_Private(pc, level, newColsRemain, colPartition, ml->numElements[level], ml->meshes[level], &ml->numElements[level+1], &ml->meshes[level+1]); CHKERRQ(ierr); /* Update variables */ colsRemain = newColsRemain; ierr = VecRestoreArray(ml->colReduceVecs[0], &colReduceArray); CHKERRQ(ierr); ierr = VecRestoreArray(ml->colReduceVecs2[0], &colReduceArray2); CHKERRQ(ierr); /* Visualize factorization */ ierr = PCMultiLevelReduceView(pc, level, colsRemain, colPartition, ((colsRemain > ml->QRthresh) && (rowsRemain > 0)), &anyAgain); CHKERRQ(ierr); } while(anyAgain) { ierr = PCMultiLevelReduceView(pc, -level, 0, colPartition, 0, &anyAgain); CHKERRQ(ierr); } ml->numLevels = level; /* Cleanup */ ierr = PetscFree(colPartition); CHKERRQ(ierr); ierr = PetscFree(rowPartition); CHKERRQ(ierr); ierr = PetscFree(globalCols); CHKERRQ(ierr); if (ml->numLevels > 0) { ierr = MatDestroy(grad); CHKERRQ(ierr); } /* Workspace allocation */ ml->interiorWorkLen = 1; for(level = 0; level < ml->numLevels; level++) for(part = 0; part < ml->numPartitions[level]; part++) ml->interiorWorkLen = PetscMax(ml->interiorWorkLen, ml->numPartitionRows[level][part]); ierr = PetscMalloc(ml->interiorWorkLen * sizeof(double), &ml->interiorWork); CHKERRQ(ierr); ierr = PetscMalloc(ml->interiorWorkLen * sizeof(double), &ml->interiorWork2); CHKERRQ(ierr); PetscLogObjectMemory(pc, ml->interiorWorkLen*2 * sizeof(double)); /* Get the null space rows and compress the range */ ml->globalRank = ml->rank; ml->numLocNullCols = ml->numCols - ml->rank; ierr = PetscMalloc(PetscMax(ml->numLocNullCols, 1) * sizeof(int), &ml->nullCols); CHKERRQ(ierr); PetscLogObjectMemory(pc, PetscMax(ml->numLocNullCols, 1) * sizeof(int)); ml->nullCols[0] = -1; for(col = 0, nullCol = 0, numNullCols = 0, ierr2 = 0; nullCol < ml->numCols; col++, nullCol++) { if (ml->range[col] < 0) { if (numNullCols == ml->numLocNullCols) { int rank; MPI_Comm_rank(pc->comm, &rank); PetscPrintf(PETSC_COMM_SELF, "[%d]Range %d, Null %d:\n", rank, ml->rank, ml->numLocNullCols); for(col = 0; col < ml->numCols; col++) PetscPrintf(PETSC_COMM_SELF, " %d", ml->range[col]); PetscPrintf(PETSC_COMM_SELF, "\n"); ierr2 = 1; break; } ml->nullCols[numNullCols++] = nullCol; ierr = PetscMemmove(&ml->range[col], &ml->range[col+1], (ml->numCols - (col+1)) * sizeof(int)); CHKERRQ(ierr); col--; } } ierr = MPI_Allreduce(&ierr2, &invalidNull, 1, MPI_INT, MPI_LOR, pc->comm); CHKERRQ(ierr); if (invalidNull) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid null space"); if (numNullCols != ml->numLocNullCols) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid number of null space columns"); if (ml->numLocNullCols + ml->rank != ml->numCols) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid space decomposition"); #ifdef PETSC_USE_BOPT_g ierr = PCValidQ_Multilevel(pc); CHKERRQ(ierr); #endif } else { Mesh mesh; /* Cleanup */ ierr = GridGetMesh(ml->grid, &mesh); CHKERRQ(ierr); ierr = MeshDestroyLocalCSR(mesh, ml->meshes[0][MESH_OFFSETS], ml->meshes[0][MESH_ADJ], ml->vertices); CHKERRQ(ierr); #ifdef PETSC_HAVE_MATHEMATICA ierr = ViewerMathematicaReduce(ml->mathViewer, pc, ml->QRthresh); CHKERRQ(ierr); ierr = PetscFree(ml->meshes[0][MESH_ELEM]); CHKERRQ(ierr); ierr = PetscFree(ml->meshes[0]); CHKERRQ(ierr); ierr = PetscFree(ml->meshes); CHKERRQ(ierr); ierr = PetscFree(ml->numElements); CHKERRQ(ierr); ierr = PetscFree(ml->numVertices); CHKERRQ(ierr); /* ierr = PCConvert_Multilevel(pc); CHKERRQ(ierr); */ #else SETERRQ(PETSC_ERR_SUP, " "); #endif } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelFilterVertexCoords_Tutte" int PCMultiLevelFilterVertexCoords_Tutte(PC pc, int numCols, double *vertices, int **mesh) { PetscRandom gen; double residual = 1.0; double tol = 1.0e-8; int iter = 0; int maxIter = 10000; int xminNode, xmaxNode, yminNode, ymaxNode; double xmin, xmax, ymin, ymax; PetscReal x, y; int node, neighbor; int ierr; PetscFunctionBegin; if (numCols < 3) PetscFunctionReturn(0); /* Fix boundary */ ierr = PetscRandomCreate(PETSC_COMM_SELF, RANDOM_DEFAULT, &gen); CHKERRQ(ierr); ierr = PetscRandomSetInterval(gen, 0, numCols); CHKERRQ(ierr); xmin = xmax = vertices[0]; ymin = ymax = vertices[1]; xminNode = xmaxNode = yminNode = ymaxNode = 0; for(node = 0; node < numCols; node++) { if (vertices[node*2] < xmin) { xmin = vertices[node*2]; xminNode = node; } if (vertices[node*2] > xmax) { xmax = vertices[node*2]; xmaxNode = node; } if (vertices[node*2+1] < ymin) { ymin = vertices[node*2+1]; yminNode = node; } if (vertices[node*2+1] > ymax) { ymax = vertices[node*2+1]; ymaxNode = node; } } if ((xmaxNode == yminNode) && (xminNode == ymaxNode)) { while((xmaxNode == yminNode) || (xmaxNode == xminNode)) { ierr = PetscRandomGetValue(gen, &x); CHKERRQ(ierr); xmaxNode = (int) floor(x); } } if ((xmaxNode == ymaxNode) && (xminNode == yminNode)) { while((xmaxNode == ymaxNode) || (xmaxNode == xminNode)) { ierr = PetscRandomGetValue(gen, &x); CHKERRQ(ierr); xmaxNode = (int) floor(x); } } ierr = PetscRandomDestroy(gen); CHKERRQ(ierr); /* First pass: Jacobi Relaxation */ while((residual > tol) && (iter < maxIter)) { for(node = 0; node < numCols; node++) { if ((node == xminNode) || (node == xmaxNode) || (node == yminNode) || (node == ymaxNode)) continue; vertices[node*2] = 0.0; vertices[node*2+1] = 0.0; for(neighbor = mesh[MESH_OFFSETS][node]; neighbor < mesh[MESH_OFFSETS][node+1]; neighbor++) { if (mesh[MESH_ADJ][neighbor] != node) { vertices[node*2] += vertices[mesh[MESH_ADJ][neighbor]*2]; vertices[node*2+1] += vertices[mesh[MESH_ADJ][neighbor]*2+1]; } } /* Remember to get rid of self edge */ vertices[node*2] /= mesh[MESH_OFFSETS][node+1] - mesh[MESH_OFFSETS][node] - 1; vertices[node*2+1] /= mesh[MESH_OFFSETS][node+1] - mesh[MESH_OFFSETS][node] - 1; } for(node = 0, residual = 0.0; node < numCols; node++) { if ((node == xminNode) || (node == xmaxNode) || (node == yminNode) || (node == ymaxNode)) continue; x = 0.0; y = 0.0; for(neighbor = mesh[MESH_OFFSETS][node]; neighbor < mesh[MESH_OFFSETS][node+1]; neighbor++) { if (mesh[MESH_ADJ][neighbor] != node) { x += vertices[mesh[MESH_ADJ][neighbor]*2]; y += vertices[mesh[MESH_ADJ][neighbor]*2+1]; } } x /= mesh[MESH_OFFSETS][node+1] - mesh[MESH_OFFSETS][node] - 1; y /= mesh[MESH_OFFSETS][node+1] - mesh[MESH_OFFSETS][node] - 1; residual += PetscAbsReal(x - vertices[node*2]) + PetscAbsReal(y - vertices[node*2+1]); } residual /= numCols; iter++; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelCalcVertexCoords_Private" int PCMultiLevelCalcVertexCoords_Private(PC pc, int numCols, int numNewCols, int *colPartition) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; double *vertices; int *counts; int node; int ierr; PetscFunctionBegin; if (numNewCols > 0) { ierr = PetscMalloc(numNewCols*2 * sizeof(double), &vertices); CHKERRQ(ierr); ierr = PetscMalloc(numNewCols * sizeof(int), &counts); CHKERRQ(ierr); ierr = PetscMemzero(vertices, numNewCols*2 * sizeof(double)); CHKERRQ(ierr); ierr = PetscMemzero(counts, numNewCols * sizeof(int)); CHKERRQ(ierr); for(node = 0; node < numCols; node++) { if (colPartition[node] >= 0) { vertices[colPartition[node]*2] += ml->vertices[node*2]; vertices[colPartition[node]*2+1] += ml->vertices[node*2+1]; counts[colPartition[node]]++; } } for(node = 0; node < numNewCols; node++) { vertices[node*2] /= counts[node]; vertices[node*2+1] /= counts[node]; } ierr = PetscFree(ml->vertices); CHKERRQ(ierr); ierr = PetscFree(counts); CHKERRQ(ierr); ml->vertices = vertices; } PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelView_Mesh" int PCMultiLevelView_Mesh(PC pc, int numCols, int numElements, int **mesh) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; PetscViewer viewer = ml->factorizationViewer; Mesh smallMesh; int *faces; int face; int ierr; PetscFunctionBegin; if (numElements > 0) { ierr = PetscMalloc(numElements*3 * sizeof(int), &faces); CHKERRQ(ierr); ierr = PetscMemcpy(faces, mesh[MESH_ELEM], numElements*3 * sizeof(int)); CHKERRQ(ierr); for(face = 0; face < numElements*3; face++) { faces[face]--; } } ierr = MeshCreateTriangular2DCSR(pc->comm, numCols, numElements, ml->vertices, mesh[MESH_OFFSETS], mesh[MESH_ADJ], faces, &smallMesh); CHKERRQ(ierr); if (numElements > 0) { ierr = PetscFree(faces); CHKERRQ(ierr); } ierr = MeshView(smallMesh, viewer); CHKERRQ(ierr); ierr = MeshDestroy(smallMesh); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelView_Private" int PCMultiLevelView_Private(PC pc, PetscViewer viewer, int level, int numCols, int numElements, int **mesh) { PetscDraw draw; char title[256]; int maxLevel; int ierr; PetscFunctionBegin; ierr = PetscViewerDrawClear(viewer); CHKERRQ(ierr); ierr = PetscViewerDrawGetDraw(viewer, 0, &draw); CHKERRQ(ierr); ierr = MPI_Allreduce(&level, &maxLevel, 1, MPI_INT, MPI_MAX, pc->comm); CHKERRQ(ierr); sprintf(title, "ML Mesh, Level %d", maxLevel); ierr = PetscDrawSetTitle(draw, title); CHKERRQ(ierr); ierr = PCMultiLevelView_Mesh(pc, numCols, numElements, mesh); CHKERRQ(ierr); PetscFunctionReturn(0); } #undef __FUNCT__ #define __FUNCT__ "PCMultiLevelView_Private" int PCMultiLevelReduceView(PC pc, int level, int numNewCols, int *colPartition, int again, int *anyAgain) { PC_Multilevel *ml = (PC_Multilevel *) pc->data; PetscViewer viewer = ml->factorizationViewer; int **newMesh; int numCols, numNewElements; int ierr; PetscFunctionBegin; if (viewer != PETSC_NULL) { if (level < 0) { ierr = PCMultiLevelView_Private(pc, viewer, -1, numNewCols, 0, ml->meshes[-level]); CHKERRQ(ierr); } else { numCols = ml->numVertices[level]; numNewElements = ml->numElements[level+1]; newMesh = ml->meshes[level+1]; ierr = PCMultiLevelCalcVertexCoords_Private(pc, numCols, numNewCols, colPartition); CHKERRQ(ierr); ierr = PCMultiLevelFilterVertexCoords_Tutte(pc, numNewCols, ml->vertices, newMesh); CHKERRQ(ierr); ierr = PCMultiLevelView_Private(pc, viewer, level+1, numNewCols, numNewElements, newMesh); CHKERRQ(ierr); } ierr = PetscViewerFlush(viewer); CHKERRQ(ierr); ierr = MPI_Allreduce(&again, anyAgain, 1, MPI_INT, MPI_LOR, pc->comm); CHKERRQ(ierr); } PetscFunctionReturn(0); }