Actual source code: ex4.c
petsc-3.7.5 2017-01-01
1: static char help[] = "Test MatSetValuesBatch: setting batches of elements using the GPU.\n\
2: This works with SeqAIJCUSP and MPIAIJCUSP matrices.\n\n";
3: #include <petscdm.h>
4: #include <petscdmda.h>
5: #include <petscksp.h>
7: /* We will use a structured mesh for this assembly test. Each square will be divided into two triangles:
8: C D
9: _______
10: |\ | The matrix for 0 and 1 is / 1 -0.5 -0.5 \
11: | \ 1 | | -0.5 0.5 0.0 |
12: | \ | \ -0.5 0.0 0.5 /
13: | \ |
14: | \ |
15: | 0 \ |
16: | \|
17: ---------
18: A B
20: TO ADD:
21: DONE 1) Build and run on baconost
22: - Gather data for CPU/GPU up to da_grid_x 1300
23: - Looks 6x faster than CPU
24: - Make plot
26: DONE 2) Solve the Neumann Poisson problem
28: 3) Multi-GPU Assembly
29: - MPIAIJCUSP: Just have two SEQAIJCUSP matrices, nothing else special
30: a) Filter rows to be sent to other procs (normally stashed)
31: b) send/recv rows, might as well do with a VecScatter
32: c) Potential to overlap this computation w/ GPU (talk to Nathan)
33: c') Just shove these rows in after the local
34: d) Have implicit rep of COO from repeated/tiled_range
35: e) Do a filtered copy, decrementing rows and remapping columns, which splits into two sets
36: f) Make two COO matrices and do separate aggregation on each one
38: 4) Solve the Neumann Poisson problem in parallel
39: - Try it on GPU machine at Brown (They need another GNU install)
41: 5) GPU FEM integration
42: - Move launch code to PETSc or - Try again now that assembly is in PETSc
43: - Move build code to PETSc
45: 6) Try out CUSP PCs
46: */
50: PetscErrorCode IntegrateCells(DM dm, PetscInt *Ne, PetscInt *Nl, PetscInt **elemRows, PetscScalar **elemMats)
51: {
52: DMDALocalInfo info;
53: PetscInt *er;
54: PetscScalar *em;
55: PetscInt X, Y, dof;
56: PetscInt nl, nxe, nye, ne;
57: PetscInt k = 0, m = 0;
58: PetscInt i, j;
60: #if defined(PETSC_USE_LOG)
61: PetscLogEvent integrationEvent;
62: #endif
65: PetscLogEventRegister("ElemIntegration", DM_CLASSID, &integrationEvent);
66: PetscLogEventBegin(integrationEvent,0,0,0,0);
67: DMDAGetInfo(dm, 0, &X, &Y,0,0,0,0, &dof,0,0,0,0,0);
68: DMDAGetLocalInfo(dm, &info);
69: nl = dof*3;
70: nxe = info.xm; if (info.xs+info.xm == X) nxe--;
71: nye = info.ym; if (info.ys+info.ym == Y) nye--;
72: ne = 2 * nxe * nye;
73: *Ne = ne;
74: *Nl = nl;
75: PetscMalloc2(ne*nl, elemRows, ne*nl*nl, elemMats);
76: er = *elemRows;
77: em = *elemMats;
78: /* Proc 0 Proc 1 */
79: /* xs: 0 xm: 3 xs: 0 xm: 3 */
80: /* ys: 0 ym: 2 ys: 2 ym: 1 */
81: /* 8 elements x 3 vertices = 24 element matrix rows and 72 entries */
82: /* 6 offproc rows containing 18 element matrix entries */
83: /* 18 onproc rows containing 54 element matrix entries */
84: /* 3 offproc columns in 8 element matrix entries */
85: /* so we should have 46 diagonal matrix entries */
86: for (j = info.ys; j < info.ys+nye; ++j) {
87: for (i = info.xs; i < info.xs+nxe; ++i) {
88: PetscInt rowA = j*X + i, rowB = j*X + i+1;
89: PetscInt rowC = (j+1)*X + i, rowD = (j+1)*X + i+1;
91: /* Lower triangle */
92: er[k+0] = rowA; em[m+0*nl+0] = 1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
93: er[k+1] = rowB; em[m+1*nl+0] = -0.5; em[m+1*nl+1] = 0.5; em[m+1*nl+2] = 0.0;
94: er[k+2] = rowC; em[m+2*nl+0] = -0.5; em[m+2*nl+1] = 0.0; em[m+2*nl+2] = 0.5;
95: k += nl; m += nl*nl;
96: /* Upper triangle */
97: er[k+0] = rowD; em[m+0*nl+0] = 1.0; em[m+0*nl+1] = -0.5; em[m+0*nl+2] = -0.5;
98: er[k+1] = rowC; em[m+1*nl+0] = -0.5; em[m+1*nl+1] = 0.5; em[m+1*nl+2] = 0.0;
99: er[k+2] = rowB; em[m+2*nl+0] = -0.5; em[m+2*nl+1] = 0.0; em[m+2*nl+2] = 0.5;
100: k += nl; m += nl*nl;
101: }
102: }
103: PetscLogEventEnd(integrationEvent,0,0,0,0);
104: return(0);
105: }
109: int main(int argc, char **argv)
110: {
111: KSP ksp;
112: MatNullSpace nullsp;
113: DM dm;
114: Mat A;
115: Vec x, b;
116: PetscViewer viewer;
117: PetscInt Nl, Ne;
118: PetscInt *elemRows;
119: PetscScalar *elemMats;
120: PetscBool doGPU = PETSC_TRUE, doCPU = PETSC_TRUE, doSolve = PETSC_FALSE, doView = PETSC_TRUE;
122: #if defined(PETSC_USE_LOG)
123: PetscLogStage gpuStage, cpuStage;
124: #endif
126: PetscInitialize(&argc, &argv, 0, help);
127: DMDACreate2d(PETSC_COMM_WORLD, DM_BOUNDARY_NONE, DM_BOUNDARY_NONE, DMDA_STENCIL_BOX, -3, -3, PETSC_DECIDE, PETSC_DECIDE, 1, 1, NULL, NULL, &dm);
128: IntegrateCells(dm, &Ne, &Nl, &elemRows, &elemMats);
129: PetscOptionsGetBool(NULL,NULL, "-view", &doView, NULL);
130: /* Construct matrix using GPU */
131: PetscOptionsGetBool(NULL,NULL, "-gpu", &doGPU, NULL);
132: if (doGPU) {
133: PetscLogStageRegister("GPU Stage", &gpuStage);
134: PetscLogStagePush(gpuStage);
135: DMSetMatType(dm,MATAIJ);
136: DMCreateMatrix(dm, &A);
137: MatSetType(A, MATAIJCUSP);
138: MatSeqAIJSetPreallocation(A, 0, NULL);
139: MatMPIAIJSetPreallocation(A, 0, NULL, 0, NULL);
140: MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
141: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
142: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
143: if (doView) {
144: PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
145: if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
146: MatView(A, viewer);
147: if (Ne > 500) {PetscViewerPopFormat(viewer);}
148: PetscViewerDestroy(&viewer);
149: }
150: PetscLogStagePop();
151: MatDestroy(&A);
152: }
153: /* Construct matrix using CPU */
154: PetscOptionsGetBool(NULL,NULL, "-cpu", &doCPU, NULL);
155: if (doCPU) {
156: PetscLogStageRegister("CPU Stage", &cpuStage);
157: PetscLogStagePush(cpuStage);
158: DMSetMatType(dm,MATAIJ);
159: DMCreateMatrix(dm, &A);
160: MatZeroEntries(A);
161: MatSetValuesBatch(A, Ne, Nl, elemRows, elemMats);
162: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
163: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
164: if (doView) {
165: PetscViewerASCIIOpen(PETSC_COMM_WORLD, NULL, &viewer);
166: if (Ne > 500) {PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);}
167: MatView(A, viewer);
168: if (Ne > 500) {PetscViewerPopFormat(viewer);}
169: PetscViewerDestroy(&viewer);
170: }
171: PetscLogStagePop();
172: }
173: /* Solve simple system with random rhs */
174: PetscOptionsGetBool(NULL,NULL, "-solve", &doSolve, NULL);
175: if (doSolve) {
176: MatCreateVecs(A, &x, &b);
177: VecSetRandom(b, NULL);
178: KSPCreate(PETSC_COMM_WORLD, &ksp);
179: KSPSetOperators(ksp, A, A);
180: MatNullSpaceCreate(PETSC_COMM_WORLD, PETSC_TRUE, 0, NULL, &nullsp);
181: MatSetNullSpace(A, nullsp);
182: MatNullSpaceDestroy(&nullsp);
183: KSPSetFromOptions(ksp);
184: KSPSolve(ksp, b, x);
185: VecDestroy(&x);
186: VecDestroy(&b);
187: /* Solve physical system:
189: -\Delta u = -6 (x + y - 1)
191: where u = x^3 - 3/2 x^2 + y^3 - 3/2y^2 + 1/2,
192: so \Delta u = 6 x - 3 + 6 y - 3,
193: and \frac{\partial u}{\partial n} = {3x (x - 1), 3y (y - 1)} \cdot n
194: = \pm 3x (x - 1) at x=0,1 = 0
195: = \pm 3y (y - 1) at y=0,1 = 0
196: */
197: }
198: /* Cleanup */
199: MatDestroy(&A);
200: PetscFree2(elemRows, elemMats);
201: DMDestroy(&dm);
202: PetscFinalize();
203: return 0;
204: }