Actual source code: axpy.c
petsc-3.7.5 2017-01-01
2: #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/
6: /*@
7: MatAXPY - Computes Y = a*X + Y.
9: Logically Collective on Mat
11: Input Parameters:
12: + a - the scalar multiplier
13: . X - the first matrix
14: . Y - the second matrix
15: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN
16: or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
18: Level: intermediate
20: .keywords: matrix, add
22: .seealso: MatAYPX()
23: @*/
24: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
25: {
27: PetscInt m1,m2,n1,n2;
33: MatGetSize(X,&m1,&n1);
34: MatGetSize(Y,&m2,&n2);
35: if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2);
37: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
38: if (Y->ops->axpy) {
39: (*Y->ops->axpy)(Y,a,X,str);
40: } else {
41: MatAXPY_Basic(Y,a,X,str);
42: }
43: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
44: #if defined(PETSC_HAVE_CUSP)
45: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
46: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
47: }
48: #elif defined(PETSC_HAVE_VIENNACL)
49: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
50: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
51: }
52: #elif defined(PETSC_HAVE_VECCUDA)
53: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
54: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
55: }
56: #endif
57: return(0);
58: }
62: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
63: {
64: PetscInt i,start,end,j,ncols,m,n;
65: PetscErrorCode ierr;
66: const PetscInt *row;
67: PetscScalar *val;
68: const PetscScalar *vals;
71: MatGetSize(X,&m,&n);
72: MatGetOwnershipRange(X,&start,&end);
73: if (a == 1.0) {
74: for (i = start; i < end; i++) {
75: MatGetRow(X,i,&ncols,&row,&vals);
76: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
77: MatRestoreRow(X,i,&ncols,&row,&vals);
78: }
79: } else {
80: PetscMalloc1(n+1,&val);
81: for (i=start; i<end; i++) {
82: MatGetRow(X,i,&ncols,&row,&vals);
83: for (j=0; j<ncols; j++) {
84: val[j] = a*vals[j];
85: }
86: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
87: MatRestoreRow(X,i,&ncols,&row,&vals);
88: }
89: PetscFree(val);
90: }
91: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
92: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
93: return(0);
94: }
98: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
99: {
100: PetscInt i,start,end,j,ncols,m,n;
101: PetscErrorCode ierr;
102: const PetscInt *row;
103: PetscScalar *val;
104: const PetscScalar *vals;
107: MatGetSize(X,&m,&n);
108: MatGetOwnershipRange(X,&start,&end);
109: if (a == 1.0) {
110: for (i = start; i < end; i++) {
111: MatGetRow(Y,i,&ncols,&row,&vals);
112: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
113: MatRestoreRow(Y,i,&ncols,&row,&vals);
115: MatGetRow(X,i,&ncols,&row,&vals);
116: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
117: MatRestoreRow(X,i,&ncols,&row,&vals);
118: }
119: } else {
120: PetscMalloc1(n+1,&val);
121: for (i=start; i<end; i++) {
122: MatGetRow(Y,i,&ncols,&row,&vals);
123: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
124: MatRestoreRow(Y,i,&ncols,&row,&vals);
126: MatGetRow(X,i,&ncols,&row,&vals);
127: for (j=0; j<ncols; j++) {
128: val[j] = a*vals[j];
129: }
130: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
131: MatRestoreRow(X,i,&ncols,&row,&vals);
132: }
133: PetscFree(val);
134: }
135: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
136: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
137: return(0);
138: }
142: /*@
143: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
145: Neighbor-wise Collective on Mat
147: Input Parameters:
148: + Y - the matrices
149: - a - the PetscScalar
151: Level: intermediate
153: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
154: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
155: entry).
157: Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is
158: preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix.
160: .keywords: matrix, add, shift
162: .seealso: MatDiagonalSet()
163: @*/
164: PetscErrorCode MatShift(Mat Y,PetscScalar a)
165: {
170: if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
171: if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
172: MatCheckPreallocated(Y,1);
174: if (Y->ops->shift) {
175: (*Y->ops->shift)(Y,a);
176: } else {
177: MatShift_Basic(Y,a);
178: }
180: #if defined(PETSC_HAVE_CUSP)
181: if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) {
182: Y->valid_GPU_matrix = PETSC_CUSP_CPU;
183: }
184: #elif defined(PETSC_HAVE_VIENNACL)
185: if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) {
186: Y->valid_GPU_matrix = PETSC_VIENNACL_CPU;
187: }
188: #elif defined(PETSC_HAVE_VECCUDA)
189: if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) {
190: Y->valid_GPU_matrix = PETSC_CUDA_CPU;
191: }
192: #endif
193: return(0);
194: }
198: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
199: {
201: PetscInt i,start,end;
202: PetscScalar *v;
205: MatGetOwnershipRange(Y,&start,&end);
206: VecGetArray(D,&v);
207: for (i=start; i<end; i++) {
208: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
209: }
210: VecRestoreArray(D,&v);
211: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
212: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
213: return(0);
214: }
218: /*@
219: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
220: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
221: INSERT_VALUES.
223: Input Parameters:
224: + Y - the input matrix
225: . D - the diagonal matrix, represented as a vector
226: - i - INSERT_VALUES or ADD_VALUES
228: Neighbor-wise Collective on Mat and Vec
230: Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
231: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
232: entry).
234: Level: intermediate
236: .keywords: matrix, add, shift, diagonal
238: .seealso: MatShift()
239: @*/
240: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
241: {
243: PetscInt matlocal,veclocal;
248: MatGetLocalSize(Y,&matlocal,NULL);
249: VecGetLocalSize(D,&veclocal);
250: if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal);
251: if (Y->ops->diagonalset) {
252: (*Y->ops->diagonalset)(Y,D,is);
253: } else {
254: MatDiagonalSet_Default(Y,D,is);
255: }
256: return(0);
257: }
261: /*@
262: MatAYPX - Computes Y = a*Y + X.
264: Logically on Mat
266: Input Parameters:
267: + a - the PetscScalar multiplier
268: . Y - the first matrix
269: . X - the second matrix
270: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN
272: Level: intermediate
274: .keywords: matrix, add
276: .seealso: MatAXPY()
277: @*/
278: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
279: {
280: PetscScalar one = 1.0;
282: PetscInt mX,mY,nX,nY;
288: MatGetSize(X,&mX,&nX);
289: MatGetSize(X,&mY,&nY);
290: if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY);
292: MatScale(Y,a);
293: MatAXPY(Y,one,X,str);
294: return(0);
295: }
299: /*@
300: MatComputeExplicitOperator - Computes the explicit matrix
302: Collective on Mat
304: Input Parameter:
305: . inmat - the matrix
307: Output Parameter:
308: . mat - the explict preconditioned operator
310: Notes:
311: This computation is done by applying the operators to columns of the
312: identity matrix.
314: Currently, this routine uses a dense matrix format when 1 processor
315: is used and a sparse format otherwise. This routine is costly in general,
316: and is recommended for use only with relatively small systems.
318: Level: advanced
320: .keywords: Mat, compute, explicit, operator
321: @*/
322: PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat)
323: {
324: Vec in,out;
326: PetscInt i,m,n,M,N,*rows,start,end;
327: MPI_Comm comm;
328: PetscScalar *array,zero = 0.0,one = 1.0;
329: PetscMPIInt size;
335: PetscObjectGetComm((PetscObject)inmat,&comm);
336: MPI_Comm_size(comm,&size);
338: MatGetLocalSize(inmat,&m,&n);
339: MatGetSize(inmat,&M,&N);
340: MatCreateVecs(inmat,&in,&out);
341: VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);
342: VecGetOwnershipRange(out,&start,&end);
343: PetscMalloc1(m,&rows);
344: for (i=0; i<m; i++) rows[i] = start + i;
346: MatCreate(comm,mat);
347: MatSetSizes(*mat,m,n,M,N);
348: if (size == 1) {
349: MatSetType(*mat,MATSEQDENSE);
350: MatSeqDenseSetPreallocation(*mat,NULL);
351: } else {
352: MatSetType(*mat,MATMPIAIJ);
353: MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);
354: }
356: for (i=0; i<N; i++) {
358: VecSet(in,zero);
359: VecSetValues(in,1,&i,&one,INSERT_VALUES);
360: VecAssemblyBegin(in);
361: VecAssemblyEnd(in);
363: MatMult(inmat,in,out);
365: VecGetArray(out,&array);
366: MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);
367: VecRestoreArray(out,&array);
369: }
370: PetscFree(rows);
371: VecDestroy(&out);
372: VecDestroy(&in);
373: MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);
374: MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);
375: return(0);
376: }
380: /*@
381: MatChop - Set all values in the matrix less than the tolerance to zero
383: Input Parameters:
384: + A - The matrix
385: - tol - The zero tolerance
387: Output Parameters:
388: . A - The chopped matrix
390: Level: intermediate
392: .seealso: MatCreate(), MatZeroEntries()
393: @*/
394: PetscErrorCode MatChop(Mat A, PetscReal tol)
395: {
396: PetscScalar *newVals;
397: PetscInt *newCols;
398: PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0;
402: MatGetOwnershipRange(A, &rStart, &rEnd);
403: for (r = rStart; r < rEnd; ++r) {
404: PetscInt ncols;
406: MatGetRow(A, r, &ncols, NULL, NULL);
407: colMax = PetscMax(colMax, ncols);
408: MatRestoreRow(A, r, &ncols, NULL, NULL);
409: }
410: numRows = rEnd - rStart;
411: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
412: PetscMalloc2(colMax,&newCols,colMax,&newVals);
413: for (r = rStart; r < rStart+maxRows; ++r) {
414: const PetscScalar *vals;
415: const PetscInt *cols;
416: PetscInt ncols, newcols, c;
418: if (r < rEnd) {
419: MatGetRow(A, r, &ncols, &cols, &vals);
420: for (c = 0; c < ncols; ++c) {
421: newCols[c] = cols[c];
422: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
423: }
424: newcols = ncols;
425: MatRestoreRow(A, r, &ncols, &cols, &vals);
426: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
427: }
428: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
429: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
430: }
431: PetscFree2(newCols,newVals);
432: return(0);
433: }