Actual source code: mpirowbs.c
1: /* $Id: mpirowbs.c,v 2.9 2001/08/10 03:30:53 bsmith Exp $*/
3: #include src/mat/impls/rowbs/mpi/mpirowbs.h
4: #include src/vec/vecimpl.h
6: #define CHUNCKSIZE_LOCAL 10
8: #undef __FUNCT__
10: static int MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
11: {
15: if (v) {
16: #if defined(PETSC_USE_LOG)
17: int len = -n*(sizeof(int)+sizeof(PetscScalar));
18: #endif
19: PetscFree(v);
20: PetscLogObjectMemory(A,len);
21: }
22: return(0);
23: }
25: #undef __FUNCT__
27: static int MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
28: {
29: int len,ierr;
32: if (!n) {
33: *i = 0; *v = 0;
34: } else {
35: len = n*(sizeof(int) + sizeof(PetscScalar));
36: PetscMalloc(len,v);
37: PetscLogObjectMemory(A,len);
38: *i = (int *)(*v + n);
39: }
40: return(0);
41: }
43: #undef __FUNCT__
45: int MatScale_MPIRowbs(PetscScalar *alphain,Mat inA)
46: {
47: Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
48: BSspmat *A = a->A;
49: BSsprow *vs;
50: PetscScalar *ap,alpha = *alphain;
51: int i,m = inA->m,nrow,j;
54: for (i=0; i<m; i++) {
55: vs = A->rows[i];
56: nrow = vs->length;
57: ap = vs->nz;
58: for (j=0; j<nrow; j++) {
59: ap[j] *= alpha;
60: }
61: }
62: PetscLogFlops(a->nz);
63: return(0);
64: }
66: /* ----------------------------------------------------------------- */
67: #undef __FUNCT__
69: static int MatCreateMPIRowbs_local(Mat A,int nz,int *nnz)
70: {
71: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
72: int ierr,i,len,nzalloc = 0,m = A->m;
73: BSspmat *bsmat;
74: BSsprow *vs;
77: if (!nnz) {
78: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
79: if (nz <= 0) nz = 1;
80: nzalloc = 1;
81: PetscMalloc((m+1)*sizeof(int),&nnz);
82: for (i=0; i<m; i++) nnz[i] = nz;
83: nz = nz*m;
84: } else {
85: nz = 0;
86: for (i=0; i<m; i++) {
87: if (nnz[i] <= 0) nnz[i] = 1;
88: nz += nnz[i];
89: }
90: }
92: /* Allocate BlockSolve matrix context */
93: ierr = PetscNew(BSspmat,&bsif->A);
94: bsmat = bsif->A;
95: BSset_mat_icc_storage(bsmat,PETSC_FALSE);
96: BSset_mat_symmetric(bsmat,PETSC_FALSE);
97: len = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
98: ierr = PetscMalloc(len,&bsmat->rows);
99: bsmat->num_rows = m;
100: bsmat->global_num_rows = A->M;
101: bsmat->map = bsif->bsmap;
102: vs = (BSsprow*)(bsmat->rows + m);
103: for (i=0; i<m; i++) {
104: bsmat->rows[i] = vs;
105: bsif->imax[i] = nnz[i];
106: vs->diag_ind = -1;
107: if (nnz[i] > 0) {
108: MatMallocRowbs_Private(A,nnz[i],&(vs->col),&(vs->nz));
109: } else {
110: vs->col = 0; vs->nz = 0;
111: }
112: /* put zero on diagonal */
113: /*vs->length = 1;
114: vs->col[0] = i + bsif->rstart;
115: vs->nz[0] = 0.0;*/
116: vs->length = 0;
117: vs++;
118: }
119: PetscLogObjectMemory(A,sizeof(BSspmat) + len);
120: bsif->nz = 0;
121: bsif->maxnz = nz;
122: bsif->sorted = 0;
123: bsif->roworiented = PETSC_TRUE;
124: bsif->nonew = 0;
125: bsif->bs_color_single = 0;
127: if (nzalloc) {PetscFree(nnz);}
128: return(0);
129: }
131: #undef __FUNCT__
133: static int MatSetValues_MPIRowbs_local(Mat AA,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
134: {
135: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
136: BSspmat *A = mat->A;
137: BSsprow *vs;
138: int *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax,ierr;
139: int *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
140: PetscScalar *ap,value;
143: for (k=0; k<m; k++) { /* loop over added rows */
144: row = im[k];
145: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
146: if (row >= AA->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147: vs = A->rows[row];
148: ap = vs->nz; rp = vs->col;
149: rmax = imax[row]; nrow = vs->length;
150: a = 0;
151: for (l=0; l<n; l++) { /* loop over added columns */
152: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative col");
153: if (in[l] >= AA->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
154: col = in[l]; value = *v++;
155: if (!sorted) a = 0; b = nrow;
156: while (b-a > 5) {
157: t = (b+a)/2;
158: if (rp[t] > col) b = t;
159: else a = t;
160: }
161: for (i=a; i<b; i++) {
162: if (rp[i] > col) break;
163: if (rp[i] == col) {
164: if (addv == ADD_VALUES) ap[i] += value;
165: else ap[i] = value;
166: goto noinsert;
167: }
168: }
169: if (nonew) goto noinsert;
170: if (nrow >= rmax) {
171: /* there is no extra room in row, therefore enlarge */
172: int *itemp,*iout,*iin = vs->col;
173: PetscScalar *vout,*vin = vs->nz,*vtemp;
175: /* malloc new storage space */
176: imax[row] += CHUNCKSIZE_LOCAL;
177: MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
178: vout = vtemp; iout = itemp;
179: for (ii=0; ii<i; ii++) {
180: vout[ii] = vin[ii];
181: iout[ii] = iin[ii];
182: }
183: vout[i] = value;
184: iout[i] = col;
185: for (ii=i+1; ii<=nrow; ii++) {
186: vout[ii] = vin[ii-1];
187: iout[ii] = iin[ii-1];
188: }
189: /* free old row storage */
190: if (rmax > 0) {
191: MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
192: }
193: vs->col = iout; vs->nz = vout;
194: rmax = imax[row];
195: mat->maxnz += CHUNCKSIZE_LOCAL;
196: mat->reallocs++;
197: } else {
198: /* shift higher columns over to make room for newie */
199: for (ii=nrow-1; ii>=i; ii--) {
200: rp[ii+1] = rp[ii];
201: ap[ii+1] = ap[ii];
202: }
203: rp[i] = col;
204: ap[i] = value;
205: }
206: nrow++;
207: mat->nz++;
208: AA->same_nonzero = PETSC_FALSE;
209: noinsert:;
210: a = i + 1;
211: }
212: vs->length = nrow;
213: }
214: return(0);
215: }
218: #undef __FUNCT__
220: static int MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
221: {
223: return(0);
224: }
226: #undef __FUNCT__
228: static int MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
229: {
230: Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
231: BSspmat *A = a->A;
232: BSsprow *vs;
233: int i,j,rstart = a->rstart;
236: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
238: /* Mark location of diagonal */
239: for (i=0; i<AA->m; i++) {
240: vs = A->rows[i];
241: for (j=0; j<vs->length; j++) {
242: if (vs->col[j] == i + rstart) {
243: vs->diag_ind = j;
244: break;
245: }
246: }
247: if (vs->diag_ind == -1) {
248: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
249: }
250: }
251: return(0);
252: }
254: #undef __FUNCT__
256: static int MatZeroRows_MPIRowbs_local(Mat A,IS is,PetscScalar *diag)
257: {
258: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
259: BSspmat *l = a->A;
260: int i,ierr,N,*rz,m = A->m - 1;
263: ISGetLocalSize(is,&N);
264: ISGetIndices(is,&rz);
265: if (diag) {
266: for (i=0; i<N; i++) {
267: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
268: if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
269: l->rows[rz[i]]->length = 1;
270: l->rows[rz[i]]->nz[0] = *diag;
271: l->rows[rz[i]]->col[0] = a->rstart + rz[i];
272: } else {
273: MatSetValues(A,1,&rz[i],1,&rz[i],diag,INSERT_VALUES);
274: }
275: }
276: } else {
277: for (i=0; i<N; i++) {
278: if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
279: l->rows[rz[i]]->length = 0;
280: }
281: }
282: A->same_nonzero = PETSC_FALSE;
283: ISRestoreIndices(is,&rz);
284: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
285: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
286: return(0);
287: }
289: #undef __FUNCT__
291: static int MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
292: {
293: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
294: BSsprow *vs,**rs;
295: PetscScalar *xv;
296: PetscReal sum = 0.0;
297: int *xi,nz,i,j,ierr;
300: rs = mat->A->rows;
301: if (type == NORM_FROBENIUS) {
302: for (i=0; i<A->m; i++) {
303: vs = *rs++;
304: nz = vs->length;
305: xv = vs->nz;
306: while (nz--) {
307: #if defined(PETSC_USE_COMPLEX)
308: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
309: #else
310: sum += (*xv)*(*xv); xv++;
311: #endif
312: }
313: }
314: *norm = sqrt(sum);
315: } else if (type == NORM_1) { /* max column norm */
316: PetscReal *tmp;
317: ierr = PetscMalloc(A->n*sizeof(PetscReal),&tmp);
318: ierr = PetscMemzero(tmp,A->n*sizeof(PetscReal));
319: *norm = 0.0;
320: for (i=0; i<A->m; i++) {
321: vs = *rs++;
322: nz = vs->length;
323: xi = vs->col;
324: xv = vs->nz;
325: while (nz--) {
326: tmp[*xi] += PetscAbsScalar(*xv);
327: xi++; xv++;
328: }
329: }
330: for (j=0; j<A->n; j++) {
331: if (tmp[j] > *norm) *norm = tmp[j];
332: }
333: PetscFree(tmp);
334: } else if (type == NORM_INFINITY) { /* max row norm */
335: *norm = 0.0;
336: for (i=0; i<A->m; i++) {
337: vs = *rs++;
338: nz = vs->length;
339: xv = vs->nz;
340: sum = 0.0;
341: while (nz--) {
342: sum += PetscAbsScalar(*xv); xv++;
343: }
344: if (sum > *norm) *norm = sum;
345: }
346: } else {
347: SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
348: }
349: return(0);
350: }
352: /* ----------------------------------------------------------------- */
354: #undef __FUNCT__
356: int MatSetValues_MPIRowbs(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode av)
357: {
358: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
359: int ierr,i,j,row,col,rstart = a->rstart,rend = a->rend;
360: PetscTruth roworiented = a->roworiented;
363: /* Note: There's no need to "unscale" the matrix, since scaling is
364: confined to a->pA, and we're working with a->A here */
365: for (i=0; i<m; i++) {
366: if (im[i] < 0) continue;
367: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
368: if (im[i] >= rstart && im[i] < rend) {
369: row = im[i] - rstart;
370: for (j=0; j<n; j++) {
371: if (in[j] < 0) continue;
372: if (in[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
373: if (in[j] >= 0 && in[j] < mat->N){
374: col = in[j];
375: if (roworiented) {
376: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
377: } else {
378: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
379: }
380: } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
381: }
382: } else {
383: if (!a->donotstash) {
384: if (roworiented) {
385: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
386: } else {
387: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
388: }
389: }
390: }
391: }
392: return(0);
393: }
395: #undef __FUNCT__
397: int MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
398: {
399: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
400: MPI_Comm comm = mat->comm;
401: int ierr,nstash,reallocs;
402: InsertMode addv;
405: /* Note: There's no need to "unscale" the matrix, since scaling is
406: confined to a->pA, and we're working with a->A here */
408: /* make sure all processors are either in INSERTMODE or ADDMODE */
409: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
410: if (addv == (ADD_VALUES|INSERT_VALUES)) {
411: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
412: }
413: mat->insertmode = addv; /* in case this processor had no cache */
415: MatStashScatterBegin_Private(&mat->stash,a->rowners);
416: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
417: PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
418: return(0);
419: }
421: #include petscviewer.h
423: #undef __FUNCT__
425: static int MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
426: {
427: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
428: int ierr,i,j;
429: PetscTruth isascii;
430: BSspmat *A = a->A;
431: BSsprow **rs = A->rows;
432: PetscViewerFormat format;
435: PetscViewerGetFormat(viewer,&format);
436: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
438: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
439: int ind_l,ind_g,clq_l,clq_g,color;
440: ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
441: ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
442: clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
443: clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
444: color = BSnum_colors(a->pA);CHKERRBS(0);
445: PetscViewerASCIIPrintf(viewer," %d global inode(s), %d global clique(s), %d color(s)n",ind_g,clq_g,color);
446: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d local inode(s), %d local clique(s)n",a->rank,ind_l,clq_l);
447: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
448: for (i=0; i<A->num_rows; i++) {
449: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
450: for (j=0; j<rs[i]->length; j++) {
451: if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
452: }
453: PetscViewerASCIISynchronizedPrintf(viewer,"n");
454: }
455: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
456: SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
457: } else {
458: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
459: for (i=0; i<A->num_rows; i++) {
460: PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
461: for (j=0; j<rs[i]->length; j++) {
462: PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
463: }
464: PetscViewerASCIISynchronizedPrintf(viewer,"n");
465: }
466: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
467: }
468: PetscViewerFlush(viewer);
469: return(0);
470: }
472: #undef __FUNCT__
474: static int MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
475: {
476: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
477: int ierr,i,M,m,rank,size,*sbuff,*rowlengths;
478: int *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
479: BSspmat *A = a->A;
480: BSsprow **rs = A->rows;
481: MPI_Comm comm = mat->comm;
482: MPI_Status status;
483: PetscScalar *vals;
484: MatInfo info;
487: MPI_Comm_size(comm,&size);
488: MPI_Comm_rank(comm,&rank);
490: M = mat->M; m = mat->m;
491: /* First gather together on the first processor the lengths of
492: each row, and write them out to the file */
493: PetscMalloc(m*sizeof(int),&sbuff);
494: for (i=0; i<A->num_rows; i++) {
495: sbuff[i] = rs[i]->length;
496: }
497: MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
498: if (!rank) {
499: PetscViewerBinaryGetDescriptor(viewer,&fd);
500: PetscMalloc((4+M)*sizeof(int),&rowlengths);
501: PetscMalloc(size*sizeof(int),&recvcts);
502: recvdisp = a->rowners;
503: for (i=0; i<size; i++) {
504: recvcts[i] = recvdisp[i+1] - recvdisp[i];
505: }
506: /* first four elements of rowlength are the header */
507: rowlengths[0] = mat->cookie;
508: rowlengths[1] = mat->M;
509: rowlengths[2] = mat->N;
510: rowlengths[3] = (int)info.nz_used;
511: MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
512: PetscFree(sbuff);
513: PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,0);
514: /* count the number of nonzeros on each processor */
515: PetscMemzero(recvcts,size*sizeof(int));
516: for (i=0; i<size; i++) {
517: for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
518: recvcts[i] += rowlengths[j+3];
519: }
520: }
521: /* allocate buffer long enough to hold largest one */
522: maxnz = 0;
523: for (i=0; i<size; i++) {
524: maxnz = PetscMax(maxnz,recvcts[i]);
525: }
526: PetscFree(rowlengths);
527: PetscFree(recvcts);
528: PetscMalloc(maxnz*sizeof(int),&cols);
530: /* binary store column indices for 0th processor */
531: nz = 0;
532: for (i=0; i<A->num_rows; i++) {
533: for (j=0; j<rs[i]->length; j++) {
534: cols[nz++] = rs[i]->col[j];
535: }
536: }
537: PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
539: /* receive and store column indices for all other processors */
540: for (i=1; i<size; i++) {
541: /* should tell processor that I am now ready and to begin the send */
542: MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
543: MPI_Get_count(&status,MPI_INT,&nz);
544: PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
545: }
546: PetscFree(cols);
547: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
549: /* binary store values for 0th processor */
550: nz = 0;
551: for (i=0; i<A->num_rows; i++) {
552: for (j=0; j<rs[i]->length; j++) {
553: vals[nz++] = rs[i]->nz[j];
554: }
555: }
556: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
558: /* receive and store nonzeros for all other processors */
559: for (i=1; i<size; i++) {
560: /* should tell processor that I am now ready and to begin the send */
561: MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
562: MPI_Get_count(&status,MPIU_SCALAR,&nz);
563: PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
564: }
565: PetscFree(vals);
566: } else {
567: MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
568: PetscFree(sbuff);
570: /* count local nonzeros */
571: nz = 0;
572: for (i=0; i<A->num_rows; i++) {
573: for (j=0; j<rs[i]->length; j++) {
574: nz++;
575: }
576: }
577: /* copy into buffer column indices */
578: PetscMalloc(nz*sizeof(int),&cols);
579: nz = 0;
580: for (i=0; i<A->num_rows; i++) {
581: for (j=0; j<rs[i]->length; j++) {
582: cols[nz++] = rs[i]->col[j];
583: }
584: }
585: /* send */ /* should wait until processor zero tells me to go */
586: MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
587: PetscFree(cols);
589: /* copy into buffer column values */
590: PetscMalloc(nz*sizeof(PetscScalar),&vals);
591: nz = 0;
592: for (i=0; i<A->num_rows; i++) {
593: for (j=0; j<rs[i]->length; j++) {
594: vals[nz++] = rs[i]->nz[j];
595: }
596: }
597: /* send */ /* should wait until processor zero tells me to go */
598: MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
599: PetscFree(vals);
600: }
602: return(0);
603: }
605: #undef __FUNCT__
607: int MatView_MPIRowbs(Mat mat,PetscViewer viewer)
608: {
609: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
610: int ierr;
611: PetscTruth isascii,isbinary;
614: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
615: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
616: if (!bsif->blocksolveassembly) {
617: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
618: }
619: if (isascii) {
620: MatView_MPIRowbs_ASCII(mat,viewer);
621: } else if (isbinary) {
622: MatView_MPIRowbs_Binary(mat,viewer);
623: } else {
624: SETERRQ1(1,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
625: }
626: return(0);
627: }
628:
629: #undef __FUNCT__
631: static int MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
632: {
633: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
634: BSspmat *A = a->A;
635: BSsprow *vs;
636: int size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
637: int msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,index,row;
638: int ctr_j,*sbuf1_j,k,ierr;
639: PetscScalar val=0.0;
640: MPI_Comm comm;
641: MPI_Request *s_waits1,*r_waits1;
642: MPI_Status *s_status,*r_status;
645: comm = mat->comm;
646: tag = mat->tag;
647: size = a->size;
648: rank = a->rank;
649: M = mat->M;
650: rstart = a->rstart;
652: PetscMalloc(M*sizeof(int),&rtable);
653: /* Create hash table for the mapping :row -> proc */
654: for (i=0,j=0; i<size; i++) {
655: len = a->rowners[i+1];
656: for (; j<len; j++) {
657: rtable[j] = i;
658: }
659: }
661: /* Evaluate communication - mesg to whom, length of mesg, and buffer space
662: required. Based on this, buffers are allocated, and data copied into them. */
663: PetscMalloc(size*4*sizeof(int),&w1);/* mesg size */
664: w3 = w1 + 2*size; /* no of IS that needs to be sent to proc i */
665: w4 = w3 + size; /* temp work space used in determining w1, w3 */
666: PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */
668: for (i=0; i<mat->m; i++) {
669: PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
670: vs = A->rows[i];
671: for (j=0; j<vs->length; j++) {
672: proc = rtable[vs->col[j]];
673: w4[proc]++;
674: }
675: for (j=0; j<size; j++) {
676: if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
677: }
678: }
679:
680: nrqs = 0; /* number of outgoing messages */
681: msz = 0; /* total mesg length (for all proc */
682: w1[2*rank] = 0; /* no mesg sent to itself */
683: w3[rank] = 0;
684: for (i=0; i<size; i++) {
685: if (w1[2*i]) {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
686: }
687: /* pa - is list of processors to communicate with */
688: PetscMalloc((nrqs+1)*sizeof(int),&pa);
689: for (i=0,j=0; i<size; i++) {
690: if (w1[2*i]) {pa[j] = i; j++;}
691: }
693: /* Each message would have a header = 1 + 2*(no of ROWS) + data */
694: for (i=0; i<nrqs; i++) {
695: j = pa[i];
696: w1[2*j] += w1[2*j+1] + 2*w3[j];
697: msz += w1[2*j];
698: }
699:
700: /* Do a global reduction to determine how many messages to expect */
701: PetscMaxSum(comm,w1,&bsz,&nrqr);
703: /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
704: len = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
705: ierr = PetscMalloc(len,&rbuf1);
706: rbuf1[0] = (int*)(rbuf1 + nrqr);
707: for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;
709: /* Post the receives */
710: PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
711: for (i=0; i<nrqr; ++i){
712: MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
713: }
714:
715: /* Allocate Memory for outgoing messages */
716: len = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
717: ierr = PetscMalloc(len,&sbuf1);
718: ptr = sbuf1 + size; /* Pointers to the data in outgoing buffers */
719: ierr = PetscMemzero(sbuf1,2*size*sizeof(int*));
720: tmp = (int*)(sbuf1 + 2*size);
721: ctr = tmp + msz;
723: {
724: int *iptr = tmp,ict = 0;
725: for (i=0; i<nrqs; i++) {
726: j = pa[i];
727: iptr += ict;
728: sbuf1[j] = iptr;
729: ict = w1[2*j];
730: }
731: }
733: /* Form the outgoing messages */
734: /* Clean up the header space */
735: for (i=0; i<nrqs; i++) {
736: j = pa[i];
737: sbuf1[j][0] = 0;
738: ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
739: ptr[j] = sbuf1[j] + 2*w3[j] + 1;
740: }
742: /* Parse the matrix and copy the data into sbuf1 */
743: for (i=0; i<mat->m; i++) {
744: PetscMemzero(ctr,size*sizeof(int));
745: vs = A->rows[i];
746: for (j=0; j<vs->length; j++) {
747: col = vs->col[j];
748: proc = rtable[col];
749: if (proc != rank) { /* copy to the outgoing buffer */
750: ctr[proc]++;
751: *ptr[proc] = col;
752: ptr[proc]++;
753: } else {
754: row = col - rstart;
755: col = i + rstart;
756: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
757: }
758: }
759: /* Update the headers for the current row */
760: for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
761: if ((ctr_j = ctr[j])) {
762: sbuf1_j = sbuf1[j];
763: k = ++sbuf1_j[0];
764: sbuf1_j[2*k] = ctr_j;
765: sbuf1_j[2*k-1] = i + rstart;
766: }
767: }
768: }
769: /* Check Validity of the outgoing messages */
770: {
771: int sum;
772: for (i=0 ; i<nrqs ; i++) {
773: j = pa[i];
774: if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!n"); }
775: }
777: for (i=0 ; i<nrqs ; i++) {
778: j = pa[i];
779: sum = 1;
780: for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
781: if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!n"); }
782: }
783: }
784:
785: /* Now post the sends */
786: PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
787: for (i=0; i<nrqs; ++i) {
788: j = pa[i];
789: MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
790: }
791:
792: /* Receive messages*/
793: PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
794: for (i=0; i<nrqr; ++i) {
795: MPI_Waitany(nrqr,r_waits1,&index,r_status+i);
796: /* Process the Message */
797: {
798: int *rbuf1_i,n_row,ct1;
800: rbuf1_i = rbuf1[index];
801: n_row = rbuf1_i[0];
802: ct1 = 2*n_row+1;
803: val = 0.0;
804: /* Optimise this later */
805: for (j=1; j<=n_row; j++) {
806: col = rbuf1_i[2*j-1];
807: for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
808: row = rbuf1_i[ct1] - rstart;
809: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
810: }
811: }
812: }
813: }
815: PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
816: MPI_Waitall(nrqs,s_waits1,s_status);
818: PetscFree(rtable);
819: PetscFree(w1);
820: PetscFree(pa);
821: PetscFree(rbuf1);
822: PetscFree(sbuf1);
823: PetscFree(r_waits1);
824: PetscFree(s_waits1);
825: PetscFree(r_status);
826: PetscFree(s_status);
827: return(0);
828: }
830: /*
831: This does the BlockSolve portion of the matrix assembly.
832: It is provided in a seperate routine so that users can
833: operate on the matrix (using MatScale(), MatShift() etc.) after
834: the matrix has been assembled but before BlockSolve has sucked it
835: in and devoured it.
836: */
837: #undef __FUNCT__
839: int MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
840: {
841: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
842: int ierr,ldim,low,high,i;
843: PetscScalar *diag;
846: if ((mat->was_assembled) && (!mat->same_nonzero)) { /* Free the old info */
847: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
848: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
849: }
851: if ((!mat->same_nonzero) || (!mat->was_assembled)) {
852: /* Indicates bypassing cliques in coloring */
853: if (a->bs_color_single) {
854: BSctx_set_si(a->procinfo,100);
855: }
856: /* Form permuted matrix for efficient parallel execution */
857: a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
858: /* Set up the communication */
859: a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
860: } else {
861: /* Repermute the matrix */
862: BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
863: }
865: /* Symmetrically scale the matrix by the diagonal */
866: BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);
868: /* Store inverse of square root of permuted diagonal scaling matrix */
869: VecGetLocalSize(a->diag,&ldim);
870: VecGetOwnershipRange(a->diag,&low,&high);
871: VecGetArray(a->diag,&diag);
872: for (i=0; i<ldim; i++) {
873: if (a->pA->scale_diag[i] != 0.0) {
874: diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
875: } else {
876: diag[i] = 1.0;
877: }
878: }
879: VecRestoreArray(a->diag,&diag);
880: a->blocksolveassembly = 1;
881: mat->was_assembled = PETSC_TRUE;
882: mat->same_nonzero = PETSC_TRUE;
883: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assemblyn");
884: return(0);
885: }
887: #undef __FUNCT__
889: int MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
890: {
891: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
892: int i,n,row,col,*rows,*cols,ierr,rstart,nzcount,flg,j,ncols;
893: PetscScalar *vals,val;
894: InsertMode addv = mat->insertmode;
897: while (1) {
898: MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
899: if (!flg) break;
900:
901: for (i=0; i<n;) {
902: /* Now identify the consecutive vals belonging to the same row */
903: for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
904: if (j < n) ncols = j-i;
905: else ncols = n-i;
906: /* Now assemble all these values with a single function call */
907: MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
908: i = j;
909: }
910: }
911: MatStashScatterEnd_Private(&mat->stash);
913: rstart = a->rstart;
914: nzcount = a->nz; /* This is the number of nonzeros entered by the user */
915: /* BlockSolve requires that the matrix is structurally symmetric */
916: if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
917: MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
918: }
919:
920: /* BlockSolve requires that all the diagonal elements are set */
921: val = 0.0;
922: for (i=0; i<mat->m; i++) {
923: row = i; col = i + rstart;
924: MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
925: }
926:
927: MatAssemblyBegin_MPIRowbs_local(mat,mode);
928: MatAssemblyEnd_MPIRowbs_local(mat,mode);
929:
930: a->blocksolveassembly = 0;
931: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d usedn",mat->m,mat->n,a->maxnz-a->nz,a->nz);
932: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %dn",nzcount,a->nz-nzcount);
933: PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %dn",a->reallocs);
934: return(0);
935: }
937: #undef __FUNCT__
939: int MatZeroEntries_MPIRowbs(Mat mat)
940: {
941: Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
942: BSspmat *A = l->A;
943: BSsprow *vs;
944: int i,j;
947: for (i=0; i <mat->m; i++) {
948: vs = A->rows[i];
949: for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
950: }
951: return(0);
952: }
954: /* the code does not do the diagonal entries correctly unless the
955: matrix is square and the column and row owerships are identical.
956: This is a BUG.
957: */
959: #undef __FUNCT__
961: int MatZeroRows_MPIRowbs(Mat A,IS is,PetscScalar *diag)
962: {
963: Mat_MPIRowbs *l = (Mat_MPIRowbs*)A->data;
964: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
965: int *nprocs,j,idx,nsends;
966: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
967: int *rvalues,tag = A->tag,count,base,slen,n,*source;
968: int *lens,imdex,*lrows,*values;
969: MPI_Comm comm = A->comm;
970: MPI_Request *send_waits,*recv_waits;
971: MPI_Status recv_status,*send_status;
972: IS istmp;
973: PetscTruth found;
976: ISGetLocalSize(is,&N);
977: ISGetIndices(is,&rows);
979: /* first count number of contributors to each processor */
980: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
981: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
982: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
983: for (i=0; i<N; i++) {
984: idx = rows[i];
985: found = PETSC_FALSE;
986: for (j=0; j<size; j++) {
987: if (idx >= owners[j] && idx < owners[j+1]) {
988: nprocs[2*j]++; nprocs[2*j] = 1; owner[i] = j; found = PETSC_TRUE; break;
989: }
990: }
991: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
992: }
993: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
995: /* inform other processors of number of messages and max length*/
996: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
998: /* post receives: */
999: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1000: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1001: for (i=0; i<nrecvs; i++) {
1002: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1003: }
1005: /* do sends:
1006: 1) starts[i] gives the starting index in svalues for stuff going to
1007: the ith processor
1008: */
1009: PetscMalloc((N+1)*sizeof(int),&svalues);
1010: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1011: PetscMalloc((size+1)*sizeof(int),&starts);
1012: starts[0] = 0;
1013: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1014: for (i=0; i<N; i++) {
1015: svalues[starts[owner[i]]++] = rows[i];
1016: }
1017: ISRestoreIndices(is,&rows);
1019: starts[0] = 0;
1020: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1021: count = 0;
1022: for (i=0; i<size; i++) {
1023: if (nprocs[2*i+1]) {
1024: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1025: }
1026: }
1027: PetscFree(starts);
1029: base = owners[rank];
1031: /* wait on receives */
1032: PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1033: source = lens + nrecvs;
1034: count = nrecvs; slen = 0;
1035: while (count) {
1036: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1037: /* unpack receives into our local space */
1038: MPI_Get_count(&recv_status,MPI_INT,&n);
1039: source[imdex] = recv_status.MPI_SOURCE;
1040: lens[imdex] = n;
1041: slen += n;
1042: count--;
1043: }
1044: PetscFree(recv_waits);
1045:
1046: /* move the data into the send scatter */
1047: PetscMalloc((slen+1)*sizeof(int),&lrows);
1048: count = 0;
1049: for (i=0; i<nrecvs; i++) {
1050: values = rvalues + i*nmax;
1051: for (j=0; j<lens[i]; j++) {
1052: lrows[count++] = values[j] - base;
1053: }
1054: }
1055: PetscFree(rvalues);
1056: PetscFree(lens);
1057: PetscFree(owner);
1058: PetscFree(nprocs);
1059:
1060: /* actually zap the local rows */
1061: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1062: PetscLogObjectParent(A,istmp);
1063: PetscFree(lrows);
1064: MatZeroRows_MPIRowbs_local(A,istmp,diag);
1065: ISDestroy(istmp);
1067: /* wait on sends */
1068: if (nsends) {
1069: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1070: MPI_Waitall(nsends,send_waits,send_status);
1071: PetscFree(send_status);
1072: }
1073: PetscFree(send_waits);
1074: PetscFree(svalues);
1076: return(0);
1077: }
1079: #undef __FUNCT__
1081: int MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1082: {
1083: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1084: BSsprow *vs,**rs;
1085: PetscScalar *xv;
1086: PetscReal sum = 0.0;
1087: int *xi,nz,i,j,ierr;
1090: if (a->size == 1) {
1091: MatNorm_MPIRowbs_local(mat,type,norm);
1092: } else {
1093: rs = a->A->rows;
1094: if (type == NORM_FROBENIUS) {
1095: for (i=0; i<mat->m; i++) {
1096: vs = *rs++;
1097: nz = vs->length;
1098: xv = vs->nz;
1099: while (nz--) {
1100: #if defined(PETSC_USE_COMPLEX)
1101: sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1102: #else
1103: sum += (*xv)*(*xv); xv++;
1104: #endif
1105: }
1106: }
1107: ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1108: *norm = sqrt(*norm);
1109: } else if (type == NORM_1) { /* max column norm */
1110: PetscReal *tmp,*tmp2;
1111: ierr = PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1112: ierr = PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1113: ierr = PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1114: *norm = 0.0;
1115: for (i=0; i<mat->m; i++) {
1116: vs = *rs++;
1117: nz = vs->length;
1118: xi = vs->col;
1119: xv = vs->nz;
1120: while (nz--) {
1121: tmp[*xi] += PetscAbsScalar(*xv);
1122: xi++; xv++;
1123: }
1124: }
1125: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1126: for (j=0; j<mat->n; j++) {
1127: if (tmp2[j] > *norm) *norm = tmp2[j];
1128: }
1129: PetscFree(tmp);
1130: PetscFree(tmp2);
1131: } else if (type == NORM_INFINITY) { /* max row norm */
1132: PetscReal ntemp = 0.0;
1133: for (i=0; i<mat->m; i++) {
1134: vs = *rs++;
1135: nz = vs->length;
1136: xv = vs->nz;
1137: sum = 0.0;
1138: while (nz--) {
1139: sum += PetscAbsScalar(*xv); xv++;
1140: }
1141: if (sum > ntemp) ntemp = sum;
1142: }
1143: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1144: } else {
1145: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1146: }
1147: }
1148: return(0);
1149: }
1151: #undef __FUNCT__
1153: int MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1154: {
1155: Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1156: BSprocinfo *bspinfo = bsif->procinfo;
1157: PetscScalar *xxa,*xworka,*yya;
1158: int ierr;
1161: if (!bsif->blocksolveassembly) {
1162: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1163: }
1165: /* Permute and apply diagonal scaling: [ xwork = D^{1/2} * x ] */
1166: if (!bsif->vecs_permscale) {
1167: VecGetArray(bsif->xwork,&xworka);
1168: VecGetArray(xx,&xxa);
1169: BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1170: VecRestoreArray(bsif->xwork,&xworka);
1171: VecRestoreArray(xx,&xxa);
1172: VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1173: }
1175: VecGetArray(xx,&xxa);
1176: VecGetArray(yy,&yya);
1177: /* Do lower triangular multiplication: [ y = L * xwork ] */
1178: if (bspinfo->single) {
1179: BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1180: } else {
1181: BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1182: }
1183:
1184: /* Do upper triangular multiplication: [ y = y + L^{T} * xwork ] */
1185: if (mat->symmetric) {
1186: if (bspinfo->single){
1187: BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1188: } else {
1189: BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190: }
1191: }
1192: /* not needed for ILU version since forward does it all */
1193: VecRestoreArray(xx,&xxa);
1194: VecRestoreArray(yy,&yya);
1196: /* Apply diagonal scaling to vector: [ y = D^{1/2} * y ] */
1197: if (!bsif->vecs_permscale) {
1198: VecGetArray(bsif->xwork,&xworka);
1199: VecGetArray(xx,&xxa);
1200: BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1201: VecRestoreArray(bsif->xwork,&xworka);
1202: VecRestoreArray(xx,&xxa);
1203: VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1204: VecGetArray(bsif->xwork,&xworka);
1205: VecGetArray(yy,&yya);
1206: BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1207: VecRestoreArray(bsif->xwork,&xworka);
1208: VecRestoreArray(yy,&yya);
1209: }
1210: PetscLogFlops(2*bsif->nz - mat->m);
1212: return(0);
1213: }
1215: #undef __FUNCT__
1217: int MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1218: {
1219: int ierr;
1220: PetscScalar one = 1.0;
1223: (*mat->ops->mult)(mat,xx,zz);
1224: VecAXPY(&one,yy,zz);
1225: return(0);
1226: }
1228: #undef __FUNCT__
1230: int MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1231: {
1232: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1233: PetscReal isend[5],irecv[5];
1234: int ierr;
1237: info->rows_global = (double)A->M;
1238: info->columns_global = (double)A->N;
1239: info->rows_local = (double)A->m;
1240: info->columns_local = (double)A->N;
1241: info->block_size = 1.0;
1242: info->mallocs = (double)mat->reallocs;
1243: isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] = mat->maxnz - mat->nz;
1244: isend[3] = A->mem; isend[4] = info->mallocs;
1246: if (flag == MAT_LOCAL) {
1247: info->nz_used = isend[0];
1248: info->nz_allocated = isend[1];
1249: info->nz_unneeded = isend[2];
1250: info->memory = isend[3];
1251: info->mallocs = isend[4];
1252: } else if (flag == MAT_GLOBAL_MAX) {
1253: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1254: info->nz_used = irecv[0];
1255: info->nz_allocated = irecv[1];
1256: info->nz_unneeded = irecv[2];
1257: info->memory = irecv[3];
1258: info->mallocs = irecv[4];
1259: } else if (flag == MAT_GLOBAL_SUM) {
1260: MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1261: info->nz_used = irecv[0];
1262: info->nz_allocated = irecv[1];
1263: info->nz_unneeded = irecv[2];
1264: info->memory = irecv[3];
1265: info->mallocs = irecv[4];
1266: }
1267: return(0);
1268: }
1270: #undef __FUNCT__
1272: int MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1273: {
1274: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1275: BSsprow **rs = a->A->rows;
1276: int i,n,ierr;
1277: PetscScalar *x,zero = 0.0;
1280: if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1281: if (!a->blocksolveassembly) {
1282: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1283: }
1285: VecSet(&zero,v);
1286: VecGetLocalSize(v,&n);
1287: if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1288: VecGetArray(v,&x);
1289: for (i=0; i<mat->m; i++) {
1290: x[i] = rs[i]->nz[rs[i]->diag_ind];
1291: }
1292: VecRestoreArray(v,&x);
1293: return(0);
1294: }
1296: #undef __FUNCT__
1298: int MatDestroy_MPIRowbs(Mat mat)
1299: {
1300: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1301: BSspmat *A = a->A;
1302: BSsprow *vs;
1303: int i,ierr;
1306: #if defined(PETSC_USE_LOG)
1307: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1308: #endif
1309: PetscFree(a->rowners);
1310: MatStashDestroy_Private(&mat->stash);
1311: if (a->bsmap) {
1312: if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1313: if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1314: if (a->bsmap->vglobal2proc) (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1315: PetscFree(a->bsmap);
1316: }
1318: if (A) {
1319: for (i=0; i<mat->m; i++) {
1320: vs = A->rows[i];
1321: MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1322: }
1323: /* Note: A->map = a->bsmap is freed above */
1324: PetscFree(A->rows);
1325: PetscFree(A);
1326: }
1327: if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1328: if (a->diag) {VecDestroy(a->diag);}
1329: if (a->xwork) {VecDestroy(a->xwork);}
1330: if (a->pA) {BSfree_par_mat(a->pA);CHKERRBS(0);}
1331: if (a->fpA) {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1332: if (a->comm_pA) {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1333: if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1334: if (a->imax) {PetscFree(a->imax);}
1335: PetscFree(a);
1336: return(0);
1337: }
1339: #undef __FUNCT__
1341: int MatSetOption_MPIRowbs(Mat A,MatOption op)
1342: {
1343: Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
1346: switch (op) {
1347: case MAT_ROW_ORIENTED:
1348: a->roworiented = PETSC_TRUE;
1349: break;
1350: case MAT_COLUMN_ORIENTED:
1351: a->roworiented = PETSC_FALSE;
1352: break;
1353: case MAT_COLUMNS_SORTED:
1354: a->sorted = 1;
1355: break;
1356: case MAT_COLUMNS_UNSORTED:
1357: a->sorted = 0;
1358: break;
1359: case MAT_NO_NEW_NONZERO_LOCATIONS:
1360: a->nonew = 1;
1361: break;
1362: case MAT_YES_NEW_NONZERO_LOCATIONS:
1363: a->nonew = 0;
1364: break;
1365: case MAT_DO_NOT_USE_INODES:
1366: a->bs_color_single = 1;
1367: break;
1368: case MAT_YES_NEW_DIAGONALS:
1369: case MAT_ROWS_SORTED:
1370: case MAT_NEW_NONZERO_LOCATION_ERR:
1371: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1372: case MAT_ROWS_UNSORTED:
1373: case MAT_USE_HASH_TABLE:
1374: PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignoredn");
1375: break;
1376: case MAT_IGNORE_OFF_PROC_ENTRIES:
1377: a->donotstash = PETSC_TRUE;
1378: break;
1379: case MAT_NO_NEW_DIAGONALS:
1380: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1381: break;
1382: case MAT_KEEP_ZEROED_ROWS:
1383: SETERRQ(PETSC_ERR_SUP,"MAT_KEEP_ZEROED_ROWS");
1384: break;
1385: default:
1386: SETERRQ(PETSC_ERR_SUP,"unknown option");
1387: break;
1388: }
1389: return(0);
1390: }
1392: #undef __FUNCT__
1394: int MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1395: {
1396: Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1397: BSspmat *A = mat->A;
1398: BSsprow *rs;
1399:
1401: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1403: rs = A->rows[row - mat->rstart];
1404: *nz = rs->length;
1405: if (v) *v = rs->nz;
1406: if (idx) *idx = rs->col;
1407: return(0);
1408: }
1410: #undef __FUNCT__
1412: int MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1413: {
1415: return(0);
1416: }
1418: /* ------------------------------------------------------------------ */
1420: #undef __FUNCT__
1422: int MatPrintHelp_MPIRowbs(Mat A)
1423: {
1424: static PetscTruth called = PETSC_FALSE;
1425: MPI_Comm comm = A->comm;
1426: int ierr;
1429: if (called) {return(0);} else called = PETSC_TRUE;
1430: (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):n");
1431: (*PetscHelpPrintf)(comm," -mat_rowbs_no_inode - Do not use inodesn");
1432: return(0);
1433: }
1435: #undef __FUNCT__
1437: int MatSetUpPreallocation_MPIRowbs(Mat A)
1438: {
1439: int ierr;
1442: MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1443: return(0);
1444: }
1446: /* -------------------------------------------------------------------*/
1447: EXTERN int MatCholeskyFactorNumeric_MPIRowbs(Mat,Mat*);
1448: EXTERN int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat,IS,PetscReal,int,Mat *);
1449: EXTERN int MatLUFactorNumeric_MPIRowbs(Mat,Mat*);
1450: EXTERN int MatILUFactorSymbolic_MPIRowbs(Mat,IS,IS,MatILUInfo*,Mat *);
1451: EXTERN int MatSolve_MPIRowbs(Mat,Vec,Vec);
1452: EXTERN int MatForwardSolve_MPIRowbs(Mat,Vec,Vec);
1453: EXTERN int MatBackwardSolve_MPIRowbs(Mat,Vec,Vec);
1454: EXTERN int MatScaleSystem_MPIRowbs(Mat,Vec,Vec);
1455: EXTERN int MatUnScaleSystem_MPIRowbs(Mat,Vec,Vec);
1456: EXTERN int MatUseScaledForm_MPIRowbs(Mat,PetscTruth);
1458: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1459: MatGetRow_MPIRowbs,
1460: MatRestoreRow_MPIRowbs,
1461: MatMult_MPIRowbs,
1462: MatMultAdd_MPIRowbs,
1463: MatMult_MPIRowbs,
1464: MatMultAdd_MPIRowbs,
1465: MatSolve_MPIRowbs,
1466: 0,
1467: 0,
1468: 0,
1469: 0,
1470: 0,
1471: 0,
1472: 0,
1473: MatGetInfo_MPIRowbs,
1474: 0,
1475: MatGetDiagonal_MPIRowbs,
1476: 0,MatNorm_MPIRowbs,
1477: MatAssemblyBegin_MPIRowbs,
1478: MatAssemblyEnd_MPIRowbs,
1479: 0,
1480: MatSetOption_MPIRowbs,
1481: MatZeroEntries_MPIRowbs,
1482: MatZeroRows_MPIRowbs,
1483: 0,
1484: MatLUFactorNumeric_MPIRowbs,
1485: 0,
1486: MatCholeskyFactorNumeric_MPIRowbs,
1487: MatSetUpPreallocation_MPIRowbs,
1488: MatILUFactorSymbolic_MPIRowbs,
1489: MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1490: 0,
1491: 0,
1492: 0,
1493: MatForwardSolve_MPIRowbs,
1494: MatBackwardSolve_MPIRowbs,
1495: 0,
1496: 0,
1497: 0,
1498: 0,
1499: 0,
1500: 0,
1501: 0,
1502: MatPrintHelp_MPIRowbs,
1503: MatScale_MPIRowbs,
1504: 0,
1505: 0,
1506: 0,
1507: 0,
1508: 0,
1509: 0,
1510: 0,
1511: 0,
1512: 0,
1513: 0,
1514: 0,
1515: 0,
1516: 0,
1517: 0,
1518: MatDestroy_MPIRowbs,
1519: MatView_MPIRowbs,
1520: MatGetPetscMaps_Petsc,
1521: MatUseScaledForm_MPIRowbs,
1522: MatScaleSystem_MPIRowbs,
1523: MatUnScaleSystem_MPIRowbs};
1525: /* ------------------------------------------------------------------- */
1528: EXTERN_C_BEGIN
1529: #undef __FUNCT__
1531: int MatCreate_MPIRowbs(Mat A)
1532: {
1533: Mat_MPIRowbs *a;
1534: BSmapping *bsmap;
1535: BSoff_map *bsoff;
1536: int i,ierr,*offset,m,M;
1537: PetscTruth flg1,flg2,flg3;
1538: BSprocinfo *bspinfo;
1539: MPI_Comm comm;
1540:
1542: comm = A->comm;
1543: m = A->m;
1544: M = A->M;
1546: ierr = PetscNew(Mat_MPIRowbs,&a);
1547: A->data = (void*)a;
1548: ierr = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1549: A->factor = 0;
1550: A->mapping = 0;
1551: a->vecs_permscale = PETSC_FALSE;
1552: A->insertmode = NOT_SET_VALUES;
1553: a->blocksolveassembly = 0;
1554: MPI_Comm_rank(comm,&a->rank);
1555: MPI_Comm_size(comm,&a->size);
1557: PetscSplitOwnership(comm,&m,&M);
1559: A->N = M;
1560: A->M = M;
1561: A->m = m;
1562: A->n = A->N; /* each row stores all columns */
1563: ierr = PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1564: a->reallocs = 0;
1566: /* the information in the maps duplicates the information computed below, eventually
1567: we should remove the duplicate information that is not contained in the maps */
1568: PetscMapCreateMPI(comm,m,M,&A->rmap);
1569: PetscMapCreateMPI(comm,m,M,&A->cmap);
1571: /* build local table of row ownerships */
1572: ierr = PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1573: ierr = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1574: a->rowners[0] = 0;
1575: for (i=2; i<=a->size; i++) {
1576: a->rowners[i] += a->rowners[i-1];
1577: }
1578: a->rstart = a->rowners[a->rank];
1579: a->rend = a->rowners[a->rank+1];
1580: PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));
1582: /* build cache for off array entries formed */
1583: MatStashCreate_Private(A->comm,1,&A->stash);
1584: a->donotstash = PETSC_FALSE;
1586: /* Initialize BlockSolve information */
1587: a->A = 0;
1588: a->pA = 0;
1589: a->comm_pA = 0;
1590: a->fpA = 0;
1591: a->comm_fpA = 0;
1592: a->alpha = 1.0;
1593: a->ierr = 0;
1594: a->failures = 0;
1595: VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1596: VecDuplicate(a->diag,&(a->xwork));
1597: PetscLogObjectParent(A,a->diag); PetscLogObjectParent(A,a->xwork);
1598: PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1599: bspinfo = BScreate_ctx();CHKERRBS(0);
1600: a->procinfo = bspinfo;
1601: BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1602: BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1603: BSctx_set_ps(bspinfo,comm);CHKERRBS(0);
1604: BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1605: BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1606: BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1607: #if defined(PETSC_USE_DEBUG)
1608: BSctx_set_err(bspinfo,1);CHKERRBS(0); /* BS error checking */
1609: #endif
1610: BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1611: PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1612: if (flg1) {
1613: BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1614: }
1615: PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1616: PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1617: PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1618: if (flg1 || flg2 || flg3) {
1619: BSctx_set_si(bspinfo,1);CHKERRBS(0);
1620: } else {
1621: BSctx_set_si(bspinfo,0);CHKERRBS(0);
1622: }
1623: #if defined(PETSC_USE_LOG)
1624: MLOG_INIT(); /* Initialize logging */
1625: #endif
1627: /* Compute global offsets */
1628: offset = &a->rstart;
1630: PetscNew(BSmapping,&a->bsmap);
1631: PetscLogObjectMemory(A,sizeof(BSmapping));
1632: bsmap = a->bsmap;
1633: ierr = PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1634: *((int *)bsmap->vlocal2global) = (*offset);
1635: bsmap->flocal2global = BSloc2glob;
1636: bsmap->free_l2g = 0;
1637: ierr = PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1638: *((int *)bsmap->vglobal2local) = (*offset);
1639: bsmap->fglobal2local = BSglob2loc;
1640: bsmap->free_g2l = 0;
1641: bsoff = BSmake_off_map(*offset,bspinfo,A->M);
1642: bsmap->vglobal2proc = (void *)bsoff;
1643: bsmap->fglobal2proc = BSglob2proc;
1644: bsmap->free_g2p = (void(*)(void*)) BSfree_off_map;
1645: return(0);
1646: }
1647: EXTERN_C_END
1649: #undef __FUNCT__
1651: /* @
1652: MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros
1653: per row in the matrix.
1655: Input Parameter:
1656: + mat - matrix
1657: . nz - maximum expected for any row
1658: - nzz - number expected in each row
1660: Note:
1661: This routine is valid only for matrices stored in the MATMPIROWBS
1662: format.
1663: @ */
1664: int MatMPIRowbsSetPreallocation(Mat mat,int nz,int *nnz)
1665: {
1666: PetscTruth ismpirowbs;
1667: int ierr;
1670: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1671: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1672: mat->preallocated = PETSC_TRUE;
1673: MatCreateMPIRowbs_local(mat,nz,nnz);
1674: return(0);
1675: }
1679: /* --------------- extra BlockSolve-specific routines -------------- */
1680: #undef __FUNCT__
1682: /* @
1683: MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1684: user can then manipulate to alter the default parameters.
1686: Input Parameter:
1687: mat - matrix
1689: Output Parameter:
1690: procinfo - processor information context
1692: Note:
1693: This routine is valid only for matrices stored in the MATMPIROWBS
1694: format.
1695: @ */
1696: int MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1697: {
1698: Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1699: PetscTruth ismpirowbs;
1700: int ierr;
1703: PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1704: if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1705: procinfo = a->procinfo;
1706: return(0);
1707: }
1709: EXTERN_C_BEGIN
1710: #undef __FUNCT__
1712: int MatLoad_MPIRowbs(PetscViewer viewer,MatType type,Mat *newmat)
1713: {
1714: Mat_MPIRowbs *a;
1715: BSspmat *A;
1716: BSsprow **rs;
1717: Mat mat;
1718: int i,nz,ierr,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1719: int header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1720: PetscScalar *vals;
1721: MPI_Comm comm = ((PetscObject)viewer)->comm;
1722: MPI_Status status;
1725: MPI_Comm_size(comm,&size);
1726: MPI_Comm_rank(comm,&rank);
1727: if (!rank) {
1728: PetscViewerBinaryGetDescriptor(viewer,&fd);
1729: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1730: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1731: if (header[3] < 0) {
1732: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1733: }
1734: }
1736: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1737: M = header[1];
1738: /* determine ownership of all rows */
1739: m = M/size + ((M % size) > rank);
1740: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1741: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1742: rowners[0] = 0;
1743: for (i=2; i<=size; i++) {
1744: rowners[i] += rowners[i-1];
1745: }
1746: rstart = rowners[rank];
1747: rend = rowners[rank+1];
1749: /* distribute row lengths to all processors */
1750: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1751: if (!rank) {
1752: PetscMalloc(M*sizeof(int),&rowlengths);
1753: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1754: PetscMalloc(size*sizeof(int),&sndcounts);
1755: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1756: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1757: PetscFree(sndcounts);
1758: } else {
1759: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1760: }
1762: /* create our matrix */
1763: MatCreateMPIRowbs(comm,m,M,0,ourlens,newmat);
1764: mat = *newmat;
1765: PetscFree(ourlens);
1767: a = (Mat_MPIRowbs*)mat->data;
1768: A = a->A;
1769: rs = A->rows;
1771: if (!rank) {
1772: /* calculate the number of nonzeros on each processor */
1773: PetscMalloc(size*sizeof(int),&procsnz);
1774: PetscMemzero(procsnz,size*sizeof(int));
1775: for (i=0; i<size; i++) {
1776: for (j=rowners[i]; j< rowners[i+1]; j++) {
1777: procsnz[i] += rowlengths[j];
1778: }
1779: }
1780: PetscFree(rowlengths);
1782: /* determine max buffer needed and allocate it */
1783: maxnz = 0;
1784: for (i=0; i<size; i++) {
1785: maxnz = PetscMax(maxnz,procsnz[i]);
1786: }
1787: PetscMalloc(maxnz*sizeof(int),&cols);
1789: /* read in my part of the matrix column indices */
1790: nz = procsnz[0];
1791: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1792:
1793: /* insert it into my part of matrix */
1794: nz = 0;
1795: for (i=0; i<A->num_rows; i++) {
1796: for (j=0; j<a->imax[i]; j++) {
1797: rs[i]->col[j] = cols[nz++];
1798: }
1799: rs[i]->length = a->imax[i];
1800: }
1801: /* read in parts for all other processors */
1802: for (i=1; i<size; i++) {
1803: nz = procsnz[i];
1804: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1805: MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1806: }
1807: PetscFree(cols);
1808: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1810: /* read in my part of the matrix numerical values */
1811: nz = procsnz[0];
1812: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1813:
1814: /* insert it into my part of matrix */
1815: nz = 0;
1816: for (i=0; i<A->num_rows; i++) {
1817: for (j=0; j<a->imax[i]; j++) {
1818: rs[i]->nz[j] = vals[nz++];
1819: }
1820: }
1821: /* read in parts for all other processors */
1822: for (i=1; i<size; i++) {
1823: nz = procsnz[i];
1824: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1825: MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1826: }
1827: PetscFree(vals);
1828: PetscFree(procsnz);
1829: } else {
1830: /* determine buffer space needed for message */
1831: nz = 0;
1832: for (i=0; i<A->num_rows; i++) {
1833: nz += a->imax[i];
1834: }
1835: PetscMalloc(nz*sizeof(int),&cols);
1837: /* receive message of column indices*/
1838: MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1839: MPI_Get_count(&status,MPI_INT,&maxnz);
1840: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1842: /* insert it into my part of matrix */
1843: nz = 0;
1844: for (i=0; i<A->num_rows; i++) {
1845: for (j=0; j<a->imax[i]; j++) {
1846: rs[i]->col[j] = cols[nz++];
1847: }
1848: rs[i]->length = a->imax[i];
1849: }
1850: PetscFree(cols);
1851: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1853: /* receive message of values*/
1854: MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1855: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1856: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");
1858: /* insert it into my part of matrix */
1859: nz = 0;
1860: for (i=0; i<A->num_rows; i++) {
1861: for (j=0; j<a->imax[i]; j++) {
1862: rs[i]->nz[j] = vals[nz++];
1863: }
1864: rs[i]->length = a->imax[i];
1865: }
1866: PetscFree(vals);
1867: }
1868: PetscFree(rowners);
1869: a->nz = a->maxnz;
1870: MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1871: MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1872: return(0);
1873: }
1874: EXTERN_C_END
1876: /*
1877: Special destroy and view routines for factored matrices
1878: */
1879: #undef __FUNCT__
1881: static int MatDestroy_MPIRowbs_Factored(Mat mat)
1882: {
1884: #if defined(PETSC_USE_LOG)
1885: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1886: #endif
1887: return(0);
1888: }
1890: #undef __FUNCT__
1892: static int MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1893: {
1897: MatView((Mat) mat->data,viewer);
1898: return(0);
1899: }
1901: #undef __FUNCT__
1903: int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,PetscReal f,int fill,Mat *newfact)
1904: {
1905: /* Note: f is not currently used in BlockSolve */
1906: Mat newmat;
1907: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1908: int ierr;
1909: PetscTruth idn;
1912: if (isrow) {
1913: ISIdentity(isrow,&idn);
1914: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1915: }
1917: if (!mat->symmetric) {
1918: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky n
1919: preconditioning with a MATMPIROWBS matrix you must declare it to be n
1920: symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
1921: }
1923: if (!mbs->blocksolveassembly) {
1924: BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1925: BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1926: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1927: }
1929: /* Copy permuted matrix */
1930: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1931: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
1933: /* Set up the communication for factorization */
1934: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1935: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
1937: /*
1938: Create a new Mat structure to hold the "factored" matrix,
1939: not this merely contains a pointer to the original matrix, since
1940: the original matrix contains the factor information.
1941: */
1942: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
1943: PetscLogObjectCreate(newmat);
1944: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
1946: newmat->data = (void*)mat;
1947: ierr = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
1948: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
1949: newmat->ops->view = MatView_MPIRowbs_Factored;
1950: newmat->factor = 1;
1951: newmat->preallocated = PETSC_TRUE;
1952: newmat->M = mat->M;
1953: newmat->N = mat->N;
1954: newmat->m = mat->m;
1955: newmat->n = mat->n;
1956: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
1958: *newfact = newmat;
1959: return(0);
1960: }
1962: #undef __FUNCT__
1964: int MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatILUInfo* info,Mat *newfact)
1965: {
1966: Mat newmat;
1967: Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1968: int ierr;
1969: PetscTruth idn;
1972: if (info->levels != 0) SETERRQ(1,"Blocksolve ILU only supports 0 fill");
1973: if (isrow) {
1974: ISIdentity(isrow,&idn);
1975: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1976: }
1977: if (iscol) {
1978: ISIdentity(iscol,&idn);
1979: if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
1980: }
1982: if (!mbs->blocksolveassembly) {
1983: MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1984: }
1985:
1986: if (mat->symmetric) {
1987: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with n
1988: MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrixn
1989: using the option MatSetOption(A,MAT_SYMMETRIC)");
1990: }
1992: /* Copy permuted matrix */
1993: if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1994: mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);
1996: /* Set up the communication for factorization */
1997: if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1998: mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);
2000: /*
2001: Create a new Mat structure to hold the "factored" matrix,
2002: not this merely contains a pointer to the original matrix, since
2003: the original matrix contains the factor information.
2004: */
2005: PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2006: PetscLogObjectCreate(newmat);
2007: PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));
2009: newmat->data = (void*)mat;
2010: ierr = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2011: newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2012: newmat->ops->view = MatView_MPIRowbs_Factored;
2013: newmat->factor = 1;
2014: newmat->preallocated = PETSC_TRUE;
2015: newmat->M = mat->M;
2016: newmat->N = mat->N;
2017: newmat->m = mat->m;
2018: newmat->n = mat->n;
2019: PetscStrallocpy(MATMPIROWBS,&newmat->type_name);
2021: *newfact = newmat;
2022: return(0);
2023: }
2025: #undef __FUNCT__
2027: int MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2028: {
2029: int ierr;
2033: if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2034: ISColoringCreate(mat->comm,mat->m,0,coloring);
2036: return(0);
2037: }
2039: #undef __FUNCT__
2041: /*@C
2042: MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2043: format. This format is intended primarily as an interface for BlockSolve95.
2045: Collective on MPI_Comm
2047: Input Parameters:
2048: + comm - MPI communicator
2049: . m - number of local rows (or PETSC_DECIDE to have calculated)
2050: . M - number of global rows (or PETSC_DECIDE to have calculated)
2051: . nz - number of nonzeros per row (same for all local rows)
2052: - nnz - number of nonzeros per row (possibly different for each row).
2054: Output Parameter:
2055: . newA - the matrix
2057: Notes:
2058: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2059: than it must be used on all processors that share the object for that argument.
2061: The user MUST specify either the local or global matrix dimensions
2062: (possibly both).
2064: Specify the preallocated storage with either nz or nnz (not both). Set
2065: nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2066: allocation.
2068: Notes:
2069: By default, the matrix is assumed to be nonsymmetric; the user can
2070: take advantage of special optimizations for symmetric matrices by calling
2071: $ MatSetOption(mat,MAT_SYMMETRIC)
2072: BEFORE calling the routine MatAssemblyBegin().
2074: Internally, the MATMPIROWBS format inserts zero elements to the
2075: matrix if necessary, so that nonsymmetric matrices are considered
2076: to be symmetric in terms of their sparsity structure; this format
2077: is required for use of the parallel communication routines within
2078: BlockSolve95. In particular, if the matrix element A[i,j] exists,
2079: then PETSc will internally allocate a 0 value for the element
2080: A[j,i] during MatAssemblyEnd() if the user has not already set
2081: a value for the matrix element A[j,i].
2083: Options Database Keys:
2084: . -mat_rowbs_no_inode - Do not use inodes.
2086: Level: intermediate
2087:
2088: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve
2090: .seealso: MatCreate(), MatSetValues()
2091: @*/
2092: int MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,int *nnz,Mat *newA)
2093: {
2095:
2097: MatCreate(comm,m,m,M,M,newA);
2098: MatSetType(*newA,MATMPIROWBS);
2099: MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2100: return(0);
2101: }