Actual source code: mpibdiag.c
1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
2: /*
3: The basic matrix operations for the Block diagonal parallel
4: matrices.
5: */
6: #include src/mat/impls/bdiag/mpi/mpibdiag.h
7: #include src/vec/vecimpl.h
9: #undef __FUNCT__
11: int MatSetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
12: {
13: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
14: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
15: PetscTruth roworiented = mbd->roworiented;
18: for (i=0; i<m; i++) {
19: if (idxm[i] < 0) continue;
20: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
21: if (idxm[i] >= rstart && idxm[i] < rend) {
22: row = idxm[i] - rstart;
23: for (j=0; j<n; j++) {
24: if (idxn[j] < 0) continue;
25: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
26: if (roworiented) {
27: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
28: } else {
29: MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
30: }
31: }
32: } else {
33: if (!mbd->donotstash) {
34: if (roworiented) {
35: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
36: } else {
37: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
38: }
39: }
40: }
41: }
42: return(0);
43: }
45: #undef __FUNCT__
47: int MatGetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
48: {
49: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
50: int ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
53: for (i=0; i<m; i++) {
54: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
55: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
56: if (idxm[i] >= rstart && idxm[i] < rend) {
57: row = idxm[i] - rstart;
58: for (j=0; j<n; j++) {
59: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
60: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
61: MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
62: }
63: } else {
64: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
65: }
66: }
67: return(0);
68: }
70: #undef __FUNCT__
72: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
73: {
74: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
75: MPI_Comm comm = mat->comm;
76: int ierr,nstash,reallocs;
77: InsertMode addv;
80: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
81: if (addv == (ADD_VALUES|INSERT_VALUES)) {
82: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
83: }
84: mat->insertmode = addv; /* in case this processor had no cache */
85: MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
86: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
87: PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
88: return(0);
89: }
90: EXTERN int MatSetUpMultiply_MPIBDiag(Mat);
92: #undef __FUNCT__
94: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
95: {
96: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
97: Mat_SeqBDiag *mlocal;
98: int i,n,*row,*col;
99: int *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
100: PetscScalar *val;
101: InsertMode addv = mat->insertmode;
105: while (1) {
106: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
107: if (!flg) break;
108:
109: for (i=0; i<n;) {
110: /* Now identify the consecutive vals belonging to the same row */
111: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
112: if (j < n) ncols = j-i;
113: else ncols = n-i;
114: /* Now assemble all these values with a single function call */
115: MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
116: i = j;
117: }
118: }
119: MatStashScatterEnd_Private(&mat->stash);
121: MatAssemblyBegin(mbd->A,mode);
122: MatAssemblyEnd(mbd->A,mode);
124: /* Fix main diagonal location and determine global diagonals */
125: mlocal = (Mat_SeqBDiag*)mbd->A->data;
126: Mblock = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
127: len = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
128: ierr = PetscMalloc(2*len*sizeof(int),&tmp1);
129: tmp2 = tmp1 + len;
130: ierr = PetscMemzero(tmp1,2*len*sizeof(int));
131: mlocal->mainbd = -1;
132: for (i=0; i<mlocal->nd; i++) {
133: if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
134: tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
135: }
136: MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
137: ict = 0;
138: for (i=0; i<len; i++) {
139: if (tmp2[i]) {
140: mbd->gdiag[ict] = i - Mblock;
141: ict++;
142: }
143: }
144: mbd->gnd = ict;
145: PetscFree(tmp1);
147: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
148: MatSetUpMultiply_MPIBDiag(mat);
149: }
150: return(0);
151: }
153: #undef __FUNCT__
155: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
156: {
157: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
158: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
161: *bs = dmat->bs;
162: return(0);
163: }
165: #undef __FUNCT__
167: int MatZeroEntries_MPIBDiag(Mat A)
168: {
169: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
170: int ierr;
173: MatZeroEntries(l->A);
174: return(0);
175: }
177: /* again this uses the same basic stratagy as in the assembly and
178: scatter create routines, we should try to do it systematically
179: if we can figure out the proper level of generality. */
181: /* the code does not do the diagonal entries correctly unless the
182: matrix is square and the column and row owerships are identical.
183: This is a BUG. The only way to fix it seems to be to access
184: aij->A and aij->B directly and not through the MatZeroRows()
185: routine.
186: */
188: #undef __FUNCT__
190: int MatZeroRows_MPIBDiag(Mat A,IS is,PetscScalar *diag)
191: {
192: Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
193: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
194: int *nprocs,j,idx,nsends;
195: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
196: int *rvalues,tag = A->tag,count,base,slen,n,*source;
197: int *lens,imdex,*lrows,*values;
198: MPI_Comm comm = A->comm;
199: MPI_Request *send_waits,*recv_waits;
200: MPI_Status recv_status,*send_status;
201: IS istmp;
202: PetscTruth found;
205: ISGetLocalSize(is,&N);
206: ISGetIndices(is,&rows);
208: /* first count number of contributors to each processor */
209: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
210: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
211: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
212: for (i=0; i<N; i++) {
213: idx = rows[i];
214: found = PETSC_FALSE;
215: for (j=0; j<size; j++) {
216: if (idx >= owners[j] && idx < owners[j+1]) {
217: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
218: }
219: }
220: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
221: }
222: nsends = 0; for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}
224: /* inform other processors of number of messages and max length*/
225: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
227: /* post receives: */
228: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
229: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
230: for (i=0; i<nrecvs; i++) {
231: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
232: }
234: /* do sends:
235: 1) starts[i] gives the starting index in svalues for stuff going to
236: the ith processor
237: */
238: PetscMalloc((N+1)*sizeof(int),&svalues);
239: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
240: PetscMalloc((size+1)*sizeof(int),&starts);
241: starts[0] = 0;
242: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
243: for (i=0; i<N; i++) {
244: svalues[starts[owner[i]]++] = rows[i];
245: }
246: ISRestoreIndices(is,&rows);
248: starts[0] = 0;
249: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
250: count = 0;
251: for (i=0; i<size; i++) {
252: if (nprocs[2*i+1]) {
253: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
254: }
255: }
256: PetscFree(starts);
258: base = owners[rank];
260: /* wait on receives */
261: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
262: source = lens + nrecvs;
263: count = nrecvs;
264: slen = 0;
265: while (count) {
266: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
267: /* unpack receives into our local space */
268: MPI_Get_count(&recv_status,MPI_INT,&n);
269: source[imdex] = recv_status.MPI_SOURCE;
270: lens[imdex] = n;
271: slen += n;
272: count--;
273: }
274: PetscFree(recv_waits);
275:
276: /* move the data into the send scatter */
277: ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);
278: count = 0;
279: for (i=0; i<nrecvs; i++) {
280: values = rvalues + i*nmax;
281: for (j=0; j<lens[i]; j++) {
282: lrows[count++] = values[j] - base;
283: }
284: }
285: PetscFree(rvalues);
286: PetscFree(lens);
287: PetscFree(owner);
288: PetscFree(nprocs);
289:
290: /* actually zap the local rows */
291: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
292: PetscLogObjectParent(A,istmp);
293: PetscFree(lrows);
294: MatZeroRows(l->A,istmp,diag);
295: ISDestroy(istmp);
297: /* wait on sends */
298: if (nsends) {
299: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
300: MPI_Waitall(nsends,send_waits,send_status);
301: PetscFree(send_status);
302: }
303: PetscFree(send_waits);
304: PetscFree(svalues);
306: return(0);
307: }
309: #undef __FUNCT__
311: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
312: {
313: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
314: int ierr;
317: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319: (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
320: return(0);
321: }
323: #undef __FUNCT__
325: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
326: {
327: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
328: int ierr;
331: VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
332: VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
333: (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
334: return(0);
335: }
337: #undef __FUNCT__
339: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
340: {
341: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
342: int ierr;
343: PetscScalar zero = 0.0;
346: VecSet(&zero,yy);
347: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
348: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350: return(0);
351: }
353: #undef __FUNCT__
355: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
356: {
357: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
358: int ierr;
361: VecCopy(yy,zz);
362: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
363: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
364: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
365: return(0);
366: }
368: #undef __FUNCT__
370: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
371: {
372: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
373: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
374: int ierr;
375: PetscReal isend[5],irecv[5];
378: info->block_size = (PetscReal)dmat->bs;
379: MatGetInfo(mat->A,MAT_LOCAL,info);
380: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
381: isend[3] = info->memory; isend[4] = info->mallocs;
382: if (flag == MAT_LOCAL) {
383: info->nz_used = isend[0];
384: info->nz_allocated = isend[1];
385: info->nz_unneeded = isend[2];
386: info->memory = isend[3];
387: info->mallocs = isend[4];
388: } else if (flag == MAT_GLOBAL_MAX) {
389: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
390: info->nz_used = irecv[0];
391: info->nz_allocated = irecv[1];
392: info->nz_unneeded = irecv[2];
393: info->memory = irecv[3];
394: info->mallocs = irecv[4];
395: } else if (flag == MAT_GLOBAL_SUM) {
396: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
397: info->nz_used = irecv[0];
398: info->nz_allocated = irecv[1];
399: info->nz_unneeded = irecv[2];
400: info->memory = irecv[3];
401: info->mallocs = irecv[4];
402: }
403: info->rows_global = (double)matin->M;
404: info->columns_global = (double)matin->N;
405: info->rows_local = (double)matin->m;
406: info->columns_local = (double)matin->N;
407: return(0);
408: }
410: #undef __FUNCT__
412: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
413: {
414: int ierr;
415: Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;
418: MatGetDiagonal(A->A,v);
419: return(0);
420: }
422: #undef __FUNCT__
424: int MatDestroy_MPIBDiag(Mat mat)
425: {
426: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
427: int ierr;
428: #if defined(PETSC_USE_LOG)
429: Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;
432: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
433: #else
435: #endif
436: MatStashDestroy_Private(&mat->stash);
437: PetscFree(mbd->rowners);
438: PetscFree(mbd->gdiag);
439: MatDestroy(mbd->A);
440: if (mbd->lvec) {VecDestroy(mbd->lvec);}
441: if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
442: PetscFree(mbd);
443: return(0);
444: }
447: #undef __FUNCT__
449: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
450: {
451: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
452: int ierr;
455: if (mbd->size == 1) {
456: MatView(mbd->A,viewer);
457: } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
458: return(0);
459: }
461: #undef __FUNCT__
463: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
464: {
465: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
466: Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;
467: int ierr,i,size = mbd->size,rank = mbd->rank;
468: PetscTruth isascii,isdraw;
469: PetscViewer sviewer;
470: PetscViewerFormat format;
473: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
474: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
475: if (isascii) {
476: PetscViewerGetFormat(viewer,&format);
477: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
478: int nline = PetscMin(10,mbd->gnd),k,nk,np;
479: PetscViewerASCIIPrintf(viewer," block size=%d, total number of diagonals=%dn",dmat->bs,mbd->gnd);
480: nk = (mbd->gnd-1)/nline + 1;
481: for (k=0; k<nk; k++) {
482: PetscViewerASCIIPrintf(viewer," global diag numbers:");
483: np = PetscMin(nline,mbd->gnd - nline*k);
484: for (i=0; i<np; i++) {
485: PetscViewerASCIIPrintf(viewer," %d",mbd->gdiag[i+nline*k]);
486: }
487: PetscViewerASCIIPrintf(viewer,"n");
488: }
489: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
490: MatInfo info;
491: MPI_Comm_rank(mat->comm,&rank);
492: MatGetInfo(mat,MAT_LOCAL,&info);
493: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
494: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
495: PetscViewerFlush(viewer);
496: VecScatterView(mbd->Mvctx,viewer);
497: }
498: return(0);
499: }
500: }
502: if (isdraw) {
503: PetscDraw draw;
504: PetscTruth isnull;
505: PetscViewerDrawGetDraw(viewer,0,&draw);
506: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
507: }
509: if (size == 1) {
510: MatView(mbd->A,viewer);
511: } else {
512: /* assemble the entire matrix onto first processor. */
513: Mat A;
514: int M = mat->M,N = mat->N,m,row,nz,*cols;
515: PetscScalar *vals;
516: Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;
518: if (!rank) {
519: MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
520: } else {
521: MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
522: }
523: PetscLogObjectParent(mat,A);
525: /* Copy the matrix ... This isn't the most efficient means,
526: but it's quick for now */
527: row = mbd->rstart;
528: m = mbd->A->m;
529: for (i=0; i<m; i++) {
530: MatGetRow(mat,row,&nz,&cols,&vals);
531: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
532: MatRestoreRow(mat,row,&nz,&cols,&vals);
533: row++;
534: }
535: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
536: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
537: PetscViewerGetSingleton(viewer,&sviewer);
538: if (!rank) {
539: MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
540: }
541: PetscViewerRestoreSingleton(viewer,&sviewer);
542: PetscViewerFlush(viewer);
543: MatDestroy(A);
544: }
545: return(0);
546: }
548: #undef __FUNCT__
550: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
551: {
552: int ierr;
553: PetscTruth isascii,isdraw,isbinary;
556: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
557: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
558: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559: if (isascii || isdraw) {
560: MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
561: } else if (isbinary) {
562: MatView_MPIBDiag_Binary(mat,viewer);
563: } else {
564: SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
565: }
566: return(0);
567: }
569: #undef __FUNCT__
571: int MatSetOption_MPIBDiag(Mat A,MatOption op)
572: {
573: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
574: int ierr;
576: switch (op) {
577: case MAT_NO_NEW_NONZERO_LOCATIONS:
578: case MAT_YES_NEW_NONZERO_LOCATIONS:
579: case MAT_NEW_NONZERO_LOCATION_ERR:
580: case MAT_NEW_NONZERO_ALLOCATION_ERR:
581: case MAT_NO_NEW_DIAGONALS:
582: case MAT_YES_NEW_DIAGONALS:
583: MatSetOption(mbd->A,op);
584: break;
585: case MAT_ROW_ORIENTED:
586: mbd->roworiented = PETSC_TRUE;
587: MatSetOption(mbd->A,op);
588: break;
589: case MAT_COLUMN_ORIENTED:
590: mbd->roworiented = PETSC_FALSE;
591: MatSetOption(mbd->A,op);
592: break;
593: case MAT_IGNORE_OFF_PROC_ENTRIES:
594: mbd->donotstash = PETSC_TRUE;
595: break;
596: case MAT_ROWS_SORTED:
597: case MAT_ROWS_UNSORTED:
598: case MAT_COLUMNS_SORTED:
599: case MAT_COLUMNS_UNSORTED:
600: PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignoredn");
601: break;
602: default:
603: SETERRQ(PETSC_ERR_SUP,"unknown option");
604: }
605: return(0);
606: }
608: #undef __FUNCT__
610: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
611: {
612: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
613: int lrow,ierr;
616: if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
617: lrow = row - mat->rstart;
618: MatGetRow(mat->A,lrow,nz,idx,v);
619: return(0);
620: }
622: #undef __FUNCT__
624: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
625: PetscScalar **v)
626: {
627: Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
628: int lrow,ierr;
631: lrow = row - mat->rstart;
632: MatRestoreRow(mat->A,lrow,nz,idx,v);
633: return(0);
634: }
637: #undef __FUNCT__
639: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
640: {
641: Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
642: Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
643: PetscReal sum = 0.0;
644: int ierr,d,i,nd = a->nd,bs = a->bs,len;
645: PetscScalar *dv;
648: if (type == NORM_FROBENIUS) {
649: for (d=0; d<nd; d++) {
650: dv = a->diagv[d];
651: len = a->bdlen[d]*bs*bs;
652: for (i=0; i<len; i++) {
653: #if defined(PETSC_USE_COMPLEX)
654: sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
655: #else
656: sum += dv[i]*dv[i];
657: #endif
658: }
659: }
660: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
661: *nrm = sqrt(*nrm);
662: PetscLogFlops(2*A->n*A->m);
663: } else if (type == NORM_1) { /* max column norm */
664: PetscReal *tmp,*tmp2;
665: int j;
666: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
667: PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
668: MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
669: *nrm = 0.0;
670: MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
671: for (j=0; j<mbd->A->n; j++) {
672: if (tmp2[j] > *nrm) *nrm = tmp2[j];
673: }
674: PetscFree(tmp);
675: PetscFree(tmp2);
676: } else if (type == NORM_INFINITY) { /* max row norm */
677: PetscReal normtemp;
678: MatNorm(mbd->A,type,&normtemp);
679: MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
680: }
681: return(0);
682: }
684: EXTERN int MatPrintHelp_SeqBDiag(Mat);
685: #undef __FUNCT__
687: int MatPrintHelp_MPIBDiag(Mat A)
688: {
689: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
690: int ierr;
693: if (!a->rank) {
694: MatPrintHelp_SeqBDiag(a->A);
695: }
696: return(0);
697: }
699: EXTERN int MatScale_SeqBDiag(PetscScalar*,Mat);
700: #undef __FUNCT__
702: int MatScale_MPIBDiag(PetscScalar *alpha,Mat A)
703: {
704: int ierr;
705: Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
708: MatScale_SeqBDiag(alpha,a->A);
709: return(0);
710: }
712: #undef __FUNCT__
714: int MatSetUpPreallocation_MPIBDiag(Mat A)
715: {
716: int ierr;
719: MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
720: return(0);
721: }
723: /* -------------------------------------------------------------------*/
725: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
726: MatGetRow_MPIBDiag,
727: MatRestoreRow_MPIBDiag,
728: MatMult_MPIBDiag,
729: MatMultAdd_MPIBDiag,
730: MatMultTranspose_MPIBDiag,
731: MatMultTransposeAdd_MPIBDiag,
732: 0,
733: 0,
734: 0,
735: 0,
736: 0,
737: 0,
738: 0,
739: 0,
740: MatGetInfo_MPIBDiag,0,
741: MatGetDiagonal_MPIBDiag,
742: 0,
743: MatNorm_MPIBDiag,
744: MatAssemblyBegin_MPIBDiag,
745: MatAssemblyEnd_MPIBDiag,
746: 0,
747: MatSetOption_MPIBDiag,
748: MatZeroEntries_MPIBDiag,
749: MatZeroRows_MPIBDiag,
750: 0,
751: 0,
752: 0,
753: 0,
754: MatSetUpPreallocation_MPIBDiag,
755: 0,
756: 0,
757: 0,
758: 0,
759: 0,
760: 0,
761: 0,
762: 0,
763: 0,
764: 0,
765: 0,
766: 0,
767: MatGetValues_MPIBDiag,
768: 0,
769: MatPrintHelp_MPIBDiag,
770: MatScale_MPIBDiag,
771: 0,
772: 0,
773: 0,
774: MatGetBlockSize_MPIBDiag,
775: 0,
776: 0,
777: 0,
778: 0,
779: 0,
780: 0,
781: 0,
782: 0,
783: 0,
784: 0,
785: MatDestroy_MPIBDiag,
786: MatView_MPIBDiag,
787: MatGetPetscMaps_Petsc};
789: EXTERN_C_BEGIN
790: #undef __FUNCT__
792: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
793: {
794: Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
795: int ierr,lrows,lcols,rstart,rend;
796: IS localc,localr;
799: MatGetLocalSize(A,&lrows,&lcols);
800: MatGetOwnershipRange(A,&rstart,&rend);
801: ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
802: ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
803: MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
804: ISDestroy(localr);
805: ISDestroy(localc);
807: *iscopy = PETSC_TRUE;
808: return(0);
809: }
810: EXTERN_C_END
812: EXTERN_C_BEGIN
813: #undef __FUNCT__
815: int MatCreate_MPIBDiag(Mat B)
816: {
817: Mat_MPIBDiag *b;
818: int ierr;
821: ierr = PetscNew(Mat_MPIBDiag,&b);
822: B->data = (void*)b;
823: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
824: B->factor = 0;
825: B->mapping = 0;
827: B->insertmode = NOT_SET_VALUES;
828: MPI_Comm_rank(B->comm,&b->rank);
829: MPI_Comm_size(B->comm,&b->size);
831: /* build local table of row ownerships */
832: PetscMalloc((b->size+2)*sizeof(int),&b->rowners);
834: /* build cache for off array entries formed */
835: MatStashCreate_Private(B->comm,1,&B->stash);
836: b->donotstash = PETSC_FALSE;
838: /* stuff used for matrix-vector multiply */
839: b->lvec = 0;
840: b->Mvctx = 0;
842: /* used for MatSetValues() input */
843: b->roworiented = PETSC_TRUE;
845: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
846: "MatGetDiagonalBlock_MPIBDiag",
847: MatGetDiagonalBlock_MPIBDiag);
848: return(0);
849: }
850: EXTERN_C_END
852: #undef __FUNCT__
854: /*@C
855: MatMPIBDiagSetPreallocation -
857: Collective on Mat
859: Input Parameters:
860: + A - the matrix
861: . nd - number of block diagonals (global) (optional)
862: . bs - each element of a diagonal is an bs x bs dense matrix
863: . diag - optional array of block diagonal numbers (length nd).
864: For a matrix element A[i,j], where i=row and j=column, the
865: diagonal number is
866: $ diag = i/bs - j/bs (integer division)
867: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
868: needed (expensive).
869: - diagv - pointer to actual diagonals (in same order as diag array),
870: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
871: to control memory allocation.
874: Options Database Keys:
875: . -mat_block_size <bs> - Sets blocksize
876: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
878: Notes:
879: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
880: than it must be used on all processors that share the object for that argument.
882: The parallel matrix is partitioned across the processors by rows, where
883: each local rectangular matrix is stored in the uniprocessor block
884: diagonal format. See the users manual for further details.
886: The user MUST specify either the local or global numbers of rows
887: (possibly both).
889: The case bs=1 (conventional diagonal storage) is implemented as
890: a special case.
892: Fortran Notes:
893: Fortran programmers cannot set diagv; this variable is ignored.
895: Level: intermediate
897: .keywords: matrix, block, diagonal, parallel, sparse
899: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
900: @*/
901: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
902: {
903: Mat_MPIBDiag *b;
904: int ierr,i,k,*ldiag,len,nd2;
905: PetscScalar **ldiagv = 0;
906: PetscTruth flg2;
909: PetscTypeCompare((PetscObject)B,MATMPIBDIAG,&flg2);
910: if (!flg2) return(0);
911: B->preallocated = PETSC_TRUE;
912: if (bs == PETSC_DEFAULT) bs = 1;
913: if (nd == PETSC_DEFAULT) nd = 0;
914: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
915: PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
916: PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
917: if (nd && !diag) {
918: PetscMalloc(nd*sizeof(int),&diag);
919: nd2 = nd;
920: PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
921: if (nd2 != nd) {
922: SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
923: }
924: } else if (flg2) {
925: SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
926: }
928: if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");
930: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
931: B->n = B->N = PetscMax(B->n,B->N);
933: if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
934: if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
935: if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");
937: /* the information in the maps duplicates the information computed below, eventually
938: we should remove the duplicate information that is not contained in the maps */
939: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
940: PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);
943: b = (Mat_MPIBDiag*)B->data;
944: b->gnd = nd;
946: ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
947: b->rowners[0] = 0;
948: for (i=2; i<=b->size; i++) {
949: b->rowners[i] += b->rowners[i-1];
950: }
951: b->rstart = b->rowners[b->rank];
952: b->rend = b->rowners[b->rank+1];
953: b->brstart = (b->rstart)/bs;
954: b->brend = (b->rend)/bs;
957: /* Determine local diagonals; for now, assume global rows = global cols */
958: /* These are sorted in MatCreateSeqBDiag */
959: PetscMalloc((nd+1)*sizeof(int),&ldiag);
960: len = B->M/bs + B->N/bs + 1;
961: PetscMalloc(len*sizeof(int),&b->gdiag);
962: k = 0;
963: PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
964: + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
965: if (diagv) {
966: PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
967: }
968: for (i=0; i<nd; i++) {
969: b->gdiag[i] = diag[i];
970: if (diag[i] > 0) { /* lower triangular */
971: if (diag[i] < b->brend) {
972: ldiag[k] = diag[i] - b->brstart;
973: if (diagv) ldiagv[k] = diagv[i];
974: k++;
975: }
976: } else { /* upper triangular */
977: if (B->M/bs - diag[i] > B->N/bs) {
978: if (B->M/bs + diag[i] > b->brstart) {
979: ldiag[k] = diag[i] - b->brstart;
980: if (diagv) ldiagv[k] = diagv[i];
981: k++;
982: }
983: } else {
984: if (B->M/bs > b->brstart) {
985: ldiag[k] = diag[i] - b->brstart;
986: if (diagv) ldiagv[k] = diagv[i];
987: k++;
988: }
989: }
990: }
991: }
993: /* Form local matrix */
994: MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
995: PetscLogObjectParent(B,b->A);
996: PetscFree(ldiag);
997: if (ldiagv) {PetscFree(ldiagv);}
999: return(0);
1000: }
1002: #undef __FUNCT__
1004: /*@C
1005: MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.
1007: Collective on MPI_Comm
1009: Input Parameters:
1010: + comm - MPI communicator
1011: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1012: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1013: . N - number of columns (local and global)
1014: . nd - number of block diagonals (global) (optional)
1015: . bs - each element of a diagonal is an bs x bs dense matrix
1016: . diag - optional array of block diagonal numbers (length nd).
1017: For a matrix element A[i,j], where i=row and j=column, the
1018: diagonal number is
1019: $ diag = i/bs - j/bs (integer division)
1020: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1021: needed (expensive).
1022: - diagv - pointer to actual diagonals (in same order as diag array),
1023: if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1024: to control memory allocation.
1026: Output Parameter:
1027: . A - the matrix
1029: Options Database Keys:
1030: . -mat_block_size <bs> - Sets blocksize
1031: . -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers
1033: Notes:
1034: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
1035: than it must be used on all processors that share the object for that argument.
1037: The parallel matrix is partitioned across the processors by rows, where
1038: each local rectangular matrix is stored in the uniprocessor block
1039: diagonal format. See the users manual for further details.
1041: The user MUST specify either the local or global numbers of rows
1042: (possibly both).
1044: The case bs=1 (conventional diagonal storage) is implemented as
1045: a special case.
1047: Fortran Notes:
1048: Fortran programmers cannot set diagv; this variable is ignored.
1050: Level: intermediate
1052: .keywords: matrix, block, diagonal, parallel, sparse
1054: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1055: @*/
1056: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,int *diag,PetscScalar **diagv,Mat *A)
1057: {
1058: int ierr,size;
1061: MatCreate(comm,m,m,M,N,A);
1062: MPI_Comm_size(comm,&size);
1063: if (size > 1) {
1064: MatSetType(*A,MATMPIBDIAG);
1065: MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1066: } else {
1067: MatSetType(*A,MATSEQBDIAG);
1068: MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1069: }
1070: return(0);
1071: }
1073: #undef __FUNCT__
1075: /*@C
1076: MatBDiagGetData - Gets the data for the block diagonal matrix format.
1077: For the parallel case, this returns information for the local submatrix.
1079: Input Parameters:
1080: . mat - the matrix, stored in block diagonal format.
1082: Not Collective
1084: Output Parameters:
1085: + m - number of rows
1086: . n - number of columns
1087: . nd - number of block diagonals
1088: . bs - each element of a diagonal is an bs x bs dense matrix
1089: . bdlen - array of total block lengths of block diagonals
1090: . diag - optional array of block diagonal numbers (length nd).
1091: For a matrix element A[i,j], where i=row and j=column, the
1092: diagonal number is
1093: $ diag = i/bs - j/bs (integer division)
1094: Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as
1095: needed (expensive).
1096: - diagv - pointer to actual diagonals (in same order as diag array),
1098: Level: advanced
1100: Notes:
1101: See the users manual for further details regarding this storage format.
1103: .keywords: matrix, block, diagonal, get, data
1105: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1106: @*/
1107: int MatBDiagGetData(Mat mat,int *nd,int *bs,int **diag,int **bdlen,PetscScalar ***diagv)
1108: {
1109: Mat_MPIBDiag *pdmat;
1110: Mat_SeqBDiag *dmat = 0;
1111: PetscTruth isseq,ismpi;
1112: int ierr;
1116: PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1117: PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1118: if (isseq) {
1119: dmat = (Mat_SeqBDiag*)mat->data;
1120: } else if (ismpi) {
1121: pdmat = (Mat_MPIBDiag*)mat->data;
1122: dmat = (Mat_SeqBDiag*)pdmat->A->data;
1123: } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1124: *nd = dmat->nd;
1125: *bs = dmat->bs;
1126: *diag = dmat->diag;
1127: *bdlen = dmat->bdlen;
1128: *diagv = dmat->diagv;
1129: return(0);
1130: }
1132: #include petscsys.h
1134: EXTERN_C_BEGIN
1135: #undef __FUNCT__
1137: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1138: {
1139: Mat A;
1140: PetscScalar *vals,*svals;
1141: MPI_Comm comm = ((PetscObject)viewer)->comm;
1142: MPI_Status status;
1143: int bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1144: int header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1145: int *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1146: int tag = ((PetscObject)viewer)->tag,extra_rows;
1149: MPI_Comm_size(comm,&size);
1150: MPI_Comm_rank(comm,&rank);
1151: if (!rank) {
1152: PetscViewerBinaryGetDescriptor(viewer,&fd);
1153: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1154: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1155: if (header[3] < 0) {
1156: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1157: }
1158: }
1159: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1160: M = header[1]; N = header[2];
1162: bs = 1; /* uses a block size of 1 by default; */
1163: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1165: /*
1166: This code adds extra rows to make sure the number of rows is
1167: divisible by the blocksize
1168: */
1169: Mbs = M/bs;
1170: extra_rows = bs - M + bs*(Mbs);
1171: if (extra_rows == bs) extra_rows = 0;
1172: else Mbs++;
1173: if (extra_rows && !rank) {
1174: PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksizen");
1175: }
1177: /* determine ownership of all rows */
1178: m = bs*(Mbs/size + ((Mbs % size) > rank));
1179: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1180: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1181: rowners[0] = 0;
1182: for (i=2; i<=size; i++) {
1183: rowners[i] += rowners[i-1];
1184: }
1185: rstart = rowners[rank];
1186: rend = rowners[rank+1];
1188: /* distribute row lengths to all processors */
1189: PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1190: if (!rank) {
1191: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1192: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1193: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1194: PetscMalloc(size*sizeof(int),&sndcounts);
1195: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1196: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1197: PetscFree(sndcounts);
1198: } else {
1199: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1200: }
1202: if (!rank) {
1203: /* calculate the number of nonzeros on each processor */
1204: PetscMalloc(size*sizeof(int),&procsnz);
1205: PetscMemzero(procsnz,size*sizeof(int));
1206: for (i=0; i<size; i++) {
1207: for (j=rowners[i]; j<rowners[i+1]; j++) {
1208: procsnz[i] += rowlengths[j];
1209: }
1210: }
1211: PetscFree(rowlengths);
1213: /* determine max buffer needed and allocate it */
1214: maxnz = 0;
1215: for (i=0; i<size; i++) {
1216: maxnz = PetscMax(maxnz,procsnz[i]);
1217: }
1218: PetscMalloc(maxnz*sizeof(int),&cols);
1220: /* read in my part of the matrix column indices */
1221: nz = procsnz[0];
1222: PetscMalloc(nz*sizeof(int),&mycols);
1223: if (size == 1) nz -= extra_rows;
1224: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1225: if (size == 1) for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }
1227: /* read in every one elses and ship off */
1228: for (i=1; i<size-1; i++) {
1229: nz = procsnz[i];
1230: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1231: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1232: }
1233: /* read in the stuff for the last proc */
1234: if (size != 1) {
1235: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
1236: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1237: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1238: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1239: }
1240: PetscFree(cols);
1241: } else {
1242: /* determine buffer space needed for message */
1243: nz = 0;
1244: for (i=0; i<m; i++) {
1245: nz += ourlens[i];
1246: }
1247: PetscMalloc(nz*sizeof(int),&mycols);
1249: /* receive message of column indices*/
1250: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1251: MPI_Get_count(&status,MPI_INT,&maxnz);
1252: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1253: }
1255: MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1256: A = *newmat;
1258: if (!rank) {
1259: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1261: /* read in my part of the matrix numerical values */
1262: nz = procsnz[0];
1263: if (size == 1) nz -= extra_rows;
1264: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1265: if (size == 1) for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }
1267: /* insert into matrix */
1268: jj = rstart;
1269: smycols = mycols;
1270: svals = vals;
1271: for (i=0; i<m; i++) {
1272: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1273: smycols += ourlens[i];
1274: svals += ourlens[i];
1275: jj++;
1276: }
1278: /* read in other processors (except the last one) and ship out */
1279: for (i=1; i<size-1; i++) {
1280: nz = procsnz[i];
1281: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1282: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1283: }
1284: /* the last proc */
1285: if (size != 1){
1286: nz = procsnz[i] - extra_rows;
1287: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1288: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1289: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1290: }
1291: PetscFree(procsnz);
1292: } else {
1293: /* receive numeric values */
1294: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1296: /* receive message of values*/
1297: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1298: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1299: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1301: /* insert into matrix */
1302: jj = rstart;
1303: smycols = mycols;
1304: svals = vals;
1305: for (i=0; i<m; i++) {
1306: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1307: smycols += ourlens[i];
1308: svals += ourlens[i];
1309: jj++;
1310: }
1311: }
1312: PetscFree(ourlens);
1313: PetscFree(vals);
1314: PetscFree(mycols);
1315: PetscFree(rowners);
1317: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1318: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1319: return(0);
1320: }
1321: EXTERN_C_END