Actual source code: mpidense.c
1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/
3: /*
4: Basic functions for basic parallel dense matrices.
5: */
6:
7: #include src/mat/impls/dense/mpi/mpidense.h
8: #include src/vec/vecimpl.h
10: EXTERN_C_BEGIN
11: #undef __FUNCT__
13: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
14: {
15: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
16: int m = A->m,rstart = mdn->rstart,ierr;
17: PetscScalar *array;
18: MPI_Comm comm;
21: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");
23: /* The reuse aspect is not implemented efficiently */
24: if (reuse) { MatDestroy(*B);}
26: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
27: MatGetArray(mdn->A,&array);
28: MatCreateSeqDense(comm,m,m,array+m*rstart,B);
29: MatRestoreArray(mdn->A,&array);
30: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
31: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
32:
33: *iscopy = PETSC_TRUE;
34: return(0);
35: }
36: EXTERN_C_END
38: EXTERN int MatSetUpMultiply_MPIDense(Mat);
40: #undef __FUNCT__
42: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
43: {
44: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
45: int ierr,i,j,rstart = A->rstart,rend = A->rend,row;
46: PetscTruth roworiented = A->roworiented;
49: for (i=0; i<m; i++) {
50: if (idxm[i] < 0) continue;
51: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
52: if (idxm[i] >= rstart && idxm[i] < rend) {
53: row = idxm[i] - rstart;
54: if (roworiented) {
55: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
56: } else {
57: for (j=0; j<n; j++) {
58: if (idxn[j] < 0) continue;
59: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
60: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
61: }
62: }
63: } else {
64: if (!A->donotstash) {
65: if (roworiented) {
66: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
67: } else {
68: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
69: }
70: }
71: }
72: }
73: return(0);
74: }
76: #undef __FUNCT__
78: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
79: {
80: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
81: int ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;
84: for (i=0; i<m; i++) {
85: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
86: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
87: if (idxm[i] >= rstart && idxm[i] < rend) {
88: row = idxm[i] - rstart;
89: for (j=0; j<n; j++) {
90: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
91: if (idxn[j] >= mat->N) {
92: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
93: }
94: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
95: }
96: } else {
97: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
98: }
99: }
100: return(0);
101: }
103: #undef __FUNCT__
105: int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106: {
107: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108: int ierr;
111: MatGetArray(a->A,array);
112: return(0);
113: }
115: #undef __FUNCT__
117: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118: {
119: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120: Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121: int i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122: PetscScalar *av,*bv,*v = lmat->v;
123: Mat newmat;
126: ISGetIndices(isrow,&irow);
127: ISGetIndices(iscol,&icol);
128: ISGetLocalSize(isrow,&nrows);
129: ISGetLocalSize(iscol,&ncols);
131: /* No parallel redistribution currently supported! Should really check each index set
132: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
133: original matrix! */
135: MatGetLocalSize(A,&nlrows,&nlcols);
136: MatGetOwnershipRange(A,&rstart,&rend);
137:
138: /* Check submatrix call */
139: if (scall == MAT_REUSE_MATRIX) {
140: /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141: /* Really need to test rows and column sizes! */
142: newmat = *B;
143: } else {
144: /* Create and fill new matrix */
145: MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
146: }
148: /* Now extract the data pointers and do the copy, column at a time */
149: newmatd = (Mat_MPIDense*)newmat->data;
150: bv = ((Mat_SeqDense *)newmatd->A->data)->v;
151:
152: for (i=0; i<ncols; i++) {
153: av = v + nlrows*icol[i];
154: for (j=0; j<nrows; j++) {
155: *bv++ = av[irow[j] - rstart];
156: }
157: }
159: /* Assemble the matrices so that the correct flags are set */
160: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
161: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
163: /* Free work space */
164: ISRestoreIndices(isrow,&irow);
165: ISRestoreIndices(iscol,&icol);
166: *B = newmat;
167: return(0);
168: }
170: #undef __FUNCT__
172: int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173: {
175: return(0);
176: }
178: #undef __FUNCT__
180: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181: {
182: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183: MPI_Comm comm = mat->comm;
184: int ierr,nstash,reallocs;
185: InsertMode addv;
188: /* make sure all processors are either in INSERTMODE or ADDMODE */
189: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
190: if (addv == (ADD_VALUES|INSERT_VALUES)) {
191: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192: }
193: mat->insertmode = addv; /* in case this processor had no cache */
195: MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
196: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
197: PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
198: return(0);
199: }
201: #undef __FUNCT__
203: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204: {
205: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206: int i,n,ierr,*row,*col,flg,j,rstart,ncols;
207: PetscScalar *val;
208: InsertMode addv=mat->insertmode;
211: /* wait on receives */
212: while (1) {
213: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
214: if (!flg) break;
215:
216: for (i=0; i<n;) {
217: /* Now identify the consecutive vals belonging to the same row */
218: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219: if (j < n) ncols = j-i;
220: else ncols = n-i;
221: /* Now assemble all these values with a single function call */
222: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
223: i = j;
224: }
225: }
226: MatStashScatterEnd_Private(&mat->stash);
227:
228: MatAssemblyBegin(mdn->A,mode);
229: MatAssemblyEnd(mdn->A,mode);
231: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232: MatSetUpMultiply_MPIDense(mat);
233: }
234: return(0);
235: }
237: #undef __FUNCT__
239: int MatZeroEntries_MPIDense(Mat A)
240: {
241: int ierr;
242: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
245: MatZeroEntries(l->A);
246: return(0);
247: }
249: #undef __FUNCT__
251: int MatGetBlockSize_MPIDense(Mat A,int *bs)
252: {
254: *bs = 1;
255: return(0);
256: }
258: /* the code does not do the diagonal entries correctly unless the
259: matrix is square and the column and row owerships are identical.
260: This is a BUG. The only way to fix it seems to be to access
261: mdn->A and mdn->B directly and not through the MatZeroRows()
262: routine.
263: */
264: #undef __FUNCT__
266: int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267: {
268: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
269: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270: int *nprocs,j,idx,nsends;
271: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272: int *rvalues,tag = A->tag,count,base,slen,n,*source;
273: int *lens,imdex,*lrows,*values;
274: MPI_Comm comm = A->comm;
275: MPI_Request *send_waits,*recv_waits;
276: MPI_Status recv_status,*send_status;
277: IS istmp;
278: PetscTruth found;
281: ISGetLocalSize(is,&N);
282: ISGetIndices(is,&rows);
284: /* first count number of contributors to each processor */
285: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
286: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
287: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
288: for (i=0; i<N; i++) {
289: idx = rows[i];
290: found = PETSC_FALSE;
291: for (j=0; j<size; j++) {
292: if (idx >= owners[j] && idx < owners[j+1]) {
293: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
294: }
295: }
296: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
297: }
298: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
300: /* inform other processors of number of messages and max length*/
301: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
303: /* post receives: */
304: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
305: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
306: for (i=0; i<nrecvs; i++) {
307: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
308: }
310: /* do sends:
311: 1) starts[i] gives the starting index in svalues for stuff going to
312: the ith processor
313: */
314: PetscMalloc((N+1)*sizeof(int),&svalues);
315: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
316: PetscMalloc((size+1)*sizeof(int),&starts);
317: starts[0] = 0;
318: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
319: for (i=0; i<N; i++) {
320: svalues[starts[owner[i]]++] = rows[i];
321: }
322: ISRestoreIndices(is,&rows);
324: starts[0] = 0;
325: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
326: count = 0;
327: for (i=0; i<size; i++) {
328: if (nprocs[2*i+1]) {
329: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
330: }
331: }
332: PetscFree(starts);
334: base = owners[rank];
336: /* wait on receives */
337: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
338: source = lens + nrecvs;
339: count = nrecvs; slen = 0;
340: while (count) {
341: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
342: /* unpack receives into our local space */
343: MPI_Get_count(&recv_status,MPI_INT,&n);
344: source[imdex] = recv_status.MPI_SOURCE;
345: lens[imdex] = n;
346: slen += n;
347: count--;
348: }
349: PetscFree(recv_waits);
350:
351: /* move the data into the send scatter */
352: PetscMalloc((slen+1)*sizeof(int),&lrows);
353: count = 0;
354: for (i=0; i<nrecvs; i++) {
355: values = rvalues + i*nmax;
356: for (j=0; j<lens[i]; j++) {
357: lrows[count++] = values[j] - base;
358: }
359: }
360: PetscFree(rvalues);
361: PetscFree(lens);
362: PetscFree(owner);
363: PetscFree(nprocs);
364:
365: /* actually zap the local rows */
366: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
367: PetscLogObjectParent(A,istmp);
368: PetscFree(lrows);
369: MatZeroRows(l->A,istmp,diag);
370: ISDestroy(istmp);
372: /* wait on sends */
373: if (nsends) {
374: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
375: MPI_Waitall(nsends,send_waits,send_status);
376: PetscFree(send_status);
377: }
378: PetscFree(send_waits);
379: PetscFree(svalues);
381: return(0);
382: }
384: #undef __FUNCT__
386: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
387: {
388: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
389: int ierr;
392: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
393: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
394: MatMult_SeqDense(mdn->A,mdn->lvec,yy);
395: return(0);
396: }
398: #undef __FUNCT__
400: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
401: {
402: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
403: int ierr;
406: VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
407: VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
408: MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
409: return(0);
410: }
412: #undef __FUNCT__
414: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
415: {
416: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
417: int ierr;
418: PetscScalar zero = 0.0;
421: VecSet(&zero,yy);
422: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
423: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
424: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
425: return(0);
426: }
428: #undef __FUNCT__
430: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
431: {
432: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
433: int ierr;
436: VecCopy(yy,zz);
437: MatMultTranspose_SeqDense(a->A,xx,a->lvec);
438: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
439: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
440: return(0);
441: }
443: #undef __FUNCT__
445: int MatGetDiagonal_MPIDense(Mat A,Vec v)
446: {
447: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
448: Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
449: int ierr,len,i,n,m = A->m,radd;
450: PetscScalar *x,zero = 0.0;
451:
453: VecSet(&zero,v);
454: VecGetArray(v,&x);
455: VecGetSize(v,&n);
456: if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
457: len = PetscMin(a->A->m,a->A->n);
458: radd = a->rstart*m;
459: for (i=0; i<len; i++) {
460: x[i] = aloc->v[radd + i*m + i];
461: }
462: VecRestoreArray(v,&x);
463: return(0);
464: }
466: #undef __FUNCT__
468: int MatDestroy_MPIDense(Mat mat)
469: {
470: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
471: int ierr;
475: #if defined(PETSC_USE_LOG)
476: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
477: #endif
478: MatStashDestroy_Private(&mat->stash);
479: PetscFree(mdn->rowners);
480: MatDestroy(mdn->A);
481: if (mdn->lvec) VecDestroy(mdn->lvec);
482: if (mdn->Mvctx) VecScatterDestroy(mdn->Mvctx);
483: if (mdn->factor) {
484: if (mdn->factor->temp) {PetscFree(mdn->factor->temp);}
485: if (mdn->factor->tag) {PetscFree(mdn->factor->tag);}
486: if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
487: PetscFree(mdn->factor);
488: }
489: PetscFree(mdn);
490: return(0);
491: }
493: #undef __FUNCT__
495: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
496: {
497: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498: int ierr;
501: if (mdn->size == 1) {
502: MatView(mdn->A,viewer);
503: }
504: else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
505: return(0);
506: }
508: #undef __FUNCT__
510: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
511: {
512: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
513: int ierr,size = mdn->size,rank = mdn->rank;
514: PetscViewerType vtype;
515: PetscTruth isascii,isdraw;
516: PetscViewer sviewer;
517: PetscViewerFormat format;
520: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
521: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
522: if (isascii) {
523: PetscViewerGetType(viewer,&vtype);
524: PetscViewerGetFormat(viewer,&format);
525: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
526: MatInfo info;
527: MatGetInfo(mat,MAT_LOCAL,&info);
528: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
529: (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
530: PetscViewerFlush(viewer);
531: VecScatterView(mdn->Mvctx,viewer);
532: return(0);
533: } else if (format == PETSC_VIEWER_ASCII_INFO) {
534: return(0);
535: }
536: } else if (isdraw) {
537: PetscDraw draw;
538: PetscTruth isnull;
540: PetscViewerDrawGetDraw(viewer,0,&draw);
541: PetscDrawIsNull(draw,&isnull);
542: if (isnull) return(0);
543: }
545: if (size == 1) {
546: MatView(mdn->A,viewer);
547: } else {
548: /* assemble the entire matrix onto first processor. */
549: Mat A;
550: int M = mat->M,N = mat->N,m,row,i,nz,*cols;
551: PetscScalar *vals;
553: if (!rank) {
554: MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
555: } else {
556: MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
557: }
558: PetscLogObjectParent(mat,A);
560: /* Copy the matrix ... This isn't the most efficient means,
561: but it's quick for now */
562: row = mdn->rstart; m = mdn->A->m;
563: for (i=0; i<m; i++) {
564: MatGetRow(mat,row,&nz,&cols,&vals);
565: MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
566: MatRestoreRow(mat,row,&nz,&cols,&vals);
567: row++;
568: }
570: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
571: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
572: PetscViewerGetSingleton(viewer,&sviewer);
573: if (!rank) {
574: MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
575: }
576: PetscViewerRestoreSingleton(viewer,&sviewer);
577: PetscViewerFlush(viewer);
578: MatDestroy(A);
579: }
580: return(0);
581: }
583: #undef __FUNCT__
585: int MatView_MPIDense(Mat mat,PetscViewer viewer)
586: {
587: int ierr;
588: PetscTruth isascii,isbinary,isdraw,issocket;
589:
591:
592: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
593: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
594: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
595: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
597: if (isascii || issocket || isdraw) {
598: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
599: } else if (isbinary) {
600: MatView_MPIDense_Binary(mat,viewer);
601: } else {
602: SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
603: }
604: return(0);
605: }
607: #undef __FUNCT__
609: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
610: {
611: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
612: Mat mdn = mat->A;
613: int ierr;
614: PetscReal isend[5],irecv[5];
617: info->rows_global = (double)A->M;
618: info->columns_global = (double)A->N;
619: info->rows_local = (double)A->m;
620: info->columns_local = (double)A->N;
621: info->block_size = 1.0;
622: MatGetInfo(mdn,MAT_LOCAL,info);
623: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
624: isend[3] = info->memory; isend[4] = info->mallocs;
625: if (flag == MAT_LOCAL) {
626: info->nz_used = isend[0];
627: info->nz_allocated = isend[1];
628: info->nz_unneeded = isend[2];
629: info->memory = isend[3];
630: info->mallocs = isend[4];
631: } else if (flag == MAT_GLOBAL_MAX) {
632: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
633: info->nz_used = irecv[0];
634: info->nz_allocated = irecv[1];
635: info->nz_unneeded = irecv[2];
636: info->memory = irecv[3];
637: info->mallocs = irecv[4];
638: } else if (flag == MAT_GLOBAL_SUM) {
639: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
640: info->nz_used = irecv[0];
641: info->nz_allocated = irecv[1];
642: info->nz_unneeded = irecv[2];
643: info->memory = irecv[3];
644: info->mallocs = irecv[4];
645: }
646: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
647: info->fill_ratio_needed = 0;
648: info->factor_mallocs = 0;
649: return(0);
650: }
652: #undef __FUNCT__
654: int MatSetOption_MPIDense(Mat A,MatOption op)
655: {
656: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
657: int ierr;
660: switch (op) {
661: case MAT_NO_NEW_NONZERO_LOCATIONS:
662: case MAT_YES_NEW_NONZERO_LOCATIONS:
663: case MAT_NEW_NONZERO_LOCATION_ERR:
664: case MAT_NEW_NONZERO_ALLOCATION_ERR:
665: case MAT_COLUMNS_SORTED:
666: case MAT_COLUMNS_UNSORTED:
667: MatSetOption(a->A,op);
668: break;
669: case MAT_ROW_ORIENTED:
670: a->roworiented = PETSC_TRUE;
671: MatSetOption(a->A,op);
672: break;
673: case MAT_ROWS_SORTED:
674: case MAT_ROWS_UNSORTED:
675: case MAT_YES_NEW_DIAGONALS:
676: case MAT_USE_HASH_TABLE:
677: PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
678: break;
679: case MAT_COLUMN_ORIENTED:
680: a->roworiented = PETSC_FALSE;
681: MatSetOption(a->A,op);
682: break;
683: case MAT_IGNORE_OFF_PROC_ENTRIES:
684: a->donotstash = PETSC_TRUE;
685: break;
686: case MAT_NO_NEW_DIAGONALS:
687: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
688: default:
689: SETERRQ(PETSC_ERR_SUP,"unknown option");
690: }
691: return(0);
692: }
694: #undef __FUNCT__
696: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
697: {
698: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
699: int lrow,rstart = mat->rstart,rend = mat->rend,ierr;
702: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
703: lrow = row - rstart;
704: MatGetRow(mat->A,lrow,nz,idx,v);
705: return(0);
706: }
708: #undef __FUNCT__
710: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
711: {
715: if (idx) {PetscFree(*idx);}
716: if (v) {PetscFree(*v);}
717: return(0);
718: }
720: #undef __FUNCT__
722: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
723: {
724: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
725: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
726: PetscScalar *l,*r,x,*v;
727: int ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;
730: MatGetLocalSize(A,&s2,&s3);
731: if (ll) {
732: VecGetLocalSize(ll,&s2a);
733: if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
734: VecGetArray(ll,&l);
735: for (i=0; i<m; i++) {
736: x = l[i];
737: v = mat->v + i;
738: for (j=0; j<n; j++) { (*v) *= x; v+= m;}
739: }
740: VecRestoreArray(ll,&l);
741: PetscLogFlops(n*m);
742: }
743: if (rr) {
744: VecGetSize(rr,&s3a);
745: if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
746: VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
747: VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
748: VecGetArray(mdn->lvec,&r);
749: for (i=0; i<n; i++) {
750: x = r[i];
751: v = mat->v + i*m;
752: for (j=0; j<m; j++) { (*v++) *= x;}
753: }
754: VecRestoreArray(mdn->lvec,&r);
755: PetscLogFlops(n*m);
756: }
757: return(0);
758: }
760: #undef __FUNCT__
762: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
763: {
764: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
765: Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
766: int ierr,i,j;
767: PetscReal sum = 0.0;
768: PetscScalar *v = mat->v;
771: if (mdn->size == 1) {
772: MatNorm(mdn->A,type,nrm);
773: } else {
774: if (type == NORM_FROBENIUS) {
775: for (i=0; i<mdn->A->n*mdn->A->m; i++) {
776: #if defined(PETSC_USE_COMPLEX)
777: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
778: #else
779: sum += (*v)*(*v); v++;
780: #endif
781: }
782: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
783: *nrm = sqrt(*nrm);
784: PetscLogFlops(2*mdn->A->n*mdn->A->m);
785: } else if (type == NORM_1) {
786: PetscReal *tmp,*tmp2;
787: PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
788: tmp2 = tmp + A->N;
789: PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
790: *nrm = 0.0;
791: v = mat->v;
792: for (j=0; j<mdn->A->n; j++) {
793: for (i=0; i<mdn->A->m; i++) {
794: tmp[j] += PetscAbsScalar(*v); v++;
795: }
796: }
797: MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
798: for (j=0; j<A->N; j++) {
799: if (tmp2[j] > *nrm) *nrm = tmp2[j];
800: }
801: PetscFree(tmp);
802: PetscLogFlops(A->n*A->m);
803: } else if (type == NORM_INFINITY) { /* max row norm */
804: PetscReal ntemp;
805: MatNorm(mdn->A,type,&ntemp);
806: MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
807: } else {
808: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
809: }
810: }
811: return(0);
812: }
814: #undef __FUNCT__
816: int MatTranspose_MPIDense(Mat A,Mat *matout)
817: {
818: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
819: Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
820: Mat B;
821: int M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
822: int j,i,ierr;
823: PetscScalar *v;
826: if (!matout && M != N) {
827: SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
828: }
829: MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);
831: m = a->A->m; n = a->A->n; v = Aloc->v;
832: PetscMalloc(n*sizeof(int),&rwork);
833: for (j=0; j<n; j++) {
834: for (i=0; i<m; i++) rwork[i] = rstart + i;
835: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
836: v += m;
837: }
838: PetscFree(rwork);
839: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
840: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
841: if (matout) {
842: *matout = B;
843: } else {
844: MatHeaderCopy(A,B);
845: }
846: return(0);
847: }
849: #include petscblaslapack.h
850: #undef __FUNCT__
852: int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
853: {
854: Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
855: Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
856: int one = 1,nz;
859: nz = inA->m*inA->N;
860: BLscal_(&nz,alpha,a->v,&one);
861: PetscLogFlops(nz);
862: return(0);
863: }
865: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
866: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);
868: #undef __FUNCT__
870: int MatSetUpPreallocation_MPIDense(Mat A)
871: {
872: int ierr;
875: MatMPIDenseSetPreallocation(A,0);
876: return(0);
877: }
879: /* -------------------------------------------------------------------*/
880: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
881: MatGetRow_MPIDense,
882: MatRestoreRow_MPIDense,
883: MatMult_MPIDense,
884: MatMultAdd_MPIDense,
885: MatMultTranspose_MPIDense,
886: MatMultTransposeAdd_MPIDense,
887: 0,
888: 0,
889: 0,
890: 0,
891: 0,
892: 0,
893: 0,
894: MatTranspose_MPIDense,
895: MatGetInfo_MPIDense,0,
896: MatGetDiagonal_MPIDense,
897: MatDiagonalScale_MPIDense,
898: MatNorm_MPIDense,
899: MatAssemblyBegin_MPIDense,
900: MatAssemblyEnd_MPIDense,
901: 0,
902: MatSetOption_MPIDense,
903: MatZeroEntries_MPIDense,
904: MatZeroRows_MPIDense,
905: 0,
906: 0,
907: 0,
908: 0,
909: MatSetUpPreallocation_MPIDense,
910: 0,
911: 0,
912: MatGetArray_MPIDense,
913: MatRestoreArray_MPIDense,
914: MatDuplicate_MPIDense,
915: 0,
916: 0,
917: 0,
918: 0,
919: 0,
920: MatGetSubMatrices_MPIDense,
921: 0,
922: MatGetValues_MPIDense,
923: 0,
924: 0,
925: MatScale_MPIDense,
926: 0,
927: 0,
928: 0,
929: MatGetBlockSize_MPIDense,
930: 0,
931: 0,
932: 0,
933: 0,
934: 0,
935: 0,
936: 0,
937: 0,
938: 0,
939: MatGetSubMatrix_MPIDense,
940: MatDestroy_MPIDense,
941: MatView_MPIDense,
942: MatGetPetscMaps_Petsc};
944: EXTERN_C_BEGIN
945: #undef __FUNCT__
947: int MatCreate_MPIDense(Mat mat)
948: {
949: Mat_MPIDense *a;
950: int ierr,i;
953: ierr = PetscNew(Mat_MPIDense,&a);
954: mat->data = (void*)a;
955: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
956: mat->factor = 0;
957: mat->mapping = 0;
959: a->factor = 0;
960: mat->insertmode = NOT_SET_VALUES;
961: MPI_Comm_rank(mat->comm,&a->rank);
962: MPI_Comm_size(mat->comm,&a->size);
964: PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
965: PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
966: a->nvec = mat->n;
968: /* the information in the maps duplicates the information computed below, eventually
969: we should remove the duplicate information that is not contained in the maps */
970: PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
971: PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);
973: /* build local table of row and column ownerships */
974: ierr = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
975: a->cowners = a->rowners + a->size + 1;
976: PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
977: MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
978: a->rowners[0] = 0;
979: for (i=2; i<=a->size; i++) {
980: a->rowners[i] += a->rowners[i-1];
981: }
982: a->rstart = a->rowners[a->rank];
983: a->rend = a->rowners[a->rank+1];
984: ierr = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
985: a->cowners[0] = 0;
986: for (i=2; i<=a->size; i++) {
987: a->cowners[i] += a->cowners[i-1];
988: }
990: /* build cache for off array entries formed */
991: a->donotstash = PETSC_FALSE;
992: MatStashCreate_Private(mat->comm,1,&mat->stash);
994: /* stuff used for matrix vector multiply */
995: a->lvec = 0;
996: a->Mvctx = 0;
997: a->roworiented = PETSC_TRUE;
999: PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1000: "MatGetDiagonalBlock_MPIDense",
1001: MatGetDiagonalBlock_MPIDense);
1002: return(0);
1003: }
1004: EXTERN_C_END
1006: #undef __FUNCT__
1008: /*@C
1009: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1011: Not collective
1013: Input Parameters:
1014: . A - the matrix
1015: - data - optional location of matrix data. Set data=PETSC_NULL for PETSc
1016: to control all matrix memory allocation.
1018: Notes:
1019: The dense format is fully compatible with standard Fortran 77
1020: storage by columns.
1022: The data input variable is intended primarily for Fortran programmers
1023: who wish to allocate their own matrix memory space. Most users should
1024: set data=PETSC_NULL.
1026: Level: intermediate
1028: .keywords: matrix,dense, parallel
1030: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1031: @*/
1032: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1033: {
1034: Mat_MPIDense *a;
1035: int ierr;
1036: PetscTruth flg2;
1039: PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
1040: if (!flg2) return(0);
1041: mat->preallocated = PETSC_TRUE;
1042: /* Note: For now, when data is specified above, this assumes the user correctly
1043: allocates the local dense storage space. We should add error checking. */
1045: a = (Mat_MPIDense*)mat->data;
1046: MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
1047: PetscLogObjectParent(mat,a->A);
1048: return(0);
1049: }
1051: #undef __FUNCT__
1053: /*@C
1054: MatCreateMPIDense - Creates a sparse parallel matrix in dense format.
1056: Collective on MPI_Comm
1058: Input Parameters:
1059: + comm - MPI communicator
1060: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1061: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1062: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1063: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1064: - data - optional location of matrix data. Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1065: to control all matrix memory allocation.
1067: Output Parameter:
1068: . A - the matrix
1070: Notes:
1071: The dense format is fully compatible with standard Fortran 77
1072: storage by columns.
1074: The data input variable is intended primarily for Fortran programmers
1075: who wish to allocate their own matrix memory space. Most users should
1076: set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).
1078: The user MUST specify either the local or global matrix dimensions
1079: (possibly both).
1081: Level: intermediate
1083: .keywords: matrix,dense, parallel
1085: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1086: @*/
1087: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1088: {
1089: int ierr,size;
1092: MatCreate(comm,m,n,M,N,A);
1093: MPI_Comm_size(comm,&size);
1094: if (size > 1) {
1095: MatSetType(*A,MATMPIDENSE);
1096: MatMPIDenseSetPreallocation(*A,data);
1097: } else {
1098: MatSetType(*A,MATSEQDENSE);
1099: MatSeqDenseSetPreallocation(*A,data);
1100: }
1101: return(0);
1102: }
1104: #undef __FUNCT__
1106: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1107: {
1108: Mat mat;
1109: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1110: int ierr;
1113: *newmat = 0;
1114: MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1115: MatSetType(mat,MATMPIDENSE);
1116: ierr = PetscNew(Mat_MPIDense,&a);
1117: mat->data = (void*)a;
1118: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1119: mat->factor = A->factor;
1120: mat->assembled = PETSC_TRUE;
1121: mat->preallocated = PETSC_TRUE;
1123: a->rstart = oldmat->rstart;
1124: a->rend = oldmat->rend;
1125: a->size = oldmat->size;
1126: a->rank = oldmat->rank;
1127: mat->insertmode = NOT_SET_VALUES;
1128: a->nvec = oldmat->nvec;
1129: a->donotstash = oldmat->donotstash;
1130: ierr = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1131: PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1132: PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1133: MatStashCreate_Private(A->comm,1,&mat->stash);
1135: MatSetUpMultiply_MPIDense(mat);
1136: MatDuplicate(oldmat->A,cpvalues,&a->A);
1137: PetscLogObjectParent(mat,a->A);
1138: *newmat = mat;
1139: return(0);
1140: }
1142: #include petscsys.h
1144: #undef __FUNCT__
1146: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1147: {
1148: int *rowners,i,size,rank,m,ierr,nz,j;
1149: PetscScalar *array,*vals,*vals_ptr;
1150: MPI_Status status;
1153: MPI_Comm_rank(comm,&rank);
1154: MPI_Comm_size(comm,&size);
1156: /* determine ownership of all rows */
1157: m = M/size + ((M % size) > rank);
1158: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1159: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1160: rowners[0] = 0;
1161: for (i=2; i<=size; i++) {
1162: rowners[i] += rowners[i-1];
1163: }
1165: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1166: MatGetArray(*newmat,&array);
1168: if (!rank) {
1169: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1171: /* read in my part of the matrix numerical values */
1172: PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1173:
1174: /* insert into matrix-by row (this is why cannot directly read into array */
1175: vals_ptr = vals;
1176: for (i=0; i<m; i++) {
1177: for (j=0; j<N; j++) {
1178: array[i + j*m] = *vals_ptr++;
1179: }
1180: }
1182: /* read in other processors and ship out */
1183: for (i=1; i<size; i++) {
1184: nz = (rowners[i+1] - rowners[i])*N;
1185: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1186: MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1187: }
1188: } else {
1189: /* receive numeric values */
1190: PetscMalloc(m*N*sizeof(PetscScalar),&vals);
1192: /* receive message of values*/
1193: MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);
1195: /* insert into matrix-by row (this is why cannot directly read into array */
1196: vals_ptr = vals;
1197: for (i=0; i<m; i++) {
1198: for (j=0; j<N; j++) {
1199: array[i + j*m] = *vals_ptr++;
1200: }
1201: }
1202: }
1203: PetscFree(rowners);
1204: PetscFree(vals);
1205: MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1206: MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1207: return(0);
1208: }
1210: EXTERN_C_BEGIN
1211: #undef __FUNCT__
1213: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1214: {
1215: Mat A;
1216: PetscScalar *vals,*svals;
1217: MPI_Comm comm = ((PetscObject)viewer)->comm;
1218: MPI_Status status;
1219: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1220: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1221: int tag = ((PetscObject)viewer)->tag;
1222: int i,nz,ierr,j,rstart,rend,fd;
1225: MPI_Comm_size(comm,&size);
1226: MPI_Comm_rank(comm,&rank);
1227: if (!rank) {
1228: PetscViewerBinaryGetDescriptor(viewer,&fd);
1229: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1230: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1231: }
1233: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1234: M = header[1]; N = header[2]; nz = header[3];
1236: /*
1237: Handle case where matrix is stored on disk as a dense matrix
1238: */
1239: if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1240: MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1241: return(0);
1242: }
1244: /* determine ownership of all rows */
1245: m = M/size + ((M % size) > rank);
1246: ierr = PetscMalloc((size+2)*sizeof(int),&rowners);
1247: ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1248: rowners[0] = 0;
1249: for (i=2; i<=size; i++) {
1250: rowners[i] += rowners[i-1];
1251: }
1252: rstart = rowners[rank];
1253: rend = rowners[rank+1];
1255: /* distribute row lengths to all processors */
1256: ierr = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1257: offlens = ourlens + (rend-rstart);
1258: if (!rank) {
1259: PetscMalloc(M*sizeof(int),&rowlengths);
1260: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1261: PetscMalloc(size*sizeof(int),&sndcounts);
1262: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1263: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1264: PetscFree(sndcounts);
1265: } else {
1266: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1267: }
1269: if (!rank) {
1270: /* calculate the number of nonzeros on each processor */
1271: PetscMalloc(size*sizeof(int),&procsnz);
1272: PetscMemzero(procsnz,size*sizeof(int));
1273: for (i=0; i<size; i++) {
1274: for (j=rowners[i]; j< rowners[i+1]; j++) {
1275: procsnz[i] += rowlengths[j];
1276: }
1277: }
1278: PetscFree(rowlengths);
1280: /* determine max buffer needed and allocate it */
1281: maxnz = 0;
1282: for (i=0; i<size; i++) {
1283: maxnz = PetscMax(maxnz,procsnz[i]);
1284: }
1285: PetscMalloc(maxnz*sizeof(int),&cols);
1287: /* read in my part of the matrix column indices */
1288: nz = procsnz[0];
1289: PetscMalloc(nz*sizeof(int),&mycols);
1290: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1292: /* read in every one elses and ship off */
1293: for (i=1; i<size; i++) {
1294: nz = procsnz[i];
1295: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1296: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1297: }
1298: PetscFree(cols);
1299: } else {
1300: /* determine buffer space needed for message */
1301: nz = 0;
1302: for (i=0; i<m; i++) {
1303: nz += ourlens[i];
1304: }
1305: PetscMalloc(nz*sizeof(int),&mycols);
1307: /* receive message of column indices*/
1308: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1309: MPI_Get_count(&status,MPI_INT,&maxnz);
1310: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1311: }
1313: /* loop over local rows, determining number of off diagonal entries */
1314: PetscMemzero(offlens,m*sizeof(int));
1315: jj = 0;
1316: for (i=0; i<m; i++) {
1317: for (j=0; j<ourlens[i]; j++) {
1318: if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1319: jj++;
1320: }
1321: }
1323: /* create our matrix */
1324: for (i=0; i<m; i++) {
1325: ourlens[i] -= offlens[i];
1326: }
1327: MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1328: A = *newmat;
1329: for (i=0; i<m; i++) {
1330: ourlens[i] += offlens[i];
1331: }
1333: if (!rank) {
1334: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1336: /* read in my part of the matrix numerical values */
1337: nz = procsnz[0];
1338: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1339:
1340: /* insert into matrix */
1341: jj = rstart;
1342: smycols = mycols;
1343: svals = vals;
1344: for (i=0; i<m; i++) {
1345: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1346: smycols += ourlens[i];
1347: svals += ourlens[i];
1348: jj++;
1349: }
1351: /* read in other processors and ship out */
1352: for (i=1; i<size; i++) {
1353: nz = procsnz[i];
1354: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1355: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1356: }
1357: PetscFree(procsnz);
1358: } else {
1359: /* receive numeric values */
1360: PetscMalloc(nz*sizeof(PetscScalar),&vals);
1362: /* receive message of values*/
1363: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1364: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1365: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1367: /* insert into matrix */
1368: jj = rstart;
1369: smycols = mycols;
1370: svals = vals;
1371: for (i=0; i<m; i++) {
1372: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1373: smycols += ourlens[i];
1374: svals += ourlens[i];
1375: jj++;
1376: }
1377: }
1378: PetscFree(ourlens);
1379: PetscFree(vals);
1380: PetscFree(mycols);
1381: PetscFree(rowners);
1383: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1384: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1385: return(0);
1386: }
1387: EXTERN_C_END