Actual source code: mpidense.c

  1: /*$Id: mpidense.c,v 1.159 2001/08/10 03:30:41 bsmith Exp $*/

  3: /*
  4:    Basic functions for basic parallel dense matrices.
  5: */
  6: 
 7:  #include src/mat/impls/dense/mpi/mpidense.h
 8:  #include src/vec/vecimpl.h

 10: EXTERN_C_BEGIN
 11: #undef __FUNCT__  
 13: int MatGetDiagonalBlock_MPIDense(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *B)
 14: {
 15:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
 16:   int          m = A->m,rstart = mdn->rstart,ierr;
 17:   PetscScalar  *array;
 18:   MPI_Comm     comm;

 21:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Only square matrices supported.");

 23:   /* The reuse aspect is not implemented efficiently */
 24:   if (reuse) { MatDestroy(*B);}

 26:   PetscObjectGetComm((PetscObject)(mdn->A),&comm);
 27:   MatGetArray(mdn->A,&array);
 28:   MatCreateSeqDense(comm,m,m,array+m*rstart,B);
 29:   MatRestoreArray(mdn->A,&array);
 30:   MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
 31:   MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
 32: 
 33:   *iscopy = PETSC_TRUE;
 34:   return(0);
 35: }
 36: EXTERN_C_END

 38: EXTERN int MatSetUpMultiply_MPIDense(Mat);

 40: #undef __FUNCT__  
 42: int MatSetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
 43: {
 44:   Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
 45:   int          ierr,i,j,rstart = A->rstart,rend = A->rend,row;
 46:   PetscTruth   roworiented = A->roworiented;

 49:   for (i=0; i<m; i++) {
 50:     if (idxm[i] < 0) continue;
 51:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 52:     if (idxm[i] >= rstart && idxm[i] < rend) {
 53:       row = idxm[i] - rstart;
 54:       if (roworiented) {
 55:         MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
 56:       } else {
 57:         for (j=0; j<n; j++) {
 58:           if (idxn[j] < 0) continue;
 59:           if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 60:           MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 61:         }
 62:       }
 63:     } else {
 64:       if (!A->donotstash) {
 65:         if (roworiented) {
 66:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 67:         } else {
 68:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 69:         }
 70:       }
 71:     }
 72:   }
 73:   return(0);
 74: }

 76: #undef __FUNCT__  
 78: int MatGetValues_MPIDense(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
 79: {
 80:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
 81:   int          ierr,i,j,rstart = mdn->rstart,rend = mdn->rend,row;

 84:   for (i=0; i<m; i++) {
 85:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 86:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 87:     if (idxm[i] >= rstart && idxm[i] < rend) {
 88:       row = idxm[i] - rstart;
 89:       for (j=0; j<n; j++) {
 90:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 91:         if (idxn[j] >= mat->N) {
 92:           SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 93:         }
 94:         MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
 95:       }
 96:     } else {
 97:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 98:     }
 99:   }
100:   return(0);
101: }

103: #undef __FUNCT__  
105: int MatGetArray_MPIDense(Mat A,PetscScalar **array)
106: {
107:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
108:   int          ierr;

111:   MatGetArray(a->A,array);
112:   return(0);
113: }

115: #undef __FUNCT__  
117: static int MatGetSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,int cs,MatReuse scall,Mat *B)
118: {
119:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
120:   Mat_SeqDense *lmat = (Mat_SeqDense*)mat->A->data;
121:   int          i,j,ierr,*irow,*icol,rstart,rend,nrows,ncols,nlrows,nlcols;
122:   PetscScalar  *av,*bv,*v = lmat->v;
123:   Mat          newmat;

126:   ISGetIndices(isrow,&irow);
127:   ISGetIndices(iscol,&icol);
128:   ISGetLocalSize(isrow,&nrows);
129:   ISGetLocalSize(iscol,&ncols);

131:   /* No parallel redistribution currently supported! Should really check each index set
132:      to comfirm that it is OK.  ... Currently supports only submatrix same partitioning as
133:      original matrix! */

135:   MatGetLocalSize(A,&nlrows,&nlcols);
136:   MatGetOwnershipRange(A,&rstart,&rend);
137: 
138:   /* Check submatrix call */
139:   if (scall == MAT_REUSE_MATRIX) {
140:     /* SETERRQ(PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
141:     /* Really need to test rows and column sizes! */
142:     newmat = *B;
143:   } else {
144:     /* Create and fill new matrix */
145:     MatCreateMPIDense(A->comm,nrows,cs,PETSC_DECIDE,ncols,PETSC_NULL,&newmat);
146:   }

148:   /* Now extract the data pointers and do the copy, column at a time */
149:   newmatd = (Mat_MPIDense*)newmat->data;
150:   bv      = ((Mat_SeqDense *)newmatd->A->data)->v;
151: 
152:   for (i=0; i<ncols; i++) {
153:     av = v + nlrows*icol[i];
154:     for (j=0; j<nrows; j++) {
155:       *bv++ = av[irow[j] - rstart];
156:     }
157:   }

159:   /* Assemble the matrices so that the correct flags are set */
160:   MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
161:   MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);

163:   /* Free work space */
164:   ISRestoreIndices(isrow,&irow);
165:   ISRestoreIndices(iscol,&icol);
166:   *B = newmat;
167:   return(0);
168: }

170: #undef __FUNCT__  
172: int MatRestoreArray_MPIDense(Mat A,PetscScalar **array)
173: {
175:   return(0);
176: }

178: #undef __FUNCT__  
180: int MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
181: {
182:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
183:   MPI_Comm     comm = mat->comm;
184:   int          ierr,nstash,reallocs;
185:   InsertMode   addv;

188:   /* make sure all processors are either in INSERTMODE or ADDMODE */
189:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
190:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
191:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
192:   }
193:   mat->insertmode = addv; /* in case this processor had no cache */

195:   MatStashScatterBegin_Private(&mat->stash,mdn->rowners);
196:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
197:   PetscLogInfo(mdn->A,"MatAssemblyBegin_MPIDense:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
198:   return(0);
199: }

201: #undef __FUNCT__  
203: int MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
204: {
205:   Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
206:   int          i,n,ierr,*row,*col,flg,j,rstart,ncols;
207:   PetscScalar  *val;
208:   InsertMode   addv=mat->insertmode;

211:   /*  wait on receives */
212:   while (1) {
213:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
214:     if (!flg) break;
215: 
216:     for (i=0; i<n;) {
217:       /* Now identify the consecutive vals belonging to the same row */
218:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
219:       if (j < n) ncols = j-i;
220:       else       ncols = n-i;
221:       /* Now assemble all these values with a single function call */
222:       MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,addv);
223:       i = j;
224:     }
225:   }
226:   MatStashScatterEnd_Private(&mat->stash);
227: 
228:   MatAssemblyBegin(mdn->A,mode);
229:   MatAssemblyEnd(mdn->A,mode);

231:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
232:     MatSetUpMultiply_MPIDense(mat);
233:   }
234:   return(0);
235: }

237: #undef __FUNCT__  
239: int MatZeroEntries_MPIDense(Mat A)
240: {
241:   int          ierr;
242:   Mat_MPIDense *l = (Mat_MPIDense*)A->data;

245:   MatZeroEntries(l->A);
246:   return(0);
247: }

249: #undef __FUNCT__  
251: int MatGetBlockSize_MPIDense(Mat A,int *bs)
252: {
254:   *bs = 1;
255:   return(0);
256: }

258: /* the code does not do the diagonal entries correctly unless the 
259:    matrix is square and the column and row owerships are identical.
260:    This is a BUG. The only way to fix it seems to be to access 
261:    mdn->A and mdn->B directly and not through the MatZeroRows() 
262:    routine. 
263: */
264: #undef __FUNCT__  
266: int MatZeroRows_MPIDense(Mat A,IS is,PetscScalar *diag)
267: {
268:   Mat_MPIDense   *l = (Mat_MPIDense*)A->data;
269:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
270:   int            *nprocs,j,idx,nsends;
271:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
272:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
273:   int            *lens,imdex,*lrows,*values;
274:   MPI_Comm       comm = A->comm;
275:   MPI_Request    *send_waits,*recv_waits;
276:   MPI_Status     recv_status,*send_status;
277:   IS             istmp;
278:   PetscTruth     found;

281:   ISGetLocalSize(is,&N);
282:   ISGetIndices(is,&rows);

284:   /*  first count number of contributors to each processor */
285:   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);
286:   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));
287:   ierr  = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
288:   for (i=0; i<N; i++) {
289:     idx = rows[i];
290:     found = PETSC_FALSE;
291:     for (j=0; j<size; j++) {
292:       if (idx >= owners[j] && idx < owners[j+1]) {
293:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
294:       }
295:     }
296:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
297:   }
298:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

300:   /* inform other processors of number of messages and max length*/
301:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

303:   /* post receives:   */
304:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
305:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
306:   for (i=0; i<nrecvs; i++) {
307:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
308:   }

310:   /* do sends:
311:       1) starts[i] gives the starting index in svalues for stuff going to 
312:          the ith processor
313:   */
314:   PetscMalloc((N+1)*sizeof(int),&svalues);
315:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
316:   PetscMalloc((size+1)*sizeof(int),&starts);
317:   starts[0]  = 0;
318:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
319:   for (i=0; i<N; i++) {
320:     svalues[starts[owner[i]]++] = rows[i];
321:   }
322:   ISRestoreIndices(is,&rows);

324:   starts[0] = 0;
325:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
326:   count = 0;
327:   for (i=0; i<size; i++) {
328:     if (nprocs[2*i+1]) {
329:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
330:     }
331:   }
332:   PetscFree(starts);

334:   base = owners[rank];

336:   /*  wait on receives */
337:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
338:   source = lens + nrecvs;
339:   count  = nrecvs; slen = 0;
340:   while (count) {
341:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
342:     /* unpack receives into our local space */
343:     MPI_Get_count(&recv_status,MPI_INT,&n);
344:     source[imdex]  = recv_status.MPI_SOURCE;
345:     lens[imdex]  = n;
346:     slen += n;
347:     count--;
348:   }
349:   PetscFree(recv_waits);
350: 
351:   /* move the data into the send scatter */
352:   PetscMalloc((slen+1)*sizeof(int),&lrows);
353:   count = 0;
354:   for (i=0; i<nrecvs; i++) {
355:     values = rvalues + i*nmax;
356:     for (j=0; j<lens[i]; j++) {
357:       lrows[count++] = values[j] - base;
358:     }
359:   }
360:   PetscFree(rvalues);
361:   PetscFree(lens);
362:   PetscFree(owner);
363:   PetscFree(nprocs);
364: 
365:   /* actually zap the local rows */
366:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
367:   PetscLogObjectParent(A,istmp);
368:   PetscFree(lrows);
369:   MatZeroRows(l->A,istmp,diag);
370:   ISDestroy(istmp);

372:   /* wait on sends */
373:   if (nsends) {
374:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
375:     MPI_Waitall(nsends,send_waits,send_status);
376:     PetscFree(send_status);
377:   }
378:   PetscFree(send_waits);
379:   PetscFree(svalues);

381:   return(0);
382: }

384: #undef __FUNCT__  
386: int MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
387: {
388:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
389:   int          ierr;

392:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
393:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
394:   MatMult_SeqDense(mdn->A,mdn->lvec,yy);
395:   return(0);
396: }

398: #undef __FUNCT__  
400: int MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
401: {
402:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
403:   int          ierr;

406:   VecScatterBegin(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
407:   VecScatterEnd(xx,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
408:   MatMultAdd_SeqDense(mdn->A,mdn->lvec,yy,zz);
409:   return(0);
410: }

412: #undef __FUNCT__  
414: int MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
415: {
416:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
417:   int          ierr;
418:   PetscScalar  zero = 0.0;

421:   VecSet(&zero,yy);
422:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
423:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
424:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
425:   return(0);
426: }

428: #undef __FUNCT__  
430: int MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
431: {
432:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
433:   int          ierr;

436:   VecCopy(yy,zz);
437:   MatMultTranspose_SeqDense(a->A,xx,a->lvec);
438:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
439:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
440:   return(0);
441: }

443: #undef __FUNCT__  
445: int MatGetDiagonal_MPIDense(Mat A,Vec v)
446: {
447:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
448:   Mat_SeqDense *aloc = (Mat_SeqDense*)a->A->data;
449:   int          ierr,len,i,n,m = A->m,radd;
450:   PetscScalar  *x,zero = 0.0;
451: 
453:   VecSet(&zero,v);
454:   VecGetArray(v,&x);
455:   VecGetSize(v,&n);
456:   if (n != A->M) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
457:   len  = PetscMin(a->A->m,a->A->n);
458:   radd = a->rstart*m;
459:   for (i=0; i<len; i++) {
460:     x[i] = aloc->v[radd + i*m + i];
461:   }
462:   VecRestoreArray(v,&x);
463:   return(0);
464: }

466: #undef __FUNCT__  
468: int MatDestroy_MPIDense(Mat mat)
469: {
470:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
471:   int          ierr;


475: #if defined(PETSC_USE_LOG)
476:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
477: #endif
478:   MatStashDestroy_Private(&mat->stash);
479:   PetscFree(mdn->rowners);
480:   MatDestroy(mdn->A);
481:   if (mdn->lvec)   VecDestroy(mdn->lvec);
482:   if (mdn->Mvctx)  VecScatterDestroy(mdn->Mvctx);
483:   if (mdn->factor) {
484:     if (mdn->factor->temp)   {PetscFree(mdn->factor->temp);}
485:     if (mdn->factor->tag)    {PetscFree(mdn->factor->tag);}
486:     if (mdn->factor->pivots) {PetscFree(mdn->factor->pivots);}
487:     PetscFree(mdn->factor);
488:   }
489:   PetscFree(mdn);
490:   return(0);
491: }

493: #undef __FUNCT__  
495: static int MatView_MPIDense_Binary(Mat mat,PetscViewer viewer)
496: {
497:   Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
498:   int          ierr;

501:   if (mdn->size == 1) {
502:     MatView(mdn->A,viewer);
503:   }
504:   else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
505:   return(0);
506: }

508: #undef __FUNCT__  
510: static int MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
511: {
512:   Mat_MPIDense      *mdn = (Mat_MPIDense*)mat->data;
513:   int               ierr,size = mdn->size,rank = mdn->rank;
514:   PetscViewerType   vtype;
515:   PetscTruth        isascii,isdraw;
516:   PetscViewer       sviewer;
517:   PetscViewerFormat format;

520:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
521:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
522:   if (isascii) {
523:     PetscViewerGetType(viewer,&vtype);
524:     PetscViewerGetFormat(viewer,&format);
525:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
526:       MatInfo info;
527:       MatGetInfo(mat,MAT_LOCAL,&info);
528:       PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
529:                    (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
530:       PetscViewerFlush(viewer);
531:       VecScatterView(mdn->Mvctx,viewer);
532:       return(0);
533:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
534:       return(0);
535:     }
536:   } else if (isdraw) {
537:     PetscDraw       draw;
538:     PetscTruth isnull;

540:     PetscViewerDrawGetDraw(viewer,0,&draw);
541:     PetscDrawIsNull(draw,&isnull);
542:     if (isnull) return(0);
543:   }

545:   if (size == 1) {
546:     MatView(mdn->A,viewer);
547:   } else {
548:     /* assemble the entire matrix onto first processor. */
549:     Mat          A;
550:     int          M = mat->M,N = mat->N,m,row,i,nz,*cols;
551:     PetscScalar  *vals;

553:     if (!rank) {
554:       MatCreateMPIDense(mat->comm,M,N,M,N,PETSC_NULL,&A);
555:     } else {
556:       MatCreateMPIDense(mat->comm,0,0,M,N,PETSC_NULL,&A);
557:     }
558:     PetscLogObjectParent(mat,A);

560:     /* Copy the matrix ... This isn't the most efficient means,
561:        but it's quick for now */
562:     row = mdn->rstart; m = mdn->A->m;
563:     for (i=0; i<m; i++) {
564:       MatGetRow(mat,row,&nz,&cols,&vals);
565:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
566:       MatRestoreRow(mat,row,&nz,&cols,&vals);
567:       row++;
568:     }

570:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
571:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
572:     PetscViewerGetSingleton(viewer,&sviewer);
573:     if (!rank) {
574:       MatView(((Mat_MPIDense*)(A->data))->A,sviewer);
575:     }
576:     PetscViewerRestoreSingleton(viewer,&sviewer);
577:     PetscViewerFlush(viewer);
578:     MatDestroy(A);
579:   }
580:   return(0);
581: }

583: #undef __FUNCT__  
585: int MatView_MPIDense(Mat mat,PetscViewer viewer)
586: {
587:   int        ierr;
588:   PetscTruth isascii,isbinary,isdraw,issocket;
589: 
591: 
592:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
593:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
594:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
595:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);

597:   if (isascii || issocket || isdraw) {
598:     MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
599:   } else if (isbinary) {
600:     MatView_MPIDense_Binary(mat,viewer);
601:   } else {
602:     SETERRQ1(1,"Viewer type %s not supported by MPI dense matrix",((PetscObject)viewer)->type_name);
603:   }
604:   return(0);
605: }

607: #undef __FUNCT__  
609: int MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
610: {
611:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
612:   Mat          mdn = mat->A;
613:   int          ierr;
614:   PetscReal    isend[5],irecv[5];

617:   info->rows_global    = (double)A->M;
618:   info->columns_global = (double)A->N;
619:   info->rows_local     = (double)A->m;
620:   info->columns_local  = (double)A->N;
621:   info->block_size     = 1.0;
622:   MatGetInfo(mdn,MAT_LOCAL,info);
623:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
624:   isend[3] = info->memory;  isend[4] = info->mallocs;
625:   if (flag == MAT_LOCAL) {
626:     info->nz_used      = isend[0];
627:     info->nz_allocated = isend[1];
628:     info->nz_unneeded  = isend[2];
629:     info->memory       = isend[3];
630:     info->mallocs      = isend[4];
631:   } else if (flag == MAT_GLOBAL_MAX) {
632:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,A->comm);
633:     info->nz_used      = irecv[0];
634:     info->nz_allocated = irecv[1];
635:     info->nz_unneeded  = irecv[2];
636:     info->memory       = irecv[3];
637:     info->mallocs      = irecv[4];
638:   } else if (flag == MAT_GLOBAL_SUM) {
639:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,A->comm);
640:     info->nz_used      = irecv[0];
641:     info->nz_allocated = irecv[1];
642:     info->nz_unneeded  = irecv[2];
643:     info->memory       = irecv[3];
644:     info->mallocs      = irecv[4];
645:   }
646:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
647:   info->fill_ratio_needed = 0;
648:   info->factor_mallocs    = 0;
649:   return(0);
650: }

652: #undef __FUNCT__  
654: int MatSetOption_MPIDense(Mat A,MatOption op)
655: {
656:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
657:   int          ierr;

660:   switch (op) {
661:   case MAT_NO_NEW_NONZERO_LOCATIONS:
662:   case MAT_YES_NEW_NONZERO_LOCATIONS:
663:   case MAT_NEW_NONZERO_LOCATION_ERR:
664:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
665:   case MAT_COLUMNS_SORTED:
666:   case MAT_COLUMNS_UNSORTED:
667:     MatSetOption(a->A,op);
668:     break;
669:   case MAT_ROW_ORIENTED:
670:     a->roworiented = PETSC_TRUE;
671:     MatSetOption(a->A,op);
672:     break;
673:   case MAT_ROWS_SORTED:
674:   case MAT_ROWS_UNSORTED:
675:   case MAT_YES_NEW_DIAGONALS:
676:   case MAT_USE_HASH_TABLE:
677:     PetscLogInfo(A,"MatSetOption_MPIDense:Option ignoredn");
678:     break;
679:   case MAT_COLUMN_ORIENTED:
680:     a->roworiented = PETSC_FALSE;
681:     MatSetOption(a->A,op);
682:     break;
683:   case MAT_IGNORE_OFF_PROC_ENTRIES:
684:     a->donotstash = PETSC_TRUE;
685:     break;
686:   case MAT_NO_NEW_DIAGONALS:
687:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
688:   default:
689:     SETERRQ(PETSC_ERR_SUP,"unknown option");
690:   }
691:   return(0);
692: }

694: #undef __FUNCT__  
696: int MatGetRow_MPIDense(Mat A,int row,int *nz,int **idx,PetscScalar **v)
697: {
698:   Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
699:   int          lrow,rstart = mat->rstart,rend = mat->rend,ierr;

702:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_SUP,"only local rows")
703:   lrow = row - rstart;
704:   MatGetRow(mat->A,lrow,nz,idx,v);
705:   return(0);
706: }

708: #undef __FUNCT__  
710: int MatRestoreRow_MPIDense(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
711: {

715:   if (idx) {PetscFree(*idx);}
716:   if (v) {PetscFree(*v);}
717:   return(0);
718: }

720: #undef __FUNCT__  
722: int MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
723: {
724:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
725:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
726:   PetscScalar  *l,*r,x,*v;
727:   int          ierr,i,j,s2a,s3a,s2,s3,m=mdn->A->m,n=mdn->A->n;

730:   MatGetLocalSize(A,&s2,&s3);
731:   if (ll) {
732:     VecGetLocalSize(ll,&s2a);
733:     if (s2a != s2) SETERRQ(PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size");
734:     VecGetArray(ll,&l);
735:     for (i=0; i<m; i++) {
736:       x = l[i];
737:       v = mat->v + i;
738:       for (j=0; j<n; j++) { (*v) *= x; v+= m;}
739:     }
740:     VecRestoreArray(ll,&l);
741:     PetscLogFlops(n*m);
742:   }
743:   if (rr) {
744:     VecGetSize(rr,&s3a);
745:     if (s3a != s3) SETERRQ(PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size");
746:     VecScatterBegin(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
747:     VecScatterEnd(rr,mdn->lvec,INSERT_VALUES,SCATTER_FORWARD,mdn->Mvctx);
748:     VecGetArray(mdn->lvec,&r);
749:     for (i=0; i<n; i++) {
750:       x = r[i];
751:       v = mat->v + i*m;
752:       for (j=0; j<m; j++) { (*v++) *= x;}
753:     }
754:     VecRestoreArray(mdn->lvec,&r);
755:     PetscLogFlops(n*m);
756:   }
757:   return(0);
758: }

760: #undef __FUNCT__  
762: int MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
763: {
764:   Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
765:   Mat_SeqDense *mat = (Mat_SeqDense*)mdn->A->data;
766:   int          ierr,i,j;
767:   PetscReal    sum = 0.0;
768:   PetscScalar  *v = mat->v;

771:   if (mdn->size == 1) {
772:      MatNorm(mdn->A,type,nrm);
773:   } else {
774:     if (type == NORM_FROBENIUS) {
775:       for (i=0; i<mdn->A->n*mdn->A->m; i++) {
776: #if defined(PETSC_USE_COMPLEX)
777:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
778: #else
779:         sum += (*v)*(*v); v++;
780: #endif
781:       }
782:       MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
783:       *nrm = sqrt(*nrm);
784:       PetscLogFlops(2*mdn->A->n*mdn->A->m);
785:     } else if (type == NORM_1) {
786:       PetscReal *tmp,*tmp2;
787:       PetscMalloc(2*A->N*sizeof(PetscReal),&tmp);
788:       tmp2 = tmp + A->N;
789:       PetscMemzero(tmp,2*A->N*sizeof(PetscReal));
790:       *nrm = 0.0;
791:       v = mat->v;
792:       for (j=0; j<mdn->A->n; j++) {
793:         for (i=0; i<mdn->A->m; i++) {
794:           tmp[j] += PetscAbsScalar(*v);  v++;
795:         }
796:       }
797:       MPI_Allreduce(tmp,tmp2,A->N,MPIU_REAL,MPI_SUM,A->comm);
798:       for (j=0; j<A->N; j++) {
799:         if (tmp2[j] > *nrm) *nrm = tmp2[j];
800:       }
801:       PetscFree(tmp);
802:       PetscLogFlops(A->n*A->m);
803:     } else if (type == NORM_INFINITY) { /* max row norm */
804:       PetscReal ntemp;
805:       MatNorm(mdn->A,type,&ntemp);
806:       MPI_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
807:     } else {
808:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
809:     }
810:   }
811:   return(0);
812: }

814: #undef __FUNCT__  
816: int MatTranspose_MPIDense(Mat A,Mat *matout)
817: {
818:   Mat_MPIDense *a = (Mat_MPIDense*)A->data;
819:   Mat_SeqDense *Aloc = (Mat_SeqDense*)a->A->data;
820:   Mat          B;
821:   int          M = A->M,N = A->N,m,n,*rwork,rstart = a->rstart;
822:   int          j,i,ierr;
823:   PetscScalar  *v;

826:   if (!matout && M != N) {
827:     SETERRQ(PETSC_ERR_SUP,"Supports square matrix only in-place");
828:   }
829:   MatCreateMPIDense(A->comm,PETSC_DECIDE,PETSC_DECIDE,N,M,PETSC_NULL,&B);

831:   m = a->A->m; n = a->A->n; v = Aloc->v;
832:   PetscMalloc(n*sizeof(int),&rwork);
833:   for (j=0; j<n; j++) {
834:     for (i=0; i<m; i++) rwork[i] = rstart + i;
835:     MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
836:     v   += m;
837:   }
838:   PetscFree(rwork);
839:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
840:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
841:   if (matout) {
842:     *matout = B;
843:   } else {
844:     MatHeaderCopy(A,B);
845:   }
846:   return(0);
847: }

849:  #include petscblaslapack.h
850: #undef __FUNCT__  
852: int MatScale_MPIDense(PetscScalar *alpha,Mat inA)
853: {
854:   Mat_MPIDense *A = (Mat_MPIDense*)inA->data;
855:   Mat_SeqDense *a = (Mat_SeqDense*)A->A->data;
856:   int          one = 1,nz;

859:   nz = inA->m*inA->N;
860:   BLscal_(&nz,alpha,a->v,&one);
861:   PetscLogFlops(nz);
862:   return(0);
863: }

865: static int MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat *);
866: EXTERN int MatGetSubMatrices_MPIDense(Mat,int,IS *,IS *,MatReuse,Mat **);

868: #undef __FUNCT__  
870: int MatSetUpPreallocation_MPIDense(Mat A)
871: {
872:   int        ierr;

875:    MatMPIDenseSetPreallocation(A,0);
876:   return(0);
877: }

879: /* -------------------------------------------------------------------*/
880: static struct _MatOps MatOps_Values = {MatSetValues_MPIDense,
881:        MatGetRow_MPIDense,
882:        MatRestoreRow_MPIDense,
883:        MatMult_MPIDense,
884:        MatMultAdd_MPIDense,
885:        MatMultTranspose_MPIDense,
886:        MatMultTransposeAdd_MPIDense,
887:        0,
888:        0,
889:        0,
890:        0,
891:        0,
892:        0,
893:        0,
894:        MatTranspose_MPIDense,
895:        MatGetInfo_MPIDense,0,
896:        MatGetDiagonal_MPIDense,
897:        MatDiagonalScale_MPIDense,
898:        MatNorm_MPIDense,
899:        MatAssemblyBegin_MPIDense,
900:        MatAssemblyEnd_MPIDense,
901:        0,
902:        MatSetOption_MPIDense,
903:        MatZeroEntries_MPIDense,
904:        MatZeroRows_MPIDense,
905:        0,
906:        0,
907:        0,
908:        0,
909:        MatSetUpPreallocation_MPIDense,
910:        0,
911:        0,
912:        MatGetArray_MPIDense,
913:        MatRestoreArray_MPIDense,
914:        MatDuplicate_MPIDense,
915:        0,
916:        0,
917:        0,
918:        0,
919:        0,
920:        MatGetSubMatrices_MPIDense,
921:        0,
922:        MatGetValues_MPIDense,
923:        0,
924:        0,
925:        MatScale_MPIDense,
926:        0,
927:        0,
928:        0,
929:        MatGetBlockSize_MPIDense,
930:        0,
931:        0,
932:        0,
933:        0,
934:        0,
935:        0,
936:        0,
937:        0,
938:        0,
939:        MatGetSubMatrix_MPIDense,
940:        MatDestroy_MPIDense,
941:        MatView_MPIDense,
942:        MatGetPetscMaps_Petsc};

944: EXTERN_C_BEGIN
945: #undef __FUNCT__  
947: int MatCreate_MPIDense(Mat mat)
948: {
949:   Mat_MPIDense *a;
950:   int          ierr,i;

953:   ierr              = PetscNew(Mat_MPIDense,&a);
954:   mat->data         = (void*)a;
955:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
956:   mat->factor       = 0;
957:   mat->mapping      = 0;

959:   a->factor       = 0;
960:   mat->insertmode = NOT_SET_VALUES;
961:   MPI_Comm_rank(mat->comm,&a->rank);
962:   MPI_Comm_size(mat->comm,&a->size);

964:   PetscSplitOwnership(mat->comm,&mat->m,&mat->M);
965:   PetscSplitOwnership(mat->comm,&mat->n,&mat->N);
966:   a->nvec = mat->n;

968:   /* the information in the maps duplicates the information computed below, eventually 
969:      we should remove the duplicate information that is not contained in the maps */
970:   PetscMapCreateMPI(mat->comm,mat->m,mat->M,&mat->rmap);
971:   PetscMapCreateMPI(mat->comm,mat->n,mat->N,&mat->cmap);

973:   /* build local table of row and column ownerships */
974:   ierr       = PetscMalloc(2*(a->size+2)*sizeof(int),&a->rowners);
975:   a->cowners = a->rowners + a->size + 1;
976:   PetscLogObjectMemory(mat,2*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
977:   MPI_Allgather(&mat->m,1,MPI_INT,a->rowners+1,1,MPI_INT,mat->comm);
978:   a->rowners[0] = 0;
979:   for (i=2; i<=a->size; i++) {
980:     a->rowners[i] += a->rowners[i-1];
981:   }
982:   a->rstart = a->rowners[a->rank];
983:   a->rend   = a->rowners[a->rank+1];
984:   ierr      = MPI_Allgather(&mat->n,1,MPI_INT,a->cowners+1,1,MPI_INT,mat->comm);
985:   a->cowners[0] = 0;
986:   for (i=2; i<=a->size; i++) {
987:     a->cowners[i] += a->cowners[i-1];
988:   }

990:   /* build cache for off array entries formed */
991:   a->donotstash = PETSC_FALSE;
992:   MatStashCreate_Private(mat->comm,1,&mat->stash);

994:   /* stuff used for matrix vector multiply */
995:   a->lvec        = 0;
996:   a->Mvctx       = 0;
997:   a->roworiented = PETSC_TRUE;

999:   PetscObjectComposeFunctionDynamic((PetscObject)mat,"MatGetDiagonalBlock_C",
1000:                                      "MatGetDiagonalBlock_MPIDense",
1001:                                      MatGetDiagonalBlock_MPIDense);
1002:   return(0);
1003: }
1004: EXTERN_C_END

1006: #undef __FUNCT__  
1008: /*@C
1009:    MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries

1011:    Not collective

1013:    Input Parameters:
1014: .  A - the matrix
1015: -  data - optional location of matrix data.  Set data=PETSC_NULL for PETSc
1016:    to control all matrix memory allocation.

1018:    Notes:
1019:    The dense format is fully compatible with standard Fortran 77
1020:    storage by columns.

1022:    The data input variable is intended primarily for Fortran programmers
1023:    who wish to allocate their own matrix memory space.  Most users should
1024:    set data=PETSC_NULL.

1026:    Level: intermediate

1028: .keywords: matrix,dense, parallel

1030: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1031: @*/
1032: int MatMPIDenseSetPreallocation(Mat mat,PetscScalar *data)
1033: {
1034:   Mat_MPIDense *a;
1035:   int          ierr;
1036:   PetscTruth   flg2;

1039:   PetscTypeCompare((PetscObject)mat,MATMPIDENSE,&flg2);
1040:   if (!flg2) return(0);
1041:   mat->preallocated = PETSC_TRUE;
1042:   /* Note:  For now, when data is specified above, this assumes the user correctly
1043:    allocates the local dense storage space.  We should add error checking. */

1045:   a    = (Mat_MPIDense*)mat->data;
1046:   MatCreateSeqDense(PETSC_COMM_SELF,mat->m,mat->N,data,&a->A);
1047:   PetscLogObjectParent(mat,a->A);
1048:   return(0);
1049: }

1051: #undef __FUNCT__  
1053: /*@C
1054:    MatCreateMPIDense - Creates a sparse parallel matrix in dense format.

1056:    Collective on MPI_Comm

1058:    Input Parameters:
1059: +  comm - MPI communicator
1060: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1061: .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1062: .  M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
1063: .  N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
1064: -  data - optional location of matrix data.  Set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
1065:    to control all matrix memory allocation.

1067:    Output Parameter:
1068: .  A - the matrix

1070:    Notes:
1071:    The dense format is fully compatible with standard Fortran 77
1072:    storage by columns.

1074:    The data input variable is intended primarily for Fortran programmers
1075:    who wish to allocate their own matrix memory space.  Most users should
1076:    set data=PETSC_NULL (PETSC_NULL_SCALAR for Fortran users).

1078:    The user MUST specify either the local or global matrix dimensions
1079:    (possibly both).

1081:    Level: intermediate

1083: .keywords: matrix,dense, parallel

1085: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1086: @*/
1087: int MatCreateMPIDense(MPI_Comm comm,int m,int n,int M,int N,PetscScalar *data,Mat *A)
1088: {
1089:   int ierr,size;

1092:   MatCreate(comm,m,n,M,N,A);
1093:   MPI_Comm_size(comm,&size);
1094:   if (size > 1) {
1095:     MatSetType(*A,MATMPIDENSE);
1096:     MatMPIDenseSetPreallocation(*A,data);
1097:   } else {
1098:     MatSetType(*A,MATSEQDENSE);
1099:     MatSeqDenseSetPreallocation(*A,data);
1100:   }
1101:   return(0);
1102: }

1104: #undef __FUNCT__  
1106: static int MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
1107: {
1108:   Mat          mat;
1109:   Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
1110:   int          ierr;

1113:   *newmat       = 0;
1114:   MatCreate(A->comm,A->m,A->n,A->M,A->N,&mat);
1115:   MatSetType(mat,MATMPIDENSE);
1116:   ierr              = PetscNew(Mat_MPIDense,&a);
1117:   mat->data         = (void*)a;
1118:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1119:   mat->factor       = A->factor;
1120:   mat->assembled    = PETSC_TRUE;
1121:   mat->preallocated = PETSC_TRUE;

1123:   a->rstart       = oldmat->rstart;
1124:   a->rend         = oldmat->rend;
1125:   a->size         = oldmat->size;
1126:   a->rank         = oldmat->rank;
1127:   mat->insertmode = NOT_SET_VALUES;
1128:   a->nvec         = oldmat->nvec;
1129:   a->donotstash   = oldmat->donotstash;
1130:   ierr            = PetscMalloc((a->size+1)*sizeof(int),&a->rowners);
1131:   PetscLogObjectMemory(mat,(a->size+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIDense));
1132:   PetscMemcpy(a->rowners,oldmat->rowners,(a->size+1)*sizeof(int));
1133:   MatStashCreate_Private(A->comm,1,&mat->stash);

1135:   MatSetUpMultiply_MPIDense(mat);
1136:   MatDuplicate(oldmat->A,cpvalues,&a->A);
1137:   PetscLogObjectParent(mat,a->A);
1138:   *newmat = mat;
1139:   return(0);
1140: }

1142:  #include petscsys.h

1144: #undef __FUNCT__  
1146: int MatLoad_MPIDense_DenseInFile(MPI_Comm comm,int fd,int M,int N,Mat *newmat)
1147: {
1148:   int          *rowners,i,size,rank,m,ierr,nz,j;
1149:   PetscScalar  *array,*vals,*vals_ptr;
1150:   MPI_Status   status;

1153:   MPI_Comm_rank(comm,&rank);
1154:   MPI_Comm_size(comm,&size);

1156:   /* determine ownership of all rows */
1157:   m          = M/size + ((M % size) > rank);
1158:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1159:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1160:   rowners[0] = 0;
1161:   for (i=2; i<=size; i++) {
1162:     rowners[i] += rowners[i-1];
1163:   }

1165:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1166:   MatGetArray(*newmat,&array);

1168:   if (!rank) {
1169:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1171:     /* read in my part of the matrix numerical values  */
1172:     PetscBinaryRead(fd,vals,m*N,PETSC_SCALAR);
1173: 
1174:     /* insert into matrix-by row (this is why cannot directly read into array */
1175:     vals_ptr = vals;
1176:     for (i=0; i<m; i++) {
1177:       for (j=0; j<N; j++) {
1178:         array[i + j*m] = *vals_ptr++;
1179:       }
1180:     }

1182:     /* read in other processors and ship out */
1183:     for (i=1; i<size; i++) {
1184:       nz   = (rowners[i+1] - rowners[i])*N;
1185:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1186:       MPI_Send(vals,nz,MPIU_SCALAR,i,(*newmat)->tag,comm);
1187:     }
1188:   } else {
1189:     /* receive numeric values */
1190:     PetscMalloc(m*N*sizeof(PetscScalar),&vals);

1192:     /* receive message of values*/
1193:     MPI_Recv(vals,m*N,MPIU_SCALAR,0,(*newmat)->tag,comm,&status);

1195:     /* insert into matrix-by row (this is why cannot directly read into array */
1196:     vals_ptr = vals;
1197:     for (i=0; i<m; i++) {
1198:       for (j=0; j<N; j++) {
1199:         array[i + j*m] = *vals_ptr++;
1200:       }
1201:     }
1202:   }
1203:   PetscFree(rowners);
1204:   PetscFree(vals);
1205:   MatAssemblyBegin(*newmat,MAT_FINAL_ASSEMBLY);
1206:   MatAssemblyEnd(*newmat,MAT_FINAL_ASSEMBLY);
1207:   return(0);
1208: }

1210: EXTERN_C_BEGIN
1211: #undef __FUNCT__  
1213: int MatLoad_MPIDense(PetscViewer viewer,MatType type,Mat *newmat)
1214: {
1215:   Mat          A;
1216:   PetscScalar  *vals,*svals;
1217:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1218:   MPI_Status   status;
1219:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1220:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1221:   int          tag = ((PetscObject)viewer)->tag;
1222:   int          i,nz,ierr,j,rstart,rend,fd;

1225:   MPI_Comm_size(comm,&size);
1226:   MPI_Comm_rank(comm,&rank);
1227:   if (!rank) {
1228:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1229:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1230:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1231:   }

1233:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1234:   M = header[1]; N = header[2]; nz = header[3];

1236:   /*
1237:        Handle case where matrix is stored on disk as a dense matrix 
1238:   */
1239:   if (nz == MATRIX_BINARY_FORMAT_DENSE) {
1240:     MatLoad_MPIDense_DenseInFile(comm,fd,M,N,newmat);
1241:     return(0);
1242:   }

1244:   /* determine ownership of all rows */
1245:   m          = M/size + ((M % size) > rank);
1246:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1247:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1248:   rowners[0] = 0;
1249:   for (i=2; i<=size; i++) {
1250:     rowners[i] += rowners[i-1];
1251:   }
1252:   rstart = rowners[rank];
1253:   rend   = rowners[rank+1];

1255:   /* distribute row lengths to all processors */
1256:   ierr    = PetscMalloc(2*(rend-rstart)*sizeof(int),&ourlens);
1257:   offlens = ourlens + (rend-rstart);
1258:   if (!rank) {
1259:     PetscMalloc(M*sizeof(int),&rowlengths);
1260:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1261:     PetscMalloc(size*sizeof(int),&sndcounts);
1262:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1263:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1264:     PetscFree(sndcounts);
1265:   } else {
1266:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1267:   }

1269:   if (!rank) {
1270:     /* calculate the number of nonzeros on each processor */
1271:     PetscMalloc(size*sizeof(int),&procsnz);
1272:     PetscMemzero(procsnz,size*sizeof(int));
1273:     for (i=0; i<size; i++) {
1274:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1275:         procsnz[i] += rowlengths[j];
1276:       }
1277:     }
1278:     PetscFree(rowlengths);

1280:     /* determine max buffer needed and allocate it */
1281:     maxnz = 0;
1282:     for (i=0; i<size; i++) {
1283:       maxnz = PetscMax(maxnz,procsnz[i]);
1284:     }
1285:     PetscMalloc(maxnz*sizeof(int),&cols);

1287:     /* read in my part of the matrix column indices  */
1288:     nz = procsnz[0];
1289:     PetscMalloc(nz*sizeof(int),&mycols);
1290:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1292:     /* read in every one elses and ship off */
1293:     for (i=1; i<size; i++) {
1294:       nz   = procsnz[i];
1295:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1296:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1297:     }
1298:     PetscFree(cols);
1299:   } else {
1300:     /* determine buffer space needed for message */
1301:     nz = 0;
1302:     for (i=0; i<m; i++) {
1303:       nz += ourlens[i];
1304:     }
1305:     PetscMalloc(nz*sizeof(int),&mycols);

1307:     /* receive message of column indices*/
1308:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1309:     MPI_Get_count(&status,MPI_INT,&maxnz);
1310:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1311:   }

1313:   /* loop over local rows, determining number of off diagonal entries */
1314:   PetscMemzero(offlens,m*sizeof(int));
1315:   jj = 0;
1316:   for (i=0; i<m; i++) {
1317:     for (j=0; j<ourlens[i]; j++) {
1318:       if (mycols[jj] < rstart || mycols[jj] >= rend) offlens[i]++;
1319:       jj++;
1320:     }
1321:   }

1323:   /* create our matrix */
1324:   for (i=0; i<m; i++) {
1325:     ourlens[i] -= offlens[i];
1326:   }
1327:   MatCreateMPIDense(comm,m,PETSC_DECIDE,M,N,PETSC_NULL,newmat);
1328:   A = *newmat;
1329:   for (i=0; i<m; i++) {
1330:     ourlens[i] += offlens[i];
1331:   }

1333:   if (!rank) {
1334:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1336:     /* read in my part of the matrix numerical values  */
1337:     nz = procsnz[0];
1338:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1339: 
1340:     /* insert into matrix */
1341:     jj      = rstart;
1342:     smycols = mycols;
1343:     svals   = vals;
1344:     for (i=0; i<m; i++) {
1345:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1346:       smycols += ourlens[i];
1347:       svals   += ourlens[i];
1348:       jj++;
1349:     }

1351:     /* read in other processors and ship out */
1352:     for (i=1; i<size; i++) {
1353:       nz   = procsnz[i];
1354:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1355:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1356:     }
1357:     PetscFree(procsnz);
1358:   } else {
1359:     /* receive numeric values */
1360:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1362:     /* receive message of values*/
1363:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1364:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1365:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1367:     /* insert into matrix */
1368:     jj      = rstart;
1369:     smycols = mycols;
1370:     svals   = vals;
1371:     for (i=0; i<m; i++) {
1372:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1373:       smycols += ourlens[i];
1374:       svals   += ourlens[i];
1375:       jj++;
1376:     }
1377:   }
1378:   PetscFree(ourlens);
1379:   PetscFree(vals);
1380:   PetscFree(mycols);
1381:   PetscFree(rowners);

1383:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1384:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1385:   return(0);
1386: }
1387: EXTERN_C_END