Actual source code: mpibdiag.c

  1: /*$Id: mpibdiag.c,v 1.205 2001/08/10 03:31:02 bsmith Exp $*/
  2: /*
  3:    The basic matrix operations for the Block diagonal parallel 
  4:   matrices.
  5: */
 6:  #include src/mat/impls/bdiag/mpi/mpibdiag.h
 7:  #include src/vec/vecimpl.h

  9: #undef __FUNCT__  
 11: int MatSetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v,InsertMode addv)
 12: {
 13:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 14:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;
 15:   PetscTruth   roworiented = mbd->roworiented;

 18:   for (i=0; i<m; i++) {
 19:     if (idxm[i] < 0) continue;
 20:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 21:     if (idxm[i] >= rstart && idxm[i] < rend) {
 22:       row = idxm[i] - rstart;
 23:       for (j=0; j<n; j++) {
 24:         if (idxn[j] < 0) continue;
 25:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 26:         if (roworiented) {
 27:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j,addv);
 28:         } else {
 29:           MatSetValues(mbd->A,1,&row,1,&idxn[j],v+i+j*m,addv);
 30:         }
 31:       }
 32:     } else {
 33:       if (!mbd->donotstash) {
 34:         if (roworiented) {
 35:           MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n);
 36:         } else {
 37:           MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m);
 38:         }
 39:       }
 40:     }
 41:   }
 42:   return(0);
 43: }

 45: #undef __FUNCT__  
 47: int MatGetValues_MPIBDiag(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
 48: {
 49:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 50:   int          ierr,i,j,row,rstart = mbd->rstart,rend = mbd->rend;

 53:   for (i=0; i<m; i++) {
 54:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
 55:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
 56:     if (idxm[i] >= rstart && idxm[i] < rend) {
 57:       row = idxm[i] - rstart;
 58:       for (j=0; j<n; j++) {
 59:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
 60:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
 61:         MatGetValues(mbd->A,1,&row,1,&idxn[j],v+i*n+j);
 62:       }
 63:     } else {
 64:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
 65:     }
 66:   }
 67:   return(0);
 68: }

 70: #undef __FUNCT__  
 72: int MatAssemblyBegin_MPIBDiag(Mat mat,MatAssemblyType mode)
 73: {
 74:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 75:   MPI_Comm     comm = mat->comm;
 76:   int          ierr,nstash,reallocs;
 77:   InsertMode   addv;

 80:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
 81:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
 82:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix adds/inserts on different procs");
 83:   }
 84:   mat->insertmode = addv; /* in case this processor had no cache */
 85:   MatStashScatterBegin_Private(&mat->stash,mbd->rowners);
 86:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
 87:   PetscLogInfo(0,"MatAssemblyBegin_MPIBDiag:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
 88:   return(0);
 89: }
 90: EXTERN int MatSetUpMultiply_MPIBDiag(Mat);

 92: #undef __FUNCT__  
 94: int MatAssemblyEnd_MPIBDiag(Mat mat,MatAssemblyType mode)
 95: {
 96:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
 97:   Mat_SeqBDiag *mlocal;
 98:   int          i,n,*row,*col;
 99:   int          *tmp1,*tmp2,ierr,len,ict,Mblock,Nblock,flg,j,rstart,ncols;
100:   PetscScalar  *val;
101:   InsertMode   addv = mat->insertmode;


105:   while (1) {
106:     MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
107:     if (!flg) break;
108: 
109:     for (i=0; i<n;) {
110:       /* Now identify the consecutive vals belonging to the same row */
111:       for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
112:       if (j < n) ncols = j-i;
113:       else       ncols = n-i;
114:       /* Now assemble all these values with a single function call */
115:       MatSetValues_MPIBDiag(mat,1,row+i,ncols,col+i,val+i,addv);
116:       i = j;
117:     }
118:   }
119:   MatStashScatterEnd_Private(&mat->stash);

121:   MatAssemblyBegin(mbd->A,mode);
122:   MatAssemblyEnd(mbd->A,mode);

124:   /* Fix main diagonal location and determine global diagonals */
125:   mlocal         = (Mat_SeqBDiag*)mbd->A->data;
126:   Mblock         = mat->M/mlocal->bs; Nblock = mat->N/mlocal->bs;
127:   len            = Mblock + Nblock + 1; /* add 1 to prevent 0 malloc */
128:   ierr           = PetscMalloc(2*len*sizeof(int),&tmp1);
129:   tmp2           = tmp1 + len;
130:   ierr           = PetscMemzero(tmp1,2*len*sizeof(int));
131:   mlocal->mainbd = -1;
132:   for (i=0; i<mlocal->nd; i++) {
133:     if (mlocal->diag[i] + mbd->brstart == 0) mlocal->mainbd = i;
134:     tmp1[mlocal->diag[i] + mbd->brstart + Mblock] = 1;
135:   }
136:   MPI_Allreduce(tmp1,tmp2,len,MPI_INT,MPI_SUM,mat->comm);
137:   ict  = 0;
138:   for (i=0; i<len; i++) {
139:     if (tmp2[i]) {
140:       mbd->gdiag[ict] = i - Mblock;
141:       ict++;
142:     }
143:   }
144:   mbd->gnd = ict;
145:   PetscFree(tmp1);

147:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
148:     MatSetUpMultiply_MPIBDiag(mat);
149:   }
150:   return(0);
151: }

153: #undef __FUNCT__  
155: int MatGetBlockSize_MPIBDiag(Mat mat,int *bs)
156: {
157:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
158:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mbd->A->data;

161:   *bs = dmat->bs;
162:   return(0);
163: }

165: #undef __FUNCT__  
167: int MatZeroEntries_MPIBDiag(Mat A)
168: {
169:   Mat_MPIBDiag *l = (Mat_MPIBDiag*)A->data;
170:   int          ierr;

173:   MatZeroEntries(l->A);
174:   return(0);
175: }

177: /* again this uses the same basic stratagy as in the assembly and 
178:    scatter create routines, we should try to do it systematically 
179:    if we can figure out the proper level of generality. */

181: /* the code does not do the diagonal entries correctly unless the 
182:    matrix is square and the column and row owerships are identical.
183:    This is a BUG. The only way to fix it seems to be to access 
184:    aij->A and aij->B directly and not through the MatZeroRows() 
185:    routine. 
186: */

188: #undef __FUNCT__  
190: int MatZeroRows_MPIBDiag(Mat A,IS is,PetscScalar *diag)
191: {
192:   Mat_MPIBDiag   *l = (Mat_MPIBDiag*)A->data;
193:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
194:   int            *nprocs,j,idx,nsends;
195:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
196:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
197:   int            *lens,imdex,*lrows,*values;
198:   MPI_Comm       comm = A->comm;
199:   MPI_Request    *send_waits,*recv_waits;
200:   MPI_Status     recv_status,*send_status;
201:   IS             istmp;
202:   PetscTruth     found;

205:   ISGetLocalSize(is,&N);
206:   ISGetIndices(is,&rows);

208:   /*  first count number of contributors to each processor */
209:   ierr   = PetscMalloc(2*size*sizeof(int),&nprocs);
210:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
211:   ierr   = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
212:   for (i=0; i<N; i++) {
213:     idx = rows[i];
214:     found = PETSC_FALSE;
215:     for (j=0; j<size; j++) {
216:       if (idx >= owners[j] && idx < owners[j+1]) {
217:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
218:       }
219:     }
220:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"row out of range");
221:   }
222:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

224:   /* inform other processors of number of messages and max length*/
225:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

227:   /* post receives:   */
228:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
229:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
230:   for (i=0; i<nrecvs; i++) {
231:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
232:   }

234:   /* do sends:
235:       1) starts[i] gives the starting index in svalues for stuff going to 
236:          the ith processor
237:   */
238:   PetscMalloc((N+1)*sizeof(int),&svalues);
239:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
240:   PetscMalloc((size+1)*sizeof(int),&starts);
241:   starts[0] = 0;
242:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
243:   for (i=0; i<N; i++) {
244:     svalues[starts[owner[i]]++] = rows[i];
245:   }
246:   ISRestoreIndices(is,&rows);

248:   starts[0] = 0;
249:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
250:   count = 0;
251:   for (i=0; i<size; i++) {
252:     if (nprocs[2*i+1]) {
253:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
254:     }
255:   }
256:   PetscFree(starts);

258:   base = owners[rank];

260:   /*  wait on receives */
261:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
262:   source = lens + nrecvs;
263:   count  = nrecvs;
264:   slen   = 0;
265:   while (count) {
266:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
267:     /* unpack receives into our local space */
268:     MPI_Get_count(&recv_status,MPI_INT,&n);
269:     source[imdex]  = recv_status.MPI_SOURCE;
270:     lens[imdex]  = n;
271:     slen += n;
272:     count--;
273:   }
274:   PetscFree(recv_waits);
275: 
276:   /* move the data into the send scatter */
277:   ierr  = PetscMalloc((slen+1)*sizeof(int),&lrows);
278:   count = 0;
279:   for (i=0; i<nrecvs; i++) {
280:     values = rvalues + i*nmax;
281:     for (j=0; j<lens[i]; j++) {
282:       lrows[count++] = values[j] - base;
283:     }
284:   }
285:   PetscFree(rvalues);
286:   PetscFree(lens);
287:   PetscFree(owner);
288:   PetscFree(nprocs);
289: 
290:   /* actually zap the local rows */
291:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
292:   PetscLogObjectParent(A,istmp);
293:   PetscFree(lrows);
294:   MatZeroRows(l->A,istmp,diag);
295:   ISDestroy(istmp);

297:   /* wait on sends */
298:   if (nsends) {
299:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
300:     MPI_Waitall(nsends,send_waits,send_status);
301:     PetscFree(send_status);
302:   }
303:   PetscFree(send_waits);
304:   PetscFree(svalues);

306:   return(0);
307: }

309: #undef __FUNCT__  
311: int MatMult_MPIBDiag(Mat mat,Vec xx,Vec yy)
312: {
313:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
314:   int          ierr;

317:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
318:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
319:   (*mbd->A->ops->mult)(mbd->A,mbd->lvec,yy);
320:   return(0);
321: }

323: #undef __FUNCT__  
325: int MatMultAdd_MPIBDiag(Mat mat,Vec xx,Vec yy,Vec zz)
326: {
327:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
328:   int          ierr;

331:   VecScatterBegin(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
332:   VecScatterEnd(xx,mbd->lvec,INSERT_VALUES,SCATTER_FORWARD,mbd->Mvctx);
333:   (*mbd->A->ops->multadd)(mbd->A,mbd->lvec,yy,zz);
334:   return(0);
335: }

337: #undef __FUNCT__  
339: int MatMultTranspose_MPIBDiag(Mat A,Vec xx,Vec yy)
340: {
341:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
342:   int          ierr;
343:   PetscScalar  zero = 0.0;

346:   VecSet(&zero,yy);
347:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
348:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
349:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
350:   return(0);
351: }

353: #undef __FUNCT__  
355: int MatMultTransposeAdd_MPIBDiag(Mat A,Vec xx,Vec yy,Vec zz)
356: {
357:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
358:   int          ierr;

361:   VecCopy(yy,zz);
362:   (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
363:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
364:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
365:   return(0);
366: }

368: #undef __FUNCT__  
370: int MatGetInfo_MPIBDiag(Mat matin,MatInfoType flag,MatInfo *info)
371: {
372:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
373:   Mat_SeqBDiag *dmat = (Mat_SeqBDiag*)mat->A->data;
374:   int          ierr;
375:   PetscReal    isend[5],irecv[5];

378:   info->block_size     = (PetscReal)dmat->bs;
379:   MatGetInfo(mat->A,MAT_LOCAL,info);
380:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
381:   isend[3] = info->memory;  isend[4] = info->mallocs;
382:   if (flag == MAT_LOCAL) {
383:     info->nz_used      = isend[0];
384:     info->nz_allocated = isend[1];
385:     info->nz_unneeded  = isend[2];
386:     info->memory       = isend[3];
387:     info->mallocs      = isend[4];
388:   } else if (flag == MAT_GLOBAL_MAX) {
389:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
390:     info->nz_used      = irecv[0];
391:     info->nz_allocated = irecv[1];
392:     info->nz_unneeded  = irecv[2];
393:     info->memory       = irecv[3];
394:     info->mallocs      = irecv[4];
395:   } else if (flag == MAT_GLOBAL_SUM) {
396:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
397:     info->nz_used      = irecv[0];
398:     info->nz_allocated = irecv[1];
399:     info->nz_unneeded  = irecv[2];
400:     info->memory       = irecv[3];
401:     info->mallocs      = irecv[4];
402:   }
403:   info->rows_global    = (double)matin->M;
404:   info->columns_global = (double)matin->N;
405:   info->rows_local     = (double)matin->m;
406:   info->columns_local  = (double)matin->N;
407:   return(0);
408: }

410: #undef __FUNCT__  
412: int MatGetDiagonal_MPIBDiag(Mat mat,Vec v)
413: {
414:   int          ierr;
415:   Mat_MPIBDiag *A = (Mat_MPIBDiag*)mat->data;

418:   MatGetDiagonal(A->A,v);
419:   return(0);
420: }

422: #undef __FUNCT__  
424: int MatDestroy_MPIBDiag(Mat mat)
425: {
426:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
427:   int          ierr;
428: #if defined(PETSC_USE_LOG)
429:   Mat_SeqBDiag *ms = (Mat_SeqBDiag*)mbd->A->data;

432:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d, BSize=%d, NDiag=%d",mat->M,mat->N,ms->bs,ms->nd);
433: #else
435: #endif
436:   MatStashDestroy_Private(&mat->stash);
437:   PetscFree(mbd->rowners);
438:   PetscFree(mbd->gdiag);
439:   MatDestroy(mbd->A);
440:   if (mbd->lvec) {VecDestroy(mbd->lvec);}
441:   if (mbd->Mvctx) {VecScatterDestroy(mbd->Mvctx);}
442:   PetscFree(mbd);
443:   return(0);
444: }


447: #undef __FUNCT__  
449: static int MatView_MPIBDiag_Binary(Mat mat,PetscViewer viewer)
450: {
451:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)mat->data;
452:   int          ierr;

455:   if (mbd->size == 1) {
456:     MatView(mbd->A,viewer);
457:   } else SETERRQ(PETSC_ERR_SUP,"Only uniprocessor output supported");
458:   return(0);
459: }

461: #undef __FUNCT__  
463: static int MatView_MPIBDiag_ASCIIorDraw(Mat mat,PetscViewer viewer)
464: {
465:   Mat_MPIBDiag      *mbd = (Mat_MPIBDiag*)mat->data;
466:   Mat_SeqBDiag      *dmat = (Mat_SeqBDiag*)mbd->A->data;
467:   int               ierr,i,size = mbd->size,rank = mbd->rank;
468:   PetscTruth        isascii,isdraw;
469:   PetscViewer       sviewer;
470:   PetscViewerFormat format;

473:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
474:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
475:   if (isascii) {
476:     PetscViewerGetFormat(viewer,&format);
477:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
478:       int nline = PetscMin(10,mbd->gnd),k,nk,np;
479:       PetscViewerASCIIPrintf(viewer,"  block size=%d, total number of diagonals=%dn",dmat->bs,mbd->gnd);
480:       nk = (mbd->gnd-1)/nline + 1;
481:       for (k=0; k<nk; k++) {
482:         PetscViewerASCIIPrintf(viewer,"  global diag numbers:");
483:         np = PetscMin(nline,mbd->gnd - nline*k);
484:         for (i=0; i<np; i++) {
485:           PetscViewerASCIIPrintf(viewer,"  %d",mbd->gdiag[i+nline*k]);
486:         }
487:         PetscViewerASCIIPrintf(viewer,"n");
488:       }
489:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
490:         MatInfo info;
491:         MPI_Comm_rank(mat->comm,&rank);
492:         MatGetInfo(mat,MAT_LOCAL,&info);
493:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local rows %d nz %d nz alloced %d mem %d n",rank,mat->m,
494:             (int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
495:         PetscViewerFlush(viewer);
496:         VecScatterView(mbd->Mvctx,viewer);
497:       }
498:       return(0);
499:     }
500:   }

502:   if (isdraw) {
503:     PetscDraw       draw;
504:     PetscTruth isnull;
505:     PetscViewerDrawGetDraw(viewer,0,&draw);
506:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
507:   }

509:   if (size == 1) {
510:     MatView(mbd->A,viewer);
511:   } else {
512:     /* assemble the entire matrix onto first processor. */
513:     Mat          A;
514:     int          M = mat->M,N = mat->N,m,row,nz,*cols;
515:     PetscScalar  *vals;
516:     Mat_SeqBDiag *Ambd = (Mat_SeqBDiag*)mbd->A->data;

518:     if (!rank) {
519:       MatCreateMPIBDiag(mat->comm,M,M,N,mbd->gnd,Ambd->bs,mbd->gdiag,PETSC_NULL,&A);
520:     } else {
521:       MatCreateMPIBDiag(mat->comm,0,M,N,0,Ambd->bs,PETSC_NULL,PETSC_NULL,&A);
522:     }
523:     PetscLogObjectParent(mat,A);

525:     /* Copy the matrix ... This isn't the most efficient means,
526:        but it's quick for now */
527:     row = mbd->rstart;
528:     m = mbd->A->m;
529:     for (i=0; i<m; i++) {
530:       MatGetRow(mat,row,&nz,&cols,&vals);
531:       MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);
532:       MatRestoreRow(mat,row,&nz,&cols,&vals);
533:       row++;
534:     }
535:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
536:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
537:     PetscViewerGetSingleton(viewer,&sviewer);
538:     if (!rank) {
539:       MatView(((Mat_MPIBDiag*)(A->data))->A,sviewer);
540:     }
541:     PetscViewerRestoreSingleton(viewer,&sviewer);
542:     PetscViewerFlush(viewer);
543:     MatDestroy(A);
544:   }
545:   return(0);
546: }

548: #undef __FUNCT__  
550: int MatView_MPIBDiag(Mat mat,PetscViewer viewer)
551: {
552:   int        ierr;
553:   PetscTruth isascii,isdraw,isbinary;

556:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
557:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
558:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
559:   if (isascii || isdraw) {
560:     MatView_MPIBDiag_ASCIIorDraw(mat,viewer);
561:   } else if (isbinary) {
562:     MatView_MPIBDiag_Binary(mat,viewer);
563:   } else {
564:     SETERRQ1(1,"Viewer type %s not supported by MPIBdiag matrices",((PetscObject)viewer)->type_name);
565:   }
566:   return(0);
567: }

569: #undef __FUNCT__  
571: int MatSetOption_MPIBDiag(Mat A,MatOption op)
572: {
573:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
574:   int          ierr;

576:   switch (op) {
577:   case MAT_NO_NEW_NONZERO_LOCATIONS:
578:   case MAT_YES_NEW_NONZERO_LOCATIONS:
579:   case MAT_NEW_NONZERO_LOCATION_ERR:
580:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
581:   case MAT_NO_NEW_DIAGONALS:
582:   case MAT_YES_NEW_DIAGONALS:
583:     MatSetOption(mbd->A,op);
584:     break;
585:   case MAT_ROW_ORIENTED:
586:     mbd->roworiented = PETSC_TRUE;
587:     MatSetOption(mbd->A,op);
588:     break;
589:   case MAT_COLUMN_ORIENTED:
590:     mbd->roworiented = PETSC_FALSE;
591:     MatSetOption(mbd->A,op);
592:     break;
593:   case MAT_IGNORE_OFF_PROC_ENTRIES:
594:     mbd->donotstash = PETSC_TRUE;
595:     break;
596:   case MAT_ROWS_SORTED:
597:   case MAT_ROWS_UNSORTED:
598:   case MAT_COLUMNS_SORTED:
599:   case MAT_COLUMNS_UNSORTED:
600:     PetscLogInfo(A,"MatSetOption_MPIBDiag:Option ignoredn");
601:     break;
602:   default:
603:     SETERRQ(PETSC_ERR_SUP,"unknown option");
604:   }
605:   return(0);
606: }

608: #undef __FUNCT__  
610: int MatGetRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
611: {
612:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
613:   int          lrow,ierr;

616:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_SUP,"only for local rows")
617:   lrow = row - mat->rstart;
618:   MatGetRow(mat->A,lrow,nz,idx,v);
619:   return(0);
620: }

622: #undef __FUNCT__  
624: int MatRestoreRow_MPIBDiag(Mat matin,int row,int *nz,int **idx,
625:                                   PetscScalar **v)
626: {
627:   Mat_MPIBDiag *mat = (Mat_MPIBDiag*)matin->data;
628:   int          lrow,ierr;

631:   lrow = row - mat->rstart;
632:   MatRestoreRow(mat->A,lrow,nz,idx,v);
633:   return(0);
634: }


637: #undef __FUNCT__  
639: int MatNorm_MPIBDiag(Mat A,NormType type,PetscReal *nrm)
640: {
641:   Mat_MPIBDiag *mbd = (Mat_MPIBDiag*)A->data;
642:   Mat_SeqBDiag *a = (Mat_SeqBDiag*)mbd->A->data;
643:   PetscReal    sum = 0.0;
644:   int          ierr,d,i,nd = a->nd,bs = a->bs,len;
645:   PetscScalar  *dv;

648:   if (type == NORM_FROBENIUS) {
649:     for (d=0; d<nd; d++) {
650:       dv   = a->diagv[d];
651:       len  = a->bdlen[d]*bs*bs;
652:       for (i=0; i<len; i++) {
653: #if defined(PETSC_USE_COMPLEX)
654:         sum += PetscRealPart(PetscConj(dv[i])*dv[i]);
655: #else
656:         sum += dv[i]*dv[i];
657: #endif
658:       }
659:     }
660:     MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,A->comm);
661:     *nrm = sqrt(*nrm);
662:     PetscLogFlops(2*A->n*A->m);
663:   } else if (type == NORM_1) { /* max column norm */
664:     PetscReal *tmp,*tmp2;
665:     int    j;
666:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp);
667:     PetscMalloc((mbd->A->n+1)*sizeof(PetscReal),&tmp2);
668:     MatNorm_SeqBDiag_Columns(mbd->A,tmp,mbd->A->n);
669:     *nrm = 0.0;
670:     MPI_Allreduce(tmp,tmp2,mbd->A->n,MPIU_REAL,MPI_SUM,A->comm);
671:     for (j=0; j<mbd->A->n; j++) {
672:       if (tmp2[j] > *nrm) *nrm = tmp2[j];
673:     }
674:     PetscFree(tmp);
675:     PetscFree(tmp2);
676:   } else if (type == NORM_INFINITY) { /* max row norm */
677:     PetscReal normtemp;
678:     MatNorm(mbd->A,type,&normtemp);
679:     MPI_Allreduce(&normtemp,nrm,1,MPIU_REAL,MPI_MAX,A->comm);
680:   }
681:   return(0);
682: }

684: EXTERN int MatPrintHelp_SeqBDiag(Mat);
685: #undef __FUNCT__  
687: int MatPrintHelp_MPIBDiag(Mat A)
688: {
689:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;
690:   int          ierr;

693:   if (!a->rank) {
694:     MatPrintHelp_SeqBDiag(a->A);
695:   }
696:   return(0);
697: }

699: EXTERN int MatScale_SeqBDiag(PetscScalar*,Mat);
700: #undef __FUNCT__  
702: int MatScale_MPIBDiag(PetscScalar *alpha,Mat A)
703: {
704:   int          ierr;
705:   Mat_MPIBDiag *a = (Mat_MPIBDiag*)A->data;

708:   MatScale_SeqBDiag(alpha,a->A);
709:   return(0);
710: }

712: #undef __FUNCT__  
714: int MatSetUpPreallocation_MPIBDiag(Mat A)
715: {
716:   int        ierr;

719:    MatMPIBDiagSetPreallocation(A,PETSC_DEFAULT,PETSC_DEFAULT,0,0);
720:   return(0);
721: }

723: /* -------------------------------------------------------------------*/

725: static struct _MatOps MatOps_Values = {MatSetValues_MPIBDiag,
726:        MatGetRow_MPIBDiag,
727:        MatRestoreRow_MPIBDiag,
728:        MatMult_MPIBDiag,
729:        MatMultAdd_MPIBDiag,
730:        MatMultTranspose_MPIBDiag,
731:        MatMultTransposeAdd_MPIBDiag,
732:        0,
733:        0,
734:        0,
735:        0,
736:        0,
737:        0,
738:        0,
739:        0,
740:        MatGetInfo_MPIBDiag,0,
741:        MatGetDiagonal_MPIBDiag,
742:        0,
743:        MatNorm_MPIBDiag,
744:        MatAssemblyBegin_MPIBDiag,
745:        MatAssemblyEnd_MPIBDiag,
746:        0,
747:        MatSetOption_MPIBDiag,
748:        MatZeroEntries_MPIBDiag,
749:        MatZeroRows_MPIBDiag,
750:        0,
751:        0,
752:        0,
753:        0,
754:        MatSetUpPreallocation_MPIBDiag,
755:        0,
756:        0,
757:        0,
758:        0,
759:        0,
760:        0,
761:        0,
762:        0,
763:        0,
764:        0,
765:        0,
766:        0,
767:        MatGetValues_MPIBDiag,
768:        0,
769:        MatPrintHelp_MPIBDiag,
770:        MatScale_MPIBDiag,
771:        0,
772:        0,
773:        0,
774:        MatGetBlockSize_MPIBDiag,
775:        0,
776:        0,
777:        0,
778:        0,
779:        0,
780:        0,
781:        0,
782:        0,
783:        0,
784:        0,
785:        MatDestroy_MPIBDiag,
786:        MatView_MPIBDiag,
787:        MatGetPetscMaps_Petsc};

789: EXTERN_C_BEGIN
790: #undef __FUNCT__  
792: int MatGetDiagonalBlock_MPIBDiag(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
793: {
794:   Mat_MPIBDiag *matin = (Mat_MPIBDiag *)A->data;
795:   int          ierr,lrows,lcols,rstart,rend;
796:   IS           localc,localr;

799:   MatGetLocalSize(A,&lrows,&lcols);
800:   MatGetOwnershipRange(A,&rstart,&rend);
801:   ISCreateStride(PETSC_COMM_SELF,lrows,rstart,1,&localc);
802:   ISCreateStride(PETSC_COMM_SELF,lrows,0,1,&localr);
803:   MatGetSubMatrix(matin->A,localr,localc,PETSC_DECIDE,reuse,a);
804:   ISDestroy(localr);
805:   ISDestroy(localc);

807:   *iscopy = PETSC_TRUE;
808:   return(0);
809: }
810: EXTERN_C_END

812: EXTERN_C_BEGIN
813: #undef __FUNCT__  
815: int MatCreate_MPIBDiag(Mat B)
816: {
817:   Mat_MPIBDiag *b;
818:   int          ierr;

821:   ierr            = PetscNew(Mat_MPIBDiag,&b);
822:   B->data         = (void*)b;
823:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
824:   B->factor       = 0;
825:   B->mapping      = 0;

827:   B->insertmode = NOT_SET_VALUES;
828:   MPI_Comm_rank(B->comm,&b->rank);
829:   MPI_Comm_size(B->comm,&b->size);

831:   /* build local table of row ownerships */
832:   PetscMalloc((b->size+2)*sizeof(int),&b->rowners);

834:   /* build cache for off array entries formed */
835:   MatStashCreate_Private(B->comm,1,&B->stash);
836:   b->donotstash = PETSC_FALSE;

838:   /* stuff used for matrix-vector multiply */
839:   b->lvec        = 0;
840:   b->Mvctx       = 0;

842:   /* used for MatSetValues() input */
843:   b->roworiented = PETSC_TRUE;

845:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
846:                                      "MatGetDiagonalBlock_MPIBDiag",
847:                                       MatGetDiagonalBlock_MPIBDiag);
848:   return(0);
849: }
850: EXTERN_C_END

852: #undef __FUNCT__  
854: /*@C
855:    MatMPIBDiagSetPreallocation - 

857:    Collective on Mat

859:    Input Parameters:
860: +  A - the matrix 
861: .  nd - number of block diagonals (global) (optional)
862: .  bs - each element of a diagonal is an bs x bs dense matrix
863: .  diag - optional array of block diagonal numbers (length nd).
864:    For a matrix element A[i,j], where i=row and j=column, the
865:    diagonal number is
866: $     diag = i/bs - j/bs  (integer division)
867:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
868:    needed (expensive).
869: -  diagv  - pointer to actual diagonals (in same order as diag array), 
870:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
871:    to control memory allocation.


874:    Options Database Keys:
875: .  -mat_block_size <bs> - Sets blocksize
876: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

878:    Notes:
879:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
880:    than it must be used on all processors that share the object for that argument.

882:    The parallel matrix is partitioned across the processors by rows, where
883:    each local rectangular matrix is stored in the uniprocessor block 
884:    diagonal format.  See the users manual for further details.

886:    The user MUST specify either the local or global numbers of rows
887:    (possibly both).

889:    The case bs=1 (conventional diagonal storage) is implemented as
890:    a special case.

892:    Fortran Notes:
893:    Fortran programmers cannot set diagv; this variable is ignored.

895:    Level: intermediate

897: .keywords: matrix, block, diagonal, parallel, sparse

899: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
900: @*/
901: int MatMPIBDiagSetPreallocation(Mat B,int nd,int bs,int *diag,PetscScalar **diagv)
902: {
903:   Mat_MPIBDiag *b;
904:   int          ierr,i,k,*ldiag,len,nd2;
905:   PetscScalar  **ldiagv = 0;
906:   PetscTruth   flg2;

909:   PetscTypeCompare((PetscObject)B,MATMPIBDIAG,&flg2);
910:   if (!flg2) return(0);
911:   B->preallocated = PETSC_TRUE;
912:   if (bs == PETSC_DEFAULT) bs = 1;
913:   if (nd == PETSC_DEFAULT) nd = 0;
914:   PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
915:   PetscOptionsGetInt(PETSC_NULL,"-mat_bdiag_ndiag",&nd,PETSC_NULL);
916:   PetscOptionsHasName(PETSC_NULL,"-mat_bdiag_diags",&flg2);
917:   if (nd && !diag) {
918:     PetscMalloc(nd*sizeof(int),&diag);
919:     nd2  = nd;
920:     PetscOptionsGetIntArray(PETSC_NULL,"-mat_bdiag_dvals",diag,&nd2,PETSC_NULL);
921:     if (nd2 != nd) {
922:       SETERRQ(PETSC_ERR_ARG_INCOMP,"Incompatible number of diags and diagonal vals");
923:     }
924:   } else if (flg2) {
925:     SETERRQ(PETSC_ERR_ARG_WRONG,"Must specify number of diagonals with -mat_bdiag_ndiag");
926:   }

928:   if (bs <= 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Blocksize must be positive");

930:   PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
931:   B->n = B->N = PetscMax(B->n,B->N);

933:   if ((B->N%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad column number");
934:   if ((B->m%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad local row number");
935:   if ((B->M%bs)) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size - bad global row number");

937:   /* the information in the maps duplicates the information computed below, eventually 
938:      we should remove the duplicate information that is not contained in the maps */
939:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
940:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);


943:   b          = (Mat_MPIBDiag*)B->data;
944:   b->gnd     = nd;

946:   ierr          = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
947:   b->rowners[0] = 0;
948:   for (i=2; i<=b->size; i++) {
949:     b->rowners[i] += b->rowners[i-1];
950:   }
951:   b->rstart  = b->rowners[b->rank];
952:   b->rend    = b->rowners[b->rank+1];
953:   b->brstart = (b->rstart)/bs;
954:   b->brend   = (b->rend)/bs;


957:   /* Determine local diagonals; for now, assume global rows = global cols */
958:   /* These are sorted in MatCreateSeqBDiag */
959:   PetscMalloc((nd+1)*sizeof(int),&ldiag);
960:   len  = B->M/bs + B->N/bs + 1;
961:   PetscMalloc(len*sizeof(int),&b->gdiag);
962:   k    = 0;
963:   PetscLogObjectMemory(B,(nd+1)*sizeof(int) + (b->size+2)*sizeof(int)
964:                         + sizeof(struct _p_Mat) + sizeof(Mat_MPIBDiag));
965:   if (diagv) {
966:     PetscMalloc((nd+1)*sizeof(PetscScalar*),&ldiagv);
967:   }
968:   for (i=0; i<nd; i++) {
969:     b->gdiag[i] = diag[i];
970:     if (diag[i] > 0) { /* lower triangular */
971:       if (diag[i] < b->brend) {
972:         ldiag[k] = diag[i] - b->brstart;
973:         if (diagv) ldiagv[k] = diagv[i];
974:         k++;
975:       }
976:     } else { /* upper triangular */
977:       if (B->M/bs - diag[i] > B->N/bs) {
978:         if (B->M/bs + diag[i] > b->brstart) {
979:           ldiag[k] = diag[i] - b->brstart;
980:           if (diagv) ldiagv[k] = diagv[i];
981:           k++;
982:         }
983:       } else {
984:         if (B->M/bs > b->brstart) {
985:           ldiag[k] = diag[i] - b->brstart;
986:           if (diagv) ldiagv[k] = diagv[i];
987:           k++;
988:         }
989:       }
990:     }
991:   }

993:   /* Form local matrix */
994:   MatCreateSeqBDiag(PETSC_COMM_SELF,B->m,B->n,k,bs,ldiag,ldiagv,&b->A);
995:   PetscLogObjectParent(B,b->A);
996:   PetscFree(ldiag);
997:   if (ldiagv) {PetscFree(ldiagv);}

999:   return(0);
1000: }

1002: #undef __FUNCT__  
1004: /*@C
1005:    MatCreateMPIBDiag - Creates a sparse parallel matrix in MPIBDiag format.

1007:    Collective on MPI_Comm

1009:    Input Parameters:
1010: +  comm - MPI communicator
1011: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1012: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1013: .  N - number of columns (local and global)
1014: .  nd - number of block diagonals (global) (optional)
1015: .  bs - each element of a diagonal is an bs x bs dense matrix
1016: .  diag - optional array of block diagonal numbers (length nd).
1017:    For a matrix element A[i,j], where i=row and j=column, the
1018:    diagonal number is
1019: $     diag = i/bs - j/bs  (integer division)
1020:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1021:    needed (expensive).
1022: -  diagv  - pointer to actual diagonals (in same order as diag array), 
1023:    if allocated by user. Otherwise, set diagv=PETSC_NULL on input for PETSc
1024:    to control memory allocation.

1026:    Output Parameter:
1027: .  A - the matrix 

1029:    Options Database Keys:
1030: .  -mat_block_size <bs> - Sets blocksize
1031: .  -mat_bdiag_diags <s1,s2,s3,...> - Sets diagonal numbers

1033:    Notes:
1034:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1035:    than it must be used on all processors that share the object for that argument.

1037:    The parallel matrix is partitioned across the processors by rows, where
1038:    each local rectangular matrix is stored in the uniprocessor block 
1039:    diagonal format.  See the users manual for further details.

1041:    The user MUST specify either the local or global numbers of rows
1042:    (possibly both).

1044:    The case bs=1 (conventional diagonal storage) is implemented as
1045:    a special case.

1047:    Fortran Notes:
1048:    Fortran programmers cannot set diagv; this variable is ignored.

1050:    Level: intermediate

1052: .keywords: matrix, block, diagonal, parallel, sparse

1054: .seealso: MatCreate(), MatCreateSeqBDiag(), MatSetValues()
1055: @*/
1056: int MatCreateMPIBDiag(MPI_Comm comm,int m,int M,int N,int nd,int bs,int *diag,PetscScalar **diagv,Mat *A)
1057: {
1058:   int ierr,size;

1061:   MatCreate(comm,m,m,M,N,A);
1062:   MPI_Comm_size(comm,&size);
1063:   if (size > 1) {
1064:     MatSetType(*A,MATMPIBDIAG);
1065:     MatMPIBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1066:   } else {
1067:     MatSetType(*A,MATSEQBDIAG);
1068:     MatSeqBDiagSetPreallocation(*A,nd,bs,diag,diagv);
1069:   }
1070:   return(0);
1071: }

1073: #undef __FUNCT__  
1075: /*@C
1076:    MatBDiagGetData - Gets the data for the block diagonal matrix format.
1077:    For the parallel case, this returns information for the local submatrix.

1079:    Input Parameters:
1080: .  mat - the matrix, stored in block diagonal format.

1082:    Not Collective

1084:    Output Parameters:
1085: +  m - number of rows
1086: .  n - number of columns
1087: .  nd - number of block diagonals
1088: .  bs - each element of a diagonal is an bs x bs dense matrix
1089: .  bdlen - array of total block lengths of block diagonals
1090: .  diag - optional array of block diagonal numbers (length nd).
1091:    For a matrix element A[i,j], where i=row and j=column, the
1092:    diagonal number is
1093: $     diag = i/bs - j/bs  (integer division)
1094:    Set diag=PETSC_NULL on input for PETSc to dynamically allocate memory as 
1095:    needed (expensive).
1096: -  diagv - pointer to actual diagonals (in same order as diag array), 

1098:    Level: advanced

1100:    Notes:
1101:    See the users manual for further details regarding this storage format.

1103: .keywords: matrix, block, diagonal, get, data

1105: .seealso: MatCreateSeqBDiag(), MatCreateMPIBDiag()
1106: @*/
1107: int MatBDiagGetData(Mat mat,int *nd,int *bs,int **diag,int **bdlen,PetscScalar ***diagv)
1108: {
1109:   Mat_MPIBDiag *pdmat;
1110:   Mat_SeqBDiag *dmat = 0;
1111:   PetscTruth   isseq,ismpi;
1112:   int          ierr;

1116:   PetscTypeCompare((PetscObject)mat,MATSEQBDIAG,&isseq);
1117:   PetscTypeCompare((PetscObject)mat,MATMPIBDIAG,&ismpi);
1118:   if (isseq) {
1119:     dmat = (Mat_SeqBDiag*)mat->data;
1120:   } else if (ismpi) {
1121:     pdmat = (Mat_MPIBDiag*)mat->data;
1122:     dmat = (Mat_SeqBDiag*)pdmat->A->data;
1123:   } else SETERRQ(PETSC_ERR_SUP,"Valid only for MATSEQBDIAG and MATMPIBDIAG formats");
1124:   *nd    = dmat->nd;
1125:   *bs    = dmat->bs;
1126:   *diag  = dmat->diag;
1127:   *bdlen = dmat->bdlen;
1128:   *diagv = dmat->diagv;
1129:   return(0);
1130: }

1132:  #include petscsys.h

1134: EXTERN_C_BEGIN
1135: #undef __FUNCT__  
1137: int MatLoad_MPIBDiag(PetscViewer viewer,MatType type,Mat *newmat)
1138: {
1139:   Mat          A;
1140:   PetscScalar  *vals,*svals;
1141:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1142:   MPI_Status   status;
1143:   int          bs,i,nz,ierr,j,rstart,rend,fd,*rowners,maxnz,*cols;
1144:   int          header[4],rank,size,*rowlengths = 0,M,N,m,Mbs;
1145:   int          *ourlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*smycols;
1146:   int          tag = ((PetscObject)viewer)->tag,extra_rows;

1149:   MPI_Comm_size(comm,&size);
1150:   MPI_Comm_rank(comm,&rank);
1151:   if (!rank) {
1152:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1153:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1154:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1155:     if (header[3] < 0) {
1156:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIBDiag");
1157:     }
1158:   }
1159:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1160:   M = header[1]; N = header[2];

1162:   bs = 1;   /* uses a block size of 1 by default; */
1163:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);

1165:   /* 
1166:      This code adds extra rows to make sure the number of rows is 
1167:      divisible by the blocksize
1168:   */
1169:   Mbs        = M/bs;
1170:   extra_rows = bs - M + bs*(Mbs);
1171:   if (extra_rows == bs) extra_rows = 0;
1172:   else                  Mbs++;
1173:   if (extra_rows && !rank) {
1174:     PetscLogInfo(0,"MatLoad_MPIBDiag:Padding loaded matrix to match blocksizen");
1175:   }

1177:   /* determine ownership of all rows */
1178:   m          = bs*(Mbs/size + ((Mbs % size) > rank));
1179:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1180:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1181:   rowners[0] = 0;
1182:   for (i=2; i<=size; i++) {
1183:     rowners[i] += rowners[i-1];
1184:   }
1185:   rstart = rowners[rank];
1186:   rend   = rowners[rank+1];

1188:   /* distribute row lengths to all processors */
1189:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1190:   if (!rank) {
1191:     PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1192:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1193:     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1194:     PetscMalloc(size*sizeof(int),&sndcounts);
1195:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1196:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1197:     PetscFree(sndcounts);
1198:   } else {
1199:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1200:   }

1202:   if (!rank) {
1203:     /* calculate the number of nonzeros on each processor */
1204:     PetscMalloc(size*sizeof(int),&procsnz);
1205:     PetscMemzero(procsnz,size*sizeof(int));
1206:     for (i=0; i<size; i++) {
1207:       for (j=rowners[i]; j<rowners[i+1]; j++) {
1208:         procsnz[i] += rowlengths[j];
1209:       }
1210:     }
1211:     PetscFree(rowlengths);

1213:     /* determine max buffer needed and allocate it */
1214:     maxnz = 0;
1215:     for (i=0; i<size; i++) {
1216:       maxnz = PetscMax(maxnz,procsnz[i]);
1217:     }
1218:     PetscMalloc(maxnz*sizeof(int),&cols);

1220:     /* read in my part of the matrix column indices  */
1221:     nz   = procsnz[0];
1222:     PetscMalloc(nz*sizeof(int),&mycols);
1223:     if (size == 1)  nz -= extra_rows;
1224:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1225:     if (size == 1)  for (i=0; i<extra_rows; i++) { mycols[nz+i] = M+i; }

1227:     /* read in every one elses and ship off */
1228:     for (i=1; i<size-1; i++) {
1229:       nz   = procsnz[i];
1230:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1231:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1232:     }
1233:     /* read in the stuff for the last proc */
1234:     if (size != 1) {
1235:       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
1236:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1237:       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
1238:       MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
1239:     }
1240:     PetscFree(cols);
1241:   } else {
1242:     /* determine buffer space needed for message */
1243:     nz = 0;
1244:     for (i=0; i<m; i++) {
1245:       nz += ourlens[i];
1246:     }
1247:     PetscMalloc(nz*sizeof(int),&mycols);

1249:     /* receive message of column indices*/
1250:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1251:     MPI_Get_count(&status,MPI_INT,&maxnz);
1252:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1253:   }

1255:   MatCreateMPIBDiag(comm,m,M+extra_rows,N+extra_rows,PETSC_NULL,bs,PETSC_NULL,PETSC_NULL,newmat);
1256:   A = *newmat;

1258:   if (!rank) {
1259:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1261:     /* read in my part of the matrix numerical values  */
1262:     nz = procsnz[0];
1263:     if (size == 1)  nz -= extra_rows;
1264:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1265:     if (size == 1)  for (i=0; i<extra_rows; i++) { vals[nz+i] = 1.0; }

1267:     /* insert into matrix */
1268:     jj      = rstart;
1269:     smycols = mycols;
1270:     svals   = vals;
1271:     for (i=0; i<m; i++) {
1272:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1273:       smycols += ourlens[i];
1274:       svals   += ourlens[i];
1275:       jj++;
1276:     }

1278:     /* read in other processors (except the last one) and ship out */
1279:     for (i=1; i<size-1; i++) {
1280:       nz   = procsnz[i];
1281:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1282:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1283:     }
1284:     /* the last proc */
1285:     if (size != 1){
1286:       nz   = procsnz[i] - extra_rows;
1287:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1288:       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
1289:       MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
1290:     }
1291:     PetscFree(procsnz);
1292:   } else {
1293:     /* receive numeric values */
1294:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1296:     /* receive message of values*/
1297:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1298:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1299:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1301:     /* insert into matrix */
1302:     jj      = rstart;
1303:     smycols = mycols;
1304:     svals   = vals;
1305:     for (i=0; i<m; i++) {
1306:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1307:       smycols += ourlens[i];
1308:       svals   += ourlens[i];
1309:       jj++;
1310:     }
1311:   }
1312:   PetscFree(ourlens);
1313:   PetscFree(vals);
1314:   PetscFree(mycols);
1315:   PetscFree(rowners);

1317:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1318:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1319:   return(0);
1320: }
1321: EXTERN_C_END