Actual source code: mpirowbs.c

  1: /* $Id: mpirowbs.c,v 2.9 2001/08/10 03:30:53 bsmith Exp $*/

 3:  #include src/mat/impls/rowbs/mpi/mpirowbs.h
 4:  #include src/vec/vecimpl.h

  6: #define CHUNCKSIZE_LOCAL   10

  8: #undef __FUNCT__  
 10: static int MatFreeRowbs_Private(Mat A,int n,int *i,PetscScalar *v)
 11: {

 15:   if (v) {
 16: #if defined(PETSC_USE_LOG)
 17:     int len = -n*(sizeof(int)+sizeof(PetscScalar));
 18: #endif
 19:     PetscFree(v);
 20:     PetscLogObjectMemory(A,len);
 21:   }
 22:   return(0);
 23: }

 25: #undef __FUNCT__  
 27: static int MatMallocRowbs_Private(Mat A,int n,int **i,PetscScalar **v)
 28: {
 29:   int len,ierr;

 32:   if (!n) {
 33:     *i = 0; *v = 0;
 34:   } else {
 35:     len = n*(sizeof(int) + sizeof(PetscScalar));
 36:     PetscMalloc(len,v);
 37:     PetscLogObjectMemory(A,len);
 38:     *i = (int *)(*v + n);
 39:   }
 40:   return(0);
 41: }

 43: #undef __FUNCT__  
 45: int MatScale_MPIRowbs(PetscScalar *alphain,Mat inA)
 46: {
 47:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)inA->data;
 48:   BSspmat      *A = a->A;
 49:   BSsprow      *vs;
 50:   PetscScalar  *ap,alpha = *alphain;
 51:   int          i,m = inA->m,nrow,j;

 54:   for (i=0; i<m; i++) {
 55:     vs   = A->rows[i];
 56:     nrow = vs->length;
 57:     ap   = vs->nz;
 58:     for (j=0; j<nrow; j++) {
 59:       ap[j] *= alpha;
 60:     }
 61:   }
 62:   PetscLogFlops(a->nz);
 63:   return(0);
 64: }

 66: /* ----------------------------------------------------------------- */
 67: #undef __FUNCT__  
 69: static int MatCreateMPIRowbs_local(Mat A,int nz,int *nnz)
 70: {
 71:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)A->data;
 72:   int          ierr,i,len,nzalloc = 0,m = A->m;
 73:   BSspmat      *bsmat;
 74:   BSsprow      *vs;

 77:   if (!nnz) {
 78:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
 79:     if (nz <= 0)             nz = 1;
 80:     nzalloc = 1;
 81:     PetscMalloc((m+1)*sizeof(int),&nnz);
 82:     for (i=0; i<m; i++) nnz[i] = nz;
 83:     nz      = nz*m;
 84:   } else {
 85:     nz = 0;
 86:     for (i=0; i<m; i++) {
 87:       if (nnz[i] <= 0) nnz[i] = 1;
 88:       nz += nnz[i];
 89:     }
 90:   }

 92:   /* Allocate BlockSolve matrix context */
 93:   ierr  = PetscNew(BSspmat,&bsif->A);
 94:   bsmat = bsif->A;
 95:   BSset_mat_icc_storage(bsmat,PETSC_FALSE);
 96:   BSset_mat_symmetric(bsmat,PETSC_FALSE);
 97:   len                    = m*(sizeof(BSsprow*)+ sizeof(BSsprow)) + 1;
 98:   ierr                   = PetscMalloc(len,&bsmat->rows);
 99:   bsmat->num_rows        = m;
100:   bsmat->global_num_rows = A->M;
101:   bsmat->map             = bsif->bsmap;
102:   vs                     = (BSsprow*)(bsmat->rows + m);
103:   for (i=0; i<m; i++) {
104:     bsmat->rows[i]  = vs;
105:     bsif->imax[i]   = nnz[i];
106:     vs->diag_ind    = -1;
107:     if (nnz[i] > 0) {
108:       MatMallocRowbs_Private(A,nnz[i],&(vs->col),&(vs->nz));
109:     } else {
110:       vs->col = 0; vs->nz = 0;
111:     }
112:     /* put zero on diagonal */
113:     /*vs->length            = 1;
114:     vs->col[0]      = i + bsif->rstart;
115:     vs->nz[0]       = 0.0;*/
116:     vs->length = 0;
117:     vs++;
118:   }
119:   PetscLogObjectMemory(A,sizeof(BSspmat) + len);
120:   bsif->nz               = 0;
121:   bsif->maxnz            = nz;
122:   bsif->sorted           = 0;
123:   bsif->roworiented      = PETSC_TRUE;
124:   bsif->nonew            = 0;
125:   bsif->bs_color_single  = 0;

127:   if (nzalloc) {PetscFree(nnz);}
128:   return(0);
129: }

131: #undef __FUNCT__  
133: static int MatSetValues_MPIRowbs_local(Mat AA,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
134: {
135:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
136:   BSspmat      *A = mat->A;
137:   BSsprow      *vs;
138:   int          *rp,k,a,b,t,ii,row,nrow,i,col,l,rmax,ierr;
139:   int          *imax = mat->imax,nonew = mat->nonew,sorted = mat->sorted;
140:   PetscScalar  *ap,value;

143:   for (k=0; k<m; k++) { /* loop over added rows */
144:     row = im[k];
145:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
146:     if (row >= AA->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147:     vs   = A->rows[row];
148:     ap   = vs->nz; rp = vs->col;
149:     rmax = imax[row]; nrow = vs->length;
150:     a    = 0;
151:     for (l=0; l<n; l++) { /* loop over added columns */
152:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative col");
153:       if (in[l] >= AA->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
154:       col = in[l]; value = *v++;
155:       if (!sorted) a = 0; b = nrow;
156:       while (b-a > 5) {
157:         t = (b+a)/2;
158:         if (rp[t] > col) b = t;
159:         else             a = t;
160:       }
161:       for (i=a; i<b; i++) {
162:         if (rp[i] > col) break;
163:         if (rp[i] == col) {
164:           if (addv == ADD_VALUES) ap[i] += value;
165:           else                    ap[i] = value;
166:           goto noinsert;
167:         }
168:       }
169:       if (nonew) goto noinsert;
170:       if (nrow >= rmax) {
171:         /* there is no extra room in row, therefore enlarge */
172:         int    *itemp,*iout,*iin = vs->col;
173:         PetscScalar *vout,*vin = vs->nz,*vtemp;

175:         /* malloc new storage space */
176:         imax[row] += CHUNCKSIZE_LOCAL;
177:         MatMallocRowbs_Private(AA,imax[row],&itemp,&vtemp);
178:         vout = vtemp; iout = itemp;
179:         for (ii=0; ii<i; ii++) {
180:           vout[ii] = vin[ii];
181:           iout[ii] = iin[ii];
182:         }
183:         vout[i] = value;
184:         iout[i] = col;
185:         for (ii=i+1; ii<=nrow; ii++) {
186:           vout[ii] = vin[ii-1];
187:           iout[ii] = iin[ii-1];
188:         }
189:         /* free old row storage */
190:         if (rmax > 0) {
191:           MatFreeRowbs_Private(AA,rmax,vs->col,vs->nz);
192:         }
193:         vs->col           =  iout; vs->nz = vout;
194:         rmax              =  imax[row];
195:         mat->maxnz        += CHUNCKSIZE_LOCAL;
196:         mat->reallocs++;
197:       } else {
198:         /* shift higher columns over to make room for newie */
199:         for (ii=nrow-1; ii>=i; ii--) {
200:           rp[ii+1] = rp[ii];
201:           ap[ii+1] = ap[ii];
202:         }
203:         rp[i] = col;
204:         ap[i] = value;
205:       }
206:       nrow++;
207:       mat->nz++;
208:       AA->same_nonzero = PETSC_FALSE;
209:       noinsert:;
210:       a = i + 1;
211:     }
212:     vs->length = nrow;
213:   }
214:   return(0);
215: }


218: #undef __FUNCT__  
220: static int MatAssemblyBegin_MPIRowbs_local(Mat A,MatAssemblyType mode)
221: {
223:   return(0);
224: }

226: #undef __FUNCT__  
228: static int MatAssemblyEnd_MPIRowbs_local(Mat AA,MatAssemblyType mode)
229: {
230:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)AA->data;
231:   BSspmat      *A = a->A;
232:   BSsprow      *vs;
233:   int          i,j,rstart = a->rstart;

236:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

238:   /* Mark location of diagonal */
239:   for (i=0; i<AA->m; i++) {
240:     vs = A->rows[i];
241:     for (j=0; j<vs->length; j++) {
242:       if (vs->col[j] == i + rstart) {
243:         vs->diag_ind = j;
244:         break;
245:       }
246:     }
247:     if (vs->diag_ind == -1) {
248:       SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"no diagonal entry");
249:     }
250:   }
251:   return(0);
252: }

254: #undef __FUNCT__  
256: static int MatZeroRows_MPIRowbs_local(Mat A,IS is,PetscScalar *diag)
257: {
258:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;
259:   BSspmat      *l = a->A;
260:   int          i,ierr,N,*rz,m = A->m - 1;

263:   ISGetLocalSize(is,&N);
264:   ISGetIndices(is,&rz);
265:   if (diag) {
266:     for (i=0; i<N; i++) {
267:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
268:       if (l->rows[rz[i]]->length > 0) { /* in case row was completely empty */
269:         l->rows[rz[i]]->length = 1;
270:         l->rows[rz[i]]->nz[0]  = *diag;
271:         l->rows[rz[i]]->col[0] = a->rstart + rz[i];
272:       } else {
273:         MatSetValues(A,1,&rz[i],1,&rz[i],diag,INSERT_VALUES);
274:       }
275:     }
276:   } else {
277:     for (i=0; i<N; i++) {
278:       if (rz[i] < 0 || rz[i] > m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Out of range");
279:       l->rows[rz[i]]->length = 0;
280:     }
281:   }
282:   A->same_nonzero = PETSC_FALSE;
283:   ISRestoreIndices(is,&rz);
284:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
285:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
286:   return(0);
287: }

289: #undef __FUNCT__  
291: static int MatNorm_MPIRowbs_local(Mat A,NormType type,PetscReal *norm)
292: {
293:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
294:   BSsprow      *vs,**rs;
295:   PetscScalar  *xv;
296:   PetscReal    sum = 0.0;
297:   int          *xi,nz,i,j,ierr;

300:   rs = mat->A->rows;
301:   if (type == NORM_FROBENIUS) {
302:     for (i=0; i<A->m; i++) {
303:       vs = *rs++;
304:       nz = vs->length;
305:       xv = vs->nz;
306:       while (nz--) {
307: #if defined(PETSC_USE_COMPLEX)
308:         sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
309: #else
310:         sum += (*xv)*(*xv); xv++;
311: #endif
312:       }
313:     }
314:     *norm = sqrt(sum);
315:   } else if (type == NORM_1) { /* max column norm */
316:     PetscReal *tmp;
317:     ierr  = PetscMalloc(A->n*sizeof(PetscReal),&tmp);
318:     ierr  = PetscMemzero(tmp,A->n*sizeof(PetscReal));
319:     *norm = 0.0;
320:     for (i=0; i<A->m; i++) {
321:       vs = *rs++;
322:       nz = vs->length;
323:       xi = vs->col;
324:       xv = vs->nz;
325:       while (nz--) {
326:         tmp[*xi] += PetscAbsScalar(*xv);
327:         xi++; xv++;
328:       }
329:     }
330:     for (j=0; j<A->n; j++) {
331:       if (tmp[j] > *norm) *norm = tmp[j];
332:     }
333:     PetscFree(tmp);
334:   } else if (type == NORM_INFINITY) { /* max row norm */
335:     *norm = 0.0;
336:     for (i=0; i<A->m; i++) {
337:       vs = *rs++;
338:       nz = vs->length;
339:       xv = vs->nz;
340:       sum = 0.0;
341:       while (nz--) {
342:         sum += PetscAbsScalar(*xv); xv++;
343:       }
344:       if (sum > *norm) *norm = sum;
345:     }
346:   } else {
347:     SETERRQ(PETSC_ERR_SUP,"No support for the two norm");
348:   }
349:   return(0);
350: }

352: /* ----------------------------------------------------------------- */

354: #undef __FUNCT__  
356: int MatSetValues_MPIRowbs(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode av)
357: {
358:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
359:   int          ierr,i,j,row,col,rstart = a->rstart,rend = a->rend;
360:   PetscTruth   roworiented = a->roworiented;

363:   /* Note:  There's no need to "unscale" the matrix, since scaling is
364:      confined to a->pA, and we're working with a->A here */
365:   for (i=0; i<m; i++) {
366:     if (im[i] < 0) continue;
367:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
368:     if (im[i] >= rstart && im[i] < rend) {
369:       row = im[i] - rstart;
370:       for (j=0; j<n; j++) {
371:         if (in[j] < 0) continue;
372:         if (in[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
373:         if (in[j] >= 0 && in[j] < mat->N){
374:           col = in[j];
375:           if (roworiented) {
376:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i*n+j,av);
377:           } else {
378:             MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,v+i+j*m,av);
379:           }
380:         } else {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid column");}
381:       }
382:     } else {
383:       if (!a->donotstash) {
384:         if (roworiented) {
385:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
386:         } else {
387:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
388:         }
389:       }
390:     }
391:   }
392:   return(0);
393: }

395: #undef __FUNCT__  
397: int MatAssemblyBegin_MPIRowbs(Mat mat,MatAssemblyType mode)
398: {
399:   Mat_MPIRowbs  *a = (Mat_MPIRowbs*)mat->data;
400:   MPI_Comm      comm = mat->comm;
401:   int           ierr,nstash,reallocs;
402:   InsertMode    addv;

405:   /* Note:  There's no need to "unscale" the matrix, since scaling is
406:             confined to a->pA, and we're working with a->A here */

408:   /* make sure all processors are either in INSERTMODE or ADDMODE */
409:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,comm);
410:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
411:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some procs inserted; others added");
412:   }
413:   mat->insertmode = addv; /* in case this processor had no cache */

415:   MatStashScatterBegin_Private(&mat->stash,a->rowners);
416:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
417:   PetscLogInfo(0,"MatAssemblyBegin_MPIRowbs:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
418:   return(0);
419: }

421:  #include petscviewer.h

423: #undef __FUNCT__  
425: static int MatView_MPIRowbs_ASCII(Mat mat,PetscViewer viewer)
426: {
427:   Mat_MPIRowbs      *a = (Mat_MPIRowbs*)mat->data;
428:   int               ierr,i,j;
429:   PetscTruth        isascii;
430:   BSspmat           *A = a->A;
431:   BSsprow           **rs = A->rows;
432:   PetscViewerFormat format;

435:   PetscViewerGetFormat(viewer,&format);
436:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);

438:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
439:     int ind_l,ind_g,clq_l,clq_g,color;
440:     ind_l = BSlocal_num_inodes(a->pA);CHKERRBS(0);
441:     ind_g = BSglobal_num_inodes(a->pA);CHKERRBS(0);
442:     clq_l = BSlocal_num_cliques(a->pA);CHKERRBS(0);
443:     clq_g = BSglobal_num_cliques(a->pA);CHKERRBS(0);
444:     color = BSnum_colors(a->pA);CHKERRBS(0);
445:     PetscViewerASCIIPrintf(viewer,"  %d global inode(s), %d global clique(s), %d color(s)n",ind_g,clq_g,color);
446:     PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d local inode(s), %d local clique(s)n",a->rank,ind_l,clq_l);
447:   } else  if (format == PETSC_VIEWER_ASCII_COMMON) {
448:     for (i=0; i<A->num_rows; i++) {
449:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
450:       for (j=0; j<rs[i]->length; j++) {
451:         if (rs[i]->nz[j]) {PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);}
452:       }
453:       PetscViewerASCIISynchronizedPrintf(viewer,"n");
454:     }
455:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
456:     SETERRQ(PETSC_ERR_SUP,"Matlab format not supported");
457:   } else {
458:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
459:     for (i=0; i<A->num_rows; i++) {
460:       PetscViewerASCIISynchronizedPrintf(viewer,"row %d:",i+a->rstart);
461:       for (j=0; j<rs[i]->length; j++) {
462:         PetscViewerASCIISynchronizedPrintf(viewer," %d %g ",rs[i]->col[j],rs[i]->nz[j]);
463:       }
464:       PetscViewerASCIISynchronizedPrintf(viewer,"n");
465:     }
466:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
467:   }
468:   PetscViewerFlush(viewer);
469:   return(0);
470: }

472: #undef __FUNCT__  
474: static int MatView_MPIRowbs_Binary(Mat mat,PetscViewer viewer)
475: {
476:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
477:   int          ierr,i,M,m,rank,size,*sbuff,*rowlengths;
478:   int          *recvcts,*recvdisp,fd,*cols,maxnz,nz,j;
479:   BSspmat      *A = a->A;
480:   BSsprow      **rs = A->rows;
481:   MPI_Comm     comm = mat->comm;
482:   MPI_Status   status;
483:   PetscScalar  *vals;
484:   MatInfo      info;

487:   MPI_Comm_size(comm,&size);
488:   MPI_Comm_rank(comm,&rank);

490:   M = mat->M; m = mat->m;
491:   /* First gather together on the first processor the lengths of 
492:      each row, and write them out to the file */
493:   PetscMalloc(m*sizeof(int),&sbuff);
494:   for (i=0; i<A->num_rows; i++) {
495:     sbuff[i] = rs[i]->length;
496:   }
497:   MatGetInfo(mat,MAT_GLOBAL_SUM,&info);
498:   if (!rank) {
499:     PetscViewerBinaryGetDescriptor(viewer,&fd);
500:     PetscMalloc((4+M)*sizeof(int),&rowlengths);
501:     PetscMalloc(size*sizeof(int),&recvcts);
502:     recvdisp = a->rowners;
503:     for (i=0; i<size; i++) {
504:       recvcts[i] = recvdisp[i+1] - recvdisp[i];
505:     }
506:     /* first four elements of rowlength are the header */
507:     rowlengths[0] = mat->cookie;
508:     rowlengths[1] = mat->M;
509:     rowlengths[2] = mat->N;
510:     rowlengths[3] = (int)info.nz_used;
511:     MPI_Gatherv(sbuff,m,MPI_INT,rowlengths+4,recvcts,recvdisp,MPI_INT,0,comm);
512:     PetscFree(sbuff);
513:     PetscBinaryWrite(fd,rowlengths,4+M,PETSC_INT,0);
514:     /* count the number of nonzeros on each processor */
515:     PetscMemzero(recvcts,size*sizeof(int));
516:     for (i=0; i<size; i++) {
517:       for (j=recvdisp[i]; j<recvdisp[i+1]; j++) {
518:         recvcts[i] += rowlengths[j+3];
519:       }
520:     }
521:     /* allocate buffer long enough to hold largest one */
522:     maxnz = 0;
523:     for (i=0; i<size; i++) {
524:       maxnz = PetscMax(maxnz,recvcts[i]);
525:     }
526:     PetscFree(rowlengths);
527:     PetscFree(recvcts);
528:     PetscMalloc(maxnz*sizeof(int),&cols);

530:     /* binary store column indices for 0th processor */
531:     nz = 0;
532:     for (i=0; i<A->num_rows; i++) {
533:       for (j=0; j<rs[i]->length; j++) {
534:         cols[nz++] = rs[i]->col[j];
535:       }
536:     }
537:     PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);

539:     /* receive and store column indices for all other processors */
540:     for (i=1; i<size; i++) {
541:       /* should tell processor that I am now ready and to begin the send */
542:       MPI_Recv(cols,maxnz,MPI_INT,i,mat->tag,comm,&status);
543:       MPI_Get_count(&status,MPI_INT,&nz);
544:       PetscBinaryWrite(fd,cols,nz,PETSC_INT,0);
545:     }
546:     PetscFree(cols);
547:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

549:     /* binary store values for 0th processor */
550:     nz = 0;
551:     for (i=0; i<A->num_rows; i++) {
552:       for (j=0; j<rs[i]->length; j++) {
553:         vals[nz++] = rs[i]->nz[j];
554:       }
555:     }
556:     PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);

558:     /* receive and store nonzeros for all other processors */
559:     for (i=1; i<size; i++) {
560:       /* should tell processor that I am now ready and to begin the send */
561:       MPI_Recv(vals,maxnz,MPIU_SCALAR,i,mat->tag,comm,&status);
562:       MPI_Get_count(&status,MPIU_SCALAR,&nz);
563:       PetscBinaryWrite(fd,vals,nz,PETSC_SCALAR,0);
564:     }
565:     PetscFree(vals);
566:   } else {
567:     MPI_Gatherv(sbuff,m,MPI_INT,0,0,0,MPI_INT,0,comm);
568:     PetscFree(sbuff);

570:     /* count local nonzeros */
571:     nz = 0;
572:     for (i=0; i<A->num_rows; i++) {
573:       for (j=0; j<rs[i]->length; j++) {
574:         nz++;
575:       }
576:     }
577:     /* copy into buffer column indices */
578:     PetscMalloc(nz*sizeof(int),&cols);
579:     nz = 0;
580:     for (i=0; i<A->num_rows; i++) {
581:       for (j=0; j<rs[i]->length; j++) {
582:         cols[nz++] = rs[i]->col[j];
583:       }
584:     }
585:     /* send */  /* should wait until processor zero tells me to go */
586:     MPI_Send(cols,nz,MPI_INT,0,mat->tag,comm);
587:     PetscFree(cols);

589:     /* copy into buffer column values */
590:     PetscMalloc(nz*sizeof(PetscScalar),&vals);
591:     nz   = 0;
592:     for (i=0; i<A->num_rows; i++) {
593:       for (j=0; j<rs[i]->length; j++) {
594:         vals[nz++] = rs[i]->nz[j];
595:       }
596:     }
597:     /* send */  /* should wait until processor zero tells me to go */
598:     MPI_Send(vals,nz,MPIU_SCALAR,0,mat->tag,comm);
599:     PetscFree(vals);
600:   }

602:   return(0);
603: }

605: #undef __FUNCT__  
607: int MatView_MPIRowbs(Mat mat,PetscViewer viewer)
608: {
609:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
610:   int          ierr;
611:   PetscTruth   isascii,isbinary;

614:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
615:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
616:   if (!bsif->blocksolveassembly) {
617:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
618:   }
619:   if (isascii) {
620:     MatView_MPIRowbs_ASCII(mat,viewer);
621:   } else if (isbinary) {
622:     MatView_MPIRowbs_Binary(mat,viewer);
623:   } else {
624:     SETERRQ1(1,"Viewer type %s not supported by MPIRowbs matrices",((PetscObject)viewer)->type_name);
625:   }
626:   return(0);
627: }
628: 
629: #undef __FUNCT__  
631: static int MatAssemblyEnd_MPIRowbs_MakeSymmetric(Mat mat)
632: {
633:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
634:   BSspmat      *A = a->A;
635:   BSsprow      *vs;
636:   int          size,rank,M,rstart,tag,i,j,*rtable,*w1,*w3,*w4,len,proc,nrqs;
637:   int          msz,*pa,bsz,nrqr,**rbuf1,**sbuf1,**ptr,*tmp,*ctr,col,index,row;
638:   int          ctr_j,*sbuf1_j,k,ierr;
639:   PetscScalar  val=0.0;
640:   MPI_Comm     comm;
641:   MPI_Request  *s_waits1,*r_waits1;
642:   MPI_Status   *s_status,*r_status;

645:   comm   = mat->comm;
646:   tag    = mat->tag;
647:   size   = a->size;
648:   rank   = a->rank;
649:   M      = mat->M;
650:   rstart = a->rstart;

652:   PetscMalloc(M*sizeof(int),&rtable);
653:   /* Create hash table for the mapping :row -> proc */
654:   for (i=0,j=0; i<size; i++) {
655:     len = a->rowners[i+1];
656:     for (; j<len; j++) {
657:       rtable[j] = i;
658:     }
659:   }

661:   /* Evaluate communication - mesg to whom, length of mesg, and buffer space
662:      required. Based on this, buffers are allocated, and data copied into them. */
663:   PetscMalloc(size*4*sizeof(int),&w1);/*  mesg size */
664:   w3   = w1 + 2*size;       /* no of IS that needs to be sent to proc i */
665:   w4   = w3 + size;       /* temp work space used in determining w1,  w3 */
666:   PetscMemzero(w1,size*3*sizeof(int)); /* initialize work vector */

668:   for (i=0;  i<mat->m; i++) {
669:     PetscMemzero(w4,size*sizeof(int)); /* initialize work vector */
670:     vs = A->rows[i];
671:     for (j=0; j<vs->length; j++) {
672:       proc = rtable[vs->col[j]];
673:       w4[proc]++;
674:     }
675:     for (j=0; j<size; j++) {
676:       if (w4[j]) { w1[2*j] += w4[j]; w3[j]++;}
677:     }
678:   }
679: 
680:   nrqs       = 0;              /* number of outgoing messages */
681:   msz        = 0;              /* total mesg length (for all proc */
682:   w1[2*rank] = 0;              /* no mesg sent to itself */
683:   w3[rank]   = 0;
684:   for (i=0; i<size; i++) {
685:     if (w1[2*i])  {w1[2*i+1] = 1; nrqs++;} /* there exists a message to proc i */
686:   }
687:   /* pa - is list of processors to communicate with */
688:   PetscMalloc((nrqs+1)*sizeof(int),&pa);
689:   for (i=0,j=0; i<size; i++) {
690:     if (w1[2*i]) {pa[j] = i; j++;}
691:   }

693:   /* Each message would have a header = 1 + 2*(no of ROWS) + data */
694:   for (i=0; i<nrqs; i++) {
695:     j       = pa[i];
696:     w1[2*j] += w1[2*j+1] + 2*w3[j];
697:     msz     += w1[2*j];
698:   }
699: 
700:   /* Do a global reduction to determine how many messages to expect */
701:   PetscMaxSum(comm,w1,&bsz,&nrqr);

703:   /* Allocate memory for recv buffers . Prob none if nrqr = 0 ???? */
704:   len      = (nrqr+1)*sizeof(int*) + nrqr*bsz*sizeof(int);
705:   ierr     = PetscMalloc(len,&rbuf1);
706:   rbuf1[0] = (int*)(rbuf1 + nrqr);
707:   for (i=1; i<nrqr; ++i) rbuf1[i] = rbuf1[i-1] + bsz;

709:   /* Post the receives */
710:   PetscMalloc((nrqr+1)*sizeof(MPI_Request),&r_waits1);
711:   for (i=0; i<nrqr; ++i){
712:     MPI_Irecv(rbuf1[i],bsz,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits1+i);
713:   }
714: 
715:   /* Allocate Memory for outgoing messages */
716:   len   = 2*size*sizeof(int*) + (size+msz)*sizeof(int);
717:   ierr  = PetscMalloc(len,&sbuf1);
718:   ptr   = sbuf1 + size;     /* Pointers to the data in outgoing buffers */
719:   ierr  = PetscMemzero(sbuf1,2*size*sizeof(int*));
720:   tmp   = (int*)(sbuf1 + 2*size);
721:   ctr   = tmp + msz;

723:   {
724:     int *iptr = tmp,ict  = 0;
725:     for (i=0; i<nrqs; i++) {
726:       j        = pa[i];
727:       iptr    += ict;
728:       sbuf1[j] = iptr;
729:       ict      = w1[2*j];
730:     }
731:   }

733:   /* Form the outgoing messages */
734:   /* Clean up the header space */
735:   for (i=0; i<nrqs; i++) {
736:     j           = pa[i];
737:     sbuf1[j][0] = 0;
738:     ierr        = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(int));
739:     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
740:   }

742:   /* Parse the matrix and copy the data into sbuf1 */
743:   for (i=0; i<mat->m; i++) {
744:     PetscMemzero(ctr,size*sizeof(int));
745:     vs = A->rows[i];
746:     for (j=0; j<vs->length; j++) {
747:       col  = vs->col[j];
748:       proc = rtable[col];
749:       if (proc != rank) { /* copy to the outgoing buffer */
750:         ctr[proc]++;
751:           *ptr[proc] = col;
752:           ptr[proc]++;
753:       } else {
754:         row = col - rstart;
755:         col = i + rstart;
756:         MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
757:       }
758:     }
759:     /* Update the headers for the current row */
760:     for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
761:       if ((ctr_j = ctr[j])) {
762:         sbuf1_j        = sbuf1[j];
763:         k               = ++sbuf1_j[0];
764:         sbuf1_j[2*k]   = ctr_j;
765:         sbuf1_j[2*k-1] = i + rstart;
766:       }
767:     }
768:   }
769:    /* Check Validity of the outgoing messages */
770:   {
771:     int sum;
772:     for (i=0 ; i<nrqs ; i++) {
773:       j = pa[i];
774:       if (w3[j] != sbuf1[j][0]) {SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[1] mismatch!n"); }
775:     }

777:     for (i=0 ; i<nrqs ; i++) {
778:       j = pa[i];
779:       sum = 1;
780:       for (k = 1; k <= w3[j]; k++) sum += sbuf1[j][2*k]+2;
781:       if (sum != w1[2*j]) { SETERRQ(PETSC_ERR_PLIB,"Blew it! Header[2-n] mismatch!n"); }
782:     }
783:   }
784: 
785:   /* Now post the sends */
786:   PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);
787:   for (i=0; i<nrqs; ++i) {
788:     j    = pa[i];
789:     MPI_Isend(sbuf1[j],w1[2*j],MPI_INT,j,tag,comm,s_waits1+i);
790:   }
791: 
792:   /* Receive messages*/
793:   PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status);
794:   for (i=0; i<nrqr; ++i) {
795:     MPI_Waitany(nrqr,r_waits1,&index,r_status+i);
796:     /* Process the Message */
797:     {
798:       int    *rbuf1_i,n_row,ct1;

800:       rbuf1_i = rbuf1[index];
801:       n_row   = rbuf1_i[0];
802:       ct1     = 2*n_row+1;
803:       val     = 0.0;
804:       /* Optimise this later */
805:       for (j=1; j<=n_row; j++) {
806:         col = rbuf1_i[2*j-1];
807:         for (k=0; k<rbuf1_i[2*j]; k++,ct1++) {
808:           row = rbuf1_i[ct1] - rstart;
809:           MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
810:         }
811:       }
812:     }
813:   }

815:   PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);
816:   MPI_Waitall(nrqs,s_waits1,s_status);

818:   PetscFree(rtable);
819:   PetscFree(w1);
820:   PetscFree(pa);
821:   PetscFree(rbuf1);
822:   PetscFree(sbuf1);
823:   PetscFree(r_waits1);
824:   PetscFree(s_waits1);
825:   PetscFree(r_status);
826:   PetscFree(s_status);
827:   return(0);
828: }

830: /*
831:      This does the BlockSolve portion of the matrix assembly.
832:    It is provided in a seperate routine so that users can
833:    operate on the matrix (using MatScale(), MatShift() etc.) after 
834:    the matrix has been assembled but before BlockSolve has sucked it
835:    in and devoured it.
836: */
837: #undef __FUNCT__  
839: int MatAssemblyEnd_MPIRowbs_ForBlockSolve(Mat mat)
840: {
841:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
842:   int          ierr,ldim,low,high,i;
843:   PetscScalar  *diag;

846:   if ((mat->was_assembled) && (!mat->same_nonzero)) {  /* Free the old info */
847:     if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
848:     if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
849:   }

851:   if ((!mat->same_nonzero) || (!mat->was_assembled)) {
852:     /* Indicates bypassing cliques in coloring */
853:     if (a->bs_color_single) {
854:       BSctx_set_si(a->procinfo,100);
855:     }
856:     /* Form permuted matrix for efficient parallel execution */
857:     a->pA = BSmain_perm(a->procinfo,a->A);CHKERRBS(0);
858:     /* Set up the communication */
859:     a->comm_pA = BSsetup_forward(a->pA,a->procinfo);CHKERRBS(0);
860:   } else {
861:     /* Repermute the matrix */
862:     BSmain_reperm(a->procinfo,a->A,a->pA);CHKERRBS(0);
863:   }

865:   /* Symmetrically scale the matrix by the diagonal */
866:   BSscale_diag(a->pA,a->pA->diag,a->procinfo);CHKERRBS(0);

868:   /* Store inverse of square root of permuted diagonal scaling matrix */
869:   VecGetLocalSize(a->diag,&ldim);
870:   VecGetOwnershipRange(a->diag,&low,&high);
871:   VecGetArray(a->diag,&diag);
872:   for (i=0; i<ldim; i++) {
873:     if (a->pA->scale_diag[i] != 0.0) {
874:       diag[i] = 1.0/sqrt(PetscAbsScalar(a->pA->scale_diag[i]));
875:     } else {
876:       diag[i] = 1.0;
877:     }
878:   }
879:   VecRestoreArray(a->diag,&diag);
880:   a->blocksolveassembly = 1;
881:   mat->was_assembled    = PETSC_TRUE;
882:   mat->same_nonzero     = PETSC_TRUE;
883:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs_ForBlockSolve:Completed BlockSolve95 matrix assemblyn");
884:   return(0);
885: }

887: #undef __FUNCT__  
889: int MatAssemblyEnd_MPIRowbs(Mat mat,MatAssemblyType mode)
890: {
891:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
892:   int          i,n,row,col,*rows,*cols,ierr,rstart,nzcount,flg,j,ncols;
893:   PetscScalar  *vals,val;
894:   InsertMode   addv = mat->insertmode;

897:   while (1) {
898:     MatStashScatterGetMesg_Private(&mat->stash,&n,&rows,&cols,&vals,&flg);
899:     if (!flg) break;
900: 
901:     for (i=0; i<n;) {
902:       /* Now identify the consecutive vals belonging to the same row */
903:       for (j=i,rstart=rows[j]; j<n; j++) { if (rows[j] != rstart) break; }
904:       if (j < n) ncols = j-i;
905:       else       ncols = n-i;
906:       /* Now assemble all these values with a single function call */
907:       MatSetValues_MPIRowbs(mat,1,rows+i,ncols,cols+i,vals+i,addv);
908:       i = j;
909:     }
910:   }
911:   MatStashScatterEnd_Private(&mat->stash);

913:   rstart = a->rstart;
914:   nzcount = a->nz; /* This is the number of nonzeros entered by the user */
915:   /* BlockSolve requires that the matrix is structurally symmetric */
916:   if (mode == MAT_FINAL_ASSEMBLY && !mat->structurally_symmetric) {
917:     MatAssemblyEnd_MPIRowbs_MakeSymmetric(mat);
918:   }
919: 
920:   /* BlockSolve requires that all the diagonal elements are set */
921:   val  = 0.0;
922:   for (i=0; i<mat->m; i++) {
923:     row = i; col = i + rstart;
924:     MatSetValues_MPIRowbs_local(mat,1,&row,1,&col,&val,ADD_VALUES);
925:   }
926: 
927:   MatAssemblyBegin_MPIRowbs_local(mat,mode);
928:   MatAssemblyEnd_MPIRowbs_local(mat,mode);
929: 
930:   a->blocksolveassembly = 0;
931:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Matrix size: %d X %d; storage space: %d unneeded,%d usedn",mat->m,mat->n,a->maxnz-a->nz,a->nz);
932:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs: User entered %d nonzeros, PETSc added %dn",nzcount,a->nz-nzcount);
933:   PetscLogInfo(mat,"MatAssemblyEnd_MPIRowbs:Number of mallocs during MatSetValues is %dn",a->reallocs);
934:   return(0);
935: }

937: #undef __FUNCT__  
939: int MatZeroEntries_MPIRowbs(Mat mat)
940: {
941:   Mat_MPIRowbs *l = (Mat_MPIRowbs*)mat->data;
942:   BSspmat      *A = l->A;
943:   BSsprow      *vs;
944:   int          i,j;

947:   for (i=0; i <mat->m; i++) {
948:     vs = A->rows[i];
949:     for (j=0; j< vs->length; j++) vs->nz[j] = 0.0;
950:   }
951:   return(0);
952: }

954: /* the code does not do the diagonal entries correctly unless the 
955:    matrix is square and the column and row owerships are identical.
956:    This is a BUG.
957: */

959: #undef __FUNCT__  
961: int MatZeroRows_MPIRowbs(Mat A,IS is,PetscScalar *diag)
962: {
963:   Mat_MPIRowbs   *l = (Mat_MPIRowbs*)A->data;
964:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
965:   int            *nprocs,j,idx,nsends;
966:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
967:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
968:   int            *lens,imdex,*lrows,*values;
969:   MPI_Comm       comm = A->comm;
970:   MPI_Request    *send_waits,*recv_waits;
971:   MPI_Status     recv_status,*send_status;
972:   IS             istmp;
973:   PetscTruth     found;

976:   ISGetLocalSize(is,&N);
977:   ISGetIndices(is,&rows);

979:   /*  first count number of contributors to each processor */
980:   ierr   = PetscMalloc(2*size*sizeof(int),&nprocs);
981:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
982:   ierr   = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
983:   for (i=0; i<N; i++) {
984:     idx = rows[i];
985:     found = PETSC_FALSE;
986:     for (j=0; j<size; j++) {
987:       if (idx >= owners[j] && idx < owners[j+1]) {
988:         nprocs[2*j]++; nprocs[2*j] = 1; owner[i] = j; found = PETSC_TRUE; break;
989:       }
990:     }
991:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
992:   }
993:   nsends = 0;  for (i=0; i<size; i++) {nsends += nprocs[2*i+1];}

995:   /* inform other processors of number of messages and max length*/
996:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

998:   /* post receives:   */
999:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1000:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1001:   for (i=0; i<nrecvs; i++) {
1002:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1003:   }

1005:   /* do sends:
1006:       1) starts[i] gives the starting index in svalues for stuff going to 
1007:          the ith processor
1008:   */
1009:   PetscMalloc((N+1)*sizeof(int),&svalues);
1010:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1011:   PetscMalloc((size+1)*sizeof(int),&starts);
1012:   starts[0] = 0;
1013:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1014:   for (i=0; i<N; i++) {
1015:     svalues[starts[owner[i]]++] = rows[i];
1016:   }
1017:   ISRestoreIndices(is,&rows);

1019:   starts[0] = 0;
1020:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1021:   count = 0;
1022:   for (i=0; i<size; i++) {
1023:     if (nprocs[2*i+1]) {
1024:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1025:     }
1026:   }
1027:   PetscFree(starts);

1029:   base = owners[rank];

1031:   /*  wait on receives */
1032:   PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1033:   source = lens + nrecvs;
1034:   count = nrecvs; slen = 0;
1035:   while (count) {
1036:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1037:     /* unpack receives into our local space */
1038:     MPI_Get_count(&recv_status,MPI_INT,&n);
1039:     source[imdex]  = recv_status.MPI_SOURCE;
1040:     lens[imdex]    = n;
1041:     slen           += n;
1042:     count--;
1043:   }
1044:   PetscFree(recv_waits);
1045: 
1046:   /* move the data into the send scatter */
1047:   PetscMalloc((slen+1)*sizeof(int),&lrows);
1048:   count = 0;
1049:   for (i=0; i<nrecvs; i++) {
1050:     values = rvalues + i*nmax;
1051:     for (j=0; j<lens[i]; j++) {
1052:       lrows[count++] = values[j] - base;
1053:     }
1054:   }
1055:   PetscFree(rvalues);
1056:   PetscFree(lens);
1057:   PetscFree(owner);
1058:   PetscFree(nprocs);
1059: 
1060:   /* actually zap the local rows */
1061:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1062:   PetscLogObjectParent(A,istmp);
1063:   PetscFree(lrows);
1064:   MatZeroRows_MPIRowbs_local(A,istmp,diag);
1065:   ISDestroy(istmp);

1067:   /* wait on sends */
1068:   if (nsends) {
1069:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1070:     MPI_Waitall(nsends,send_waits,send_status);
1071:     PetscFree(send_status);
1072:   }
1073:   PetscFree(send_waits);
1074:   PetscFree(svalues);

1076:   return(0);
1077: }

1079: #undef __FUNCT__  
1081: int MatNorm_MPIRowbs(Mat mat,NormType type,PetscReal *norm)
1082: {
1083:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1084:   BSsprow      *vs,**rs;
1085:   PetscScalar  *xv;
1086:   PetscReal    sum = 0.0;
1087:   int          *xi,nz,i,j,ierr;

1090:   if (a->size == 1) {
1091:     MatNorm_MPIRowbs_local(mat,type,norm);
1092:   } else {
1093:     rs = a->A->rows;
1094:     if (type == NORM_FROBENIUS) {
1095:       for (i=0; i<mat->m; i++) {
1096:         vs = *rs++;
1097:         nz = vs->length;
1098:         xv = vs->nz;
1099:         while (nz--) {
1100: #if defined(PETSC_USE_COMPLEX)
1101:           sum += PetscRealPart(PetscConj(*xv)*(*xv)); xv++;
1102: #else
1103:           sum += (*xv)*(*xv); xv++;
1104: #endif
1105:         }
1106:       }
1107:       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1108:       *norm = sqrt(*norm);
1109:     } else if (type == NORM_1) { /* max column norm */
1110:       PetscReal *tmp,*tmp2;
1111:       ierr  = PetscMalloc(mat->n*sizeof(PetscReal),&tmp);
1112:       ierr  = PetscMalloc(mat->n*sizeof(PetscReal),&tmp2);
1113:       ierr  = PetscMemzero(tmp,mat->n*sizeof(PetscReal));
1114:       *norm = 0.0;
1115:       for (i=0; i<mat->m; i++) {
1116:         vs = *rs++;
1117:         nz = vs->length;
1118:         xi = vs->col;
1119:         xv = vs->nz;
1120:         while (nz--) {
1121:           tmp[*xi] += PetscAbsScalar(*xv);
1122:           xi++; xv++;
1123:         }
1124:       }
1125:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1126:       for (j=0; j<mat->n; j++) {
1127:         if (tmp2[j] > *norm) *norm = tmp2[j];
1128:       }
1129:       PetscFree(tmp);
1130:       PetscFree(tmp2);
1131:     } else if (type == NORM_INFINITY) { /* max row norm */
1132:       PetscReal ntemp = 0.0;
1133:       for (i=0; i<mat->m; i++) {
1134:         vs = *rs++;
1135:         nz = vs->length;
1136:         xv = vs->nz;
1137:         sum = 0.0;
1138:         while (nz--) {
1139:           sum += PetscAbsScalar(*xv); xv++;
1140:         }
1141:         if (sum > ntemp) ntemp = sum;
1142:       }
1143:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1144:     } else {
1145:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1146:     }
1147:   }
1148:   return(0);
1149: }

1151: #undef __FUNCT__  
1153: int MatMult_MPIRowbs(Mat mat,Vec xx,Vec yy)
1154: {
1155:   Mat_MPIRowbs *bsif = (Mat_MPIRowbs*)mat->data;
1156:   BSprocinfo   *bspinfo = bsif->procinfo;
1157:   PetscScalar  *xxa,*xworka,*yya;
1158:   int          ierr;

1161:   if (!bsif->blocksolveassembly) {
1162:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1163:   }

1165:   /* Permute and apply diagonal scaling:  [ xwork = D^{1/2} * x ] */
1166:   if (!bsif->vecs_permscale) {
1167:     VecGetArray(bsif->xwork,&xworka);
1168:     VecGetArray(xx,&xxa);
1169:     BSperm_dvec(xxa,xworka,bsif->pA->perm);CHKERRBS(0);
1170:     VecRestoreArray(bsif->xwork,&xworka);
1171:     VecRestoreArray(xx,&xxa);
1172:     VecPointwiseDivide(bsif->xwork,bsif->diag,xx);
1173:   }

1175:   VecGetArray(xx,&xxa);
1176:   VecGetArray(yy,&yya);
1177:   /* Do lower triangular multiplication:  [ y = L * xwork ] */
1178:   if (bspinfo->single) {
1179:     BSforward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1180:   }  else {
1181:     BSforward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1182:   }
1183: 
1184:   /* Do upper triangular multiplication:  [ y = y + L^{T} * xwork ] */
1185:   if (mat->symmetric) {
1186:     if (bspinfo->single){
1187:       BSbackward1(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1188:     } else {
1189:       BSbackward(bsif->pA,xxa,yya,bsif->comm_pA,bspinfo);CHKERRBS(0);
1190:     }
1191:   }
1192:   /* not needed for ILU version since forward does it all */
1193:   VecRestoreArray(xx,&xxa);
1194:   VecRestoreArray(yy,&yya);

1196:   /* Apply diagonal scaling to vector:  [  y = D^{1/2} * y ] */
1197:   if (!bsif->vecs_permscale) {
1198:     VecGetArray(bsif->xwork,&xworka);
1199:     VecGetArray(xx,&xxa);
1200:     BSiperm_dvec(xworka,xxa,bsif->pA->perm);CHKERRBS(0);
1201:     VecRestoreArray(bsif->xwork,&xworka);
1202:     VecRestoreArray(xx,&xxa);
1203:     VecPointwiseDivide(yy,bsif->diag,bsif->xwork);
1204:     VecGetArray(bsif->xwork,&xworka);
1205:     VecGetArray(yy,&yya);
1206:     BSiperm_dvec(xworka,yya,bsif->pA->perm);CHKERRBS(0);
1207:     VecRestoreArray(bsif->xwork,&xworka);
1208:     VecRestoreArray(yy,&yya);
1209:   }
1210:   PetscLogFlops(2*bsif->nz - mat->m);

1212:   return(0);
1213: }

1215: #undef __FUNCT__  
1217: int MatMultAdd_MPIRowbs(Mat mat,Vec xx,Vec yy,Vec zz)
1218: {
1219:   int          ierr;
1220:   PetscScalar  one = 1.0;

1223:   (*mat->ops->mult)(mat,xx,zz);
1224:   VecAXPY(&one,yy,zz);
1225:   return(0);
1226: }

1228: #undef __FUNCT__  
1230: int MatGetInfo_MPIRowbs(Mat A,MatInfoType flag,MatInfo *info)
1231: {
1232:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)A->data;
1233:   PetscReal    isend[5],irecv[5];
1234:   int          ierr;

1237:   info->rows_global    = (double)A->M;
1238:   info->columns_global = (double)A->N;
1239:   info->rows_local     = (double)A->m;
1240:   info->columns_local  = (double)A->N;
1241:   info->block_size     = 1.0;
1242:   info->mallocs        = (double)mat->reallocs;
1243:   isend[0] = mat->nz; isend[1] = mat->maxnz; isend[2] =  mat->maxnz -  mat->nz;
1244:   isend[3] = A->mem;  isend[4] = info->mallocs;

1246:   if (flag == MAT_LOCAL) {
1247:     info->nz_used      = isend[0];
1248:     info->nz_allocated = isend[1];
1249:     info->nz_unneeded  = isend[2];
1250:     info->memory       = isend[3];
1251:     info->mallocs      = isend[4];
1252:   } else if (flag == MAT_GLOBAL_MAX) {
1253:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_MAX,A->comm);
1254:     info->nz_used      = irecv[0];
1255:     info->nz_allocated = irecv[1];
1256:     info->nz_unneeded  = irecv[2];
1257:     info->memory       = irecv[3];
1258:     info->mallocs      = irecv[4];
1259:   } else if (flag == MAT_GLOBAL_SUM) {
1260:     MPI_Allreduce(isend,irecv,3,MPIU_REAL,MPI_SUM,A->comm);
1261:     info->nz_used      = irecv[0];
1262:     info->nz_allocated = irecv[1];
1263:     info->nz_unneeded  = irecv[2];
1264:     info->memory       = irecv[3];
1265:     info->mallocs      = irecv[4];
1266:   }
1267:   return(0);
1268: }

1270: #undef __FUNCT__  
1272: int MatGetDiagonal_MPIRowbs(Mat mat,Vec v)
1273: {
1274:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1275:   BSsprow      **rs = a->A->rows;
1276:   int          i,n,ierr;
1277:   PetscScalar  *x,zero = 0.0;

1280:   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1281:   if (!a->blocksolveassembly) {
1282:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1283:   }

1285:   VecSet(&zero,v);
1286:   VecGetLocalSize(v,&n);
1287:   if (n != mat->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
1288:   VecGetArray(v,&x);
1289:   for (i=0; i<mat->m; i++) {
1290:     x[i] = rs[i]->nz[rs[i]->diag_ind];
1291:   }
1292:   VecRestoreArray(v,&x);
1293:   return(0);
1294: }

1296: #undef __FUNCT__  
1298: int MatDestroy_MPIRowbs(Mat mat)
1299: {
1300:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1301:   BSspmat      *A = a->A;
1302:   BSsprow      *vs;
1303:   int          i,ierr;

1306: #if defined(PETSC_USE_LOG)
1307:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1308: #endif
1309:   PetscFree(a->rowners);
1310:   MatStashDestroy_Private(&mat->stash);
1311:   if (a->bsmap) {
1312:     if (a->bsmap->vlocal2global) {PetscFree(a->bsmap->vlocal2global);}
1313:     if (a->bsmap->vglobal2local) {PetscFree(a->bsmap->vglobal2local);}
1314:     if (a->bsmap->vglobal2proc)  (*a->bsmap->free_g2p)(a->bsmap->vglobal2proc);
1315:     PetscFree(a->bsmap);
1316:   }

1318:   if (A) {
1319:     for (i=0; i<mat->m; i++) {
1320:       vs = A->rows[i];
1321:       MatFreeRowbs_Private(mat,vs->length,vs->col,vs->nz);
1322:     }
1323:     /* Note: A->map = a->bsmap is freed above */
1324:     PetscFree(A->rows);
1325:     PetscFree(A);
1326:   }
1327:   if (a->procinfo) {BSfree_ctx(a->procinfo);CHKERRBS(0);}
1328:   if (a->diag)     {VecDestroy(a->diag);}
1329:   if (a->xwork)    {VecDestroy(a->xwork);}
1330:   if (a->pA)       {BSfree_par_mat(a->pA);CHKERRBS(0);}
1331:   if (a->fpA)      {BSfree_copy_par_mat(a->fpA);CHKERRBS(0);}
1332:   if (a->comm_pA)  {BSfree_comm(a->comm_pA);CHKERRBS(0);}
1333:   if (a->comm_fpA) {BSfree_comm(a->comm_fpA);CHKERRBS(0);}
1334:   if (a->imax)     {PetscFree(a->imax);}
1335:   PetscFree(a);
1336:   return(0);
1337: }

1339: #undef __FUNCT__  
1341: int MatSetOption_MPIRowbs(Mat A,MatOption op)
1342: {
1343:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)A->data;

1346:   switch (op) {
1347:   case MAT_ROW_ORIENTED:
1348:     a->roworiented = PETSC_TRUE;
1349:     break;
1350:   case MAT_COLUMN_ORIENTED:
1351:     a->roworiented = PETSC_FALSE;
1352:     break;
1353:   case MAT_COLUMNS_SORTED:
1354:     a->sorted      = 1;
1355:     break;
1356:   case MAT_COLUMNS_UNSORTED:
1357:     a->sorted      = 0;
1358:     break;
1359:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1360:     a->nonew       = 1;
1361:     break;
1362:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1363:     a->nonew       = 0;
1364:     break;
1365:   case MAT_DO_NOT_USE_INODES:
1366:     a->bs_color_single = 1;
1367:     break;
1368:   case MAT_YES_NEW_DIAGONALS:
1369:   case MAT_ROWS_SORTED:
1370:   case MAT_NEW_NONZERO_LOCATION_ERR:
1371:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1372:   case MAT_ROWS_UNSORTED:
1373:   case MAT_USE_HASH_TABLE:
1374:     PetscLogInfo(A,"MatSetOption_MPIRowbs:Option ignoredn");
1375:     break;
1376:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1377:     a->donotstash = PETSC_TRUE;
1378:     break;
1379:   case MAT_NO_NEW_DIAGONALS:
1380:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1381:     break;
1382:   case MAT_KEEP_ZEROED_ROWS:
1383:     SETERRQ(PETSC_ERR_SUP,"MAT_KEEP_ZEROED_ROWS");
1384:     break;
1385:   default:
1386:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1387:     break;
1388:   }
1389:   return(0);
1390: }

1392: #undef __FUNCT__  
1394: int MatGetRow_MPIRowbs(Mat AA,int row,int *nz,int **idx,PetscScalar **v)
1395: {
1396:   Mat_MPIRowbs *mat = (Mat_MPIRowbs*)AA->data;
1397:   BSspmat      *A = mat->A;
1398:   BSsprow      *rs;
1399: 
1401:   if (row < mat->rstart || row >= mat->rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");

1403:   rs  = A->rows[row - mat->rstart];
1404:   *nz = rs->length;
1405:   if (v)   *v   = rs->nz;
1406:   if (idx) *idx = rs->col;
1407:   return(0);
1408: }

1410: #undef __FUNCT__  
1412: int MatRestoreRow_MPIRowbs(Mat A,int row,int *nz,int **idx,PetscScalar **v)
1413: {
1415:   return(0);
1416: }

1418: /* ------------------------------------------------------------------ */

1420: #undef __FUNCT__  
1422: int MatPrintHelp_MPIRowbs(Mat A)
1423: {
1424:   static PetscTruth called = PETSC_FALSE;
1425:   MPI_Comm          comm = A->comm;
1426:   int               ierr;

1429:   if (called) {return(0);} else called = PETSC_TRUE;
1430:   (*PetscHelpPrintf)(comm," Options for MATMPIROWBS matrix format (needed for BlockSolve):n");
1431:   (*PetscHelpPrintf)(comm,"  -mat_rowbs_no_inode  - Do not use inodesn");
1432:   return(0);
1433: }

1435: #undef __FUNCT__  
1437: int MatSetUpPreallocation_MPIRowbs(Mat A)
1438: {
1439:   int        ierr;

1442:    MatMPIRowbsSetPreallocation(A,PETSC_DEFAULT,0);
1443:   return(0);
1444: }

1446: /* -------------------------------------------------------------------*/
1447: EXTERN int MatCholeskyFactorNumeric_MPIRowbs(Mat,Mat*);
1448: EXTERN int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat,IS,PetscReal,int,Mat *);
1449: EXTERN int MatLUFactorNumeric_MPIRowbs(Mat,Mat*);
1450: EXTERN int MatILUFactorSymbolic_MPIRowbs(Mat,IS,IS,MatILUInfo*,Mat *);
1451: EXTERN int MatSolve_MPIRowbs(Mat,Vec,Vec);
1452: EXTERN int MatForwardSolve_MPIRowbs(Mat,Vec,Vec);
1453: EXTERN int MatBackwardSolve_MPIRowbs(Mat,Vec,Vec);
1454: EXTERN int MatScaleSystem_MPIRowbs(Mat,Vec,Vec);
1455: EXTERN int MatUnScaleSystem_MPIRowbs(Mat,Vec,Vec);
1456: EXTERN int MatUseScaledForm_MPIRowbs(Mat,PetscTruth);

1458: static struct _MatOps MatOps_Values = {MatSetValues_MPIRowbs,
1459:        MatGetRow_MPIRowbs,
1460:        MatRestoreRow_MPIRowbs,
1461:        MatMult_MPIRowbs,
1462:        MatMultAdd_MPIRowbs,
1463:        MatMult_MPIRowbs,
1464:        MatMultAdd_MPIRowbs,
1465:        MatSolve_MPIRowbs,
1466:        0,
1467:        0,
1468:        0,
1469:        0,
1470:        0,
1471:        0,
1472:        0,
1473:        MatGetInfo_MPIRowbs,
1474:        0,
1475:        MatGetDiagonal_MPIRowbs,
1476:        0,MatNorm_MPIRowbs,
1477:        MatAssemblyBegin_MPIRowbs,
1478:        MatAssemblyEnd_MPIRowbs,
1479:        0,
1480:        MatSetOption_MPIRowbs,
1481:        MatZeroEntries_MPIRowbs,
1482:        MatZeroRows_MPIRowbs,
1483:        0,
1484:        MatLUFactorNumeric_MPIRowbs,
1485:        0,
1486:        MatCholeskyFactorNumeric_MPIRowbs,
1487:        MatSetUpPreallocation_MPIRowbs,
1488:        MatILUFactorSymbolic_MPIRowbs,
1489:        MatIncompleteCholeskyFactorSymbolic_MPIRowbs,
1490:        0,
1491:        0,
1492:        0,
1493:        MatForwardSolve_MPIRowbs,
1494:        MatBackwardSolve_MPIRowbs,
1495:        0,
1496:        0,
1497:        0,
1498:        0,
1499:        0,
1500:        0,
1501:        0,
1502:        MatPrintHelp_MPIRowbs,
1503:        MatScale_MPIRowbs,
1504:        0,
1505:        0,
1506:        0,
1507:        0,
1508:        0,
1509:        0,
1510:        0,
1511:        0,
1512:        0,
1513:        0,
1514:        0,
1515:        0,
1516:        0,
1517:        0,
1518:        MatDestroy_MPIRowbs,
1519:        MatView_MPIRowbs,
1520:        MatGetPetscMaps_Petsc,
1521:        MatUseScaledForm_MPIRowbs,
1522:        MatScaleSystem_MPIRowbs,
1523:        MatUnScaleSystem_MPIRowbs};

1525: /* ------------------------------------------------------------------- */


1528: EXTERN_C_BEGIN
1529: #undef __FUNCT__  
1531: int MatCreate_MPIRowbs(Mat A)
1532: {
1533:   Mat_MPIRowbs *a;
1534:   BSmapping    *bsmap;
1535:   BSoff_map    *bsoff;
1536:   int          i,ierr,*offset,m,M;
1537:   PetscTruth   flg1,flg2,flg3;
1538:   BSprocinfo   *bspinfo;
1539:   MPI_Comm     comm;
1540: 
1542:   comm = A->comm;
1543:   m    = A->m;
1544:   M    = A->M;

1546:   ierr                  = PetscNew(Mat_MPIRowbs,&a);
1547:   A->data               = (void*)a;
1548:   ierr                  = PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1549:   A->factor             = 0;
1550:   A->mapping            = 0;
1551:   a->vecs_permscale     = PETSC_FALSE;
1552:   A->insertmode         = NOT_SET_VALUES;
1553:   a->blocksolveassembly = 0;
1554:   MPI_Comm_rank(comm,&a->rank);
1555:   MPI_Comm_size(comm,&a->size);

1557:   PetscSplitOwnership(comm,&m,&M);

1559:   A->N = M;
1560:   A->M = M;
1561:   A->m = m;
1562:   A->n = A->N;  /* each row stores all columns */
1563:   ierr                             = PetscMalloc((A->m+1)*sizeof(int),&a->imax);
1564:   a->reallocs                      = 0;

1566:   /* the information in the maps duplicates the information computed below, eventually 
1567:      we should remove the duplicate information that is not contained in the maps */
1568:   PetscMapCreateMPI(comm,m,M,&A->rmap);
1569:   PetscMapCreateMPI(comm,m,M,&A->cmap);

1571:   /* build local table of row ownerships */
1572:   ierr          = PetscMalloc((a->size+2)*sizeof(int),&a->rowners);
1573:   ierr          = MPI_Allgather(&m,1,MPI_INT,a->rowners+1,1,MPI_INT,comm);
1574:   a->rowners[0] = 0;
1575:   for (i=2; i<=a->size; i++) {
1576:     a->rowners[i] += a->rowners[i-1];
1577:   }
1578:   a->rstart = a->rowners[a->rank];
1579:   a->rend   = a->rowners[a->rank+1];
1580:   PetscLogObjectMemory(A,(A->m+a->size+3)*sizeof(int));

1582:   /* build cache for off array entries formed */
1583:   MatStashCreate_Private(A->comm,1,&A->stash);
1584:   a->donotstash = PETSC_FALSE;

1586:   /* Initialize BlockSolve information */
1587:   a->A              = 0;
1588:   a->pA              = 0;
1589:   a->comm_pA  = 0;
1590:   a->fpA      = 0;
1591:   a->comm_fpA = 0;
1592:   a->alpha    = 1.0;
1593:   a->ierr     = 0;
1594:   a->failures = 0;
1595:   VecCreateMPI(A->comm,A->m,A->M,&(a->diag));
1596:   VecDuplicate(a->diag,&(a->xwork));
1597:   PetscLogObjectParent(A,a->diag);  PetscLogObjectParent(A,a->xwork);
1598:   PetscLogObjectMemory(A,(A->m+1)*sizeof(PetscScalar));
1599:   bspinfo = BScreate_ctx();CHKERRBS(0);
1600:   a->procinfo = bspinfo;
1601:   BSctx_set_id(bspinfo,a->rank);CHKERRBS(0);
1602:   BSctx_set_np(bspinfo,a->size);CHKERRBS(0);
1603:   BSctx_set_ps(bspinfo,comm);CHKERRBS(0);
1604:   BSctx_set_cs(bspinfo,INT_MAX);CHKERRBS(0);
1605:   BSctx_set_is(bspinfo,INT_MAX);CHKERRBS(0);
1606:   BSctx_set_ct(bspinfo,IDO);CHKERRBS(0);
1607: #if defined(PETSC_USE_DEBUG)
1608:   BSctx_set_err(bspinfo,1);CHKERRBS(0);  /* BS error checking */
1609: #endif
1610:   BSctx_set_rt(bspinfo,1);CHKERRBS(0);
1611:   PetscOptionsHasName(PETSC_NULL,"-log_info",&flg1);
1612:   if (flg1) {
1613:     BSctx_set_pr(bspinfo,1);CHKERRBS(0);
1614:   }
1615:   PetscOptionsHasName(PETSC_NULL,"-pc_ilu_factorpointwise",&flg1);
1616:   PetscOptionsHasName(PETSC_NULL,"-pc_icc_factorpointwise",&flg2);
1617:   PetscOptionsHasName(PETSC_NULL,"-mat_rowbs_no_inode",&flg3);
1618:   if (flg1 || flg2 || flg3) {
1619:     BSctx_set_si(bspinfo,1);CHKERRBS(0);
1620:   } else {
1621:     BSctx_set_si(bspinfo,0);CHKERRBS(0);
1622:   }
1623: #if defined(PETSC_USE_LOG)
1624:   MLOG_INIT();  /* Initialize logging */
1625: #endif

1627:   /* Compute global offsets */
1628:   offset = &a->rstart;

1630:   PetscNew(BSmapping,&a->bsmap);
1631:   PetscLogObjectMemory(A,sizeof(BSmapping));
1632:   bsmap = a->bsmap;
1633:   ierr                           = PetscMalloc(sizeof(int),&bsmap->vlocal2global);
1634:   *((int *)bsmap->vlocal2global) = (*offset);
1635:   bsmap->flocal2global                 = BSloc2glob;
1636:   bsmap->free_l2g                = 0;
1637:   ierr                           = PetscMalloc(sizeof(int),&bsmap->vglobal2local);
1638:   *((int *)bsmap->vglobal2local) = (*offset);
1639:   bsmap->fglobal2local                 = BSglob2loc;
1640:   bsmap->free_g2l                 = 0;
1641:   bsoff                          = BSmake_off_map(*offset,bspinfo,A->M);
1642:   bsmap->vglobal2proc                 = (void *)bsoff;
1643:   bsmap->fglobal2proc                 = BSglob2proc;
1644:   bsmap->free_g2p                = (void(*)(void*)) BSfree_off_map;
1645:   return(0);
1646: }
1647: EXTERN_C_END

1649: #undef __FUNCT__  
1651: /* @
1652:   MatMPIRowbsSetPreallocation - Sets the number of expected nonzeros 
1653:   per row in the matrix.

1655:   Input Parameter:
1656: +  mat - matrix
1657: .  nz - maximum expected for any row
1658: -  nzz - number expected in each row

1660:   Note:
1661:   This routine is valid only for matrices stored in the MATMPIROWBS
1662:   format.
1663: @ */
1664: int MatMPIRowbsSetPreallocation(Mat mat,int nz,int *nnz)
1665: {
1666:   PetscTruth ismpirowbs;
1667:   int        ierr;

1670:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1671:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1672:   mat->preallocated = PETSC_TRUE;
1673:   MatCreateMPIRowbs_local(mat,nz,nnz);
1674:   return(0);
1675: }



1679: /* --------------- extra BlockSolve-specific routines -------------- */
1680: #undef __FUNCT__  
1682: /* @
1683:   MatGetBSProcinfo - Gets the BlockSolve BSprocinfo context, which the
1684:   user can then manipulate to alter the default parameters.

1686:   Input Parameter:
1687:   mat - matrix

1689:   Output Parameter:
1690:   procinfo - processor information context

1692:   Note:
1693:   This routine is valid only for matrices stored in the MATMPIROWBS
1694:   format.
1695: @ */
1696: int MatGetBSProcinfo(Mat mat,BSprocinfo *procinfo)
1697: {
1698:   Mat_MPIRowbs *a = (Mat_MPIRowbs*)mat->data;
1699:   PetscTruth   ismpirowbs;
1700:   int          ierr;

1703:   PetscTypeCompare((PetscObject)mat,MATMPIROWBS,&ismpirowbs);
1704:   if (!ismpirowbs) SETERRQ(PETSC_ERR_ARG_WRONG,"For MATMPIROWBS matrix type");
1705:   procinfo = a->procinfo;
1706:   return(0);
1707: }

1709: EXTERN_C_BEGIN
1710: #undef __FUNCT__  
1712: int MatLoad_MPIRowbs(PetscViewer viewer,MatType type,Mat *newmat)
1713: {
1714:   Mat_MPIRowbs *a;
1715:   BSspmat      *A;
1716:   BSsprow      **rs;
1717:   Mat          mat;
1718:   int          i,nz,ierr,j,rstart,rend,fd,*ourlens,*sndcounts = 0,*procsnz;
1719:   int          header[4],rank,size,*rowlengths = 0,M,m,*rowners,maxnz,*cols;
1720:   PetscScalar  *vals;
1721:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1722:   MPI_Status   status;

1725:   MPI_Comm_size(comm,&size);
1726:   MPI_Comm_rank(comm,&rank);
1727:   if (!rank) {
1728:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1729:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1730:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Not matrix object");
1731:     if (header[3] < 0) {
1732:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format,cannot load as MPIRowbs");
1733:     }
1734:   }

1736:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1737:   M = header[1];
1738:   /* determine ownership of all rows */
1739:   m          = M/size + ((M % size) > rank);
1740:   ierr       = PetscMalloc((size+2)*sizeof(int),&rowners);
1741:   ierr       = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1742:   rowners[0] = 0;
1743:   for (i=2; i<=size; i++) {
1744:     rowners[i] += rowners[i-1];
1745:   }
1746:   rstart = rowners[rank];
1747:   rend   = rowners[rank+1];

1749:   /* distribute row lengths to all processors */
1750:   PetscMalloc((rend-rstart)*sizeof(int),&ourlens);
1751:   if (!rank) {
1752:     PetscMalloc(M*sizeof(int),&rowlengths);
1753:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1754:     PetscMalloc(size*sizeof(int),&sndcounts);
1755:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1756:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1757:     PetscFree(sndcounts);
1758:   } else {
1759:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1760:   }

1762:   /* create our matrix */
1763:   MatCreateMPIRowbs(comm,m,M,0,ourlens,newmat);
1764:   mat = *newmat;
1765:   PetscFree(ourlens);

1767:   a = (Mat_MPIRowbs*)mat->data;
1768:   A = a->A;
1769:   rs = A->rows;

1771:   if (!rank) {
1772:     /* calculate the number of nonzeros on each processor */
1773:     PetscMalloc(size*sizeof(int),&procsnz);
1774:     PetscMemzero(procsnz,size*sizeof(int));
1775:     for (i=0; i<size; i++) {
1776:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1777:         procsnz[i] += rowlengths[j];
1778:       }
1779:     }
1780:     PetscFree(rowlengths);

1782:     /* determine max buffer needed and allocate it */
1783:     maxnz = 0;
1784:     for (i=0; i<size; i++) {
1785:       maxnz = PetscMax(maxnz,procsnz[i]);
1786:     }
1787:     PetscMalloc(maxnz*sizeof(int),&cols);

1789:     /* read in my part of the matrix column indices  */
1790:     nz = procsnz[0];
1791:     PetscBinaryRead(fd,cols,nz,PETSC_INT);
1792: 
1793:     /* insert it into my part of matrix */
1794:     nz = 0;
1795:     for (i=0; i<A->num_rows; i++) {
1796:       for (j=0; j<a->imax[i]; j++) {
1797:         rs[i]->col[j] = cols[nz++];
1798:       }
1799:       rs[i]->length = a->imax[i];
1800:     }
1801:     /* read in parts for all other processors */
1802:     for (i=1; i<size; i++) {
1803:       nz   = procsnz[i];
1804:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1805:       MPI_Send(cols,nz,MPI_INT,i,mat->tag,comm);
1806:     }
1807:     PetscFree(cols);
1808:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1810:     /* read in my part of the matrix numerical values  */
1811:     nz   = procsnz[0];
1812:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1813: 
1814:     /* insert it into my part of matrix */
1815:     nz = 0;
1816:     for (i=0; i<A->num_rows; i++) {
1817:       for (j=0; j<a->imax[i]; j++) {
1818:         rs[i]->nz[j] = vals[nz++];
1819:       }
1820:     }
1821:     /* read in parts for all other processors */
1822:     for (i=1; i<size; i++) {
1823:       nz   = procsnz[i];
1824:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1825:       MPI_Send(vals,nz,MPIU_SCALAR,i,mat->tag,comm);
1826:     }
1827:     PetscFree(vals);
1828:     PetscFree(procsnz);
1829:   } else {
1830:     /* determine buffer space needed for message */
1831:     nz = 0;
1832:     for (i=0; i<A->num_rows; i++) {
1833:       nz += a->imax[i];
1834:     }
1835:     PetscMalloc(nz*sizeof(int),&cols);

1837:     /* receive message of column indices*/
1838:     MPI_Recv(cols,nz,MPI_INT,0,mat->tag,comm,&status);
1839:     MPI_Get_count(&status,MPI_INT,&maxnz);
1840:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1842:     /* insert it into my part of matrix */
1843:     nz = 0;
1844:     for (i=0; i<A->num_rows; i++) {
1845:       for (j=0; j<a->imax[i]; j++) {
1846:         rs[i]->col[j] = cols[nz++];
1847:       }
1848:       rs[i]->length = a->imax[i];
1849:     }
1850:     PetscFree(cols);
1851:     PetscMalloc(nz*sizeof(PetscScalar),&vals);

1853:     /* receive message of values*/
1854:     MPI_Recv(vals,nz,MPIU_SCALAR,0,mat->tag,comm,&status);
1855:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1856:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong");

1858:     /* insert it into my part of matrix */
1859:     nz = 0;
1860:     for (i=0; i<A->num_rows; i++) {
1861:       for (j=0; j<a->imax[i]; j++) {
1862:         rs[i]->nz[j] = vals[nz++];
1863:       }
1864:       rs[i]->length = a->imax[i];
1865:     }
1866:     PetscFree(vals);
1867:   }
1868:   PetscFree(rowners);
1869:   a->nz = a->maxnz;
1870:   MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1871:   MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
1872:   return(0);
1873: }
1874: EXTERN_C_END

1876: /* 
1877:     Special destroy and view routines for factored matrices 
1878: */
1879: #undef __FUNCT__  
1881: static int MatDestroy_MPIRowbs_Factored(Mat mat)
1882: {
1884: #if defined(PETSC_USE_LOG)
1885:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
1886: #endif
1887:   return(0);
1888: }

1890: #undef __FUNCT__  
1892: static int MatView_MPIRowbs_Factored(Mat mat,PetscViewer viewer)
1893: {

1897:   MatView((Mat) mat->data,viewer);
1898:   return(0);
1899: }

1901: #undef __FUNCT__  
1903: int MatIncompleteCholeskyFactorSymbolic_MPIRowbs(Mat mat,IS isrow,PetscReal f,int fill,Mat *newfact)
1904: {
1905:   /* Note:  f is not currently used in BlockSolve */
1906:   Mat          newmat;
1907:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1908:   int          ierr;
1909:   PetscTruth   idn;

1912:   if (isrow) {
1913:     ISIdentity(isrow,&idn);
1914:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1915:   }

1917:   if (!mat->symmetric) {
1918:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use incomplete Cholesky n
1919:         preconditioning with a MATMPIROWBS matrix you must declare it to be n
1920:         symmetric using the option MatSetOption(A,MAT_SYMMETRIC)");
1921:   }

1923:   if (!mbs->blocksolveassembly) {
1924:     BSset_mat_icc_storage(mbs->A,PETSC_TRUE);CHKERRBS(0);
1925:     BSset_mat_symmetric(mbs->A,PETSC_TRUE);CHKERRBS(0);
1926:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1927:   }

1929:   /* Copy permuted matrix */
1930:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1931:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

1933:   /* Set up the communication for factorization */
1934:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1935:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

1937:   /* 
1938:       Create a new Mat structure to hold the "factored" matrix, 
1939:     not this merely contains a pointer to the original matrix, since
1940:     the original matrix contains the factor information.
1941:   */
1942:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
1943:   PetscLogObjectCreate(newmat);
1944:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

1946:   newmat->data         = (void*)mat;
1947:   ierr                 = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
1948:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
1949:   newmat->ops->view    = MatView_MPIRowbs_Factored;
1950:   newmat->factor       = 1;
1951:   newmat->preallocated = PETSC_TRUE;
1952:   newmat->M            = mat->M;
1953:   newmat->N            = mat->N;
1954:   newmat->m            = mat->m;
1955:   newmat->n            = mat->n;
1956:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

1958:   *newfact = newmat;
1959:   return(0);
1960: }

1962: #undef __FUNCT__  
1964: int MatILUFactorSymbolic_MPIRowbs(Mat mat,IS isrow,IS iscol,MatILUInfo* info,Mat *newfact)
1965: {
1966:   Mat          newmat;
1967:   Mat_MPIRowbs *mbs = (Mat_MPIRowbs*)mat->data;
1968:   int          ierr;
1969:   PetscTruth   idn;

1972:   if (info->levels != 0) SETERRQ(1,"Blocksolve ILU only supports 0 fill");
1973:   if (isrow) {
1974:     ISIdentity(isrow,&idn);
1975:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity row permutation supported");
1976:   }
1977:   if (iscol) {
1978:     ISIdentity(iscol,&idn);
1979:     if (!idn) SETERRQ(PETSC_ERR_SUP,"Only identity column permutation supported");
1980:   }

1982:   if (!mbs->blocksolveassembly) {
1983:     MatAssemblyEnd_MPIRowbs_ForBlockSolve(mat);
1984:   }
1985: 
1986:   if (mat->symmetric) {
1987:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"To use ILU preconditioner with n
1988:         MatCreateMPIRowbs() matrix you CANNOT declare it to be a symmetric matrixn
1989:         using the option MatSetOption(A,MAT_SYMMETRIC)");
1990:   }

1992:   /* Copy permuted matrix */
1993:   if (mbs->fpA) {BSfree_copy_par_mat(mbs->fpA);CHKERRBS(0);}
1994:   mbs->fpA = BScopy_par_mat(mbs->pA);CHKERRBS(0);

1996:   /* Set up the communication for factorization */
1997:   if (mbs->comm_fpA) {BSfree_comm(mbs->comm_fpA);CHKERRBS(0);}
1998:   mbs->comm_fpA = BSsetup_factor(mbs->fpA,mbs->procinfo);CHKERRBS(0);

2000:   /* 
2001:       Create a new Mat structure to hold the "factored" matrix,
2002:     not this merely contains a pointer to the original matrix, since
2003:     the original matrix contains the factor information.
2004:   */
2005:   PetscHeaderCreate(newmat,_p_Mat,struct _MatOps,MAT_COOKIE,-1,"Mat",mat->comm,MatDestroy,MatView);
2006:   PetscLogObjectCreate(newmat);
2007:   PetscLogObjectMemory(newmat,sizeof(struct _p_Mat));

2009:   newmat->data         = (void*)mat;
2010:   ierr                 = PetscMemcpy(newmat->ops,&MatOps_Values,sizeof(struct _MatOps));
2011:   newmat->ops->destroy = MatDestroy_MPIRowbs_Factored;
2012:   newmat->ops->view    = MatView_MPIRowbs_Factored;
2013:   newmat->factor       = 1;
2014:   newmat->preallocated = PETSC_TRUE;
2015:   newmat->M            = mat->M;
2016:   newmat->N            = mat->N;
2017:   newmat->m            = mat->m;
2018:   newmat->n            = mat->n;
2019:   PetscStrallocpy(MATMPIROWBS,&newmat->type_name);

2021:   *newfact = newmat;
2022:   return(0);
2023: }

2025: #undef __FUNCT__  
2027: int MatMPIRowbsGetColor(Mat mat,ISColoring *coloring)
2028: {
2029:   int          ierr;

2033:   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
2034:   ISColoringCreate(mat->comm,mat->m,0,coloring);

2036:   return(0);
2037: }

2039: #undef __FUNCT__  
2041: /*@C
2042:    MatCreateMPIRowbs - Creates a sparse parallel matrix in the MATMPIROWBS
2043:    format.  This format is intended primarily as an interface for BlockSolve95.

2045:    Collective on MPI_Comm

2047:    Input Parameters:
2048: +  comm - MPI communicator
2049: .  m - number of local rows (or PETSC_DECIDE to have calculated)
2050: .  M - number of global rows (or PETSC_DECIDE to have calculated)
2051: .  nz - number of nonzeros per row (same for all local rows)
2052: -  nnz - number of nonzeros per row (possibly different for each row).

2054:    Output Parameter:
2055: .  newA - the matrix 

2057:    Notes:
2058:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2059:    than it must be used on all processors that share the object for that argument.

2061:    The user MUST specify either the local or global matrix dimensions
2062:    (possibly both).

2064:    Specify the preallocated storage with either nz or nnz (not both).  Set 
2065:    nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
2066:    allocation.

2068:    Notes:
2069:    By default, the matrix is assumed to be nonsymmetric; the user can
2070:    take advantage of special optimizations for symmetric matrices by calling
2071: $     MatSetOption(mat,MAT_SYMMETRIC)
2072:    BEFORE calling the routine MatAssemblyBegin().

2074:    Internally, the MATMPIROWBS format inserts zero elements to the
2075:    matrix if necessary, so that nonsymmetric matrices are considered
2076:    to be symmetric in terms of their sparsity structure; this format
2077:    is required for use of the parallel communication routines within
2078:    BlockSolve95. In particular, if the matrix element A[i,j] exists,
2079:    then PETSc will internally allocate a 0 value for the element
2080:    A[j,i] during MatAssemblyEnd() if the user has not already set
2081:    a value for the matrix element A[j,i].

2083:    Options Database Keys:
2084: .  -mat_rowbs_no_inode - Do not use inodes.

2086:    Level: intermediate
2087:   
2088: .keywords: matrix, row, symmetric, sparse, parallel, BlockSolve

2090: .seealso: MatCreate(), MatSetValues()
2091: @*/
2092: int MatCreateMPIRowbs(MPI_Comm comm,int m,int M,int nz,int *nnz,Mat *newA)
2093: {
2095: 
2097:   MatCreate(comm,m,m,M,M,newA);
2098:   MatSetType(*newA,MATMPIROWBS);
2099:   MatMPIRowbsSetPreallocation(*newA,nz,nnz);
2100:   return(0);
2101: }