Actual source code: mpiaij.c
  1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/

 3:  #include src/mat/impls/aij/mpi/mpiaij.h
 4:  #include src/vec/vecimpl.h
 5:  #include src/inline/spops.h

  7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
  8: EXTERN int DisAssemble_MPIAIJ(Mat);
  9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
 10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
 12: EXTERN int MatPrintHelp_SeqAIJ(Mat);

 14: /* 
 15:   Local utility routine that creates a mapping from the global column 
 16: number to the local number in the off-diagonal part of the local 
 17: storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at 
 18: a slightly higher hash table cost; without it it is not scalable (each processor
 19: has an order N integer array but is fast to acess.
 20: */
 21: #undef __FUNCT__  
 23: int CreateColmap_MPIAIJ_Private(Mat mat)
 24: {
 25:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
 26:   int        n = aij->B->n,i,ierr;

 29: #if defined (PETSC_USE_CTABLE)
 30:   PetscTableCreate(n,&aij->colmap);
 31:   for (i=0; i<n; i++){
 32:     PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
 33:   }
 34: #else
 35:   PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
 36:   PetscLogObjectMemory(mat,mat->N*sizeof(int));
 37:   PetscMemzero(aij->colmap,mat->N*sizeof(int));
 38:   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
 39: #endif
 40:   return(0);
 41: }

 43: #define CHUNKSIZE   15
 44: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) 
 45: { 
 46:  
 47:     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
 48:     rmax = aimax[row]; nrow = ailen[row];  
 49:     col1 = col - shift; 
 50:      
 51:     low = 0; high = nrow; 
 52:     while (high-low > 5) { 
 53:       t = (low+high)/2; 
 54:       if (rp[t] > col) high = t; 
 55:       else             low  = t; 
 56:     } 
 57:       for (_i=low; _i<high; _i++) { 
 58:         if (rp[_i] > col1) break; 
 59:         if (rp[_i] == col1) { 
 60:           if (addv == ADD_VALUES) ap[_i] += value;   
 61:           else                  ap[_i] = value; 
 62:           goto a_noinsert; 
 63:         } 
 64:       }  
 65:       if (nonew == 1) goto a_noinsert; 
 66:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
 67:       if (nrow >= rmax) { 
 68:         /* there is no extra room in row, therefore enlarge */ 
 69:         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; 
 70:         PetscScalar *new_a; 
 71:  
 72:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
 73:  
 74:         /* malloc new storage space */ 
 75:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); 
 76:         ierr    = PetscMalloc(len,&new_a); 
 77:         new_j   = (int*)(new_a + new_nz); 
 78:         new_i   = new_j + new_nz; 
 79:  
 80:         /* copy over old data into new slots */ 
 81:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 
 82:         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 
 83:         PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int)); 
 84:         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); 
 85:         PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, 
 86:                                                            len*sizeof(int)); 
 87:         PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar)); 
 88:         PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, 
 89:                                                            len*sizeof(PetscScalar));  
 90:         /* free up old matrix storage */ 
 91:  
 92:         PetscFree(a->a);  
 93:         if (!a->singlemalloc) { 
 94:            PetscFree(a->i); 
 95:            PetscFree(a->j); 
 96:         } 
 97:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  
 98:         a->singlemalloc = PETSC_TRUE; 
 99:  
100:         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; 
101:         rmax = aimax[row] = aimax[row] + CHUNKSIZE; 
102:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
103:         a->maxnz += CHUNKSIZE; 
104:         a->reallocs++; 
105:       } 
106:       N = nrow++ - 1; a->nz++; 
107:       /* shift up all the later entries in this row */ 
108:       for (ii=N; ii>=_i; ii--) { 
109:         rp[ii+1] = rp[ii]; 
110:         ap[ii+1] = ap[ii]; 
111:       } 
112:       rp[_i] = col1;  
113:       ap[_i] = value;  
114:       a_noinsert: ; 
115:       ailen[row] = nrow; 
116: } 

118: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) 
119: { 
120:  
121:     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
122:     rmax = bimax[row]; nrow = bilen[row];  
123:     col1 = col - shift; 
124:      
125:     low = 0; high = nrow; 
126:     while (high-low > 5) { 
127:       t = (low+high)/2; 
128:       if (rp[t] > col) high = t; 
129:       else             low  = t; 
130:     } 
131:        for (_i=low; _i<high; _i++) { 
132:         if (rp[_i] > col1) break; 
133:         if (rp[_i] == col1) { 
134:           if (addv == ADD_VALUES) ap[_i] += value;   
135:           else                  ap[_i] = value; 
136:           goto b_noinsert; 
137:         } 
138:       }  
139:       if (nonew == 1) goto b_noinsert; 
140:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); 
141:       if (nrow >= rmax) { 
142:         /* there is no extra room in row, therefore enlarge */ 
143:         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; 
144:         PetscScalar *new_a; 
145:  
146:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 
147:  
148:         /* malloc new storage space */ 
149:         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); 
150:         ierr    = PetscMalloc(len,&new_a); 
151:         new_j   = (int*)(new_a + new_nz); 
152:         new_i   = new_j + new_nz; 
153:  
154:         /* copy over old data into new slots */ 
155:         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} 
156:         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} 
157:         PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int)); 
158:         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); 
159:         PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, 
160:                                                            len*sizeof(int)); 
161:         PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar)); 
162:         PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, 
163:                                                            len*sizeof(PetscScalar));  
164:         /* free up old matrix storage */ 
165:  
166:         PetscFree(b->a);  
167:         if (!b->singlemalloc) { 
168:           PetscFree(b->i); 
169:           PetscFree(b->j); 
170:         } 
171:         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  
172:         b->singlemalloc = PETSC_TRUE; 
173:  
174:         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; 
175:         rmax = bimax[row] = bimax[row] + CHUNKSIZE; 
176:         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); 
177:         b->maxnz += CHUNKSIZE; 
178:         b->reallocs++; 
179:       } 
180:       N = nrow++ - 1; b->nz++; 
181:       /* shift up all the later entries in this row */ 
182:       for (ii=N; ii>=_i; ii--) { 
183:         rp[ii+1] = rp[ii]; 
184:         ap[ii+1] = ap[ii]; 
185:       } 
186:       rp[_i] = col1;  
187:       ap[_i] = value;  
188:       b_noinsert: ; 
189:       bilen[row] = nrow; 
190: }

192: #undef __FUNCT__  
194: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195: {
196:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197:   PetscScalar  value;
198:   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199:   int          cstart = aij->cstart,cend = aij->cend,row,col;
200:   PetscTruth   roworiented = aij->roworiented;

202:   /* Some Variables required in the macro */
203:   Mat          A = aij->A;
204:   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205:   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206:   PetscScalar  *aa = a->a;
207:   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208:   Mat          B = aij->B;
209:   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210:   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211:   PetscScalar  *ba = b->a;

213:   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214:   int          nonew = a->nonew,shift = a->indexshift;
215:   PetscScalar  *ap;

218:   for (i=0; i<m; i++) {
219:     if (im[i] < 0) continue;
220: #if defined(PETSC_USE_BOPT_g)
221:     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222: #endif
223:     if (im[i] >= rstart && im[i] < rend) {
224:       row = im[i] - rstart;
225:       for (j=0; j<n; j++) {
226:         if (in[j] >= cstart && in[j] < cend){
227:           col = in[j] - cstart;
228:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229:           if (ignorezeroentries && value == 0.0) continue;
230:           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231:           /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
232:         } else if (in[j] < 0) continue;
233: #if defined(PETSC_USE_BOPT_g)
234:         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235: #endif
236:         else {
237:           if (mat->was_assembled) {
238:             if (!aij->colmap) {
239:               CreateColmap_MPIAIJ_Private(mat);
240:             }
241: #if defined (PETSC_USE_CTABLE)
242:             PetscTableFind(aij->colmap,in[j]+1,&col);
243:             col--;
244: #else
245:             col = aij->colmap[in[j]] - 1;
246: #endif
247:             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248:               DisAssemble_MPIAIJ(mat);
249:               col =  in[j];
250:               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251:               B = aij->B;
252:               b = (Mat_SeqAIJ*)B->data;
253:               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254:               ba = b->a;
255:             }
256:           } else col = in[j];
257:           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258:           if (ignorezeroentries && value == 0.0) continue;
259:           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260:           /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
261:         }
262:       }
263:     } else {
264:       if (!aij->donotstash) {
265:         if (roworiented) {
266:           if (ignorezeroentries && v[i*n] == 0.0) continue;
267:           MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
268:         } else {
269:           if (ignorezeroentries && v[i] == 0.0) continue;
270:           MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
271:         }
272:       }
273:     }
274:   }
275:   return(0);
276: }

278: #undef __FUNCT__  
280: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281: {
282:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283:   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284:   int        cstart = aij->cstart,cend = aij->cend,row,col;

287:   for (i=0; i<m; i++) {
288:     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289:     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290:     if (idxm[i] >= rstart && idxm[i] < rend) {
291:       row = idxm[i] - rstart;
292:       for (j=0; j<n; j++) {
293:         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294:         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295:         if (idxn[j] >= cstart && idxn[j] < cend){
296:           col = idxn[j] - cstart;
297:           MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
298:         } else {
299:           if (!aij->colmap) {
300:             CreateColmap_MPIAIJ_Private(mat);
301:           }
302: #if defined (PETSC_USE_CTABLE)
303:           PetscTableFind(aij->colmap,idxn[j]+1,&col);
304:           col --;
305: #else
306:           col = aij->colmap[idxn[j]] - 1;
307: #endif
308:           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309:           else {
310:             MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
311:           }
312:         }
313:       }
314:     } else {
315:       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316:     }
317:   }
318:   return(0);
319: }

321: #undef __FUNCT__  
323: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324: {
325:   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326:   int         ierr,nstash,reallocs;
327:   InsertMode  addv;

330:   if (aij->donotstash) {
331:     return(0);
332:   }

334:   /* make sure all processors are either in INSERTMODE or ADDMODE */
335:   MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
336:   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338:   }
339:   mat->insertmode = addv; /* in case this processor had no cache */

341:   MatStashScatterBegin_Private(&mat->stash,aij->rowners);
342:   MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
343:   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
344:   return(0);
345: }


348: #undef __FUNCT__  
350: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351: {
352:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353:   int         i,j,rstart,ncols,n,ierr,flg;
354:   int         *row,*col,other_disassembled;
355:   PetscScalar *val;
356:   InsertMode  addv = mat->insertmode;
357: #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) 
358:   PetscTruth  flag;
359: #endif

362:   if (!aij->donotstash) {
363:     while (1) {
364:       MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
365:       if (!flg) break;

367:       for (i=0; i<n;) {
368:         /* Now identify the consecutive vals belonging to the same row */
369:         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370:         if (j < n) ncols = j-i;
371:         else       ncols = n-i;
372:         /* Now assemble all these values with a single function call */
373:         MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
374:         i = j;
375:       }
376:     }
377:     MatStashScatterEnd_Private(&mat->stash);
378:   }
379: 
380:   MatAssemblyBegin(aij->A,mode);
381:   MatAssemblyEnd(aij->A,mode);

383:   /* determine if any processor has disassembled, if so we must 
384:      also disassemble ourselfs, in order that we may reassemble. */
385:   /*
386:      if nonzero structure of submatrix B cannot change then we know that
387:      no processor disassembled thus we can skip this stuff
388:   */
389:   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390:     MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
391:     if (mat->was_assembled && !other_disassembled) {
392:       DisAssemble_MPIAIJ(mat);
393:     }
394:   }

396:   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397:     MatSetUpMultiply_MPIAIJ(mat);
398:   }
399:   MatAssemblyBegin(aij->B,mode);
400:   MatAssemblyEnd(aij->B,mode);

402:   if (aij->rowvalues) {
403:     PetscFree(aij->rowvalues);
404:     aij->rowvalues = 0;
405:   }
406: #if defined(PETSC_HAVE_SUPERLUDIST) 
407:   PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);
408:   if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
409: #endif 

411: #if defined(PETSC_HAVE_SPOOLES) 
412:   PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);
413:   if (flag) { MatUseSpooles_MPIAIJ(mat); }
414: #endif 
415:   return(0);
416: }

418: #undef __FUNCT__  
420: int MatZeroEntries_MPIAIJ(Mat A)
421: {
422:   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423:   int        ierr;

426:   MatZeroEntries(l->A);
427:   MatZeroEntries(l->B);
428:   return(0);
429: }

431: #undef __FUNCT__  
433: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434: {
435:   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436:   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437:   int            *nprocs,j,idx,nsends,row;
438:   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439:   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440:   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441:   MPI_Comm       comm = A->comm;
442:   MPI_Request    *send_waits,*recv_waits;
443:   MPI_Status     recv_status,*send_status;
444:   IS             istmp;
445:   PetscTruth     found;

448:   ISGetLocalSize(is,&N);
449:   ISGetIndices(is,&rows);

451:   /*  first count number of contributors to each processor */
452:   PetscMalloc(2*size*sizeof(int),&nprocs);
453:   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));
454:   PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
455:   for (i=0; i<N; i++) {
456:     idx = rows[i];
457:     found = PETSC_FALSE;
458:     for (j=0; j<size; j++) {
459:       if (idx >= owners[j] && idx < owners[j+1]) {
460:         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461:       }
462:     }
463:     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464:   }
465:   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}

467:   /* inform other processors of number of messages and max length*/
468:   PetscMaxSum(comm,nprocs,&nmax,&nrecvs);

470:   /* post receives:   */
471:   PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
472:   PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
473:   for (i=0; i<nrecvs; i++) {
474:     MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
475:   }

477:   /* do sends:
478:       1) starts[i] gives the starting index in svalues for stuff going to 
479:          the ith processor
480:   */
481:   PetscMalloc((N+1)*sizeof(int),&svalues);
482:   PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
483:   PetscMalloc((size+1)*sizeof(int),&starts);
484:   starts[0] = 0;
485:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486:   for (i=0; i<N; i++) {
487:     svalues[starts[owner[i]]++] = rows[i];
488:   }
489:   ISRestoreIndices(is,&rows);

491:   starts[0] = 0;
492:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493:   count = 0;
494:   for (i=0; i<size; i++) {
495:     if (nprocs[2*i+1]) {
496:       MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
497:     }
498:   }
499:   PetscFree(starts);

501:   base = owners[rank];

503:   /*  wait on receives */
504:   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
505:   source = lens + nrecvs;
506:   count  = nrecvs; slen = 0;
507:   while (count) {
508:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
509:     /* unpack receives into our local space */
510:     MPI_Get_count(&recv_status,MPI_INT,&n);
511:     source[imdex]  = recv_status.MPI_SOURCE;
512:     lens[imdex]    = n;
513:     slen          += n;
514:     count--;
515:   }
516:   PetscFree(recv_waits);
517: 
518:   /* move the data into the send scatter */
519:   PetscMalloc((slen+1)*sizeof(int),&lrows);
520:   count = 0;
521:   for (i=0; i<nrecvs; i++) {
522:     values = rvalues + i*nmax;
523:     for (j=0; j<lens[i]; j++) {
524:       lrows[count++] = values[j] - base;
525:     }
526:   }
527:   PetscFree(rvalues);
528:   PetscFree(lens);
529:   PetscFree(owner);
530:   PetscFree(nprocs);
531: 
532:   /* actually zap the local rows */
533:   ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
534:   PetscLogObjectParent(A,istmp);

536:   /*
537:         Zero the required rows. If the "diagonal block" of the matrix
538:      is square and the user wishes to set the diagonal we use seperate
539:      code so that MatSetValues() is not called for each diagonal allocating
540:      new memory, thus calling lots of mallocs and slowing things down.

542:        Contributed by: Mathew Knepley
543:   */
544:   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545:   MatZeroRows(l->B,istmp,0);
546:   if (diag && (l->A->M == l->A->N)) {
547:     ierr      = MatZeroRows(l->A,istmp,diag);
548:   } else if (diag) {
549:     MatZeroRows(l->A,istmp,0);
550:     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551:       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
552: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553:     }
554:     for (i = 0; i < slen; i++) {
555:       row  = lrows[i] + rstart;
556:       MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
557:     }
558:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
559:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
560:   } else {
561:     MatZeroRows(l->A,istmp,0);
562:   }
563:   ISDestroy(istmp);
564:   PetscFree(lrows);

566:   /* wait on sends */
567:   if (nsends) {
568:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
569:     MPI_Waitall(nsends,send_waits,send_status);
570:     PetscFree(send_status);
571:   }
572:   PetscFree(send_waits);
573:   PetscFree(svalues);

575:   return(0);
576: }

578: #undef __FUNCT__  
580: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581: {
582:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583:   int        ierr,nt;

586:   VecGetLocalSize(xx,&nt);
587:   if (nt != A->n) {
588:     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589:   }
590:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
591:   (*a->A->ops->mult)(a->A,xx,yy);
592:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
593:   (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
594:   return(0);
595: }

597: #undef __FUNCT__  
599: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600: {
601:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602:   int        ierr;

605:   VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
606:   (*a->A->ops->multadd)(a->A,xx,yy,zz);
607:   VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
608:   (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
609:   return(0);
610: }

612: #undef __FUNCT__  
614: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615: {
616:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617:   int        ierr;

620:   /* do nondiagonal part */
621:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
622:   /* send it on its way */
623:   VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
624:   /* do local part */
625:   (*a->A->ops->multtranspose)(a->A,xx,yy);
626:   /* receive remote parts: note this assumes the values are not actually */
627:   /* inserted in yy until the next line, which is true for my implementation*/
628:   /* but is not perhaps always true. */
629:   VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630:   return(0);
631: }

633: #undef __FUNCT__  
635: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636: {
637:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638:   int        ierr;

641:   /* do nondiagonal part */
642:   (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
643:   /* send it on its way */
644:   VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
645:   /* do local part */
646:   (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
647:   /* receive remote parts: note this assumes the values are not actually */
648:   /* inserted in yy until the next line, which is true for my implementation*/
649:   /* but is not perhaps always true. */
650:   VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
651:   return(0);
652: }

654: /*
655:   This only works correctly for square matrices where the subblock A->A is the 
656:    diagonal block
657: */
658: #undef __FUNCT__  
660: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661: {
662:   int        ierr;
663:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

666:   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667:   if (a->rstart != a->cstart || a->rend != a->cend) {
668:     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669:   }
670:   MatGetDiagonal(a->A,v);
671:   return(0);
672: }

674: #undef __FUNCT__  
676: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677: {
678:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679:   int        ierr;

682:   MatScale(aa,a->A);
683:   MatScale(aa,a->B);
684:   return(0);
685: }

687: #undef __FUNCT__  
689: int MatDestroy_MPIAIJ(Mat mat)
690: {
691:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692:   int        ierr;

695: #if defined(PETSC_USE_LOG)
696:   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697: #endif
698:   MatStashDestroy_Private(&mat->stash);
699:   PetscFree(aij->rowners);
700:   MatDestroy(aij->A);
701:   MatDestroy(aij->B);
702: #if defined (PETSC_USE_CTABLE)
703:   if (aij->colmap) {PetscTableDelete(aij->colmap);}
704: #else
705:   if (aij->colmap) {PetscFree(aij->colmap);}
706: #endif
707:   if (aij->garray) {PetscFree(aij->garray);}
708:   if (aij->lvec)   {VecDestroy(aij->lvec);}
709:   if (aij->Mvctx)  {VecScatterDestroy(aij->Mvctx);}
710:   if (aij->rowvalues) {PetscFree(aij->rowvalues);}
711:   PetscFree(aij);
712:   return(0);
713: }

715: extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716: extern int MatFactorInfo_Spooles(Mat,PetscViewer);

718: #undef __FUNCT__  
720: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721: {
722:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723:   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724:   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725:   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726:   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727:   PetscScalar       *column_values;

730:   MPI_Comm_rank(mat->comm,&rank);
731:   MPI_Comm_size(mat->comm,&size);
732:   nz   = A->nz + B->nz;
733:   if (rank == 0) {
734:     header[0] = MAT_FILE_COOKIE;
735:     header[1] = mat->M;
736:     header[2] = mat->N;
737:     MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
738:     PetscViewerBinaryGetDescriptor(viewer,&fd);
739:     PetscBinaryWrite(fd,header,4,PETSC_INT,1);
740:     /* get largest number of rows any processor has */
741:     rlen = mat->m;
742:     PetscMapGetGlobalRange(mat->rmap,&range);
743:     for (i=1; i<size; i++) {
744:       rlen = PetscMax(rlen,range[i+1] - range[i]);
745:     }
746:   } else {
747:     MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
748:     rlen = mat->m;
749:   }

751:   /* load up the local row counts */
752:   PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
753:   for (i=0; i<mat->m; i++) {
754:     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755:   }

757:   /* store the row lengths to the file */
758:   if (rank == 0) {
759:     MPI_Status status;
760:     PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
761:     for (i=1; i<size; i++) {
762:       rlen = range[i+1] - range[i];
763:       MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
764:       PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
765:     }
766:   } else {
767:     MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
768:   }
769:   PetscFree(row_lengths);

771:   /* load up the local column indices */
772:   nzmax = nz; /* )th processor needs space a largest processor needs */
773:   MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
774:   PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
775:   cnt  = 0;
776:   for (i=0; i<mat->m; i++) {
777:     for (j=B->i[i]; j<B->i[i+1]; j++) {
778:       if ( (col = garray[B->j[j]]) > cstart) break;
779:       column_indices[cnt++] = col;
780:     }
781:     for (k=A->i[i]; k<A->i[i+1]; k++) {
782:       column_indices[cnt++] = A->j[k] + cstart;
783:     }
784:     for (; j<B->i[i+1]; j++) {
785:       column_indices[cnt++] = garray[B->j[j]];
786:     }
787:   }
788:   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

790:   /* store the column indices to the file */
791:   if (rank == 0) {
792:     MPI_Status status;
793:     PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
794:     for (i=1; i<size; i++) {
795:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
796:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797:       MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
798:       PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
799:     }
800:   } else {
801:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
802:     MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
803:   }
804:   PetscFree(column_indices);

806:   /* load up the local column values */
807:   PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
808:   cnt  = 0;
809:   for (i=0; i<mat->m; i++) {
810:     for (j=B->i[i]; j<B->i[i+1]; j++) {
811:       if ( garray[B->j[j]] > cstart) break;
812:       column_values[cnt++] = B->a[j];
813:     }
814:     for (k=A->i[i]; k<A->i[i+1]; k++) {
815:       column_values[cnt++] = A->a[k];
816:     }
817:     for (; j<B->i[i+1]; j++) {
818:       column_values[cnt++] = B->a[j];
819:     }
820:   }
821:   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);

823:   /* store the column values to the file */
824:   if (rank == 0) {
825:     MPI_Status status;
826:     PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
827:     for (i=1; i<size; i++) {
828:       MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
829:       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830:       MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
831:       PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
832:     }
833:   } else {
834:     MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
835:     MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
836:   }
837:   PetscFree(column_values);
838:   return(0);
839: }

841: #undef __FUNCT__  
843: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844: {
845:   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846:   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847:   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848:   PetscTruth        isdraw,isascii,flg,isbinary;
849:   PetscViewer       sviewer;
850:   PetscViewerFormat format;

853:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
854:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
855:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
856:   if (isascii) {
857:     PetscViewerGetFormat(viewer,&format);
858:     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859:       MatInfo info;
860:       MPI_Comm_rank(mat->comm,&rank);
861:       MatGetInfo(mat,MAT_LOCAL,&info);
862:       PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
863:       if (flg) {
864:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
865:                                               rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
866:       } else {
867:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
868:                     rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
869:       }
870:       MatGetInfo(aij->A,MAT_LOCAL,&info);
871:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
872:       MatGetInfo(aij->B,MAT_LOCAL,&info);
873:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
874:       PetscViewerFlush(viewer);
875:       VecScatterView(aij->Mvctx,viewer);
876:       return(0);
877:     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878:       return(0);
879:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881:       MatMPIAIJFactorInfo_SuperLu(mat,viewer);
882: #endif
883: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
884:       MatFactorInfo_Spooles(mat,viewer);
885: #endif

887:       return(0);
888:     }
889:   } else if (isbinary) {
890:     if (size == 1) {
891:       PetscObjectSetName((PetscObject)aij->A,mat->name);
892:       MatView(aij->A,viewer);
893:     } else {
894:       MatView_MPIAIJ_Binary(mat,viewer);
895:     }
896:     return(0);
897:   } else if (isdraw) {
898:     PetscDraw  draw;
899:     PetscTruth isnull;
900:     PetscViewerDrawGetDraw(viewer,0,&draw);
901:     PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
902:   }

904:   if (size == 1) {
905:     PetscObjectSetName((PetscObject)aij->A,mat->name);
906:     MatView(aij->A,viewer);
907:   } else {
908:     /* assemble the entire matrix onto first processor. */
909:     Mat         A;
910:     Mat_SeqAIJ *Aloc;
911:     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912:     PetscScalar *a;

914:     if (!rank) {
915:       MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
916:     } else {
917:       MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
918:     }
919:     PetscLogObjectParent(mat,A);

921:     /* copy over the A part */
922:     Aloc = (Mat_SeqAIJ*)aij->A->data;
923:     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924:     row = aij->rstart;
925:     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926:     for (i=0; i<m; i++) {
927:       MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
928:       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929:     }
930:     aj = Aloc->j;
931:     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}

933:     /* copy over the B part */
934:     Aloc = (Mat_SeqAIJ*)aij->B->data;
935:     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936:     row  = aij->rstart;
937:     PetscMalloc((ai[m]+1)*sizeof(int),&cols);
938:     ct   = cols;
939:     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940:     for (i=0; i<m; i++) {
941:       MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
942:       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943:     }
944:     PetscFree(ct);
945:     MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
946:     MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
947:     /* 
948:        Everyone has to call to draw the matrix since the graphics waits are
949:        synchronized across all processors that share the PetscDraw object
950:     */
951:     PetscViewerGetSingleton(viewer,&sviewer);
952:     if (!rank) {
953:       PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
954:       MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
955:     }
956:     PetscViewerRestoreSingleton(viewer,&sviewer);
957:     MatDestroy(A);
958:   }
959:   return(0);
960: }

962: #undef __FUNCT__  
964: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965: {
966:   int        ierr;
967:   PetscTruth isascii,isdraw,issocket,isbinary;
968: 
970:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
971:   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
972:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
973:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
974:   if (isascii || isdraw || isbinary || issocket) {
975:     MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
976:   } else {
977:     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978:   }
979:   return(0);
980: }



984: #undef __FUNCT__  
986: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987: {
988:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989:   int          ierr;
990:   Vec          bb1;
991:   PetscScalar  mone=-1.0;

994:   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);

996:   VecDuplicate(bb,&bb1);

998:   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1001:       its--;
1002:     }
1003: 
1004:     while (its--) {
1005:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1006:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1008:       /* update rhs: bb1 = bb - B*x */
1009:       VecScale(&mone,mat->lvec);
1010:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1012:       /* local sweep */
1013:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014: 
1015:     }
1016:   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1019:       its--;
1020:     }
1021:     while (its--) {
1022:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1023:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1025:       /* update rhs: bb1 = bb - B*x */
1026:       VecScale(&mone,mat->lvec);
1027:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1029:       /* local sweep */
1030:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031: 
1032:     }
1033:   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034:     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035:       (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1036:       its--;
1037:     }
1038:     while (its--) {
1039:       VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1040:       VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);

1042:       /* update rhs: bb1 = bb - B*x */
1043:       VecScale(&mone,mat->lvec);
1044:       (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);

1046:       /* local sweep */
1047:       (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048: 
1049:     }
1050:   } else {
1051:     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052:   }

1054:   VecDestroy(bb1);
1055:   return(0);
1056: }

1058: #undef __FUNCT__  
1060: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061: {
1062:   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063:   Mat        A = mat->A,B = mat->B;
1064:   int        ierr;
1065:   PetscReal  isend[5],irecv[5];

1068:   info->block_size     = 1.0;
1069:   MatGetInfo(A,MAT_LOCAL,info);
1070:   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071:   isend[3] = info->memory;  isend[4] = info->mallocs;
1072:   MatGetInfo(B,MAT_LOCAL,info);
1073:   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074:   isend[3] += info->memory;  isend[4] += info->mallocs;
1075:   if (flag == MAT_LOCAL) {
1076:     info->nz_used      = isend[0];
1077:     info->nz_allocated = isend[1];
1078:     info->nz_unneeded  = isend[2];
1079:     info->memory       = isend[3];
1080:     info->mallocs      = isend[4];
1081:   } else if (flag == MAT_GLOBAL_MAX) {
1082:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1083:     info->nz_used      = irecv[0];
1084:     info->nz_allocated = irecv[1];
1085:     info->nz_unneeded  = irecv[2];
1086:     info->memory       = irecv[3];
1087:     info->mallocs      = irecv[4];
1088:   } else if (flag == MAT_GLOBAL_SUM) {
1089:     MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1090:     info->nz_used      = irecv[0];
1091:     info->nz_allocated = irecv[1];
1092:     info->nz_unneeded  = irecv[2];
1093:     info->memory       = irecv[3];
1094:     info->mallocs      = irecv[4];
1095:   }
1096:   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097:   info->fill_ratio_needed = 0;
1098:   info->factor_mallocs    = 0;
1099:   info->rows_global       = (double)matin->M;
1100:   info->columns_global    = (double)matin->N;
1101:   info->rows_local        = (double)matin->m;
1102:   info->columns_local     = (double)matin->N;

1104:   return(0);
1105: }

1107: #undef __FUNCT__  
1109: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110: {
1111:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112:   int        ierr;

1115:   switch (op) {
1116:   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117:   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118:   case MAT_COLUMNS_UNSORTED:
1119:   case MAT_COLUMNS_SORTED:
1120:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121:   case MAT_KEEP_ZEROED_ROWS:
1122:   case MAT_NEW_NONZERO_LOCATION_ERR:
1123:   case MAT_USE_INODES:
1124:   case MAT_DO_NOT_USE_INODES:
1125:   case MAT_IGNORE_ZERO_ENTRIES:
1126:     MatSetOption(a->A,op);
1127:     MatSetOption(a->B,op);
1128:     break;
1129:   case MAT_ROW_ORIENTED:
1130:     a->roworiented = PETSC_TRUE;
1131:     MatSetOption(a->A,op);
1132:     MatSetOption(a->B,op);
1133:     break;
1134:   case MAT_ROWS_SORTED:
1135:   case MAT_ROWS_UNSORTED:
1136:   case MAT_YES_NEW_DIAGONALS:
1137:     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
1138:     break;
1139:   case MAT_COLUMN_ORIENTED:
1140:     a->roworiented = PETSC_FALSE;
1141:     MatSetOption(a->A,op);
1142:     MatSetOption(a->B,op);
1143:     break;
1144:   case MAT_IGNORE_OFF_PROC_ENTRIES:
1145:     a->donotstash = PETSC_TRUE;
1146:     break;
1147:   case MAT_NO_NEW_DIAGONALS:
1148:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149:   default:
1150:     SETERRQ(PETSC_ERR_SUP,"unknown option");
1151:   }
1152:   return(0);
1153: }

1155: #undef __FUNCT__  
1157: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158: {
1159:   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1160:   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161:   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162:   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163:   int          *cmap,*idx_p;

1166:   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167:   mat->getrowactive = PETSC_TRUE;

1169:   if (!mat->rowvalues && (idx || v)) {
1170:     /*
1171:         allocate enough space to hold information from the longest row.
1172:     */
1173:     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174:     int     max = 1,tmp;
1175:     for (i=0; i<matin->m; i++) {
1176:       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177:       if (max < tmp) { max = tmp; }
1178:     }
1179:     PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1180:     mat->rowindices = (int*)(mat->rowvalues + max);
1181:   }

1183:   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184:   lrow = row - rstart;

1186:   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187:   if (!v)   {pvA = 0; pvB = 0;}
1188:   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189:   (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1190:   (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1191:   nztot = nzA + nzB;

1193:   cmap  = mat->garray;
1194:   if (v  || idx) {
1195:     if (nztot) {
1196:       /* Sort by increasing column numbers, assuming A and B already sorted */
1197:       int imark = -1;
1198:       if (v) {
1199:         *v = v_p = mat->rowvalues;
1200:         for (i=0; i<nzB; i++) {
1201:           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1202:           else break;
1203:         }
1204:         imark = i;
1205:         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1206:         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1207:       }
1208:       if (idx) {
1209:         *idx = idx_p = mat->rowindices;
1210:         if (imark > -1) {
1211:           for (i=0; i<imark; i++) {
1212:             idx_p[i] = cmap[cworkB[i]];
1213:           }
1214:         } else {
1215:           for (i=0; i<nzB; i++) {
1216:             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1217:             else break;
1218:           }
1219:           imark = i;
1220:         }
1221:         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1222:         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1223:       }
1224:     } else {
1225:       if (idx) *idx = 0;
1226:       if (v)   *v   = 0;
1227:     }
1228:   }
1229:   *nz = nztot;
1230:   (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1231:   (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1232:   return(0);
1233: }

1235: #undef __FUNCT__  
1237: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238: {
1239:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;

1242:   if (aij->getrowactive == PETSC_FALSE) {
1243:     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244:   }
1245:   aij->getrowactive = PETSC_FALSE;
1246:   return(0);
1247: }

1249: #undef __FUNCT__  
1251: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252: {
1253:   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1254:   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255:   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256:   PetscReal    sum = 0.0;
1257:   PetscScalar  *v;

1260:   if (aij->size == 1) {
1261:      MatNorm(aij->A,type,norm);
1262:   } else {
1263:     if (type == NORM_FROBENIUS) {
1264:       v = amat->a;
1265:       for (i=0; i<amat->nz; i++) {
1266: #if defined(PETSC_USE_COMPLEX)
1267:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268: #else
1269:         sum += (*v)*(*v); v++;
1270: #endif
1271:       }
1272:       v = bmat->a;
1273:       for (i=0; i<bmat->nz; i++) {
1274: #if defined(PETSC_USE_COMPLEX)
1275:         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276: #else
1277:         sum += (*v)*(*v); v++;
1278: #endif
1279:       }
1280:       MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1281:       *norm = sqrt(*norm);
1282:     } else if (type == NORM_1) { /* max column norm */
1283:       PetscReal *tmp,*tmp2;
1284:       int    *jj,*garray = aij->garray;
1285:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1286:       PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1287:       PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1288:       *norm = 0.0;
1289:       v = amat->a; jj = amat->j;
1290:       for (j=0; j<amat->nz; j++) {
1291:         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1292:       }
1293:       v = bmat->a; jj = bmat->j;
1294:       for (j=0; j<bmat->nz; j++) {
1295:         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296:       }
1297:       MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1298:       for (j=0; j<mat->N; j++) {
1299:         if (tmp2[j] > *norm) *norm = tmp2[j];
1300:       }
1301:       PetscFree(tmp);
1302:       PetscFree(tmp2);
1303:     } else if (type == NORM_INFINITY) { /* max row norm */
1304:       PetscReal ntemp = 0.0;
1305:       for (j=0; j<aij->A->m; j++) {
1306:         v = amat->a + amat->i[j] + shift;
1307:         sum = 0.0;
1308:         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309:           sum += PetscAbsScalar(*v); v++;
1310:         }
1311:         v = bmat->a + bmat->i[j] + shift;
1312:         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313:           sum += PetscAbsScalar(*v); v++;
1314:         }
1315:         if (sum > ntemp) ntemp = sum;
1316:       }
1317:       MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1318:     } else {
1319:       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320:     }
1321:   }
1322:   return(0);
1323: }

1325: #undef __FUNCT__  
1327: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328: {
1329:   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1330:   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1331:   int          ierr,shift = Aloc->indexshift;
1332:   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333:   Mat          B;
1334:   PetscScalar  *array;

1337:   if (!matout && M != N) {
1338:     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339:   }

1341:   MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);

1343:   /* copy over the A part */
1344:   Aloc = (Mat_SeqAIJ*)a->A->data;
1345:   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346:   row = a->rstart;
1347:   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348:   for (i=0; i<m; i++) {
1349:     MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1350:     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351:   }
1352:   aj = Aloc->j;
1353:   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}

1355:   /* copy over the B part */
1356:   Aloc = (Mat_SeqAIJ*)a->B->data;
1357:   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358:   row  = a->rstart;
1359:   PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1360:   ct   = cols;
1361:   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362:   for (i=0; i<m; i++) {
1363:     MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1364:     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365:   }
1366:   PetscFree(ct);
1367:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1368:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1369:   if (matout) {
1370:     *matout = B;
1371:   } else {
1372:     MatHeaderCopy(A,B);
1373:   }
1374:   return(0);
1375: }

1377: #undef __FUNCT__  
1379: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380: {
1381:   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382:   Mat        a = aij->A,b = aij->B;
1383:   int        ierr,s1,s2,s3;

1386:   MatGetLocalSize(mat,&s2,&s3);
1387:   if (rr) {
1388:     VecGetLocalSize(rr,&s1);
1389:     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390:     /* Overlap communication with computation. */
1391:     VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1392:   }
1393:   if (ll) {
1394:     VecGetLocalSize(ll,&s1);
1395:     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396:     (*b->ops->diagonalscale)(b,ll,0);
1397:   }
1398:   /* scale  the diagonal block */
1399:   (*a->ops->diagonalscale)(a,ll,rr);

1401:   if (rr) {
1402:     /* Do a scatter end and then right scale the off-diagonal block */
1403:     VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1404:     (*b->ops->diagonalscale)(b,0,aij->lvec);
1405:   }
1406: 
1407:   return(0);
1408: }


1411: #undef __FUNCT__  
1413: int MatPrintHelp_MPIAIJ(Mat A)
1414: {
1415:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1416:   int        ierr;

1419:   if (!a->rank) {
1420:     MatPrintHelp_SeqAIJ(a->A);
1421:   }
1422:   return(0);
1423: }

1425: #undef __FUNCT__  
1427: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428: {
1430:   *bs = 1;
1431:   return(0);
1432: }
1433: #undef __FUNCT__  
1435: int MatSetUnfactored_MPIAIJ(Mat A)
1436: {
1437:   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1438:   int        ierr;

1441:   MatSetUnfactored(a->A);
1442:   return(0);
1443: }

1445: #undef __FUNCT__  
1447: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448: {
1449:   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450:   Mat        a,b,c,d;
1451:   PetscTruth flg;
1452:   int        ierr;

1455:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1456:   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457:   a = matA->A; b = matA->B;
1458:   c = matB->A; d = matB->B;

1460:   MatEqual(a,c,&flg);
1461:   if (flg == PETSC_TRUE) {
1462:     MatEqual(b,d,&flg);
1463:   }
1464:   MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1465:   return(0);
1466: }

1468: #undef __FUNCT__  
1470: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471: {
1472:   int        ierr;
1473:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474:   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475:   PetscTruth flg;

1478:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1479:   if (str != SAME_NONZERO_PATTERN || !flg) {
1480:     /* because of the column compression in the off-processor part of the matrix a->B,
1481:        the number of columns in a->B and b->B may be different, hence we cannot call
1482:        the MatCopy() directly on the two parts. If need be, we can provide a more 
1483:        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484:        then copying the submatrices */
1485:     MatCopy_Basic(A,B,str);
1486:   } else {
1487:     MatCopy(a->A,b->A,str);
1488:     MatCopy(a->B,b->B,str);
1489:   }
1490:   return(0);
1491: }

1493: #undef __FUNCT__  
1495: int MatSetUpPreallocation_MPIAIJ(Mat A)
1496: {
1497:   int        ierr;

1500:    MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1501:   return(0);
1502: }

1504: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1511: #endif

1513:  #include petscblaslapack.h

1515: #undef __FUNCT__  
1517: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518: {
1519:   int        ierr,one;
1520:   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521:   Mat_SeqAIJ *x,*y;

1524:   if (str == SAME_NONZERO_PATTERN) {
1525:     x  = (Mat_SeqAIJ *)xx->A->data;
1526:     y  = (Mat_SeqAIJ *)yy->A->data;
1527:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528:     x  = (Mat_SeqAIJ *)xx->B->data;
1529:     y  = (Mat_SeqAIJ *)yy->B->data;
1530:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531:   } else {
1532:     MatAXPY_Basic(a,X,Y,str);
1533:   }
1534:   return(0);
1535: }

1537: /* -------------------------------------------------------------------*/
1538: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1539:        MatGetRow_MPIAIJ,
1540:        MatRestoreRow_MPIAIJ,
1541:        MatMult_MPIAIJ,
1542:        MatMultAdd_MPIAIJ,
1543:        MatMultTranspose_MPIAIJ,
1544:        MatMultTransposeAdd_MPIAIJ,
1545:        0,
1546:        0,
1547:        0,
1548:        0,
1549:        0,
1550:        0,
1551:        MatRelax_MPIAIJ,
1552:        MatTranspose_MPIAIJ,
1553:        MatGetInfo_MPIAIJ,
1554:        MatEqual_MPIAIJ,
1555:        MatGetDiagonal_MPIAIJ,
1556:        MatDiagonalScale_MPIAIJ,
1557:        MatNorm_MPIAIJ,
1558:        MatAssemblyBegin_MPIAIJ,
1559:        MatAssemblyEnd_MPIAIJ,
1560:        0,
1561:        MatSetOption_MPIAIJ,
1562:        MatZeroEntries_MPIAIJ,
1563:        MatZeroRows_MPIAIJ,
1564: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565:        MatLUFactorSymbolic_MPIAIJ_TFS,
1566: #else
1567:        0,
1568: #endif
1569:        0,
1570:        0,
1571:        0,
1572:        MatSetUpPreallocation_MPIAIJ,
1573:        0,
1574:        0,
1575:        0,
1576:        0,
1577:        MatDuplicate_MPIAIJ,
1578:        0,
1579:        0,
1580:        0,
1581:        0,
1582:        MatAXPY_MPIAIJ,
1583:        MatGetSubMatrices_MPIAIJ,
1584:        MatIncreaseOverlap_MPIAIJ,
1585:        MatGetValues_MPIAIJ,
1586:        MatCopy_MPIAIJ,
1587:        MatPrintHelp_MPIAIJ,
1588:        MatScale_MPIAIJ,
1589:        0,
1590:        0,
1591:        0,
1592:        MatGetBlockSize_MPIAIJ,
1593:        0,
1594:        0,
1595:        0,
1596:        0,
1597:        MatFDColoringCreate_MPIAIJ,
1598:        0,
1599:        MatSetUnfactored_MPIAIJ,
1600:        0,
1601:        0,
1602:        MatGetSubMatrix_MPIAIJ,
1603:        MatDestroy_MPIAIJ,
1604:        MatView_MPIAIJ,
1605:        MatGetPetscMaps_Petsc,
1606:        0,
1607:        0,
1608:        0,
1609:        0,
1610:        0,
1611:        0,
1612:        0,
1613:        0,
1614:        MatSetColoring_MPIAIJ,
1615:        MatSetValuesAdic_MPIAIJ,
1616:        MatSetValuesAdifor_MPIAIJ
1617: };

1619: /* ----------------------------------------------------------------------------------------*/

1621: EXTERN_C_BEGIN
1622: #undef __FUNCT__  
1624: int MatStoreValues_MPIAIJ(Mat mat)
1625: {
1626:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627:   int        ierr;

1630:   MatStoreValues(aij->A);
1631:   MatStoreValues(aij->B);
1632:   return(0);
1633: }
1634: EXTERN_C_END

1636: EXTERN_C_BEGIN
1637: #undef __FUNCT__  
1639: int MatRetrieveValues_MPIAIJ(Mat mat)
1640: {
1641:   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1642:   int        ierr;

1645:   MatRetrieveValues(aij->A);
1646:   MatRetrieveValues(aij->B);
1647:   return(0);
1648: }
1649: EXTERN_C_END

1651:  #include petscpc.h
1652: EXTERN_C_BEGIN
1653: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1654: EXTERN_C_END

1656: EXTERN_C_BEGIN
1657: #undef __FUNCT__  
1659: int MatCreate_MPIAIJ(Mat B)
1660: {
1661:   Mat_MPIAIJ *b;
1662:   int        ierr,i,size;

1665:   MPI_Comm_size(B->comm,&size);

1667:   ierr            = PetscNew(Mat_MPIAIJ,&b);
1668:   B->data         = (void*)b;
1669:   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1670:   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1671:   B->factor       = 0;
1672:   B->assembled    = PETSC_FALSE;
1673:   B->mapping      = 0;

1675:   B->insertmode      = NOT_SET_VALUES;
1676:   b->size            = size;
1677:   MPI_Comm_rank(B->comm,&b->rank);

1679:   PetscSplitOwnership(B->comm,&B->m,&B->M);
1680:   PetscSplitOwnership(B->comm,&B->n,&B->N);

1682:   /* the information in the maps duplicates the information computed below, eventually 
1683:      we should remove the duplicate information that is not contained in the maps */
1684:   PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1685:   PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);

1687:   /* build local table of row and column ownerships */
1688:   PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1689:   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690:   b->cowners = b->rowners + b->size + 2;
1691:   MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1692:   b->rowners[0] = 0;
1693:   for (i=2; i<=b->size; i++) {
1694:     b->rowners[i] += b->rowners[i-1];
1695:   }
1696:   b->rstart = b->rowners[b->rank];
1697:   b->rend   = b->rowners[b->rank+1];
1698:   MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1699:   b->cowners[0] = 0;
1700:   for (i=2; i<=b->size; i++) {
1701:     b->cowners[i] += b->cowners[i-1];
1702:   }
1703:   b->cstart = b->cowners[b->rank];
1704:   b->cend   = b->cowners[b->rank+1];

1706:   /* build cache for off array entries formed */
1707:   MatStashCreate_Private(B->comm,1,&B->stash);
1708:   b->donotstash  = PETSC_FALSE;
1709:   b->colmap      = 0;
1710:   b->garray      = 0;
1711:   b->roworiented = PETSC_TRUE;

1713:   /* stuff used for matrix vector multiply */
1714:   b->lvec      = PETSC_NULL;
1715:   b->Mvctx     = PETSC_NULL;

1717:   /* stuff for MatGetRow() */
1718:   b->rowindices   = 0;
1719:   b->rowvalues    = 0;
1720:   b->getrowactive = PETSC_FALSE;

1722:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1723:                                      "MatStoreValues_MPIAIJ",
1724:                                      MatStoreValues_MPIAIJ);
1725:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1726:                                      "MatRetrieveValues_MPIAIJ",
1727:                                      MatRetrieveValues_MPIAIJ);
1728:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1729:                                      "MatGetDiagonalBlock_MPIAIJ",
1730:                                      MatGetDiagonalBlock_MPIAIJ);

1732:   return(0);
1733: }
1734: EXTERN_C_END

1736: #undef __FUNCT__  
1738: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1739: {
1740:   Mat        mat;
1741:   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1742:   int        ierr;

1745:   *newmat       = 0;
1746:   MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1747:   MatSetType(mat,MATMPIAIJ);
1748:   a    = (Mat_MPIAIJ*)mat->data;
1749:   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1750:   mat->factor       = matin->factor;
1751:   mat->assembled    = PETSC_TRUE;
1752:   mat->insertmode   = NOT_SET_VALUES;
1753:   mat->preallocated = PETSC_TRUE;

1755:   a->rstart       = oldmat->rstart;
1756:   a->rend         = oldmat->rend;
1757:   a->cstart       = oldmat->cstart;
1758:   a->cend         = oldmat->cend;
1759:   a->size         = oldmat->size;
1760:   a->rank         = oldmat->rank;
1761:   a->donotstash   = oldmat->donotstash;
1762:   a->roworiented  = oldmat->roworiented;
1763:   a->rowindices   = 0;
1764:   a->rowvalues    = 0;
1765:   a->getrowactive = PETSC_FALSE;

1767:   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1768:   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);
1769:   if (oldmat->colmap) {
1770: #if defined (PETSC_USE_CTABLE)
1771:     PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1772: #else
1773:     PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1774:     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1775:     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1776: #endif
1777:   } else a->colmap = 0;
1778:   if (oldmat->garray) {
1779:     int len;
1780:     len  = oldmat->B->n;
1781:     PetscMalloc((len+1)*sizeof(int),&a->garray);
1782:     PetscLogObjectMemory(mat,len*sizeof(int));
1783:     if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1784:   } else a->garray = 0;
1785: 
1786:    VecDuplicate(oldmat->lvec,&a->lvec);
1787:   PetscLogObjectParent(mat,a->lvec);
1788:    VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1789:   PetscLogObjectParent(mat,a->Mvctx);
1790:    MatDuplicate(oldmat->A,cpvalues,&a->A);
1791:   PetscLogObjectParent(mat,a->A);
1792:    MatDuplicate(oldmat->B,cpvalues,&a->B);
1793:   PetscLogObjectParent(mat,a->B);
1794:   PetscFListDuplicate(matin->qlist,&mat->qlist);
1795:   *newmat = mat;
1796:   return(0);
1797: }

1799:  #include petscsys.h

1801: EXTERN_C_BEGIN
1802: #undef __FUNCT__  
1804: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1805: {
1806:   Mat          A;
1807:   PetscScalar  *vals,*svals;
1808:   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1809:   MPI_Status   status;
1810:   int          i,nz,ierr,j,rstart,rend,fd;
1811:   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1812:   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1813:   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;

1816:   MPI_Comm_size(comm,&size);
1817:   MPI_Comm_rank(comm,&rank);
1818:   if (!rank) {
1819:     PetscViewerBinaryGetDescriptor(viewer,&fd);
1820:     PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1821:     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1822:     if (header[3] < 0) {
1823:       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1824:     }
1825:   }

1827:   MPI_Bcast(header+1,3,MPI_INT,0,comm);
1828:   M = header[1]; N = header[2];
1829:   /* determine ownership of all rows */
1830:   m = M/size + ((M % size) > rank);
1831:   PetscMalloc((size+2)*sizeof(int),&rowners);
1832:   MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1833:   rowners[0] = 0;
1834:   for (i=2; i<=size; i++) {
1835:     rowners[i] += rowners[i-1];
1836:   }
1837:   rstart = rowners[rank];
1838:   rend   = rowners[rank+1];

1840:   /* distribute row lengths to all processors */
1841:   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1842:   offlens = ourlens + (rend-rstart);
1843:   if (!rank) {
1844:     PetscMalloc(M*sizeof(int),&rowlengths);
1845:     PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1846:     PetscMalloc(size*sizeof(int),&sndcounts);
1847:     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1848:     MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1849:     PetscFree(sndcounts);
1850:   } else {
1851:     MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1852:   }

1854:   if (!rank) {
1855:     /* calculate the number of nonzeros on each processor */
1856:     PetscMalloc(size*sizeof(int),&procsnz);
1857:     PetscMemzero(procsnz,size*sizeof(int));
1858:     for (i=0; i<size; i++) {
1859:       for (j=rowners[i]; j< rowners[i+1]; j++) {
1860:         procsnz[i] += rowlengths[j];
1861:       }
1862:     }
1863:     PetscFree(rowlengths);

1865:     /* determine max buffer needed and allocate it */
1866:     maxnz = 0;
1867:     for (i=0; i<size; i++) {
1868:       maxnz = PetscMax(maxnz,procsnz[i]);
1869:     }
1870:     PetscMalloc(maxnz*sizeof(int),&cols);

1872:     /* read in my part of the matrix column indices  */
1873:     nz   = procsnz[0];
1874:     PetscMalloc(nz*sizeof(int),&mycols);
1875:     PetscBinaryRead(fd,mycols,nz,PETSC_INT);

1877:     /* read in every one elses and ship off */
1878:     for (i=1; i<size; i++) {
1879:       nz   = procsnz[i];
1880:       PetscBinaryRead(fd,cols,nz,PETSC_INT);
1881:       MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1882:     }
1883:     PetscFree(cols);
1884:   } else {
1885:     /* determine buffer space needed for message */
1886:     nz = 0;
1887:     for (i=0; i<m; i++) {
1888:       nz += ourlens[i];
1889:     }
1890:     PetscMalloc((nz+1)*sizeof(int),&mycols);

1892:     /* receive message of column indices*/
1893:     MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1894:     MPI_Get_count(&status,MPI_INT,&maxnz);
1895:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1896:   }

1898:   /* determine column ownership if matrix is not square */
1899:   if (N != M) {
1900:     n      = N/size + ((N % size) > rank);
1901:     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1902:     cstart = cend - n;
1903:   } else {
1904:     cstart = rstart;
1905:     cend   = rend;
1906:     n      = cend - cstart;
1907:   }

1909:   /* loop over local rows, determining number of off diagonal entries */
1910:   PetscMemzero(offlens,m*sizeof(int));
1911:   jj = 0;
1912:   for (i=0; i<m; i++) {
1913:     for (j=0; j<ourlens[i]; j++) {
1914:       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1915:       jj++;
1916:     }
1917:   }

1919:   /* create our matrix */
1920:   for (i=0; i<m; i++) {
1921:     ourlens[i] -= offlens[i];
1922:   }
1923:   MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1924:   A = *newmat;
1925:   MatSetOption(A,MAT_COLUMNS_SORTED);
1926:   for (i=0; i<m; i++) {
1927:     ourlens[i] += offlens[i];
1928:   }

1930:   if (!rank) {
1931:     PetscMalloc(maxnz*sizeof(PetscScalar),&vals);

1933:     /* read in my part of the matrix numerical values  */
1934:     nz   = procsnz[0];
1935:     PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1936: 
1937:     /* insert into matrix */
1938:     jj      = rstart;
1939:     smycols = mycols;
1940:     svals   = vals;
1941:     for (i=0; i<m; i++) {
1942:       MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1943:       smycols += ourlens[i];
1944:       svals   += ourlens[i];
1945:       jj++;
1946:     }

1948:     /* read in other processors and ship out */
1949:     for (i=1; i<size; i++) {
1950:       nz   = procsnz[i];
1951:       PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1952:       MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1953:     }
1954:     PetscFree(procsnz);
1955:   } else {
1956:     /* receive numeric values */
1957:     PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);

1959:     /* receive message of values*/
1960:     MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1961:     MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1962:     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");

1964:     /* insert into matrix */
1965:     jj      = rstart;
1966:     smycols = mycols;
1967:     svals   = vals;
1968:     for (i=0; i<m; i++) {
1969:       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1970:       smycols += ourlens[i];
1971:       svals   += ourlens[i];
1972:       jj++;
1973:     }
1974:   }
1975:   PetscFree(ourlens);
1976:   PetscFree(vals);
1977:   PetscFree(mycols);
1978:   PetscFree(rowners);

1980:   MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1981:   MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1982:   return(0);
1983: }
1984: EXTERN_C_END

1986: #undef __FUNCT__  
1988: /*
1989:     Not great since it makes two copies of the submatrix, first an SeqAIJ 
1990:   in local and then by concatenating the local matrices the end result.
1991:   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1992: */
1993: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1994: {
1995:   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1996:   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1997:   Mat          *local,M,Mreuse;
1998:   PetscScalar  *vwork,*aa;
1999:   MPI_Comm     comm = mat->comm;
2000:   Mat_SeqAIJ   *aij;


2004:   MPI_Comm_rank(comm,&rank);
2005:   MPI_Comm_size(comm,&size);

2007:   if (call ==  MAT_REUSE_MATRIX) {
2008:     PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2009:     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2010:     local = &Mreuse;
2011:     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2012:   } else {
2013:     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2014:     Mreuse = *local;
2015:     ierr   = PetscFree(local);
2016:   }

2018:   /* 
2019:       m - number of local rows
2020:       n - number of columns (same on all processors)
2021:       rstart - first row in new global matrix generated
2022:   */
2023:   MatGetSize(Mreuse,&m,&n);
2024:   if (call == MAT_INITIAL_MATRIX) {
2025:     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2026:     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2027:     ii  = aij->i;
2028:     jj  = aij->j;

2030:     /*
2031:         Determine the number of non-zeros in the diagonal and off-diagonal 
2032:         portions of the matrix in order to do correct preallocation
2033:     */

2035:     /* first get start and end of "diagonal" columns */
2036:     if (csize == PETSC_DECIDE) {
2037:       ISGetSize(isrow,&mglobal);
2038:       if (mglobal == n) { /* square matrix */
2039:         nlocal = m;
2040:       } else {
2041:         nlocal = n/size + ((n % size) > rank);
2042:       }
2043:     } else {
2044:       nlocal = csize;
2045:     }
2046:     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2047:     rstart = rend - nlocal;
2048:     if (rank == size - 1 && rend != n) {
2049:       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2050:     }

2052:     /* next, compute all the lengths */
2053:     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);
2054:     olens = dlens + m;
2055:     for (i=0; i<m; i++) {
2056:       jend = ii[i+1] - ii[i];
2057:       olen = 0;
2058:       dlen = 0;
2059:       for (j=0; j<jend; j++) {
2060:         if (*jj < rstart || *jj >= rend) olen++;
2061:         else dlen++;
2062:         jj++;
2063:       }
2064:       olens[i] = olen;
2065:       dlens[i] = dlen;
2066:     }
2067:     MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
2068:     PetscFree(dlens);
2069:   } else {
2070:     int ml,nl;

2072:     M = *newmat;
2073:     MatGetLocalSize(M,&ml,&nl);
2074:     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2075:     MatZeroEntries(M);
2076:     /*
2077:          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2078:        rather than the slower MatSetValues().
2079:     */
2080:     M->was_assembled = PETSC_TRUE;
2081:     M->assembled     = PETSC_FALSE;
2082:   }
2083:   MatGetOwnershipRange(M,&rstart,&rend);
2084:   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2085:   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2086:   ii  = aij->i;
2087:   jj  = aij->j;
2088:   aa  = aij->a;
2089:   for (i=0; i<m; i++) {
2090:     row   = rstart + i;
2091:     nz    = ii[i+1] - ii[i];
2092:     cwork = jj;     jj += nz;
2093:     vwork = aa;     aa += nz;
2094:     MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2095:   }

2097:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2098:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2099:   *newmat = M;

2101:   /* save submatrix used in processor for next request */
2102:   if (call ==  MAT_INITIAL_MATRIX) {
2103:     PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2104:     PetscObjectDereference((PetscObject)Mreuse);
2105:   }

2107:   return(0);
2108: }

2110: #undef __FUNCT__  
2112: /*@C
2113:    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2114:    (the default parallel PETSc format).  For good matrix assembly performance
2115:    the user should preallocate the matrix storage by setting the parameters 
2116:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2117:    performance can be increased by more than a factor of 50.

2119:    Collective on MPI_Comm

2121:    Input Parameters:
2122: +  A - the matrix 
2123: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2124:            (same value is used for all local rows)
2125: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2126:            DIAGONAL portion of the local submatrix (possibly different for each row)
2127:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2128:            The size of this array is equal to the number of local rows, i.e 'm'. 
2129:            You must leave room for the diagonal entry even if it is zero.
2130: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2131:            submatrix (same value is used for all local rows).
2132: -  o_nnz - array containing the number of nonzeros in the various rows of the
2133:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2134:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2135:            structure. The size of this array is equal to the number 
2136:            of local rows, i.e 'm'. 

2138:    The AIJ format (also called the Yale sparse matrix format or
2139:    compressed row storage), is fully compatible with standard Fortran 77
2140:    storage.  That is, the stored row and column indices can begin at
2141:    either one (as in Fortran) or zero.  See the users manual for details.

2143:    The user MUST specify either the local or global matrix dimensions
2144:    (possibly both).

2146:    The parallel matrix is partitioned such that the first m0 rows belong to 
2147:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2148:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2150:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2151:    as the submatrix which is obtained by extraction the part corresponding 
2152:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2153:    first row that belongs to the processor, and r2 is the last row belonging 
2154:    to the this processor. This is a square mxm matrix. The remaining portion 
2155:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2157:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2159:    By default, this format uses inodes (identical nodes) when possible.
2160:    We search for consecutive rows with the same nonzero structure, thereby
2161:    reusing matrix information to achieve increased efficiency.

2163:    Options Database Keys:
2164: +  -mat_aij_no_inode  - Do not use inodes
2165: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2166: -  -mat_aij_oneindex - Internally use indexing starting at 1
2167:         rather than 0.  Note that when calling MatSetValues(),
2168:         the user still MUST index entries starting at 0!

2170:    Example usage:
2171:   
2172:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2173:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2174:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2175:    as follows:

2177: .vb
2178:             1  2  0  |  0  3  0  |  0  4
2179:     Proc0   0  5  6  |  7  0  0  |  8  0
2180:             9  0 10  | 11  0  0  | 12  0
2181:     -------------------------------------
2182:            13  0 14  | 15 16 17  |  0  0
2183:     Proc1   0 18  0  | 19 20 21  |  0  0 
2184:             0  0  0  | 22 23  0  | 24  0
2185:     -------------------------------------
2186:     Proc2  25 26 27  |  0  0 28  | 29  0
2187:            30  0  0  | 31 32 33  |  0 34
2188: .ve

2190:    This can be represented as a collection of submatrices as:

2192: .vb
2193:       A B C
2194:       D E F
2195:       G H I
2196: .ve

2198:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2199:    owned by proc1, G,H,I are owned by proc2.

2201:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2202:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2203:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2205:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2206:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2207:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2208:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2209:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2210:    matrix, ans [DF] as another SeqAIJ matrix.

2212:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2213:    allocated for every row of the local diagonal submatrix, and o_nz
2214:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2215:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2216:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2217:    In this case, the values of d_nz,o_nz are:
2218: .vb
2219:      proc0 : dnz = 2, o_nz = 2
2220:      proc1 : dnz = 3, o_nz = 2
2221:      proc2 : dnz = 1, o_nz = 4
2222: .ve
2223:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2224:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2225:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2226:    34 values.

2228:    When d_nnz, o_nnz parameters are specified, the storage is specified
2229:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2230:    In the above case the values for d_nnz,o_nnz are:
2231: .vb
2232:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2233:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2234:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2235: .ve
2236:    Here the space allocated is sum of all the above values i.e 34, and
2237:    hence pre-allocation is perfect.

2239:    Level: intermediate

2241: .keywords: matrix, aij, compressed row, sparse, parallel

2243: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2244: @*/
2245: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2246: {
2247:   Mat_MPIAIJ   *b;
2248:   int          ierr,i;
2249:   PetscTruth   flg2;

2252:   PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2253:   if (!flg2) return(0);
2254:   B->preallocated = PETSC_TRUE;
2255:   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2256:   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2257:   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2258:   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2259:   if (d_nnz) {
2260:     for (i=0; i<B->m; i++) {
2261:       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2262:     }
2263:   }
2264:   if (o_nnz) {
2265:     for (i=0; i<B->m; i++) {
2266:       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2267:     }
2268:   }
2269:   b = (Mat_MPIAIJ*)B->data;

2271:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2272:   PetscLogObjectParent(B,b->A);
2273:   MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2274:   PetscLogObjectParent(B,b->B);

2276:   return(0);
2277: }

2279: #undef __FUNCT__  
2281: /*@C
2282:    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2283:    (the default parallel PETSc format).  For good matrix assembly performance
2284:    the user should preallocate the matrix storage by setting the parameters 
2285:    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2286:    performance can be increased by more than a factor of 50.

2288:    Collective on MPI_Comm

2290:    Input Parameters:
2291: +  comm - MPI communicator
2292: .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2293:            This value should be the same as the local size used in creating the 
2294:            y vector for the matrix-vector product y = Ax.
2295: .  n - This value should be the same as the local size used in creating the 
2296:        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2297:        calculated if N is given) For square matrices n is almost always m.
2298: .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2299: .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2300: .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2301:            (same value is used for all local rows)
2302: .  d_nnz - array containing the number of nonzeros in the various rows of the 
2303:            DIAGONAL portion of the local submatrix (possibly different for each row)
2304:            or PETSC_NULL, if d_nz is used to specify the nonzero structure. 
2305:            The size of this array is equal to the number of local rows, i.e 'm'. 
2306:            You must leave room for the diagonal entry even if it is zero.
2307: .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308:            submatrix (same value is used for all local rows).
2309: -  o_nnz - array containing the number of nonzeros in the various rows of the
2310:            OFF-DIAGONAL portion of the local submatrix (possibly different for
2311:            each row) or PETSC_NULL, if o_nz is used to specify the nonzero 
2312:            structure. The size of this array is equal to the number 
2313:            of local rows, i.e 'm'. 

2315:    Output Parameter:
2316: .  A - the matrix 

2318:    Notes:
2319:    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2320:    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2321:    storage requirements for this matrix.

2323:    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one 
2324:    processor than it must be used on all processors that share the object for 
2325:    that argument.

2327:    The AIJ format (also called the Yale sparse matrix format or
2328:    compressed row storage), is fully compatible with standard Fortran 77
2329:    storage.  That is, the stored row and column indices can begin at
2330:    either one (as in Fortran) or zero.  See the users manual for details.

2332:    The user MUST specify either the local or global matrix dimensions
2333:    (possibly both).

2335:    The parallel matrix is partitioned such that the first m0 rows belong to 
2336:    process 0, the next m1 rows belong to process 1, the next m2 rows belong 
2337:    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.

2339:    The DIAGONAL portion of the local submatrix of a processor can be defined 
2340:    as the submatrix which is obtained by extraction the part corresponding 
2341:    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the 
2342:    first row that belongs to the processor, and r2 is the last row belonging 
2343:    to the this processor. This is a square mxm matrix. The remaining portion 
2344:    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.

2346:    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.

2348:    By default, this format uses inodes (identical nodes) when possible.
2349:    We search for consecutive rows with the same nonzero structure, thereby
2350:    reusing matrix information to achieve increased efficiency.

2352:    Options Database Keys:
2353: +  -mat_aij_no_inode  - Do not use inodes
2354: .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2355: -  -mat_aij_oneindex - Internally use indexing starting at 1
2356:         rather than 0.  Note that when calling MatSetValues(),
2357:         the user still MUST index entries starting at 0!


2360:    Example usage:
2361:   
2362:    Consider the following 8x8 matrix with 34 non-zero values, that is 
2363:    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2364:    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 
2365:    as follows:

2367: .vb
2368:             1  2  0  |  0  3  0  |  0  4
2369:     Proc0   0  5  6  |  7  0  0  |  8  0
2370:             9  0 10  | 11  0  0  | 12  0
2371:     -------------------------------------
2372:            13  0 14  | 15 16 17  |  0  0
2373:     Proc1   0 18  0  | 19 20 21  |  0  0 
2374:             0  0  0  | 22 23  0  | 24  0
2375:     -------------------------------------
2376:     Proc2  25 26 27  |  0  0 28  | 29  0
2377:            30  0  0  | 31 32 33  |  0 34
2378: .ve

2380:    This can be represented as a collection of submatrices as:

2382: .vb
2383:       A B C
2384:       D E F
2385:       G H I
2386: .ve

2388:    Where the submatrices A,B,C are owned by proc0, D,E,F are
2389:    owned by proc1, G,H,I are owned by proc2.

2391:    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2392:    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2393:    The 'M','N' parameters are 8,8, and have the same values on all procs.

2395:    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2396:    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2397:    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2398:    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2399:    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2400:    matrix, ans [DF] as another SeqAIJ matrix.

2402:    When d_nz, o_nz parameters are specified, d_nz storage elements are
2403:    allocated for every row of the local diagonal submatrix, and o_nz
2404:    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2405:    One way to choose d_nz and o_nz is to use the max nonzerors per local 
2406:    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 
2407:    In this case, the values of d_nz,o_nz are:
2408: .vb
2409:      proc0 : dnz = 2, o_nz = 2
2410:      proc1 : dnz = 3, o_nz = 2
2411:      proc2 : dnz = 1, o_nz = 4
2412: .ve
2413:    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2414:    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2415:    for proc3. i.e we are using 12+15+10=37 storage locations to store 
2416:    34 values.

2418:    When d_nnz, o_nnz parameters are specified, the storage is specified
2419:    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2420:    In the above case the values for d_nnz,o_nnz are:
2421: .vb
2422:      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2423:      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2424:      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2425: .ve
2426:    Here the space allocated is sum of all the above values i.e 34, and
2427:    hence pre-allocation is perfect.

2429:    Level: intermediate

2431: .keywords: matrix, aij, compressed row, sparse, parallel

2433: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2434: @*/
2435: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2436: {
2437:   int ierr,size;

2440:   MatCreate(comm,m,n,M,N,A);
2441:   MPI_Comm_size(comm,&size);
2442:   if (size > 1) {
2443:     MatSetType(*A,MATMPIAIJ);
2444:     MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2445:   } else {
2446:     MatSetType(*A,MATSEQAIJ);
2447:     MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2448:   }
2449:   return(0);
2450: }

2452: #undef __FUNCT__  
2454: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2455: {
2456:   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2458:   *Ad     = a->A;
2459:   *Ao     = a->B;
2460:   *colmap = a->garray;
2461:   return(0);
2462: }

2464: #undef __FUNCT__  
2466: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2467: {
2468:   int        ierr;
2469:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;

2472:   if (coloring->ctype == IS_COLORING_LOCAL) {
2473:     int        *allcolors,*colors,i;
2474:     ISColoring ocoloring;

2476:     /* set coloring for diagonal portion */
2477:     MatSetColoring_SeqAIJ(a->A,coloring);

2479:     /* set coloring for off-diagonal portion */
2480:     ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2481:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2482:     for (i=0; i<a->B->n; i++) {
2483:       colors[i] = allcolors[a->garray[i]];
2484:     }
2485:     PetscFree(allcolors);
2486:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2487:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2488:     ISColoringDestroy(ocoloring);
2489:   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2490:     int        *colors,i,*larray;
2491:     ISColoring ocoloring;

2493:     /* set coloring for diagonal portion */
2494:     PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2495:     for (i=0; i<a->A->n; i++) {
2496:       larray[i] = i + a->cstart;
2497:     }
2498:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2499:     PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2500:     for (i=0; i<a->A->n; i++) {
2501:       colors[i] = coloring->colors[larray[i]];
2502:     }
2503:     PetscFree(larray);
2504:     ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2505:     MatSetColoring_SeqAIJ(a->A,ocoloring);
2506:     ISColoringDestroy(ocoloring);

2508:     /* set coloring for off-diagonal portion */
2509:     PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2510:     ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2511:     PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2512:     for (i=0; i<a->B->n; i++) {
2513:       colors[i] = coloring->colors[larray[i]];
2514:     }
2515:     PetscFree(larray);
2516:     ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2517:     MatSetColoring_SeqAIJ(a->B,ocoloring);
2518:     ISColoringDestroy(ocoloring);
2519:   } else {
2520:     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2521:   }

2523:   return(0);
2524: }

2526: #undef __FUNCT__  
2528: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2529: {
2530:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2531:   int        ierr;

2534:   MatSetValuesAdic_SeqAIJ(a->A,advalues);
2535:   MatSetValuesAdic_SeqAIJ(a->B,advalues);
2536:   return(0);
2537: }

2539: #undef __FUNCT__  
2541: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2542: {
2543:   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2544:   int        ierr;

2547:   MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2548:   MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2549:   return(0);
2550: }

2552: #undef __FUNCT__  
2554: /*@C
2555:       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2556:                  matrices from each processor

2558:     Collective on MPI_Comm

2560:    Input Parameters:
2561: +    comm - the communicators the parallel matrix will live on
2562: -    inmat - the input sequential matrices

2564:    Output Parameter:
2565: .    outmat - the parallel matrix generated

2567:     Level: advanced

2569:    Notes: The number of columns of the matrix in EACH of the seperate files
2570:       MUST be the same.

2572: @*/
2573: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2574: {
2575:   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2576:   PetscScalar *values;
2577:   PetscMap    columnmap,rowmap;

2580: 
2581:   MatGetSize(inmat,&m,&n);

2583:   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2584:   PetscMapCreate(comm,&columnmap);
2585:   PetscMapSetSize(columnmap,n);
2586:   PetscMapSetType(columnmap,MAP_MPI);
2587:   PetscMapGetLocalSize(columnmap,&n);
2588:   PetscMapDestroy(columnmap);

2590:   PetscMapCreate(comm,&rowmap);
2591:   PetscMapSetLocalSize(rowmap,m);
2592:   PetscMapSetType(rowmap,MAP_MPI);
2593:   PetscMapGetLocalRange(rowmap,&rstart,0);
2594:   PetscMapDestroy(rowmap);

2596:   MatPreallocateInitialize(comm,m,n,dnz,onz);
2597:   for (i=0;i<m;i++) {
2598:     MatGetRow(inmat,i,&nnz,&indx,&values);
2599:     MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2600:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2601:   }
2602:   MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);
2603:   MatPreallocateFinalize(dnz,onz);

2605:   for (i=0;i<m;i++) {
2606:     MatGetRow(inmat,i,&nnz,&indx,&values);
2607:     I    = i + rstart;
2608:     MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2609:     MatRestoreRow(inmat,i,&nnz,&indx,&values);
2610:   }
2611:   MatDestroy(inmat);
2612:   MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2613:   MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);

2615:   return(0);
2616: }

2618: #undef __FUNCT__  
2620: int MatFileSplit(Mat A,char *outfile)
2621: {
2622:   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2623:   PetscViewer out;
2624:   char        *name;
2625:   Mat         B;
2626:   PetscScalar *values;

2629: 
2630:   MatGetLocalSize(A,&m,0);
2631:   MatGetSize(A,0,&N);
2632:   MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);
2633:   MatGetOwnershipRange(A,&rstart,0);
2634:   for (i=0;i<m;i++) {
2635:     MatGetRow(A,i+rstart,&nnz,&indx,&values);
2636:     MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2637:     MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2638:   }
2639:   MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2640:   MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);

2642:   MPI_Comm_rank(A->comm,&rank);
2643:   PetscStrlen(outfile,&len);
2644:   PetscMalloc((len+5)*sizeof(char),&name);
2645:   sprintf(name,"%s.%d",outfile,rank);
2646:   PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);
2647:   PetscFree(name);
2648:   MatView(B,out);
2649:   PetscViewerDestroy(out);
2650:   MatDestroy(B);
2651:   return(0);
2652: }