Actual source code: baij.c

  1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/

  3: /*
  4:     Defines the basic matrix operations for the BAIJ (compressed row)
  5:   matrix storage format.
  6: */
 7:  #include src/mat/impls/baij/seq/baij.h
 8:  #include src/vec/vecimpl.h
 9:  #include src/inline/spops.h
 10:  #include petscsys.h

 12: /*
 13:     Special version for Fun3d sequential benchmark
 14: */
 15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
 17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 18: #define matsetvaluesblocked4_ matsetvaluesblocked4
 19: #endif

 21: EXTERN_C_BEGIN
 22: #undef __FUNCT__  
 24: void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
 25: {
 26:   Mat         A = *AA;
 27:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
 28:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
 29:   int         *ai=a->i,*ailen=a->ilen;
 30:   int         *aj=a->j,stepval;
 31:   PetscScalar *value = v;
 32:   MatScalar   *ap,*aa = a->a,*bap;

 35:   stepval = (n-1)*4;
 36:   for (k=0; k<m; k++) { /* loop over added rows */
 37:     row  = im[k];
 38:     rp   = aj + ai[row];
 39:     ap   = aa + 16*ai[row];
 40:     nrow = ailen[row];
 41:     low  = 0;
 42:     for (l=0; l<n; l++) { /* loop over added columns */
 43:       col = in[l];
 44:       value = v + k*(stepval+4)*4 + l*4;
 45:       low = 0; high = nrow;
 46:       while (high-low > 7) {
 47:         t = (low+high)/2;
 48:         if (rp[t] > col) high = t;
 49:         else             low  = t;
 50:       }
 51:       for (i=low; i<high; i++) {
 52:         if (rp[i] > col) break;
 53:         if (rp[i] == col) {
 54:           bap  = ap +  16*i;
 55:           for (ii=0; ii<4; ii++,value+=stepval) {
 56:             for (jj=ii; jj<16; jj+=4) {
 57:               bap[jj] += *value++;
 58:             }
 59:           }
 60:           goto noinsert2;
 61:         }
 62:       }
 63:       N = nrow++ - 1;
 64:       /* shift up all the later entries in this row */
 65:       for (ii=N; ii>=i; ii--) {
 66:         rp[ii+1] = rp[ii];
 67:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
 68:       }
 69:       if (N >= i) {
 70:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
 71:       }
 72:       rp[i] = col;
 73:       bap   = ap +  16*i;
 74:       for (ii=0; ii<4; ii++,value+=stepval) {
 75:         for (jj=ii; jj<16; jj+=4) {
 76:           bap[jj] = *value++;
 77:         }
 78:       }
 79:       noinsert2:;
 80:       low = i;
 81:     }
 82:     ailen[row] = nrow;
 83:   }
 84: }
 85: EXTERN_C_END

 87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 88: #define matsetvalues4_ MATSETVALUES4
 89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 90: #define matsetvalues4_ matsetvalues4
 91: #endif

 93: EXTERN_C_BEGIN
 94: #undef __FUNCT__  
 96: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
 97: {
 98:   Mat         A = *AA;
 99:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101:   int         *ai=a->i,*ailen=a->ilen;
102:   int         *aj=a->j,brow,bcol;
103:   int         ridx,cidx;
104:   MatScalar   *ap,value,*aa=a->a,*bap;
105: 
107:   for (k=0; k<m; k++) { /* loop over added rows */
108:     row  = im[k]; brow = row/4;
109:     rp   = aj + ai[brow];
110:     ap   = aa + 16*ai[brow];
111:     nrow = ailen[brow];
112:     low  = 0;
113:     for (l=0; l<n; l++) { /* loop over added columns */
114:       col = in[l]; bcol = col/4;
115:       ridx = row % 4; cidx = col % 4;
116:       value = v[l + k*n];
117:       low = 0; high = nrow;
118:       while (high-low > 7) {
119:         t = (low+high)/2;
120:         if (rp[t] > bcol) high = t;
121:         else              low  = t;
122:       }
123:       for (i=low; i<high; i++) {
124:         if (rp[i] > bcol) break;
125:         if (rp[i] == bcol) {
126:           bap  = ap +  16*i + 4*cidx + ridx;
127:           *bap += value;
128:           goto noinsert1;
129:         }
130:       }
131:       N = nrow++ - 1;
132:       /* shift up all the later entries in this row */
133:       for (ii=N; ii>=i; ii--) {
134:         rp[ii+1] = rp[ii];
135:         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136:       }
137:       if (N>=i) {
138:         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139:       }
140:       rp[i]                    = bcol;
141:       ap[16*i + 4*cidx + ridx] = value;
142:       noinsert1:;
143:       low = i;
144:     }
145:     ailen[brow] = nrow;
146:   }
147: }
148: EXTERN_C_END

150: /*  UGLY, ugly, ugly
151:    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 
152:    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 
153:    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154:    converts the entries into single precision and then calls ..._MatScalar() to put them
155:    into the single precision data structures.
156: */
157: #if defined(PETSC_USE_MAT_SINGLE)
158: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
159: #else
160: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161: #endif
162: #if defined(PETSC_HAVE_DSCPACK)
163: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
164: #endif

166: #define CHUNKSIZE  10

168: /*
169:      Checks for missing diagonals
170: */
171: #undef __FUNCT__  
173: int MatMissingDiagonal_SeqBAIJ(Mat A)
174: {
175:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
176:   int         *diag,*jj = a->j,i,ierr;

179:   MatMarkDiagonal_SeqBAIJ(A);
180:   diag = a->diag;
181:   for (i=0; i<a->mbs; i++) {
182:     if (jj[diag[i]] != i) {
183:       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
184:     }
185:   }
186:   return(0);
187: }

189: #undef __FUNCT__  
191: int MatMarkDiagonal_SeqBAIJ(Mat A)
192: {
193:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
194:   int         i,j,*diag,m = a->mbs,ierr;

197:   if (a->diag) return(0);

199:   PetscMalloc((m+1)*sizeof(int),&diag);
200:   PetscLogObjectMemory(A,(m+1)*sizeof(int));
201:   for (i=0; i<m; i++) {
202:     diag[i] = a->i[i+1];
203:     for (j=a->i[i]; j<a->i[i+1]; j++) {
204:       if (a->j[j] == i) {
205:         diag[i] = j;
206:         break;
207:       }
208:     }
209:   }
210:   a->diag = diag;
211:   return(0);
212: }


215: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);

217: #undef __FUNCT__  
219: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
220: {
221:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
222:   int         ierr,n = a->mbs,i;

225:   *nn = n;
226:   if (!ia) return(0);
227:   if (symmetric) {
228:     MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
229:   } else if (oshift == 1) {
230:     /* temporarily add 1 to i and j indices */
231:     int nz = a->i[n];
232:     for (i=0; i<nz; i++) a->j[i]++;
233:     for (i=0; i<n+1; i++) a->i[i]++;
234:     *ia = a->i; *ja = a->j;
235:   } else {
236:     *ia = a->i; *ja = a->j;
237:   }

239:   return(0);
240: }

242: #undef __FUNCT__  
244: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
245: {
246:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
247:   int         i,n = a->mbs,ierr;

250:   if (!ia) return(0);
251:   if (symmetric) {
252:     PetscFree(*ia);
253:     PetscFree(*ja);
254:   } else if (oshift == 1) {
255:     int nz = a->i[n]-1;
256:     for (i=0; i<nz; i++) a->j[i]--;
257:     for (i=0; i<n+1; i++) a->i[i]--;
258:   }
259:   return(0);
260: }

262: #undef __FUNCT__  
264: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
265: {
266:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;

269:   *bs = baij->bs;
270:   return(0);
271: }


274: #undef __FUNCT__  
276: int MatDestroy_SeqBAIJ(Mat A)
277: {
278:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
279:   int         ierr;

282: #if defined(PETSC_USE_LOG)
283:   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
284: #endif
285:   PetscFree(a->a);
286:   if (!a->singlemalloc) {
287:     PetscFree(a->i);
288:     PetscFree(a->j);
289:   }
290:   if (a->row) {
291:     ISDestroy(a->row);
292:   }
293:   if (a->col) {
294:     ISDestroy(a->col);
295:   }
296:   if (a->diag) {PetscFree(a->diag);}
297:   if (a->ilen) {PetscFree(a->ilen);}
298:   if (a->imax) {PetscFree(a->imax);}
299:   if (a->solve_work) {PetscFree(a->solve_work);}
300:   if (a->mult_work) {PetscFree(a->mult_work);}
301:   if (a->icol) {ISDestroy(a->icol);}
302:   if (a->saved_values) {PetscFree(a->saved_values);}
303: #if defined(PETSC_USE_MAT_SINGLE)
304:   if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
305: #endif
306:   PetscFree(a);
307:   return(0);
308: }

310: #undef __FUNCT__  
312: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
313: {
314:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;

317:   switch (op) {
318:   case MAT_ROW_ORIENTED:
319:     a->roworiented    = PETSC_TRUE;
320:     break;
321:   case MAT_COLUMN_ORIENTED:
322:     a->roworiented    = PETSC_FALSE;
323:     break;
324:   case MAT_COLUMNS_SORTED:
325:     a->sorted         = PETSC_TRUE;
326:     break;
327:   case MAT_COLUMNS_UNSORTED:
328:     a->sorted         = PETSC_FALSE;
329:     break;
330:   case MAT_KEEP_ZEROED_ROWS:
331:     a->keepzeroedrows = PETSC_TRUE;
332:     break;
333:   case MAT_NO_NEW_NONZERO_LOCATIONS:
334:     a->nonew          = 1;
335:     break;
336:   case MAT_NEW_NONZERO_LOCATION_ERR:
337:     a->nonew          = -1;
338:     break;
339:   case MAT_NEW_NONZERO_ALLOCATION_ERR:
340:     a->nonew          = -2;
341:     break;
342:   case MAT_YES_NEW_NONZERO_LOCATIONS:
343:     a->nonew          = 0;
344:     break;
345:   case MAT_ROWS_SORTED:
346:   case MAT_ROWS_UNSORTED:
347:   case MAT_YES_NEW_DIAGONALS:
348:   case MAT_IGNORE_OFF_PROC_ENTRIES:
349:   case MAT_USE_HASH_TABLE:
350:     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
351:     break;
352:   case MAT_NO_NEW_DIAGONALS:
353:     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
354:   default:
355:     SETERRQ(PETSC_ERR_SUP,"unknown option");
356:   }
357:   return(0);
358: }

360: #undef __FUNCT__  
362: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
363: {
364:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
365:   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
366:   MatScalar    *aa,*aa_i;
367:   PetscScalar  *v_i;

370:   bs  = a->bs;
371:   ai  = a->i;
372:   aj  = a->j;
373:   aa  = a->a;
374:   bs2 = a->bs2;
375: 
376:   if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
377: 
378:   bn  = row/bs;   /* Block number */
379:   bp  = row % bs; /* Block Position */
380:   M   = ai[bn+1] - ai[bn];
381:   *nz = bs*M;
382: 
383:   if (v) {
384:     *v = 0;
385:     if (*nz) {
386:       PetscMalloc((*nz)*sizeof(PetscScalar),v);
387:       for (i=0; i<M; i++) { /* for each block in the block row */
388:         v_i  = *v + i*bs;
389:         aa_i = aa + bs2*(ai[bn] + i);
390:         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
391:       }
392:     }
393:   }

395:   if (idx) {
396:     *idx = 0;
397:     if (*nz) {
398:       PetscMalloc((*nz)*sizeof(int),idx);
399:       for (i=0; i<M; i++) { /* for each block in the block row */
400:         idx_i = *idx + i*bs;
401:         itmp  = bs*aj[ai[bn] + i];
402:         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
403:       }
404:     }
405:   }
406:   return(0);
407: }

409: #undef __FUNCT__  
411: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
412: {

416:   if (idx) {if (*idx) {PetscFree(*idx);}}
417:   if (v)   {if (*v)   {PetscFree(*v);}}
418:   return(0);
419: }

421: #undef __FUNCT__  
423: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
424: {
425:   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
426:   Mat         C;
427:   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
428:   int         *rows,*cols,bs2=a->bs2;
429:   PetscScalar *array;

432:   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
433:   PetscMalloc((1+nbs)*sizeof(int),&col);
434:   PetscMemzero(col,(1+nbs)*sizeof(int));

436: #if defined(PETSC_USE_MAT_SINGLE)
437:   PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
438:   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
439: #else
440:   array = a->a;
441: #endif

443:   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
444:   MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
445:   PetscFree(col);
446:   PetscMalloc(2*bs*sizeof(int),&rows);
447:   cols = rows + bs;
448:   for (i=0; i<mbs; i++) {
449:     cols[0] = i*bs;
450:     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
451:     len = ai[i+1] - ai[i];
452:     for (j=0; j<len; j++) {
453:       rows[0] = (*aj++)*bs;
454:       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
455:       MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
456:       array += bs2;
457:     }
458:   }
459:   PetscFree(rows);
460: #if defined(PETSC_USE_MAT_SINGLE)
461:   PetscFree(array);
462: #endif
463: 
464:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
465:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
466: 
467:   if (B) {
468:     *B = C;
469:   } else {
470:     MatHeaderCopy(A,C);
471:   }
472:   return(0);
473: }

475: #undef __FUNCT__  
477: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
478: {
479:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
480:   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
481:   PetscScalar *aa;
482:   FILE        *file;

485:   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);
486:   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
487:   col_lens[0] = MAT_FILE_COOKIE;

489:   col_lens[1] = A->m;
490:   col_lens[2] = A->n;
491:   col_lens[3] = a->nz*bs2;

493:   /* store lengths of each row and write (including header) to file */
494:   count = 0;
495:   for (i=0; i<a->mbs; i++) {
496:     for (j=0; j<bs; j++) {
497:       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
498:     }
499:   }
500:   PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
501:   PetscFree(col_lens);

503:   /* store column indices (zero start index) */
504:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
505:   count = 0;
506:   for (i=0; i<a->mbs; i++) {
507:     for (j=0; j<bs; j++) {
508:       for (k=a->i[i]; k<a->i[i+1]; k++) {
509:         for (l=0; l<bs; l++) {
510:           jj[count++] = bs*a->j[k] + l;
511:         }
512:       }
513:     }
514:   }
515:   PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
516:   PetscFree(jj);

518:   /* store nonzero values */
519:   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
520:   count = 0;
521:   for (i=0; i<a->mbs; i++) {
522:     for (j=0; j<bs; j++) {
523:       for (k=a->i[i]; k<a->i[i+1]; k++) {
524:         for (l=0; l<bs; l++) {
525:           aa[count++] = a->a[bs2*k + l*bs + j];
526:         }
527:       }
528:     }
529:   }
530:   PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
531:   PetscFree(aa);

533:   PetscViewerBinaryGetInfoPointer(viewer,&file);
534:   if (file) {
535:     fprintf(file,"-matload_block_size %dn",a->bs);
536:   }
537:   return(0);
538: }

540: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);

542: #undef __FUNCT__  
544: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
545: {
546:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
547:   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
548:   PetscViewerFormat format;

551:   PetscViewerGetFormat(viewer,&format);
552:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
553:     PetscViewerASCIIPrintf(viewer,"  block size is %dn",bs);
554:   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
555:     Mat aij;
556:     MatConvert(A,MATSEQAIJ,&aij);
557:     MatView(aij,viewer);
558:     MatDestroy(aij);
559:   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
560: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
561:      MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
562: #endif
563:      return(0);
564:   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
565:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
566:     for (i=0; i<a->mbs; i++) {
567:       for (j=0; j<bs; j++) {
568:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
569:         for (k=a->i[i]; k<a->i[i+1]; k++) {
570:           for (l=0; l<bs; l++) {
571: #if defined(PETSC_USE_COMPLEX)
572:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573:               PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
574:                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
575:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576:               PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
577:                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
578:             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
579:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
580:             }
581: #else
582:             if (a->a[bs2*k + l*bs + j] != 0.0) {
583:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
584:             }
585: #endif
586:           }
587:         }
588:         PetscViewerASCIIPrintf(viewer,"n");
589:       }
590:     }
591:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
592:   } else {
593:     PetscViewerASCIIUseTabs(viewer,PETSC_NO);
594:     for (i=0; i<a->mbs; i++) {
595:       for (j=0; j<bs; j++) {
596:         PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
597:         for (k=a->i[i]; k<a->i[i+1]; k++) {
598:           for (l=0; l<bs; l++) {
599: #if defined(PETSC_USE_COMPLEX)
600:             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
601:               PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
602:                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
603:             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
604:               PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
605:                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
606:             } else {
607:               PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
608:             }
609: #else
610:             PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
611: #endif
612:           }
613:         }
614:         PetscViewerASCIIPrintf(viewer,"n");
615:       }
616:     }
617:     PetscViewerASCIIUseTabs(viewer,PETSC_YES);
618:   }
619:   PetscViewerFlush(viewer);
620:   return(0);
621: }

623: #undef __FUNCT__  
625: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
626: {
627:   Mat          A = (Mat) Aa;
628:   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
629:   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
630:   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
631:   MatScalar    *aa;
632:   PetscViewer  viewer;


636:   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
637:   PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);

639:   PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);

641:   /* loop over matrix elements drawing boxes */
642:   color = PETSC_DRAW_BLUE;
643:   for (i=0,row=0; i<mbs; i++,row+=bs) {
644:     for (j=a->i[i]; j<a->i[i+1]; j++) {
645:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
646:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
647:       aa = a->a + j*bs2;
648:       for (k=0; k<bs; k++) {
649:         for (l=0; l<bs; l++) {
650:           if (PetscRealPart(*aa++) >=  0.) continue;
651:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
652:         }
653:       }
654:     }
655:   }
656:   color = PETSC_DRAW_CYAN;
657:   for (i=0,row=0; i<mbs; i++,row+=bs) {
658:     for (j=a->i[i]; j<a->i[i+1]; j++) {
659:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
660:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
661:       aa = a->a + j*bs2;
662:       for (k=0; k<bs; k++) {
663:         for (l=0; l<bs; l++) {
664:           if (PetscRealPart(*aa++) != 0.) continue;
665:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
666:         }
667:       }
668:     }
669:   }

671:   color = PETSC_DRAW_RED;
672:   for (i=0,row=0; i<mbs; i++,row+=bs) {
673:     for (j=a->i[i]; j<a->i[i+1]; j++) {
674:       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
675:       x_l = a->j[j]*bs; x_r = x_l + 1.0;
676:       aa = a->a + j*bs2;
677:       for (k=0; k<bs; k++) {
678:         for (l=0; l<bs; l++) {
679:           if (PetscRealPart(*aa++) <= 0.) continue;
680:           PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
681:         }
682:       }
683:     }
684:   }
685:   return(0);
686: }

688: #undef __FUNCT__  
690: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
691: {
692:   int          ierr;
693:   PetscReal    xl,yl,xr,yr,w,h;
694:   PetscDraw    draw;
695:   PetscTruth   isnull;


699:   PetscViewerDrawGetDraw(viewer,0,&draw);
700:   PetscDrawIsNull(draw,&isnull); if (isnull) return(0);

702:   PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
703:   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
704:   xr += w;    yr += h;  xl = -w;     yl = -h;
705:   PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
706:   PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
707:   PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
708:   return(0);
709: }

711: #undef __FUNCT__  
713: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
714: {
715:   int        ierr;
716:   PetscTruth issocket,isascii,isbinary,isdraw;

719:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
720:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
721:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
722:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
723:   if (issocket) {
724:     SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
725:   } else if (isascii){
726:     MatView_SeqBAIJ_ASCII(A,viewer);
727:   } else if (isbinary) {
728:     MatView_SeqBAIJ_Binary(A,viewer);
729:   } else if (isdraw) {
730:     MatView_SeqBAIJ_Draw(A,viewer);
731:   } else {
732:     SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
733:   }
734:   return(0);
735: }


738: #undef __FUNCT__  
740: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
741: {
742:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
743:   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
744:   int        *ai = a->i,*ailen = a->ilen;
745:   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
746:   MatScalar  *ap,*aa = a->a,zero = 0.0;

749:   for (k=0; k<m; k++) { /* loop over rows */
750:     row  = im[k]; brow = row/bs;
751:     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
752:     if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
753:     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
754:     nrow = ailen[brow];
755:     for (l=0; l<n; l++) { /* loop over columns */
756:       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
757:       if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
758:       col  = in[l] ;
759:       bcol = col/bs;
760:       cidx = col%bs;
761:       ridx = row%bs;
762:       high = nrow;
763:       low  = 0; /* assume unsorted */
764:       while (high-low > 5) {
765:         t = (low+high)/2;
766:         if (rp[t] > bcol) high = t;
767:         else             low  = t;
768:       }
769:       for (i=low; i<high; i++) {
770:         if (rp[i] > bcol) break;
771:         if (rp[i] == bcol) {
772:           *v++ = ap[bs2*i+bs*cidx+ridx];
773:           goto finished;
774:         }
775:       }
776:       *v++ = zero;
777:       finished:;
778:     }
779:   }
780:   return(0);
781: }

783: #if defined(PETSC_USE_MAT_SINGLE)
784: #undef __FUNCT__  
786: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
787: {
788:   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
789:   int         ierr,i,N = m*n*b->bs2;
790:   MatScalar   *vsingle;

793:   if (N > b->setvalueslen) {
794:     if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
795:     PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
796:     b->setvalueslen  = N;
797:   }
798:   vsingle = b->setvaluescopy;
799:   for (i=0; i<N; i++) {
800:     vsingle[i] = v[i];
801:   }
802:   MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
803:   return(0);
804: }
805: #endif


808: #undef __FUNCT__  
810: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
811: {
812:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
813:   int         *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
814:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
815:   int         *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
816:   PetscTruth  roworiented=a->roworiented;
817:   MatScalar   *value = v,*ap,*aa = a->a,*bap;

820:   if (roworiented) {
821:     stepval = (n-1)*bs;
822:   } else {
823:     stepval = (m-1)*bs;
824:   }
825:   for (k=0; k<m; k++) { /* loop over added rows */
826:     row  = im[k];
827:     if (row < 0) continue;
828: #if defined(PETSC_USE_BOPT_g)  
829:     if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
830: #endif
831:     rp   = aj + ai[row];
832:     ap   = aa + bs2*ai[row];
833:     rmax = imax[row];
834:     nrow = ailen[row];
835:     low  = 0;
836:     for (l=0; l<n; l++) { /* loop over added columns */
837:       if (in[l] < 0) continue;
838: #if defined(PETSC_USE_BOPT_g)  
839:       if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
840: #endif
841:       col = in[l];
842:       if (roworiented) {
843:         value = v + k*(stepval+bs)*bs + l*bs;
844:       } else {
845:         value = v + l*(stepval+bs)*bs + k*bs;
846:       }
847:       if (!sorted) low = 0; high = nrow;
848:       while (high-low > 7) {
849:         t = (low+high)/2;
850:         if (rp[t] > col) high = t;
851:         else             low  = t;
852:       }
853:       for (i=low; i<high; i++) {
854:         if (rp[i] > col) break;
855:         if (rp[i] == col) {
856:           bap  = ap +  bs2*i;
857:           if (roworiented) {
858:             if (is == ADD_VALUES) {
859:               for (ii=0; ii<bs; ii++,value+=stepval) {
860:                 for (jj=ii; jj<bs2; jj+=bs) {
861:                   bap[jj] += *value++;
862:                 }
863:               }
864:             } else {
865:               for (ii=0; ii<bs; ii++,value+=stepval) {
866:                 for (jj=ii; jj<bs2; jj+=bs) {
867:                   bap[jj] = *value++;
868:                 }
869:               }
870:             }
871:           } else {
872:             if (is == ADD_VALUES) {
873:               for (ii=0; ii<bs; ii++,value+=stepval) {
874:                 for (jj=0; jj<bs; jj++) {
875:                   *bap++ += *value++;
876:                 }
877:               }
878:             } else {
879:               for (ii=0; ii<bs; ii++,value+=stepval) {
880:                 for (jj=0; jj<bs; jj++) {
881:                   *bap++  = *value++;
882:                 }
883:               }
884:             }
885:           }
886:           goto noinsert2;
887:         }
888:       }
889:       if (nonew == 1) goto noinsert2;
890:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
891:       if (nrow >= rmax) {
892:         /* there is no extra room in row, therefore enlarge */
893:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
894:         MatScalar *new_a;

896:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

898:         /* malloc new storage space */
899:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
900:         ierr    = PetscMalloc(len,&new_a);
901:         new_j   = (int*)(new_a + bs2*new_nz);
902:         new_i   = new_j + new_nz;

904:         /* copy over old data into new slots */
905:         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
906:         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
907:         PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
908:         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
909:         PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
910:         PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
911:         PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
912:         PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
913:         /* free up old matrix storage */
914:         PetscFree(a->a);
915:         if (!a->singlemalloc) {
916:           PetscFree(a->i);
917:           PetscFree(a->j);
918:         }
919:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
920:         a->singlemalloc = PETSC_TRUE;

922:         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
923:         rmax = imax[row] = imax[row] + CHUNKSIZE;
924:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
925:         a->maxnz += bs2*CHUNKSIZE;
926:         a->reallocs++;
927:         a->nz++;
928:       }
929:       N = nrow++ - 1;
930:       /* shift up all the later entries in this row */
931:       for (ii=N; ii>=i; ii--) {
932:         rp[ii+1] = rp[ii];
933:         PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
934:       }
935:       if (N >= i) {
936:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
937:       }
938:       rp[i] = col;
939:       bap   = ap +  bs2*i;
940:       if (roworiented) {
941:         for (ii=0; ii<bs; ii++,value+=stepval) {
942:           for (jj=ii; jj<bs2; jj+=bs) {
943:             bap[jj] = *value++;
944:           }
945:         }
946:       } else {
947:         for (ii=0; ii<bs; ii++,value+=stepval) {
948:           for (jj=0; jj<bs; jj++) {
949:             *bap++  = *value++;
950:           }
951:         }
952:       }
953:       noinsert2:;
954:       low = i;
955:     }
956:     ailen[row] = nrow;
957:   }
958:   return(0);
959: }

961: #undef __FUNCT__  
963: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
964: {
965:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
966:   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
967:   int        m = A->m,*ip,N,*ailen = a->ilen;
968:   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
969:   MatScalar  *aa = a->a,*ap;
970: #if defined(PETSC_HAVE_DSCPACK)
971:   PetscTruth   flag;
972: #endif

975:   if (mode == MAT_FLUSH_ASSEMBLY) return(0);

977:   if (m) rmax = ailen[0];
978:   for (i=1; i<mbs; i++) {
979:     /* move each row back by the amount of empty slots (fshift) before it*/
980:     fshift += imax[i-1] - ailen[i-1];
981:     rmax   = PetscMax(rmax,ailen[i]);
982:     if (fshift) {
983:       ip = aj + ai[i]; ap = aa + bs2*ai[i];
984:       N = ailen[i];
985:       for (j=0; j<N; j++) {
986:         ip[j-fshift] = ip[j];
987:         PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
988:       }
989:     }
990:     ai[i] = ai[i-1] + ailen[i-1];
991:   }
992:   if (mbs) {
993:     fshift += imax[mbs-1] - ailen[mbs-1];
994:     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
995:   }
996:   /* reset ilen and imax for each row */
997:   for (i=0; i<mbs; i++) {
998:     ailen[i] = imax[i] = ai[i+1] - ai[i];
999:   }
1000:   a->nz = ai[mbs];

1002:   /* diagonals may have moved, so kill the diagonal pointers */
1003:   if (fshift && a->diag) {
1004:     PetscFree(a->diag);
1005:     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1006:     a->diag = 0;
1007:   }
1008:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1009:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
1010:   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
1011:   a->reallocs          = 0;
1012:   A->info.nz_unneeded  = (PetscReal)fshift*bs2;

1014: #if defined(PETSC_HAVE_DSCPACK)
1015:   PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);
1016:   if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
1017: #endif

1019:   return(0);
1020: }



1024: /* 
1025:    This function returns an array of flags which indicate the locations of contiguous
1026:    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1027:    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1028:    Assume: sizes should be long enough to hold all the values.
1029: */
1030: #undef __FUNCT__  
1032: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1033: {
1034:   int        i,j,k,row;
1035:   PetscTruth flg;

1038:   for (i=0,j=0; i<n; j++) {
1039:     row = idx[i];
1040:     if (row%bs!=0) { /* Not the begining of a block */
1041:       sizes[j] = 1;
1042:       i++;
1043:     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1044:       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1045:       i++;
1046:     } else { /* Begining of the block, so check if the complete block exists */
1047:       flg = PETSC_TRUE;
1048:       for (k=1; k<bs; k++) {
1049:         if (row+k != idx[i+k]) { /* break in the block */
1050:           flg = PETSC_FALSE;
1051:           break;
1052:         }
1053:       }
1054:       if (flg == PETSC_TRUE) { /* No break in the bs */
1055:         sizes[j] = bs;
1056:         i+= bs;
1057:       } else {
1058:         sizes[j] = 1;
1059:         i++;
1060:       }
1061:     }
1062:   }
1063:   *bs_max = j;
1064:   return(0);
1065: }
1066: 
1067: #undef __FUNCT__  
1069: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1070: {
1071:   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1072:   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1073:   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1074:   PetscScalar zero = 0.0;
1075:   MatScalar   *aa;

1078:   /* Make a copy of the IS and  sort it */
1079:   ISGetLocalSize(is,&is_n);
1080:   ISGetIndices(is,&is_idx);

1082:   /* allocate memory for rows,sizes */
1083:   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1084:   sizes = rows + is_n;

1086:   /* copy IS values to rows, and sort them */
1087:   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1088:   PetscSortInt(is_n,rows);
1089:   if (baij->keepzeroedrows) {
1090:     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1091:     bs_max = is_n;
1092:   } else {
1093:     MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1094:   }
1095:   ISRestoreIndices(is,&is_idx);

1097:   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1098:     row   = rows[j];
1099:     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1100:     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1101:     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1102:     if (sizes[i] == bs && !baij->keepzeroedrows) {
1103:       if (diag) {
1104:         if (baij->ilen[row/bs] > 0) {
1105:           baij->ilen[row/bs]       = 1;
1106:           baij->j[baij->i[row/bs]] = row/bs;
1107:           PetscMemzero(aa,count*bs*sizeof(MatScalar));
1108:         }
1109:         /* Now insert all the diagonal values for this bs */
1110:         for (k=0; k<bs; k++) {
1111:           (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1112:         }
1113:       } else { /* (!diag) */
1114:         baij->ilen[row/bs] = 0;
1115:       } /* end (!diag) */
1116:     } else { /* (sizes[i] != bs) */
1117: #if defined (PETSC_USE_DEBUG)
1118:       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1119: #endif
1120:       for (k=0; k<count; k++) {
1121:         aa[0] =  zero;
1122:         aa    += bs;
1123:       }
1124:       if (diag) {
1125:         (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1126:       }
1127:     }
1128:   }

1130:   PetscFree(rows);
1131:   MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1132:   return(0);
1133: }

1135: #undef __FUNCT__  
1137: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1138: {
1139:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1140:   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1141:   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1142:   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1143:   int         ridx,cidx,bs2=a->bs2,ierr;
1144:   PetscTruth  roworiented=a->roworiented;
1145:   MatScalar   *ap,value,*aa=a->a,*bap;

1148:   for (k=0; k<m; k++) { /* loop over added rows */
1149:     row  = im[k]; brow = row/bs;
1150:     if (row < 0) continue;
1151: #if defined(PETSC_USE_BOPT_g)  
1152:     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1153: #endif
1154:     rp   = aj + ai[brow];
1155:     ap   = aa + bs2*ai[brow];
1156:     rmax = imax[brow];
1157:     nrow = ailen[brow];
1158:     low  = 0;
1159:     for (l=0; l<n; l++) { /* loop over added columns */
1160:       if (in[l] < 0) continue;
1161: #if defined(PETSC_USE_BOPT_g)  
1162:       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1163: #endif
1164:       col = in[l]; bcol = col/bs;
1165:       ridx = row % bs; cidx = col % bs;
1166:       if (roworiented) {
1167:         value = v[l + k*n];
1168:       } else {
1169:         value = v[k + l*m];
1170:       }
1171:       if (!sorted) low = 0; high = nrow;
1172:       while (high-low > 7) {
1173:         t = (low+high)/2;
1174:         if (rp[t] > bcol) high = t;
1175:         else              low  = t;
1176:       }
1177:       for (i=low; i<high; i++) {
1178:         if (rp[i] > bcol) break;
1179:         if (rp[i] == bcol) {
1180:           bap  = ap +  bs2*i + bs*cidx + ridx;
1181:           if (is == ADD_VALUES) *bap += value;
1182:           else                  *bap  = value;
1183:           goto noinsert1;
1184:         }
1185:       }
1186:       if (nonew == 1) goto noinsert1;
1187:       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1188:       if (nrow >= rmax) {
1189:         /* there is no extra room in row, therefore enlarge */
1190:         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1191:         MatScalar *new_a;

1193:         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");

1195:         /* Malloc new storage space */
1196:         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1197:         ierr    = PetscMalloc(len,&new_a);
1198:         new_j   = (int*)(new_a + bs2*new_nz);
1199:         new_i   = new_j + new_nz;

1201:         /* copy over old data into new slots */
1202:         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1203:         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1204:         PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1205:         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1206:         PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1207:         PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1208:         PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1209:         PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1210:         /* free up old matrix storage */
1211:         PetscFree(a->a);
1212:         if (!a->singlemalloc) {
1213:           PetscFree(a->i);
1214:           PetscFree(a->j);
1215:         }
1216:         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1217:         a->singlemalloc = PETSC_TRUE;

1219:         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1220:         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1221:         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1222:         a->maxnz += bs2*CHUNKSIZE;
1223:         a->reallocs++;
1224:         a->nz++;
1225:       }
1226:       N = nrow++ - 1;
1227:       /* shift up all the later entries in this row */
1228:       for (ii=N; ii>=i; ii--) {
1229:         rp[ii+1] = rp[ii];
1230:         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1231:       }
1232:       if (N>=i) {
1233:         PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1234:       }
1235:       rp[i]                      = bcol;
1236:       ap[bs2*i + bs*cidx + ridx] = value;
1237:       noinsert1:;
1238:       low = i;
1239:     }
1240:     ailen[brow] = nrow;
1241:   }
1242:   return(0);
1243: }


1246: #undef __FUNCT__  
1248: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1249: {
1250:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1251:   Mat         outA;
1252:   int         ierr;
1253:   PetscTruth  row_identity,col_identity;

1256:   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1257:   ISIdentity(row,&row_identity);
1258:   ISIdentity(col,&col_identity);
1259:   if (!row_identity || !col_identity) {
1260:     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1261:   }

1263:   outA          = inA;
1264:   inA->factor   = FACTOR_LU;

1266:   if (!a->diag) {
1267:     MatMarkDiagonal_SeqBAIJ(inA);
1268:   }

1270:   a->row        = row;
1271:   a->col        = col;
1272:   ierr          = PetscObjectReference((PetscObject)row);
1273:   ierr          = PetscObjectReference((PetscObject)col);
1274: 
1275:   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1276:   ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1277:   PetscLogObjectParent(inA,a->icol);
1278: 
1279:   /*
1280:       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 
1281:       for ILU(0) factorization with natural ordering
1282:   */
1283:   if (a->bs < 8) {
1284:     MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1285:   } else {
1286:     if (!a->solve_work) {
1287:       PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1288:       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1289:     }
1290:   }

1292:   MatLUFactorNumeric(inA,&outA);

1294:   return(0);
1295: }
1296: #undef __FUNCT__  
1298: int MatPrintHelp_SeqBAIJ(Mat A)
1299: {
1300:   static PetscTruth called = PETSC_FALSE;
1301:   MPI_Comm          comm = A->comm;
1302:   int               ierr;

1305:   if (called) {return(0);} else called = PETSC_TRUE;
1306:   (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1307:   (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>n");
1308:   return(0);
1309: }

1311: EXTERN_C_BEGIN
1312: #undef __FUNCT__  
1314: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1315: {
1316:   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1317:   int         i,nz,nbs;

1320:   nz  = baij->maxnz/baij->bs2;
1321:   nbs = baij->nbs;
1322:   for (i=0; i<nz; i++) {
1323:     baij->j[i] = indices[i];
1324:   }
1325:   baij->nz = nz;
1326:   for (i=0; i<nbs; i++) {
1327:     baij->ilen[i] = baij->imax[i];
1328:   }

1330:   return(0);
1331: }
1332: EXTERN_C_END

1334: #undef __FUNCT__  
1336: /*@
1337:     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1338:        in the matrix.

1340:   Input Parameters:
1341: +  mat - the SeqBAIJ matrix
1342: -  indices - the column indices

1344:   Level: advanced

1346:   Notes:
1347:     This can be called if you have precomputed the nonzero structure of the 
1348:   matrix and want to provide it to the matrix object to improve the performance
1349:   of the MatSetValues() operation.

1351:     You MUST have set the correct numbers of nonzeros per row in the call to 
1352:   MatCreateSeqBAIJ().

1354:     MUST be called before any calls to MatSetValues();

1356: @*/
1357: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1358: {
1359:   int ierr,(*f)(Mat,int *);

1363:   PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1364:   if (f) {
1365:     (*f)(mat,indices);
1366:   } else {
1367:     SETERRQ(1,"Wrong type of matrix to set column indices");
1368:   }
1369:   return(0);
1370: }

1372: #undef __FUNCT__  
1374: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1375: {
1376:   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1377:   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1378:   PetscReal    atmp;
1379:   PetscScalar  *x,zero = 0.0;
1380:   MatScalar    *aa;
1381:   int          ncols,brow,krow,kcol;

1384:   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1385:   bs   = a->bs;
1386:   aa   = a->a;
1387:   ai   = a->i;
1388:   aj   = a->j;
1389:   mbs = a->mbs;

1391:   VecSet(&zero,v);
1392:   VecGetArray(v,&x);
1393:   VecGetLocalSize(v,&n);
1394:   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1395:   for (i=0; i<mbs; i++) {
1396:     ncols = ai[1] - ai[0]; ai++;
1397:     brow  = bs*i;
1398:     for (j=0; j<ncols; j++){
1399:       /* bcol = bs*(*aj); */
1400:       for (kcol=0; kcol<bs; kcol++){
1401:         for (krow=0; krow<bs; krow++){
1402:           atmp = PetscAbsScalar(*aa); aa++;
1403:           row = brow + krow;    /* row index */
1404:           /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1405:           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1406:         }
1407:       }
1408:       aj++;
1409:     }
1410:   }
1411:   VecRestoreArray(v,&x);
1412:   return(0);
1413: }

1415: #undef __FUNCT__  
1417: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1418: {
1419:   int        ierr;

1422:    MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1423:   return(0);
1424: }

1426: #undef __FUNCT__  
1428: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1429: {
1430:   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1432:   *array = a->a;
1433:   return(0);
1434: }

1436: #undef __FUNCT__  
1438: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1439: {
1441:   return(0);
1442: }

1444:  #include petscblaslapack.h
1445: #undef __FUNCT__  
1447: int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1448: {
1449:   int          ierr,one=1;
1450:   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;

1453:   if (str == SAME_NONZERO_PATTERN) {
1454:     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1455:   } else {
1456:     MatAXPY_Basic(a,X,Y,str);
1457:   }
1458:   return(0);
1459: }

1461: /* -------------------------------------------------------------------*/
1462: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1463:        MatGetRow_SeqBAIJ,
1464:        MatRestoreRow_SeqBAIJ,
1465:        MatMult_SeqBAIJ_N,
1466:        MatMultAdd_SeqBAIJ_N,
1467:        MatMultTranspose_SeqBAIJ,
1468:        MatMultTransposeAdd_SeqBAIJ,
1469:        MatSolve_SeqBAIJ_N,
1470:        0,
1471:        0,
1472:        0,
1473:        MatLUFactor_SeqBAIJ,
1474:        0,
1475:        0,
1476:        MatTranspose_SeqBAIJ,
1477:        MatGetInfo_SeqBAIJ,
1478:        MatEqual_SeqBAIJ,
1479:        MatGetDiagonal_SeqBAIJ,
1480:        MatDiagonalScale_SeqBAIJ,
1481:        MatNorm_SeqBAIJ,
1482:        0,
1483:        MatAssemblyEnd_SeqBAIJ,
1484:        0,
1485:        MatSetOption_SeqBAIJ,
1486:        MatZeroEntries_SeqBAIJ,
1487:        MatZeroRows_SeqBAIJ,
1488:        MatLUFactorSymbolic_SeqBAIJ,
1489:        MatLUFactorNumeric_SeqBAIJ_N,
1490:        0,
1491:        0,
1492:        MatSetUpPreallocation_SeqBAIJ,
1493:        MatILUFactorSymbolic_SeqBAIJ,
1494:        0,
1495:        MatGetArray_SeqBAIJ,
1496:        MatRestoreArray_SeqBAIJ,
1497:        MatDuplicate_SeqBAIJ,
1498:        0,
1499:        0,
1500:        MatILUFactor_SeqBAIJ,
1501:        0,
1502:        MatAXPY_SeqBAIJ,
1503:        MatGetSubMatrices_SeqBAIJ,
1504:        MatIncreaseOverlap_SeqBAIJ,
1505:        MatGetValues_SeqBAIJ,
1506:        0,
1507:        MatPrintHelp_SeqBAIJ,
1508:        MatScale_SeqBAIJ,
1509:        0,
1510:        0,
1511:        0,
1512:        MatGetBlockSize_SeqBAIJ,
1513:        MatGetRowIJ_SeqBAIJ,
1514:        MatRestoreRowIJ_SeqBAIJ,
1515:        0,
1516:        0,
1517:        0,
1518:        0,
1519:        0,
1520:        0,
1521:        MatSetValuesBlocked_SeqBAIJ,
1522:        MatGetSubMatrix_SeqBAIJ,
1523:        MatDestroy_SeqBAIJ,
1524:        MatView_SeqBAIJ,
1525:        MatGetPetscMaps_Petsc,
1526:        0,
1527:        0,
1528:        0,
1529:        0,
1530:        0,
1531:        0,
1532:        MatGetRowMax_SeqBAIJ,
1533:        MatConvert_Basic};

1535: EXTERN_C_BEGIN
1536: #undef __FUNCT__  
1538: int MatStoreValues_SeqBAIJ(Mat mat)
1539: {
1540:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1541:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
1542:   int         ierr;

1545:   if (aij->nonew != 1) {
1546:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1547:   }

1549:   /* allocate space for values if not already there */
1550:   if (!aij->saved_values) {
1551:     PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1552:   }

1554:   /* copy values over */
1555:   PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1556:   return(0);
1557: }
1558: EXTERN_C_END

1560: EXTERN_C_BEGIN
1561: #undef __FUNCT__  
1563: int MatRetrieveValues_SeqBAIJ(Mat mat)
1564: {
1565:   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1566:   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;

1569:   if (aij->nonew != 1) {
1570:     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1571:   }
1572:   if (!aij->saved_values) {
1573:     SETERRQ(1,"Must call MatStoreValues(A);first");
1574:   }

1576:   /* copy values over */
1577:   PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1578:   return(0);
1579: }
1580: EXTERN_C_END

1582: EXTERN_C_BEGIN
1583: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1584: EXTERN_C_END

1586: EXTERN_C_BEGIN
1587: #undef __FUNCT__  
1589: int MatCreate_SeqBAIJ(Mat B)
1590: {
1591:   int         ierr,size;
1592:   Mat_SeqBAIJ *b;

1595:   MPI_Comm_size(B->comm,&size);
1596:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");

1598:   B->m = B->M = PetscMax(B->m,B->M);
1599:   B->n = B->N = PetscMax(B->n,B->N);
1600:   ierr    = PetscNew(Mat_SeqBAIJ,&b);
1601:   B->data = (void*)b;
1602:   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1603:   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1604:   B->factor           = 0;
1605:   B->lupivotthreshold = 1.0;
1606:   B->mapping          = 0;
1607:   b->row              = 0;
1608:   b->col              = 0;
1609:   b->icol             = 0;
1610:   b->reallocs         = 0;
1611:   b->saved_values     = 0;
1612: #if defined(PETSC_USE_MAT_SINGLE)
1613:   b->setvalueslen     = 0;
1614:   b->setvaluescopy    = PETSC_NULL;
1615: #endif

1617:   PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1618:   PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);

1620:   b->sorted           = PETSC_FALSE;
1621:   b->roworiented      = PETSC_TRUE;
1622:   b->nonew            = 0;
1623:   b->diag             = 0;
1624:   b->solve_work       = 0;
1625:   b->mult_work        = 0;
1626:   B->spptr            = 0;
1627:   B->info.nz_unneeded = (PetscReal)b->maxnz;
1628:   b->keepzeroedrows   = PETSC_FALSE;

1630:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1631:                                      "MatStoreValues_SeqBAIJ",
1632:                                       MatStoreValues_SeqBAIJ);
1633:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1634:                                      "MatRetrieveValues_SeqBAIJ",
1635:                                       MatRetrieveValues_SeqBAIJ);
1636:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1637:                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1638:                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);
1639:   PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1640:                                      "MatConvert_SeqBAIJ_SeqAIJ",
1641:                                       MatConvert_SeqBAIJ_SeqAIJ);
1642:   return(0);
1643: }
1644: EXTERN_C_END

1646: #undef __FUNCT__  
1648: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1649: {
1650:   Mat         C;
1651:   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1652:   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;

1655:   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");

1657:   *B = 0;
1658:   MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1659:   MatSetType(C,MATSEQBAIJ);
1660:   c    = (Mat_SeqBAIJ*)C->data;

1662:   c->bs         = a->bs;
1663:   c->bs2        = a->bs2;
1664:   c->mbs        = a->mbs;
1665:   c->nbs        = a->nbs;
1666:   PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));

1668:   PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1669:   PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1670:   for (i=0; i<mbs; i++) {
1671:     c->imax[i] = a->imax[i];
1672:     c->ilen[i] = a->ilen[i];
1673:   }

1675:   /* allocate the matrix space */
1676:   c->singlemalloc = PETSC_TRUE;
1677:   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1678:   PetscMalloc(len,&c->a);
1679:   c->j = (int*)(c->a + nz*bs2);
1680:   c->i = c->j + nz;
1681:   PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1682:   if (mbs > 0) {
1683:     PetscMemcpy(c->j,a->j,nz*sizeof(int));
1684:     if (cpvalues == MAT_COPY_VALUES) {
1685:       PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1686:     } else {
1687:       PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1688:     }
1689:   }

1691:   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1692:   c->sorted      = a->sorted;
1693:   c->roworiented = a->roworiented;
1694:   c->nonew       = a->nonew;

1696:   if (a->diag) {
1697:     PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1698:     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1699:     for (i=0; i<mbs; i++) {
1700:       c->diag[i] = a->diag[i];
1701:     }
1702:   } else c->diag        = 0;
1703:   c->nz                 = a->nz;
1704:   c->maxnz              = a->maxnz;
1705:   c->solve_work         = 0;
1706:   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
1707:   c->mult_work          = 0;
1708:   C->preallocated       = PETSC_TRUE;
1709:   C->assembled          = PETSC_TRUE;
1710:   *B = C;
1711:   PetscFListDuplicate(A->qlist,&C->qlist);
1712:   return(0);
1713: }

1715: EXTERN_C_BEGIN
1716: #undef __FUNCT__  
1718: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1719: {
1720:   Mat_SeqBAIJ  *a;
1721:   Mat          B;
1722:   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1723:   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1724:   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
1725:   int          *masked,nmask,tmp,bs2,ishift;
1726:   PetscScalar  *aa;
1727:   MPI_Comm     comm = ((PetscObject)viewer)->comm;

1730:   PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1731:   bs2  = bs*bs;

1733:   MPI_Comm_size(comm,&size);
1734:   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1735:   PetscViewerBinaryGetDescriptor(viewer,&fd);
1736:   PetscBinaryRead(fd,header,4,PETSC_INT);
1737:   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1738:   M = header[1]; N = header[2]; nz = header[3];

1740:   if (header[3] < 0) {
1741:     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1742:   }

1744:   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");

1746:   /* 
1747:      This code adds extra rows to make sure the number of rows is 
1748:     divisible by the blocksize
1749:   */
1750:   mbs        = M/bs;
1751:   extra_rows = bs - M + bs*(mbs);
1752:   if (extra_rows == bs) extra_rows = 0;
1753:   else                  mbs++;
1754:   if (extra_rows) {
1755:     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1756:   }

1758:   /* read in row lengths */
1759:   PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1760:   PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1761:   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;

1763:   /* read in column indices */
1764:   PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1765:   PetscBinaryRead(fd,jj,nz,PETSC_INT);
1766:   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;

1768:   /* loop over row lengths determining block row lengths */
1769:   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);
1770:   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));
1771:   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);
1772:   ierr     = PetscMemzero(mask,mbs*sizeof(int));
1773:   masked   = mask + mbs;
1774:   rowcount = 0; nzcount = 0;
1775:   for (i=0; i<mbs; i++) {
1776:     nmask = 0;
1777:     for (j=0; j<bs; j++) {
1778:       kmax = rowlengths[rowcount];
1779:       for (k=0; k<kmax; k++) {
1780:         tmp = jj[nzcount++]/bs;
1781:         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1782:       }
1783:       rowcount++;
1784:     }
1785:     browlengths[i] += nmask;
1786:     /* zero out the mask elements we set */
1787:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1788:   }

1790:   /* create our matrix */
1791:   MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1792:   B = *A;
1793:   a = (Mat_SeqBAIJ*)B->data;

1795:   /* set matrix "i" values */
1796:   a->i[0] = 0;
1797:   for (i=1; i<= mbs; i++) {
1798:     a->i[i]      = a->i[i-1] + browlengths[i-1];
1799:     a->ilen[i-1] = browlengths[i-1];
1800:   }
1801:   a->nz         = 0;
1802:   for (i=0; i<mbs; i++) a->nz += browlengths[i];

1804:   /* read in nonzero values */
1805:   PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1806:   PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1807:   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;

1809:   /* set "a" and "j" values into matrix */
1810:   nzcount = 0; jcount = 0;
1811:   for (i=0; i<mbs; i++) {
1812:     nzcountb = nzcount;
1813:     nmask    = 0;
1814:     for (j=0; j<bs; j++) {
1815:       kmax = rowlengths[i*bs+j];
1816:       for (k=0; k<kmax; k++) {
1817:         tmp = jj[nzcount++]/bs;
1818:         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1819:       }
1820:     }
1821:     /* sort the masked values */
1822:     PetscSortInt(nmask,masked);

1824:     /* set "j" values into matrix */
1825:     maskcount = 1;
1826:     for (j=0; j<nmask; j++) {
1827:       a->j[jcount++]  = masked[j];
1828:       mask[masked[j]] = maskcount++;
1829:     }
1830:     /* set "a" values into matrix */
1831:     ishift = bs2*a->i[i];
1832:     for (j=0; j<bs; j++) {
1833:       kmax = rowlengths[i*bs+j];
1834:       for (k=0; k<kmax; k++) {
1835:         tmp       = jj[nzcountb]/bs ;
1836:         block     = mask[tmp] - 1;
1837:         point     = jj[nzcountb] - bs*tmp;
1838:         idx       = ishift + bs2*block + j + bs*point;
1839:         a->a[idx] = (MatScalar)aa[nzcountb++];
1840:       }
1841:     }
1842:     /* zero out the mask elements we set */
1843:     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1844:   }
1845:   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");

1847:   PetscFree(rowlengths);
1848:   PetscFree(browlengths);
1849:   PetscFree(aa);
1850:   PetscFree(jj);
1851:   PetscFree(mask);

1853:   B->assembled = PETSC_TRUE;
1854: 
1855:   MatView_Private(B);
1856:   return(0);
1857: }
1858: EXTERN_C_END

1860: #undef __FUNCT__  
1862: /*@C
1863:    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1864:    compressed row) format.  For good matrix assembly performance the
1865:    user should preallocate the matrix storage by setting the parameter nz
1866:    (or the array nnz).  By setting these parameters accurately, performance
1867:    during matrix assembly can be increased by more than a factor of 50.

1869:    Collective on MPI_Comm

1871:    Input Parameters:
1872: +  comm - MPI communicator, set to PETSC_COMM_SELF
1873: .  bs - size of block
1874: .  m - number of rows
1875: .  n - number of columns
1876: .  nz - number of nonzero blocks  per block row (same for all rows)
1877: -  nnz - array containing the number of nonzero blocks in the various block rows 
1878:          (possibly different for each block row) or PETSC_NULL

1880:    Output Parameter:
1881: .  A - the matrix 

1883:    Options Database Keys:
1884: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1885:                      block calculations (much slower)
1886: .    -mat_block_size - size of the blocks to use

1888:    Level: intermediate

1890:    Notes:
1891:    A nonzero block is any block that as 1 or more nonzeros in it

1893:    The block AIJ format is fully compatible with standard Fortran 77
1894:    storage.  That is, the stored row and column indices can begin at
1895:    either one (as in Fortran) or zero.  See the users' manual for details.

1897:    Specify the preallocated storage with either nz or nnz (not both).
1898:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1899:    allocation.  For additional details, see the users manual chapter on
1900:    matrices.

1902: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1903: @*/
1904: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1905: {
1907: 
1909:   MatCreate(comm,m,n,m,n,A);
1910:   MatSetType(*A,MATSEQBAIJ);
1911:   MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1912:   return(0);
1913: }

1915: #undef __FUNCT__  
1917: /*@C
1918:    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1919:    per row in the matrix. For good matrix assembly performance the
1920:    user should preallocate the matrix storage by setting the parameter nz
1921:    (or the array nnz).  By setting these parameters accurately, performance
1922:    during matrix assembly can be increased by more than a factor of 50.

1924:    Collective on MPI_Comm

1926:    Input Parameters:
1927: +  A - the matrix
1928: .  bs - size of block
1929: .  nz - number of block nonzeros per block row (same for all rows)
1930: -  nnz - array containing the number of block nonzeros in the various block rows 
1931:          (possibly different for each block row) or PETSC_NULL

1933:    Options Database Keys:
1934: .   -mat_no_unroll - uses code that does not unroll the loops in the 
1935:                      block calculations (much slower)
1936: .    -mat_block_size - size of the blocks to use

1938:    Level: intermediate

1940:    Notes:
1941:    The block AIJ format is fully compatible with standard Fortran 77
1942:    storage.  That is, the stored row and column indices can begin at
1943:    either one (as in Fortran) or zero.  See the users' manual for details.

1945:    Specify the preallocated storage with either nz or nnz (not both).
1946:    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 
1947:    allocation.  For additional details, see the users manual chapter on
1948:    matrices.

1950: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1951: @*/
1952: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1953: {
1954:   Mat_SeqBAIJ *b;
1955:   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
1956:   PetscTruth  flg;

1959:   PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1960:   if (!flg) return(0);

1962:   B->preallocated = PETSC_TRUE;
1963:   PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1964:   if (nnz && newbs != bs) {
1965:     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1966:   }
1967:   bs   = newbs;

1969:   mbs  = B->m/bs;
1970:   nbs  = B->n/bs;
1971:   bs2  = bs*bs;

1973:   if (mbs*bs!=B->m || nbs*bs!=B->n) {
1974:     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1975:   }

1977:   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1978:   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1979:   if (nnz) {
1980:     for (i=0; i<mbs; i++) {
1981:       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1982:       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1983:     }
1984:   }

1986:   b       = (Mat_SeqBAIJ*)B->data;
1987:   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1988:   B->ops->solve               = MatSolve_SeqBAIJ_Update;
1989:   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
1990:   if (!flg) {
1991:     switch (bs) {
1992:     case 1:
1993:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1994:       B->ops->mult            = MatMult_SeqBAIJ_1;
1995:       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
1996:       break;
1997:     case 2:
1998:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1999:       B->ops->mult            = MatMult_SeqBAIJ_2;
2000:       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2001:       break;
2002:     case 3:
2003:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2004:       B->ops->mult            = MatMult_SeqBAIJ_3;
2005:       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2006:       break;
2007:     case 4:
2008:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2009:       B->ops->mult            = MatMult_SeqBAIJ_4;
2010:       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2011:       break;
2012:     case 5:
2013:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2014:       B->ops->mult            = MatMult_SeqBAIJ_5;
2015:       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2016:       break;
2017:     case 6:
2018:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2019:       B->ops->mult            = MatMult_SeqBAIJ_6;
2020:       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2021:       break;
2022:     case 7:
2023:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2024:       B->ops->mult            = MatMult_SeqBAIJ_7;
2025:       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2026:       break;
2027:     default:
2028:       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2029:       B->ops->mult            = MatMult_SeqBAIJ_N;
2030:       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2031:       break;
2032:     }
2033:   }
2034:   b->bs      = bs;
2035:   b->mbs     = mbs;
2036:   b->nbs     = nbs;
2037:   PetscMalloc((mbs+1)*sizeof(int),&b->imax);
2038:   if (!nnz) {
2039:     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2040:     else if (nz <= 0)        nz = 1;
2041:     for (i=0; i<mbs; i++) b->imax[i] = nz;
2042:     nz = nz*mbs;
2043:   } else {
2044:     nz = 0;
2045:     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2046:   }

2048:   /* allocate the matrix space */
2049:   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2050:   ierr  = PetscMalloc(len,&b->a);
2051:   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2052:   b->j  = (int*)(b->a + nz*bs2);
2053:   PetscMemzero(b->j,nz*sizeof(int));
2054:   b->i  = b->j + nz;
2055:   b->singlemalloc = PETSC_TRUE;

2057:   b->i[0] = 0;
2058:   for (i=1; i<mbs+1; i++) {
2059:     b->i[i] = b->i[i-1] + b->imax[i-1];
2060:   }

2062:   /* b->ilen will count nonzeros in each block row so far. */
2063:   PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
2064:   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2065:   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}

2067:   b->bs               = bs;
2068:   b->bs2              = bs2;
2069:   b->mbs              = mbs;
2070:   b->nz               = 0;
2071:   b->maxnz            = nz*bs2;
2072:   B->info.nz_unneeded = (PetscReal)b->maxnz;
2073:   return(0);
2074: }