Actual source code: baij.c
1: /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
3: /*
4: Defines the basic matrix operations for the BAIJ (compressed row)
5: matrix storage format.
6: */
7: #include src/mat/impls/baij/seq/baij.h
8: #include src/vec/vecimpl.h
9: #include src/inline/spops.h
10: #include petscsys.h
12: /*
13: Special version for Fun3d sequential benchmark
14: */
15: #if defined(PETSC_HAVE_FORTRAN_CAPS)
16: #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
17: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
18: #define matsetvaluesblocked4_ matsetvaluesblocked4
19: #endif
21: EXTERN_C_BEGIN
22: #undef __FUNCT__
24: void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
25: {
26: Mat A = *AA;
27: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
28: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
29: int *ai=a->i,*ailen=a->ilen;
30: int *aj=a->j,stepval;
31: PetscScalar *value = v;
32: MatScalar *ap,*aa = a->a,*bap;
35: stepval = (n-1)*4;
36: for (k=0; k<m; k++) { /* loop over added rows */
37: row = im[k];
38: rp = aj + ai[row];
39: ap = aa + 16*ai[row];
40: nrow = ailen[row];
41: low = 0;
42: for (l=0; l<n; l++) { /* loop over added columns */
43: col = in[l];
44: value = v + k*(stepval+4)*4 + l*4;
45: low = 0; high = nrow;
46: while (high-low > 7) {
47: t = (low+high)/2;
48: if (rp[t] > col) high = t;
49: else low = t;
50: }
51: for (i=low; i<high; i++) {
52: if (rp[i] > col) break;
53: if (rp[i] == col) {
54: bap = ap + 16*i;
55: for (ii=0; ii<4; ii++,value+=stepval) {
56: for (jj=ii; jj<16; jj+=4) {
57: bap[jj] += *value++;
58: }
59: }
60: goto noinsert2;
61: }
62: }
63: N = nrow++ - 1;
64: /* shift up all the later entries in this row */
65: for (ii=N; ii>=i; ii--) {
66: rp[ii+1] = rp[ii];
67: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
68: }
69: if (N >= i) {
70: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
71: }
72: rp[i] = col;
73: bap = ap + 16*i;
74: for (ii=0; ii<4; ii++,value+=stepval) {
75: for (jj=ii; jj<16; jj+=4) {
76: bap[jj] = *value++;
77: }
78: }
79: noinsert2:;
80: low = i;
81: }
82: ailen[row] = nrow;
83: }
84: }
85: EXTERN_C_END
87: #if defined(PETSC_HAVE_FORTRAN_CAPS)
88: #define matsetvalues4_ MATSETVALUES4
89: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
90: #define matsetvalues4_ matsetvalues4
91: #endif
93: EXTERN_C_BEGIN
94: #undef __FUNCT__
96: void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
97: {
98: Mat A = *AA;
99: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
100: int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
101: int *ai=a->i,*ailen=a->ilen;
102: int *aj=a->j,brow,bcol;
103: int ridx,cidx;
104: MatScalar *ap,value,*aa=a->a,*bap;
105:
107: for (k=0; k<m; k++) { /* loop over added rows */
108: row = im[k]; brow = row/4;
109: rp = aj + ai[brow];
110: ap = aa + 16*ai[brow];
111: nrow = ailen[brow];
112: low = 0;
113: for (l=0; l<n; l++) { /* loop over added columns */
114: col = in[l]; bcol = col/4;
115: ridx = row % 4; cidx = col % 4;
116: value = v[l + k*n];
117: low = 0; high = nrow;
118: while (high-low > 7) {
119: t = (low+high)/2;
120: if (rp[t] > bcol) high = t;
121: else low = t;
122: }
123: for (i=low; i<high; i++) {
124: if (rp[i] > bcol) break;
125: if (rp[i] == bcol) {
126: bap = ap + 16*i + 4*cidx + ridx;
127: *bap += value;
128: goto noinsert1;
129: }
130: }
131: N = nrow++ - 1;
132: /* shift up all the later entries in this row */
133: for (ii=N; ii>=i; ii--) {
134: rp[ii+1] = rp[ii];
135: PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
136: }
137: if (N>=i) {
138: PetscMemzero(ap+16*i,16*sizeof(MatScalar));
139: }
140: rp[i] = bcol;
141: ap[16*i + 4*cidx + ridx] = value;
142: noinsert1:;
143: low = i;
144: }
145: ailen[brow] = nrow;
146: }
147: }
148: EXTERN_C_END
150: /* UGLY, ugly, ugly
151: When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
152: not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
153: inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
154: converts the entries into single precision and then calls ..._MatScalar() to put them
155: into the single precision data structures.
156: */
157: #if defined(PETSC_USE_MAT_SINGLE)
158: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
159: #else
160: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
161: #endif
162: #if defined(PETSC_HAVE_DSCPACK)
163: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
164: #endif
166: #define CHUNKSIZE 10
168: /*
169: Checks for missing diagonals
170: */
171: #undef __FUNCT__
173: int MatMissingDiagonal_SeqBAIJ(Mat A)
174: {
175: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
176: int *diag,*jj = a->j,i,ierr;
179: MatMarkDiagonal_SeqBAIJ(A);
180: diag = a->diag;
181: for (i=0; i<a->mbs; i++) {
182: if (jj[diag[i]] != i) {
183: SETERRQ1(1,"Matrix is missing diagonal number %d",i);
184: }
185: }
186: return(0);
187: }
189: #undef __FUNCT__
191: int MatMarkDiagonal_SeqBAIJ(Mat A)
192: {
193: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
194: int i,j,*diag,m = a->mbs,ierr;
197: if (a->diag) return(0);
199: PetscMalloc((m+1)*sizeof(int),&diag);
200: PetscLogObjectMemory(A,(m+1)*sizeof(int));
201: for (i=0; i<m; i++) {
202: diag[i] = a->i[i+1];
203: for (j=a->i[i]; j<a->i[i+1]; j++) {
204: if (a->j[j] == i) {
205: diag[i] = j;
206: break;
207: }
208: }
209: }
210: a->diag = diag;
211: return(0);
212: }
215: EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
217: #undef __FUNCT__
219: static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
220: {
221: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
222: int ierr,n = a->mbs,i;
225: *nn = n;
226: if (!ia) return(0);
227: if (symmetric) {
228: MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);
229: } else if (oshift == 1) {
230: /* temporarily add 1 to i and j indices */
231: int nz = a->i[n];
232: for (i=0; i<nz; i++) a->j[i]++;
233: for (i=0; i<n+1; i++) a->i[i]++;
234: *ia = a->i; *ja = a->j;
235: } else {
236: *ia = a->i; *ja = a->j;
237: }
239: return(0);
240: }
242: #undef __FUNCT__
244: static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done)
245: {
246: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
247: int i,n = a->mbs,ierr;
250: if (!ia) return(0);
251: if (symmetric) {
252: PetscFree(*ia);
253: PetscFree(*ja);
254: } else if (oshift == 1) {
255: int nz = a->i[n]-1;
256: for (i=0; i<nz; i++) a->j[i]--;
257: for (i=0; i<n+1; i++) a->i[i]--;
258: }
259: return(0);
260: }
262: #undef __FUNCT__
264: int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
265: {
266: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
269: *bs = baij->bs;
270: return(0);
271: }
274: #undef __FUNCT__
276: int MatDestroy_SeqBAIJ(Mat A)
277: {
278: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
279: int ierr;
282: #if defined(PETSC_USE_LOG)
283: PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
284: #endif
285: PetscFree(a->a);
286: if (!a->singlemalloc) {
287: PetscFree(a->i);
288: PetscFree(a->j);
289: }
290: if (a->row) {
291: ISDestroy(a->row);
292: }
293: if (a->col) {
294: ISDestroy(a->col);
295: }
296: if (a->diag) {PetscFree(a->diag);}
297: if (a->ilen) {PetscFree(a->ilen);}
298: if (a->imax) {PetscFree(a->imax);}
299: if (a->solve_work) {PetscFree(a->solve_work);}
300: if (a->mult_work) {PetscFree(a->mult_work);}
301: if (a->icol) {ISDestroy(a->icol);}
302: if (a->saved_values) {PetscFree(a->saved_values);}
303: #if defined(PETSC_USE_MAT_SINGLE)
304: if (a->setvaluescopy) {PetscFree(a->setvaluescopy);}
305: #endif
306: PetscFree(a);
307: return(0);
308: }
310: #undef __FUNCT__
312: int MatSetOption_SeqBAIJ(Mat A,MatOption op)
313: {
314: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
317: switch (op) {
318: case MAT_ROW_ORIENTED:
319: a->roworiented = PETSC_TRUE;
320: break;
321: case MAT_COLUMN_ORIENTED:
322: a->roworiented = PETSC_FALSE;
323: break;
324: case MAT_COLUMNS_SORTED:
325: a->sorted = PETSC_TRUE;
326: break;
327: case MAT_COLUMNS_UNSORTED:
328: a->sorted = PETSC_FALSE;
329: break;
330: case MAT_KEEP_ZEROED_ROWS:
331: a->keepzeroedrows = PETSC_TRUE;
332: break;
333: case MAT_NO_NEW_NONZERO_LOCATIONS:
334: a->nonew = 1;
335: break;
336: case MAT_NEW_NONZERO_LOCATION_ERR:
337: a->nonew = -1;
338: break;
339: case MAT_NEW_NONZERO_ALLOCATION_ERR:
340: a->nonew = -2;
341: break;
342: case MAT_YES_NEW_NONZERO_LOCATIONS:
343: a->nonew = 0;
344: break;
345: case MAT_ROWS_SORTED:
346: case MAT_ROWS_UNSORTED:
347: case MAT_YES_NEW_DIAGONALS:
348: case MAT_IGNORE_OFF_PROC_ENTRIES:
349: case MAT_USE_HASH_TABLE:
350: PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignoredn");
351: break;
352: case MAT_NO_NEW_DIAGONALS:
353: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
354: default:
355: SETERRQ(PETSC_ERR_SUP,"unknown option");
356: }
357: return(0);
358: }
360: #undef __FUNCT__
362: int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
363: {
364: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
365: int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
366: MatScalar *aa,*aa_i;
367: PetscScalar *v_i;
370: bs = a->bs;
371: ai = a->i;
372: aj = a->j;
373: aa = a->a;
374: bs2 = a->bs2;
375:
376: if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range");
377:
378: bn = row/bs; /* Block number */
379: bp = row % bs; /* Block Position */
380: M = ai[bn+1] - ai[bn];
381: *nz = bs*M;
382:
383: if (v) {
384: *v = 0;
385: if (*nz) {
386: PetscMalloc((*nz)*sizeof(PetscScalar),v);
387: for (i=0; i<M; i++) { /* for each block in the block row */
388: v_i = *v + i*bs;
389: aa_i = aa + bs2*(ai[bn] + i);
390: for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
391: }
392: }
393: }
395: if (idx) {
396: *idx = 0;
397: if (*nz) {
398: PetscMalloc((*nz)*sizeof(int),idx);
399: for (i=0; i<M; i++) { /* for each block in the block row */
400: idx_i = *idx + i*bs;
401: itmp = bs*aj[ai[bn] + i];
402: for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
403: }
404: }
405: }
406: return(0);
407: }
409: #undef __FUNCT__
411: int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
412: {
416: if (idx) {if (*idx) {PetscFree(*idx);}}
417: if (v) {if (*v) {PetscFree(*v);}}
418: return(0);
419: }
421: #undef __FUNCT__
423: int MatTranspose_SeqBAIJ(Mat A,Mat *B)
424: {
425: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
426: Mat C;
427: int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
428: int *rows,*cols,bs2=a->bs2;
429: PetscScalar *array;
432: if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
433: PetscMalloc((1+nbs)*sizeof(int),&col);
434: PetscMemzero(col,(1+nbs)*sizeof(int));
436: #if defined(PETSC_USE_MAT_SINGLE)
437: PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);
438: for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
439: #else
440: array = a->a;
441: #endif
443: for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
444: MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);
445: PetscFree(col);
446: PetscMalloc(2*bs*sizeof(int),&rows);
447: cols = rows + bs;
448: for (i=0; i<mbs; i++) {
449: cols[0] = i*bs;
450: for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
451: len = ai[i+1] - ai[i];
452: for (j=0; j<len; j++) {
453: rows[0] = (*aj++)*bs;
454: for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
455: MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);
456: array += bs2;
457: }
458: }
459: PetscFree(rows);
460: #if defined(PETSC_USE_MAT_SINGLE)
461: PetscFree(array);
462: #endif
463:
464: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
465: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
466:
467: if (B) {
468: *B = C;
469: } else {
470: MatHeaderCopy(A,C);
471: }
472: return(0);
473: }
475: #undef __FUNCT__
477: static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
478: {
479: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
480: int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
481: PetscScalar *aa;
482: FILE *file;
485: ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);
486: ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);
487: col_lens[0] = MAT_FILE_COOKIE;
489: col_lens[1] = A->m;
490: col_lens[2] = A->n;
491: col_lens[3] = a->nz*bs2;
493: /* store lengths of each row and write (including header) to file */
494: count = 0;
495: for (i=0; i<a->mbs; i++) {
496: for (j=0; j<bs; j++) {
497: col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
498: }
499: }
500: PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);
501: PetscFree(col_lens);
503: /* store column indices (zero start index) */
504: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);
505: count = 0;
506: for (i=0; i<a->mbs; i++) {
507: for (j=0; j<bs; j++) {
508: for (k=a->i[i]; k<a->i[i+1]; k++) {
509: for (l=0; l<bs; l++) {
510: jj[count++] = bs*a->j[k] + l;
511: }
512: }
513: }
514: }
515: PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);
516: PetscFree(jj);
518: /* store nonzero values */
519: ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);
520: count = 0;
521: for (i=0; i<a->mbs; i++) {
522: for (j=0; j<bs; j++) {
523: for (k=a->i[i]; k<a->i[i+1]; k++) {
524: for (l=0; l<bs; l++) {
525: aa[count++] = a->a[bs2*k + l*bs + j];
526: }
527: }
528: }
529: }
530: PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);
531: PetscFree(aa);
533: PetscViewerBinaryGetInfoPointer(viewer,&file);
534: if (file) {
535: fprintf(file,"-matload_block_size %dn",a->bs);
536: }
537: return(0);
538: }
540: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
542: #undef __FUNCT__
544: static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
545: {
546: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
547: int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
548: PetscViewerFormat format;
551: PetscViewerGetFormat(viewer,&format);
552: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
553: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
554: } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
555: Mat aij;
556: MatConvert(A,MATSEQAIJ,&aij);
557: MatView(aij,viewer);
558: MatDestroy(aij);
559: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
560: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
561: MatMPIBAIJFactorInfo_DSCPACK(A,viewer);
562: #endif
563: return(0);
564: } else if (format == PETSC_VIEWER_ASCII_COMMON) {
565: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
566: for (i=0; i<a->mbs; i++) {
567: for (j=0; j<bs; j++) {
568: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
569: for (k=a->i[i]; k<a->i[i+1]; k++) {
570: for (l=0; l<bs; l++) {
571: #if defined(PETSC_USE_COMPLEX)
572: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
573: PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
574: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
575: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
576: PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
577: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
578: } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
579: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
580: }
581: #else
582: if (a->a[bs2*k + l*bs + j] != 0.0) {
583: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
584: }
585: #endif
586: }
587: }
588: PetscViewerASCIIPrintf(viewer,"n");
589: }
590: }
591: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
592: } else {
593: PetscViewerASCIIUseTabs(viewer,PETSC_NO);
594: for (i=0; i<a->mbs; i++) {
595: for (j=0; j<bs; j++) {
596: PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);
597: for (k=a->i[i]; k<a->i[i+1]; k++) {
598: for (l=0; l<bs; l++) {
599: #if defined(PETSC_USE_COMPLEX)
600: if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
601: PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
602: PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
603: } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
604: PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
605: PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));
606: } else {
607: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));
608: }
609: #else
610: PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);
611: #endif
612: }
613: }
614: PetscViewerASCIIPrintf(viewer,"n");
615: }
616: }
617: PetscViewerASCIIUseTabs(viewer,PETSC_YES);
618: }
619: PetscViewerFlush(viewer);
620: return(0);
621: }
623: #undef __FUNCT__
625: static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
626: {
627: Mat A = (Mat) Aa;
628: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data;
629: int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
630: PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r;
631: MatScalar *aa;
632: PetscViewer viewer;
636: /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
637: PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);
639: PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);
641: /* loop over matrix elements drawing boxes */
642: color = PETSC_DRAW_BLUE;
643: for (i=0,row=0; i<mbs; i++,row+=bs) {
644: for (j=a->i[i]; j<a->i[i+1]; j++) {
645: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
646: x_l = a->j[j]*bs; x_r = x_l + 1.0;
647: aa = a->a + j*bs2;
648: for (k=0; k<bs; k++) {
649: for (l=0; l<bs; l++) {
650: if (PetscRealPart(*aa++) >= 0.) continue;
651: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
652: }
653: }
654: }
655: }
656: color = PETSC_DRAW_CYAN;
657: for (i=0,row=0; i<mbs; i++,row+=bs) {
658: for (j=a->i[i]; j<a->i[i+1]; j++) {
659: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
660: x_l = a->j[j]*bs; x_r = x_l + 1.0;
661: aa = a->a + j*bs2;
662: for (k=0; k<bs; k++) {
663: for (l=0; l<bs; l++) {
664: if (PetscRealPart(*aa++) != 0.) continue;
665: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
666: }
667: }
668: }
669: }
671: color = PETSC_DRAW_RED;
672: for (i=0,row=0; i<mbs; i++,row+=bs) {
673: for (j=a->i[i]; j<a->i[i+1]; j++) {
674: y_l = A->m - row - 1.0; y_r = y_l + 1.0;
675: x_l = a->j[j]*bs; x_r = x_l + 1.0;
676: aa = a->a + j*bs2;
677: for (k=0; k<bs; k++) {
678: for (l=0; l<bs; l++) {
679: if (PetscRealPart(*aa++) <= 0.) continue;
680: PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);
681: }
682: }
683: }
684: }
685: return(0);
686: }
688: #undef __FUNCT__
690: static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
691: {
692: int ierr;
693: PetscReal xl,yl,xr,yr,w,h;
694: PetscDraw draw;
695: PetscTruth isnull;
699: PetscViewerDrawGetDraw(viewer,0,&draw);
700: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
702: PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);
703: xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
704: xr += w; yr += h; xl = -w; yl = -h;
705: PetscDrawSetCoordinates(draw,xl,yl,xr,yr);
706: PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);
707: PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);
708: return(0);
709: }
711: #undef __FUNCT__
713: int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
714: {
715: int ierr;
716: PetscTruth issocket,isascii,isbinary,isdraw;
719: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
720: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
721: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
722: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
723: if (issocket) {
724: SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported");
725: } else if (isascii){
726: MatView_SeqBAIJ_ASCII(A,viewer);
727: } else if (isbinary) {
728: MatView_SeqBAIJ_Binary(A,viewer);
729: } else if (isdraw) {
730: MatView_SeqBAIJ_Draw(A,viewer);
731: } else {
732: SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name);
733: }
734: return(0);
735: }
738: #undef __FUNCT__
740: int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v)
741: {
742: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
743: int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
744: int *ai = a->i,*ailen = a->ilen;
745: int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
746: MatScalar *ap,*aa = a->a,zero = 0.0;
749: for (k=0; k<m; k++) { /* loop over rows */
750: row = im[k]; brow = row/bs;
751: if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
752: if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
753: rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
754: nrow = ailen[brow];
755: for (l=0; l<n; l++) { /* loop over columns */
756: if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
757: if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
758: col = in[l] ;
759: bcol = col/bs;
760: cidx = col%bs;
761: ridx = row%bs;
762: high = nrow;
763: low = 0; /* assume unsorted */
764: while (high-low > 5) {
765: t = (low+high)/2;
766: if (rp[t] > bcol) high = t;
767: else low = t;
768: }
769: for (i=low; i<high; i++) {
770: if (rp[i] > bcol) break;
771: if (rp[i] == bcol) {
772: *v++ = ap[bs2*i+bs*cidx+ridx];
773: goto finished;
774: }
775: }
776: *v++ = zero;
777: finished:;
778: }
779: }
780: return(0);
781: }
783: #if defined(PETSC_USE_MAT_SINGLE)
784: #undef __FUNCT__
786: int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
787: {
788: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
789: int ierr,i,N = m*n*b->bs2;
790: MatScalar *vsingle;
793: if (N > b->setvalueslen) {
794: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
795: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
796: b->setvalueslen = N;
797: }
798: vsingle = b->setvaluescopy;
799: for (i=0; i<N; i++) {
800: vsingle[i] = v[i];
801: }
802: MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
803: return(0);
804: }
805: #endif
808: #undef __FUNCT__
810: int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is)
811: {
812: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
813: int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
814: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
815: int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
816: PetscTruth roworiented=a->roworiented;
817: MatScalar *value = v,*ap,*aa = a->a,*bap;
820: if (roworiented) {
821: stepval = (n-1)*bs;
822: } else {
823: stepval = (m-1)*bs;
824: }
825: for (k=0; k<m; k++) { /* loop over added rows */
826: row = im[k];
827: if (row < 0) continue;
828: #if defined(PETSC_USE_BOPT_g)
829: if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
830: #endif
831: rp = aj + ai[row];
832: ap = aa + bs2*ai[row];
833: rmax = imax[row];
834: nrow = ailen[row];
835: low = 0;
836: for (l=0; l<n; l++) { /* loop over added columns */
837: if (in[l] < 0) continue;
838: #if defined(PETSC_USE_BOPT_g)
839: if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
840: #endif
841: col = in[l];
842: if (roworiented) {
843: value = v + k*(stepval+bs)*bs + l*bs;
844: } else {
845: value = v + l*(stepval+bs)*bs + k*bs;
846: }
847: if (!sorted) low = 0; high = nrow;
848: while (high-low > 7) {
849: t = (low+high)/2;
850: if (rp[t] > col) high = t;
851: else low = t;
852: }
853: for (i=low; i<high; i++) {
854: if (rp[i] > col) break;
855: if (rp[i] == col) {
856: bap = ap + bs2*i;
857: if (roworiented) {
858: if (is == ADD_VALUES) {
859: for (ii=0; ii<bs; ii++,value+=stepval) {
860: for (jj=ii; jj<bs2; jj+=bs) {
861: bap[jj] += *value++;
862: }
863: }
864: } else {
865: for (ii=0; ii<bs; ii++,value+=stepval) {
866: for (jj=ii; jj<bs2; jj+=bs) {
867: bap[jj] = *value++;
868: }
869: }
870: }
871: } else {
872: if (is == ADD_VALUES) {
873: for (ii=0; ii<bs; ii++,value+=stepval) {
874: for (jj=0; jj<bs; jj++) {
875: *bap++ += *value++;
876: }
877: }
878: } else {
879: for (ii=0; ii<bs; ii++,value+=stepval) {
880: for (jj=0; jj<bs; jj++) {
881: *bap++ = *value++;
882: }
883: }
884: }
885: }
886: goto noinsert2;
887: }
888: }
889: if (nonew == 1) goto noinsert2;
890: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
891: if (nrow >= rmax) {
892: /* there is no extra room in row, therefore enlarge */
893: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
894: MatScalar *new_a;
896: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
898: /* malloc new storage space */
899: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
900: ierr = PetscMalloc(len,&new_a);
901: new_j = (int*)(new_a + bs2*new_nz);
902: new_i = new_j + new_nz;
904: /* copy over old data into new slots */
905: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
906: for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
907: PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));
908: len = (new_nz - CHUNKSIZE - ai[row] - nrow);
909: PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));
910: PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));
911: PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
912: PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));
913: /* free up old matrix storage */
914: PetscFree(a->a);
915: if (!a->singlemalloc) {
916: PetscFree(a->i);
917: PetscFree(a->j);
918: }
919: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
920: a->singlemalloc = PETSC_TRUE;
922: rp = aj + ai[row]; ap = aa + bs2*ai[row];
923: rmax = imax[row] = imax[row] + CHUNKSIZE;
924: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
925: a->maxnz += bs2*CHUNKSIZE;
926: a->reallocs++;
927: a->nz++;
928: }
929: N = nrow++ - 1;
930: /* shift up all the later entries in this row */
931: for (ii=N; ii>=i; ii--) {
932: rp[ii+1] = rp[ii];
933: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
934: }
935: if (N >= i) {
936: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
937: }
938: rp[i] = col;
939: bap = ap + bs2*i;
940: if (roworiented) {
941: for (ii=0; ii<bs; ii++,value+=stepval) {
942: for (jj=ii; jj<bs2; jj+=bs) {
943: bap[jj] = *value++;
944: }
945: }
946: } else {
947: for (ii=0; ii<bs; ii++,value+=stepval) {
948: for (jj=0; jj<bs; jj++) {
949: *bap++ = *value++;
950: }
951: }
952: }
953: noinsert2:;
954: low = i;
955: }
956: ailen[row] = nrow;
957: }
958: return(0);
959: }
961: #undef __FUNCT__
963: int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
964: {
965: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
966: int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
967: int m = A->m,*ip,N,*ailen = a->ilen;
968: int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
969: MatScalar *aa = a->a,*ap;
970: #if defined(PETSC_HAVE_DSCPACK)
971: PetscTruth flag;
972: #endif
975: if (mode == MAT_FLUSH_ASSEMBLY) return(0);
977: if (m) rmax = ailen[0];
978: for (i=1; i<mbs; i++) {
979: /* move each row back by the amount of empty slots (fshift) before it*/
980: fshift += imax[i-1] - ailen[i-1];
981: rmax = PetscMax(rmax,ailen[i]);
982: if (fshift) {
983: ip = aj + ai[i]; ap = aa + bs2*ai[i];
984: N = ailen[i];
985: for (j=0; j<N; j++) {
986: ip[j-fshift] = ip[j];
987: PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));
988: }
989: }
990: ai[i] = ai[i-1] + ailen[i-1];
991: }
992: if (mbs) {
993: fshift += imax[mbs-1] - ailen[mbs-1];
994: ai[mbs] = ai[mbs-1] + ailen[mbs-1];
995: }
996: /* reset ilen and imax for each row */
997: for (i=0; i<mbs; i++) {
998: ailen[i] = imax[i] = ai[i+1] - ai[i];
999: }
1000: a->nz = ai[mbs];
1002: /* diagonals may have moved, so kill the diagonal pointers */
1003: if (fshift && a->diag) {
1004: PetscFree(a->diag);
1005: PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1006: a->diag = 0;
1007: }
1008: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d usedn",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1009: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %dn",a->reallocs);
1010: PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %dn",rmax);
1011: a->reallocs = 0;
1012: A->info.nz_unneeded = (PetscReal)fshift*bs2;
1014: #if defined(PETSC_HAVE_DSCPACK)
1015: PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);
1016: if (flag) { MatUseDSCPACK_MPIBAIJ(A); }
1017: #endif
1019: return(0);
1020: }
1024: /*
1025: This function returns an array of flags which indicate the locations of contiguous
1026: blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9]
1027: then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1028: Assume: sizes should be long enough to hold all the values.
1029: */
1030: #undef __FUNCT__
1032: static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1033: {
1034: int i,j,k,row;
1035: PetscTruth flg;
1038: for (i=0,j=0; i<n; j++) {
1039: row = idx[i];
1040: if (row%bs!=0) { /* Not the begining of a block */
1041: sizes[j] = 1;
1042: i++;
1043: } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1044: sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */
1045: i++;
1046: } else { /* Begining of the block, so check if the complete block exists */
1047: flg = PETSC_TRUE;
1048: for (k=1; k<bs; k++) {
1049: if (row+k != idx[i+k]) { /* break in the block */
1050: flg = PETSC_FALSE;
1051: break;
1052: }
1053: }
1054: if (flg == PETSC_TRUE) { /* No break in the bs */
1055: sizes[j] = bs;
1056: i+= bs;
1057: } else {
1058: sizes[j] = 1;
1059: i++;
1060: }
1061: }
1062: }
1063: *bs_max = j;
1064: return(0);
1065: }
1066:
1067: #undef __FUNCT__
1069: int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag)
1070: {
1071: Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1072: int ierr,i,j,k,count,is_n,*is_idx,*rows;
1073: int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1074: PetscScalar zero = 0.0;
1075: MatScalar *aa;
1078: /* Make a copy of the IS and sort it */
1079: ISGetLocalSize(is,&is_n);
1080: ISGetIndices(is,&is_idx);
1082: /* allocate memory for rows,sizes */
1083: ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);
1084: sizes = rows + is_n;
1086: /* copy IS values to rows, and sort them */
1087: for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1088: PetscSortInt(is_n,rows);
1089: if (baij->keepzeroedrows) {
1090: for (i=0; i<is_n; i++) { sizes[i] = 1; }
1091: bs_max = is_n;
1092: } else {
1093: MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);
1094: }
1095: ISRestoreIndices(is,&is_idx);
1097: for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1098: row = rows[j];
1099: if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1100: count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1101: aa = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1102: if (sizes[i] == bs && !baij->keepzeroedrows) {
1103: if (diag) {
1104: if (baij->ilen[row/bs] > 0) {
1105: baij->ilen[row/bs] = 1;
1106: baij->j[baij->i[row/bs]] = row/bs;
1107: PetscMemzero(aa,count*bs*sizeof(MatScalar));
1108: }
1109: /* Now insert all the diagonal values for this bs */
1110: for (k=0; k<bs; k++) {
1111: (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);
1112: }
1113: } else { /* (!diag) */
1114: baij->ilen[row/bs] = 0;
1115: } /* end (!diag) */
1116: } else { /* (sizes[i] != bs) */
1117: #if defined (PETSC_USE_DEBUG)
1118: if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1119: #endif
1120: for (k=0; k<count; k++) {
1121: aa[0] = zero;
1122: aa += bs;
1123: }
1124: if (diag) {
1125: (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);
1126: }
1127: }
1128: }
1130: PetscFree(rows);
1131: MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);
1132: return(0);
1133: }
1135: #undef __FUNCT__
1137: int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is)
1138: {
1139: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1140: int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1141: int *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1142: int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1143: int ridx,cidx,bs2=a->bs2,ierr;
1144: PetscTruth roworiented=a->roworiented;
1145: MatScalar *ap,value,*aa=a->a,*bap;
1148: for (k=0; k<m; k++) { /* loop over added rows */
1149: row = im[k]; brow = row/bs;
1150: if (row < 0) continue;
1151: #if defined(PETSC_USE_BOPT_g)
1152: if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m);
1153: #endif
1154: rp = aj + ai[brow];
1155: ap = aa + bs2*ai[brow];
1156: rmax = imax[brow];
1157: nrow = ailen[brow];
1158: low = 0;
1159: for (l=0; l<n; l++) { /* loop over added columns */
1160: if (in[l] < 0) continue;
1161: #if defined(PETSC_USE_BOPT_g)
1162: if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n);
1163: #endif
1164: col = in[l]; bcol = col/bs;
1165: ridx = row % bs; cidx = col % bs;
1166: if (roworiented) {
1167: value = v[l + k*n];
1168: } else {
1169: value = v[k + l*m];
1170: }
1171: if (!sorted) low = 0; high = nrow;
1172: while (high-low > 7) {
1173: t = (low+high)/2;
1174: if (rp[t] > bcol) high = t;
1175: else low = t;
1176: }
1177: for (i=low; i<high; i++) {
1178: if (rp[i] > bcol) break;
1179: if (rp[i] == bcol) {
1180: bap = ap + bs2*i + bs*cidx + ridx;
1181: if (is == ADD_VALUES) *bap += value;
1182: else *bap = value;
1183: goto noinsert1;
1184: }
1185: }
1186: if (nonew == 1) goto noinsert1;
1187: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1188: if (nrow >= rmax) {
1189: /* there is no extra room in row, therefore enlarge */
1190: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1191: MatScalar *new_a;
1193: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
1195: /* Malloc new storage space */
1196: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1197: ierr = PetscMalloc(len,&new_a);
1198: new_j = (int*)(new_a + bs2*new_nz);
1199: new_i = new_j + new_nz;
1201: /* copy over old data into new slots */
1202: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1203: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1204: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
1205: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1206: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
1207: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
1208: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
1209: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
1210: /* free up old matrix storage */
1211: PetscFree(a->a);
1212: if (!a->singlemalloc) {
1213: PetscFree(a->i);
1214: PetscFree(a->j);
1215: }
1216: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1217: a->singlemalloc = PETSC_TRUE;
1219: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
1220: rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1221: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1222: a->maxnz += bs2*CHUNKSIZE;
1223: a->reallocs++;
1224: a->nz++;
1225: }
1226: N = nrow++ - 1;
1227: /* shift up all the later entries in this row */
1228: for (ii=N; ii>=i; ii--) {
1229: rp[ii+1] = rp[ii];
1230: ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
1231: }
1232: if (N>=i) {
1233: PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));
1234: }
1235: rp[i] = bcol;
1236: ap[bs2*i + bs*cidx + ridx] = value;
1237: noinsert1:;
1238: low = i;
1239: }
1240: ailen[brow] = nrow;
1241: }
1242: return(0);
1243: }
1246: #undef __FUNCT__
1248: int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info)
1249: {
1250: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1251: Mat outA;
1252: int ierr;
1253: PetscTruth row_identity,col_identity;
1256: if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1257: ISIdentity(row,&row_identity);
1258: ISIdentity(col,&col_identity);
1259: if (!row_identity || !col_identity) {
1260: SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1261: }
1263: outA = inA;
1264: inA->factor = FACTOR_LU;
1266: if (!a->diag) {
1267: MatMarkDiagonal_SeqBAIJ(inA);
1268: }
1270: a->row = row;
1271: a->col = col;
1272: ierr = PetscObjectReference((PetscObject)row);
1273: ierr = PetscObjectReference((PetscObject)col);
1274:
1275: /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1276: ISInvertPermutation(col,PETSC_DECIDE,&a->icol);
1277: PetscLogObjectParent(inA,a->icol);
1278:
1279: /*
1280: Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1281: for ILU(0) factorization with natural ordering
1282: */
1283: if (a->bs < 8) {
1284: MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);
1285: } else {
1286: if (!a->solve_work) {
1287: PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);
1288: PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1289: }
1290: }
1292: MatLUFactorNumeric(inA,&outA);
1294: return(0);
1295: }
1296: #undef __FUNCT__
1298: int MatPrintHelp_SeqBAIJ(Mat A)
1299: {
1300: static PetscTruth called = PETSC_FALSE;
1301: MPI_Comm comm = A->comm;
1302: int ierr;
1305: if (called) {return(0);} else called = PETSC_TRUE;
1306: (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):n");
1307: (*PetscHelpPrintf)(comm," -mat_block_size <block_size>n");
1308: return(0);
1309: }
1311: EXTERN_C_BEGIN
1312: #undef __FUNCT__
1314: int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1315: {
1316: Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1317: int i,nz,nbs;
1320: nz = baij->maxnz/baij->bs2;
1321: nbs = baij->nbs;
1322: for (i=0; i<nz; i++) {
1323: baij->j[i] = indices[i];
1324: }
1325: baij->nz = nz;
1326: for (i=0; i<nbs; i++) {
1327: baij->ilen[i] = baij->imax[i];
1328: }
1330: return(0);
1331: }
1332: EXTERN_C_END
1334: #undef __FUNCT__
1336: /*@
1337: MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1338: in the matrix.
1340: Input Parameters:
1341: + mat - the SeqBAIJ matrix
1342: - indices - the column indices
1344: Level: advanced
1346: Notes:
1347: This can be called if you have precomputed the nonzero structure of the
1348: matrix and want to provide it to the matrix object to improve the performance
1349: of the MatSetValues() operation.
1351: You MUST have set the correct numbers of nonzeros per row in the call to
1352: MatCreateSeqBAIJ().
1354: MUST be called before any calls to MatSetValues();
1356: @*/
1357: int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1358: {
1359: int ierr,(*f)(Mat,int *);
1363: PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);
1364: if (f) {
1365: (*f)(mat,indices);
1366: } else {
1367: SETERRQ(1,"Wrong type of matrix to set column indices");
1368: }
1369: return(0);
1370: }
1372: #undef __FUNCT__
1374: int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1375: {
1376: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1377: int ierr,i,j,n,row,bs,*ai,*aj,mbs;
1378: PetscReal atmp;
1379: PetscScalar *x,zero = 0.0;
1380: MatScalar *aa;
1381: int ncols,brow,krow,kcol;
1384: if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1385: bs = a->bs;
1386: aa = a->a;
1387: ai = a->i;
1388: aj = a->j;
1389: mbs = a->mbs;
1391: VecSet(&zero,v);
1392: VecGetArray(v,&x);
1393: VecGetLocalSize(v,&n);
1394: if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1395: for (i=0; i<mbs; i++) {
1396: ncols = ai[1] - ai[0]; ai++;
1397: brow = bs*i;
1398: for (j=0; j<ncols; j++){
1399: /* bcol = bs*(*aj); */
1400: for (kcol=0; kcol<bs; kcol++){
1401: for (krow=0; krow<bs; krow++){
1402: atmp = PetscAbsScalar(*aa); aa++;
1403: row = brow + krow; /* row index */
1404: /* printf("val[%d,%d]: %gn",row,bcol+kcol,atmp); */
1405: if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1406: }
1407: }
1408: aj++;
1409: }
1410: }
1411: VecRestoreArray(v,&x);
1412: return(0);
1413: }
1415: #undef __FUNCT__
1417: int MatSetUpPreallocation_SeqBAIJ(Mat A)
1418: {
1419: int ierr;
1422: MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);
1423: return(0);
1424: }
1426: #undef __FUNCT__
1428: int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array)
1429: {
1430: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1432: *array = a->a;
1433: return(0);
1434: }
1436: #undef __FUNCT__
1438: int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array)
1439: {
1441: return(0);
1442: }
1444: #include petscblaslapack.h
1445: #undef __FUNCT__
1447: int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1448: {
1449: int ierr,one=1;
1450: Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1453: if (str == SAME_NONZERO_PATTERN) {
1454: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1455: } else {
1456: MatAXPY_Basic(a,X,Y,str);
1457: }
1458: return(0);
1459: }
1461: /* -------------------------------------------------------------------*/
1462: static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1463: MatGetRow_SeqBAIJ,
1464: MatRestoreRow_SeqBAIJ,
1465: MatMult_SeqBAIJ_N,
1466: MatMultAdd_SeqBAIJ_N,
1467: MatMultTranspose_SeqBAIJ,
1468: MatMultTransposeAdd_SeqBAIJ,
1469: MatSolve_SeqBAIJ_N,
1470: 0,
1471: 0,
1472: 0,
1473: MatLUFactor_SeqBAIJ,
1474: 0,
1475: 0,
1476: MatTranspose_SeqBAIJ,
1477: MatGetInfo_SeqBAIJ,
1478: MatEqual_SeqBAIJ,
1479: MatGetDiagonal_SeqBAIJ,
1480: MatDiagonalScale_SeqBAIJ,
1481: MatNorm_SeqBAIJ,
1482: 0,
1483: MatAssemblyEnd_SeqBAIJ,
1484: 0,
1485: MatSetOption_SeqBAIJ,
1486: MatZeroEntries_SeqBAIJ,
1487: MatZeroRows_SeqBAIJ,
1488: MatLUFactorSymbolic_SeqBAIJ,
1489: MatLUFactorNumeric_SeqBAIJ_N,
1490: 0,
1491: 0,
1492: MatSetUpPreallocation_SeqBAIJ,
1493: MatILUFactorSymbolic_SeqBAIJ,
1494: 0,
1495: MatGetArray_SeqBAIJ,
1496: MatRestoreArray_SeqBAIJ,
1497: MatDuplicate_SeqBAIJ,
1498: 0,
1499: 0,
1500: MatILUFactor_SeqBAIJ,
1501: 0,
1502: MatAXPY_SeqBAIJ,
1503: MatGetSubMatrices_SeqBAIJ,
1504: MatIncreaseOverlap_SeqBAIJ,
1505: MatGetValues_SeqBAIJ,
1506: 0,
1507: MatPrintHelp_SeqBAIJ,
1508: MatScale_SeqBAIJ,
1509: 0,
1510: 0,
1511: 0,
1512: MatGetBlockSize_SeqBAIJ,
1513: MatGetRowIJ_SeqBAIJ,
1514: MatRestoreRowIJ_SeqBAIJ,
1515: 0,
1516: 0,
1517: 0,
1518: 0,
1519: 0,
1520: 0,
1521: MatSetValuesBlocked_SeqBAIJ,
1522: MatGetSubMatrix_SeqBAIJ,
1523: MatDestroy_SeqBAIJ,
1524: MatView_SeqBAIJ,
1525: MatGetPetscMaps_Petsc,
1526: 0,
1527: 0,
1528: 0,
1529: 0,
1530: 0,
1531: 0,
1532: MatGetRowMax_SeqBAIJ,
1533: MatConvert_Basic};
1535: EXTERN_C_BEGIN
1536: #undef __FUNCT__
1538: int MatStoreValues_SeqBAIJ(Mat mat)
1539: {
1540: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1541: int nz = aij->i[mat->m]*aij->bs*aij->bs2;
1542: int ierr;
1545: if (aij->nonew != 1) {
1546: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1547: }
1549: /* allocate space for values if not already there */
1550: if (!aij->saved_values) {
1551: PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);
1552: }
1554: /* copy values over */
1555: PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));
1556: return(0);
1557: }
1558: EXTERN_C_END
1560: EXTERN_C_BEGIN
1561: #undef __FUNCT__
1563: int MatRetrieveValues_SeqBAIJ(Mat mat)
1564: {
1565: Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
1566: int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
1569: if (aij->nonew != 1) {
1570: SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
1571: }
1572: if (!aij->saved_values) {
1573: SETERRQ(1,"Must call MatStoreValues(A);first");
1574: }
1576: /* copy values over */
1577: PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));
1578: return(0);
1579: }
1580: EXTERN_C_END
1582: EXTERN_C_BEGIN
1583: extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*);
1584: EXTERN_C_END
1586: EXTERN_C_BEGIN
1587: #undef __FUNCT__
1589: int MatCreate_SeqBAIJ(Mat B)
1590: {
1591: int ierr,size;
1592: Mat_SeqBAIJ *b;
1595: MPI_Comm_size(B->comm,&size);
1596: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
1598: B->m = B->M = PetscMax(B->m,B->M);
1599: B->n = B->N = PetscMax(B->n,B->N);
1600: ierr = PetscNew(Mat_SeqBAIJ,&b);
1601: B->data = (void*)b;
1602: ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));
1603: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1604: B->factor = 0;
1605: B->lupivotthreshold = 1.0;
1606: B->mapping = 0;
1607: b->row = 0;
1608: b->col = 0;
1609: b->icol = 0;
1610: b->reallocs = 0;
1611: b->saved_values = 0;
1612: #if defined(PETSC_USE_MAT_SINGLE)
1613: b->setvalueslen = 0;
1614: b->setvaluescopy = PETSC_NULL;
1615: #endif
1617: PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);
1618: PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);
1620: b->sorted = PETSC_FALSE;
1621: b->roworiented = PETSC_TRUE;
1622: b->nonew = 0;
1623: b->diag = 0;
1624: b->solve_work = 0;
1625: b->mult_work = 0;
1626: B->spptr = 0;
1627: B->info.nz_unneeded = (PetscReal)b->maxnz;
1628: b->keepzeroedrows = PETSC_FALSE;
1630: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1631: "MatStoreValues_SeqBAIJ",
1632: MatStoreValues_SeqBAIJ);
1633: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1634: "MatRetrieveValues_SeqBAIJ",
1635: MatRetrieveValues_SeqBAIJ);
1636: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
1637: "MatSeqBAIJSetColumnIndices_SeqBAIJ",
1638: MatSeqBAIJSetColumnIndices_SeqBAIJ);
1639: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
1640: "MatConvert_SeqBAIJ_SeqAIJ",
1641: MatConvert_SeqBAIJ_SeqAIJ);
1642: return(0);
1643: }
1644: EXTERN_C_END
1646: #undef __FUNCT__
1648: int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
1649: {
1650: Mat C;
1651: Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
1652: int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
1655: if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
1657: *B = 0;
1658: MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);
1659: MatSetType(C,MATSEQBAIJ);
1660: c = (Mat_SeqBAIJ*)C->data;
1662: c->bs = a->bs;
1663: c->bs2 = a->bs2;
1664: c->mbs = a->mbs;
1665: c->nbs = a->nbs;
1666: PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));
1668: PetscMalloc((mbs+1)*sizeof(int),&c->imax);
1669: PetscMalloc((mbs+1)*sizeof(int),&c->ilen);
1670: for (i=0; i<mbs; i++) {
1671: c->imax[i] = a->imax[i];
1672: c->ilen[i] = a->ilen[i];
1673: }
1675: /* allocate the matrix space */
1676: c->singlemalloc = PETSC_TRUE;
1677: len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
1678: PetscMalloc(len,&c->a);
1679: c->j = (int*)(c->a + nz*bs2);
1680: c->i = c->j + nz;
1681: PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));
1682: if (mbs > 0) {
1683: PetscMemcpy(c->j,a->j,nz*sizeof(int));
1684: if (cpvalues == MAT_COPY_VALUES) {
1685: PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));
1686: } else {
1687: PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));
1688: }
1689: }
1691: PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
1692: c->sorted = a->sorted;
1693: c->roworiented = a->roworiented;
1694: c->nonew = a->nonew;
1696: if (a->diag) {
1697: PetscMalloc((mbs+1)*sizeof(int),&c->diag);
1698: PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
1699: for (i=0; i<mbs; i++) {
1700: c->diag[i] = a->diag[i];
1701: }
1702: } else c->diag = 0;
1703: c->nz = a->nz;
1704: c->maxnz = a->maxnz;
1705: c->solve_work = 0;
1706: C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */
1707: c->mult_work = 0;
1708: C->preallocated = PETSC_TRUE;
1709: C->assembled = PETSC_TRUE;
1710: *B = C;
1711: PetscFListDuplicate(A->qlist,&C->qlist);
1712: return(0);
1713: }
1715: EXTERN_C_BEGIN
1716: #undef __FUNCT__
1718: int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A)
1719: {
1720: Mat_SeqBAIJ *a;
1721: Mat B;
1722: int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
1723: int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
1724: int kmax,jcount,block,idx,point,nzcountb,extra_rows;
1725: int *masked,nmask,tmp,bs2,ishift;
1726: PetscScalar *aa;
1727: MPI_Comm comm = ((PetscObject)viewer)->comm;
1730: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
1731: bs2 = bs*bs;
1733: MPI_Comm_size(comm,&size);
1734: if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
1735: PetscViewerBinaryGetDescriptor(viewer,&fd);
1736: PetscBinaryRead(fd,header,4,PETSC_INT);
1737: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
1738: M = header[1]; N = header[2]; nz = header[3];
1740: if (header[3] < 0) {
1741: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
1742: }
1744: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
1746: /*
1747: This code adds extra rows to make sure the number of rows is
1748: divisible by the blocksize
1749: */
1750: mbs = M/bs;
1751: extra_rows = bs - M + bs*(mbs);
1752: if (extra_rows == bs) extra_rows = 0;
1753: else mbs++;
1754: if (extra_rows) {
1755: PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksizen");
1756: }
1758: /* read in row lengths */
1759: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
1760: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1761: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
1763: /* read in column indices */
1764: PetscMalloc((nz+extra_rows)*sizeof(int),&jj);
1765: PetscBinaryRead(fd,jj,nz,PETSC_INT);
1766: for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
1768: /* loop over row lengths determining block row lengths */
1769: ierr = PetscMalloc(mbs*sizeof(int),&browlengths);
1770: ierr = PetscMemzero(browlengths,mbs*sizeof(int));
1771: ierr = PetscMalloc(2*mbs*sizeof(int),&mask);
1772: ierr = PetscMemzero(mask,mbs*sizeof(int));
1773: masked = mask + mbs;
1774: rowcount = 0; nzcount = 0;
1775: for (i=0; i<mbs; i++) {
1776: nmask = 0;
1777: for (j=0; j<bs; j++) {
1778: kmax = rowlengths[rowcount];
1779: for (k=0; k<kmax; k++) {
1780: tmp = jj[nzcount++]/bs;
1781: if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
1782: }
1783: rowcount++;
1784: }
1785: browlengths[i] += nmask;
1786: /* zero out the mask elements we set */
1787: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1788: }
1790: /* create our matrix */
1791: MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);
1792: B = *A;
1793: a = (Mat_SeqBAIJ*)B->data;
1795: /* set matrix "i" values */
1796: a->i[0] = 0;
1797: for (i=1; i<= mbs; i++) {
1798: a->i[i] = a->i[i-1] + browlengths[i-1];
1799: a->ilen[i-1] = browlengths[i-1];
1800: }
1801: a->nz = 0;
1802: for (i=0; i<mbs; i++) a->nz += browlengths[i];
1804: /* read in nonzero values */
1805: PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);
1806: PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);
1807: for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
1809: /* set "a" and "j" values into matrix */
1810: nzcount = 0; jcount = 0;
1811: for (i=0; i<mbs; i++) {
1812: nzcountb = nzcount;
1813: nmask = 0;
1814: for (j=0; j<bs; j++) {
1815: kmax = rowlengths[i*bs+j];
1816: for (k=0; k<kmax; k++) {
1817: tmp = jj[nzcount++]/bs;
1818: if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
1819: }
1820: }
1821: /* sort the masked values */
1822: PetscSortInt(nmask,masked);
1824: /* set "j" values into matrix */
1825: maskcount = 1;
1826: for (j=0; j<nmask; j++) {
1827: a->j[jcount++] = masked[j];
1828: mask[masked[j]] = maskcount++;
1829: }
1830: /* set "a" values into matrix */
1831: ishift = bs2*a->i[i];
1832: for (j=0; j<bs; j++) {
1833: kmax = rowlengths[i*bs+j];
1834: for (k=0; k<kmax; k++) {
1835: tmp = jj[nzcountb]/bs ;
1836: block = mask[tmp] - 1;
1837: point = jj[nzcountb] - bs*tmp;
1838: idx = ishift + bs2*block + j + bs*point;
1839: a->a[idx] = (MatScalar)aa[nzcountb++];
1840: }
1841: }
1842: /* zero out the mask elements we set */
1843: for (j=0; j<nmask; j++) mask[masked[j]] = 0;
1844: }
1845: if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
1847: PetscFree(rowlengths);
1848: PetscFree(browlengths);
1849: PetscFree(aa);
1850: PetscFree(jj);
1851: PetscFree(mask);
1853: B->assembled = PETSC_TRUE;
1854:
1855: MatView_Private(B);
1856: return(0);
1857: }
1858: EXTERN_C_END
1860: #undef __FUNCT__
1862: /*@C
1863: MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
1864: compressed row) format. For good matrix assembly performance the
1865: user should preallocate the matrix storage by setting the parameter nz
1866: (or the array nnz). By setting these parameters accurately, performance
1867: during matrix assembly can be increased by more than a factor of 50.
1869: Collective on MPI_Comm
1871: Input Parameters:
1872: + comm - MPI communicator, set to PETSC_COMM_SELF
1873: . bs - size of block
1874: . m - number of rows
1875: . n - number of columns
1876: . nz - number of nonzero blocks per block row (same for all rows)
1877: - nnz - array containing the number of nonzero blocks in the various block rows
1878: (possibly different for each block row) or PETSC_NULL
1880: Output Parameter:
1881: . A - the matrix
1883: Options Database Keys:
1884: . -mat_no_unroll - uses code that does not unroll the loops in the
1885: block calculations (much slower)
1886: . -mat_block_size - size of the blocks to use
1888: Level: intermediate
1890: Notes:
1891: A nonzero block is any block that as 1 or more nonzeros in it
1893: The block AIJ format is fully compatible with standard Fortran 77
1894: storage. That is, the stored row and column indices can begin at
1895: either one (as in Fortran) or zero. See the users' manual for details.
1897: Specify the preallocated storage with either nz or nnz (not both).
1898: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1899: allocation. For additional details, see the users manual chapter on
1900: matrices.
1902: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1903: @*/
1904: int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A)
1905: {
1907:
1909: MatCreate(comm,m,n,m,n,A);
1910: MatSetType(*A,MATSEQBAIJ);
1911: MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);
1912: return(0);
1913: }
1915: #undef __FUNCT__
1917: /*@C
1918: MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
1919: per row in the matrix. For good matrix assembly performance the
1920: user should preallocate the matrix storage by setting the parameter nz
1921: (or the array nnz). By setting these parameters accurately, performance
1922: during matrix assembly can be increased by more than a factor of 50.
1924: Collective on MPI_Comm
1926: Input Parameters:
1927: + A - the matrix
1928: . bs - size of block
1929: . nz - number of block nonzeros per block row (same for all rows)
1930: - nnz - array containing the number of block nonzeros in the various block rows
1931: (possibly different for each block row) or PETSC_NULL
1933: Options Database Keys:
1934: . -mat_no_unroll - uses code that does not unroll the loops in the
1935: block calculations (much slower)
1936: . -mat_block_size - size of the blocks to use
1938: Level: intermediate
1940: Notes:
1941: The block AIJ format is fully compatible with standard Fortran 77
1942: storage. That is, the stored row and column indices can begin at
1943: either one (as in Fortran) or zero. See the users' manual for details.
1945: Specify the preallocated storage with either nz or nnz (not both).
1946: Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
1947: allocation. For additional details, see the users manual chapter on
1948: matrices.
1950: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1951: @*/
1952: int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz)
1953: {
1954: Mat_SeqBAIJ *b;
1955: int i,len,ierr,mbs,nbs,bs2,newbs = bs;
1956: PetscTruth flg;
1959: PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);
1960: if (!flg) return(0);
1962: B->preallocated = PETSC_TRUE;
1963: PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);
1964: if (nnz && newbs != bs) {
1965: SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
1966: }
1967: bs = newbs;
1969: mbs = B->m/bs;
1970: nbs = B->n/bs;
1971: bs2 = bs*bs;
1973: if (mbs*bs!=B->m || nbs*bs!=B->n) {
1974: SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
1975: }
1977: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
1978: if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
1979: if (nnz) {
1980: for (i=0; i<mbs; i++) {
1981: if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
1982: if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
1983: }
1984: }
1986: b = (Mat_SeqBAIJ*)B->data;
1987: ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);
1988: B->ops->solve = MatSolve_SeqBAIJ_Update;
1989: B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update;
1990: if (!flg) {
1991: switch (bs) {
1992: case 1:
1993: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
1994: B->ops->mult = MatMult_SeqBAIJ_1;
1995: B->ops->multadd = MatMultAdd_SeqBAIJ_1;
1996: break;
1997: case 2:
1998: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
1999: B->ops->mult = MatMult_SeqBAIJ_2;
2000: B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2001: break;
2002: case 3:
2003: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2004: B->ops->mult = MatMult_SeqBAIJ_3;
2005: B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2006: break;
2007: case 4:
2008: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2009: B->ops->mult = MatMult_SeqBAIJ_4;
2010: B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2011: break;
2012: case 5:
2013: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2014: B->ops->mult = MatMult_SeqBAIJ_5;
2015: B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2016: break;
2017: case 6:
2018: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2019: B->ops->mult = MatMult_SeqBAIJ_6;
2020: B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2021: break;
2022: case 7:
2023: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2024: B->ops->mult = MatMult_SeqBAIJ_7;
2025: B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2026: break;
2027: default:
2028: B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2029: B->ops->mult = MatMult_SeqBAIJ_N;
2030: B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2031: break;
2032: }
2033: }
2034: b->bs = bs;
2035: b->mbs = mbs;
2036: b->nbs = nbs;
2037: PetscMalloc((mbs+1)*sizeof(int),&b->imax);
2038: if (!nnz) {
2039: if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2040: else if (nz <= 0) nz = 1;
2041: for (i=0; i<mbs; i++) b->imax[i] = nz;
2042: nz = nz*mbs;
2043: } else {
2044: nz = 0;
2045: for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2046: }
2048: /* allocate the matrix space */
2049: len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2050: ierr = PetscMalloc(len,&b->a);
2051: ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));
2052: b->j = (int*)(b->a + nz*bs2);
2053: PetscMemzero(b->j,nz*sizeof(int));
2054: b->i = b->j + nz;
2055: b->singlemalloc = PETSC_TRUE;
2057: b->i[0] = 0;
2058: for (i=1; i<mbs+1; i++) {
2059: b->i[i] = b->i[i-1] + b->imax[i-1];
2060: }
2062: /* b->ilen will count nonzeros in each block row so far. */
2063: PetscMalloc((mbs+1)*sizeof(int),&b->ilen);
2064: PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2065: for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2067: b->bs = bs;
2068: b->bs2 = bs2;
2069: b->mbs = mbs;
2070: b->nz = 0;
2071: b->maxnz = nz*bs2;
2072: B->info.nz_unneeded = (PetscReal)b->maxnz;
2073: return(0);
2074: }