Actual source code: aijnode.c
1: /*$Id: aijnode.c,v 1.128 2001/08/07 03:02:47 balay Exp $*/
2: /*
3: This file provides high performance routines for the AIJ (compressed row)
4: format by taking advantage of rows with identical nonzero structure (I-nodes).
5: */
6: #include src/mat/impls/aij/seq/aij.h
7: #include src/vec/vecimpl.h
9: EXTERN int Mat_AIJ_CheckInode(Mat,PetscTruth);
10: EXTERN int MatSolve_SeqAIJ_Inode(Mat,Vec,Vec);
11: EXTERN int MatLUFactorNumeric_SeqAIJ_Inode(Mat,Mat *);
13: EXTERN int MatMult_SeqAIJ(Mat,Vec,Vec);
14: EXTERN int MatMultAdd_SeqAIJ(Mat,Vec,Vec,Vec);
15: EXTERN int MatSolve_SeqAIJ(Mat,Vec,Vec);
16: EXTERN int MatLUFactorNumeric_SeqAIJ(Mat,Mat*);
17: EXTERN int MatGetRowIJ_SeqAIJ(Mat,int,PetscTruth,int*,int**,int**,PetscTruth*);
18: EXTERN int MatRestoreRowIJ_SeqAIJ(Mat,int,PetscTruth,int*,int**,int**,PetscTruth*);
19: EXTERN int MatGetColumnIJ_SeqAIJ(Mat,int,PetscTruth,int*,int**,int**,PetscTruth*);
20: EXTERN int MatRestoreColumnIJ_SeqAIJ(Mat,int,PetscTruth,int*,int**,int**,PetscTruth*);
22: #undef __FUNCT__
24: static int Mat_AIJ_CreateColInode(Mat A,int* size,int ** ns)
25: {
26: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
27: int ierr,i,count,m,n,min_mn,*ns_row,*ns_col;
30: n = A->n;
31: m = A->m;
32: ns_row = a->inode.size;
33:
34: min_mn = (m < n) ? m : n;
35: if (!ns) {
36: for (count=0,i=0; count<min_mn; count+=ns_row[i],i++);
37: for(; count+1 < n; count++,i++);
38: if (count < n) {
39: i++;
40: }
41: *size = i;
42: return(0);
43: }
44: PetscMalloc((n+1)*sizeof(int),&ns_col);
45:
46: /* Use the same row structure wherever feasible. */
47: for (count=0,i=0; count<min_mn; count+=ns_row[i],i++) {
48: ns_col[i] = ns_row[i];
49: }
51: /* if m < n; pad up the remainder with inode_limit */
52: for(; count+1 < n; count++,i++) {
53: ns_col[i] = 1;
54: }
55: /* The last node is the odd ball. padd it up with the remaining rows; */
56: if (count < n) {
57: ns_col[i] = n - count;
58: i++;
59: } else if (count > n) {
60: /* Adjust for the over estimation */
61: ns_col[i-1] += n - count;
62: }
63: *size = i;
64: *ns = ns_col;
65: return(0);
66: }
69: /*
70: This builds symmetric version of nonzero structure,
71: */
72: #undef __FUNCT__
74: static int MatGetRowIJ_SeqAIJ_Inode_Symmetric(Mat A,int **iia,int **jja,int ishift,int oshift)
75: {
76: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
77: int *work,*ia,*ja,*j,nz,nslim_row,nslim_col,m,row,col,*jmax,n,ierr;
78: int *tns,*tvc,*ns_row = a->inode.size,*ns_col,nsz,i1,i2,*ai= a->i,*aj = a->j;
81: nslim_row = a->inode.node_count;
82: m = A->m;
83: n = A->n;
84: if (m != n) SETERRQ(1,"MatGetRowIJ_SeqAIJ_Inode_Symmetric: Matrix shoul be square");
85:
86: /* Use the row_inode as column_inode */
87: nslim_col = nslim_row;
88: ns_col = ns_row;
90: /* allocate space for reformated inode structure */
91: PetscMalloc((nslim_col+1)*sizeof(int),&tns);
92: PetscMalloc((n+1)*sizeof(int),&tvc);
93: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1]+ ns_row[i1];
95: for (i1=0,col=0; i1<nslim_col; ++i1){
96: nsz = ns_col[i1];
97: for (i2=0; i2<nsz; ++i2,++col)
98: tvc[col] = i1;
99: }
100: /* allocate space for row pointers */
101: PetscMalloc((nslim_row+1)*sizeof(int),&ia);
102: *iia = ia;
103: PetscMemzero(ia,(nslim_row+1)*sizeof(int));
104: PetscMalloc((nslim_row+1)*sizeof(int),&work);
106: /* determine the number of columns in each row */
107: ia[0] = oshift;
108: for (i1=0,row=0 ; i1<nslim_row; row+=ns_row[i1],i1++) {
110: j = aj + ai[row] + ishift;
111: jmax = aj + ai[row+1] + ishift;
112: i2 = 0;
113: col = *j++ + ishift;
114: i2 = tvc[col];
115: while (i2<i1 && j<jmax) { /* 1.[-xx-d-xx--] 2.[-xx-------],off-diagonal elemets */
116: ia[i1+1]++;
117: ia[i2+1]++;
118: i2++; /* Start col of next node */
119: while(((col=*j+ishift)<tns[i2]) && (j<jmax)) ++j;
120: i2 = tvc[col];
121: }
122: if(i2 == i1) ia[i2+1]++; /* now the diagonal element */
123: }
125: /* shift ia[i] to point to next row */
126: for (i1=1; i1<nslim_row+1; i1++) {
127: row = ia[i1-1];
128: ia[i1] += row;
129: work[i1-1] = row - oshift;
130: }
132: /* allocate space for column pointers */
133: nz = ia[nslim_row] + (!ishift);
134: PetscMalloc(nz*sizeof(int),&ja);
135: *jja = ja;
137: /* loop over lower triangular part putting into ja */
138: for (i1=0,row=0; i1<nslim_row; row += ns_row[i1],i1++) {
139: j = aj + ai[row] + ishift;
140: jmax = aj + ai[row+1] + ishift;
141: i2 = 0; /* Col inode index */
142: col = *j++ + ishift;
143: i2 = tvc[col];
144: while (i2<i1 && j<jmax) {
145: ja[work[i2]++] = i1 + oshift;
146: ja[work[i1]++] = i2 + oshift;
147: ++i2;
148: while(((col=*j+ishift)< tns[i2])&&(j<jmax)) ++j; /* Skip rest col indices in this node */
149: i2 = tvc[col];
150: }
151: if (i2 == i1) ja[work[i1]++] = i2 + oshift;
153: }
154: PetscFree(work);
155: PetscFree(tns);
156: PetscFree(tvc);
157: return(0);
158: }
160: /*
161: This builds nonsymmetric version of nonzero structure,
162: */
163: #undef __FUNCT__
165: static int MatGetRowIJ_SeqAIJ_Inode_Nonsymmetric(Mat A,int **iia,int **jja,int ishift,int oshift)
166: {
167: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
168: int *work,*ia,*ja,*j,nz,nslim_row,n,row,col,ierr,*ns_col,nslim_col;
169: int *tns,*tvc,*ns_row = a->inode.size,nsz,i1,i2,*ai= a->i,*aj = a->j;
172: nslim_row = a->inode.node_count;
173: n = A->n;
175: /* Create The column_inode for this matrix */
176: Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
177:
178: /* allocate space for reformated column_inode structure */
179: PetscMalloc((nslim_col +1)*sizeof(int),&tns);
180: PetscMalloc((n +1)*sizeof(int),&tvc);
181: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1] + ns_col[i1];
183: for (i1=0,col=0; i1<nslim_col; ++i1){
184: nsz = ns_col[i1];
185: for (i2=0; i2<nsz; ++i2,++col)
186: tvc[col] = i1;
187: }
188: /* allocate space for row pointers */
189: PetscMalloc((nslim_row+1)*sizeof(int),&ia);
190: *iia = ia;
191: PetscMemzero(ia,(nslim_row+1)*sizeof(int));
192: PetscMalloc((nslim_row+1)*sizeof(int),&work);
194: /* determine the number of columns in each row */
195: ia[0] = oshift;
196: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
197: j = aj + ai[row] + ishift;
198: col = *j++ + ishift;
199: i2 = tvc[col];
200: nz = ai[row+1] - ai[row];
201: while (nz-- > 0) { /* off-diagonal elemets */
202: ia[i1+1]++;
203: i2++; /* Start col of next node */
204: while (((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
205: i2 = tvc[col];
206: }
207: }
209: /* shift ia[i] to point to next row */
210: for (i1=1; i1<nslim_row+1; i1++) {
211: row = ia[i1-1];
212: ia[i1] += row;
213: work[i1-1] = row - oshift;
214: }
216: /* allocate space for column pointers */
217: nz = ia[nslim_row] + (!ishift);
218: PetscMalloc(nz*sizeof(int),&ja);
219: *jja = ja;
221: /* loop over matrix putting into ja */
222: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
223: j = aj + ai[row] + ishift;
224: i2 = 0; /* Col inode index */
225: col = *j++ + ishift;
226: i2 = tvc[col];
227: nz = ai[row+1] - ai[row];
228: while (nz-- > 0) {
229: ja[work[i1]++] = i2 + oshift;
230: ++i2;
231: while(((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
232: i2 = tvc[col];
233: }
234: }
235: PetscFree(ns_col);
236: PetscFree(work);
237: PetscFree(tns);
238: PetscFree(tvc);
239: return(0);
240: }
242: #undef __FUNCT__
244: static int MatGetRowIJ_SeqAIJ_Inode(Mat A,int oshift,PetscTruth symmetric,int *n,int **ia,int **ja,PetscTruth *done)
245: {
246: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
247: int ierr,ishift;
250: *n = a->inode.node_count;
251: if (!ia) return(0);
253: ishift = a->indexshift;
254: if (symmetric) {
255: MatGetRowIJ_SeqAIJ_Inode_Symmetric(A,ia,ja,ishift,oshift);
256: } else {
257: MatGetRowIJ_SeqAIJ_Inode_Nonsymmetric(A,ia,ja,ishift,oshift);
258: }
259: return(0);
260: }
262: #undef __FUNCT__
264: static int MatRestoreRowIJ_SeqAIJ_Inode(Mat A,int oshift,PetscTruth symmetric,int *n,int **ia,int **ja,PetscTruth *done)
265: {
269: if (!ia) return(0);
270: PetscFree(*ia);
271: PetscFree(*ja);
272: return(0);
273: }
275: /* ----------------------------------------------------------- */
277: #undef __FUNCT__
279: static int MatGetColumnIJ_SeqAIJ_Inode_Nonsymmetric(Mat A,int **iia,int **jja,int ishift,int oshift)
280: {
281: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
282: int *work,*ia,*ja,*j,nz,nslim_row, n,row,col,ierr,*ns_col,nslim_col;
283: int *tns,*tvc,*ns_row = a->inode.size,nsz,i1,i2,*ai= a->i,*aj = a->j;
286: nslim_row = a->inode.node_count;
287: n = A->n;
289: /* Create The column_inode for this matrix */
290: Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
291:
292: /* allocate space for reformated column_inode structure */
293: PetscMalloc((nslim_col + 1)*sizeof(int),&tns);
294: PetscMalloc((n + 1)*sizeof(int),&tvc);
295: for (i1=0,tns[0]=0; i1<nslim_col; ++i1) tns[i1+1] = tns[i1] + ns_col[i1];
297: for (i1=0,col=0; i1<nslim_col; ++i1){
298: nsz = ns_col[i1];
299: for (i2=0; i2<nsz; ++i2,++col)
300: tvc[col] = i1;
301: }
302: /* allocate space for column pointers */
303: PetscMalloc((nslim_col+1)*sizeof(int),&ia);
304: *iia = ia;
305: PetscMemzero(ia,(nslim_col+1)*sizeof(int));
306: PetscMalloc((nslim_col+1)*sizeof(int),&work);
308: /* determine the number of columns in each row */
309: ia[0] = oshift;
310: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
311: j = aj + ai[row] + ishift;
312: col = *j++ + ishift;
313: i2 = tvc[col];
314: nz = ai[row+1] - ai[row];
315: while (nz-- > 0) { /* off-diagonal elemets */
316: /* ia[i1+1]++; */
317: ia[i2+1]++;
318: i2++;
319: while (((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
320: i2 = tvc[col];
321: }
322: }
324: /* shift ia[i] to point to next col */
325: for (i1=1; i1<nslim_col+1; i1++) {
326: col = ia[i1-1];
327: ia[i1] += col;
328: work[i1-1] = col - oshift;
329: }
331: /* allocate space for column pointers */
332: nz = ia[nslim_col] + (!ishift);
333: PetscMalloc(nz*sizeof(int),&ja);
334: *jja = ja;
336: /* loop over matrix putting into ja */
337: for (i1=0,row=0; i1<nslim_row; row+=ns_row[i1],i1++) {
338: j = aj + ai[row] + ishift;
339: i2 = 0; /* Col inode index */
340: col = *j++ + ishift;
341: i2 = tvc[col];
342: nz = ai[row+1] - ai[row];
343: while (nz-- > 0) {
344: /* ja[work[i1]++] = i2 + oshift; */
345: ja[work[i2]++] = i1 + oshift;
346: i2++;
347: while(((col = *j++ + ishift) < tns[i2]) && nz > 0) {nz--;}
348: i2 = tvc[col];
349: }
350: }
351: PetscFree(ns_col);
352: PetscFree(work);
353: PetscFree(tns);
354: PetscFree(tvc);
355: return(0);
356: }
358: #undef __FUNCT__
360: static int MatGetColumnIJ_SeqAIJ_Inode(Mat A,int oshift,PetscTruth symmetric,int *n,int **ia,int **ja,PetscTruth *done)
361: {
362: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
363: int ierr,ishift;
366: Mat_AIJ_CreateColInode(A,n,PETSC_NULL);
367: if (!ia) return(0);
369: ishift = a->indexshift;
370: if (symmetric) {
371: /* Since the indices are symmetric it does'nt matter */
372: MatGetRowIJ_SeqAIJ_Inode_Symmetric(A,ia,ja,ishift,oshift);
373: } else {
374: MatGetColumnIJ_SeqAIJ_Inode_Nonsymmetric(A,ia,ja,ishift,oshift);
375: }
376: return(0);
377: }
379: #undef __FUNCT__
381: static int MatRestoreColumnIJ_SeqAIJ_Inode(Mat A,int oshift,PetscTruth symmetric,int *n,int **ia,int **ja,PetscTruth *done)
382: {
386: if (!ia) return(0);
387: PetscFree(*ia);
388: PetscFree(*ja);
389: return(0);
390: }
392: /* ----------------------------------------------------------- */
394: #undef __FUNCT__
396: static int MatMult_SeqAIJ_Inode(Mat A,Vec xx,Vec yy)
397: {
398: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
399: PetscScalar sum1,sum2,sum3,sum4,sum5,tmp0,tmp1;
400: PetscScalar *v1,*v2,*v3,*v4,*v5,*x,*y;
401: int ierr,*idx,i1,i2,n,i,row,node_max,*ns,*ii,nsz,sz;
402: int shift = a->indexshift;
403:
404: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
405: #pragma disjoint(*x,*y,*v1,*v2,*v3,*v4,*v5)
406: #endif
409: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
410: node_max = a->inode.node_count;
411: ns = a->inode.size; /* Node Size array */
412: VecGetArray(xx,&x);
413: VecGetArray(yy,&y);
414: x = x + shift; /* shift for Fortran start by 1 indexing */
415: idx = a->j;
416: v1 = a->a;
417: ii = a->i;
419: for (i = 0,row = 0; i< node_max; ++i){
420: nsz = ns[i];
421: n = ii[1] - ii[0];
422: ii += nsz;
423: sz = n; /*No of non zeros in this row */
424: /* Switch on the size of Node */
425: switch (nsz){ /* Each loop in 'case' is unrolled */
426: case 1 :
427: sum1 = 0;
428:
429: for(n = 0; n< sz-1; n+=2) {
430: i1 = idx[0]; /* The instructions are ordered to */
431: i2 = idx[1]; /* make the compiler's job easy */
432: idx += 2;
433: tmp0 = x[i1];
434: tmp1 = x[i2];
435: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
436: }
437:
438: if (n == sz-1){ /* Take care of the last nonzero */
439: tmp0 = x[*idx++];
440: sum1 += *v1++ * tmp0;
441: }
442: y[row++]=sum1;
443: break;
444: case 2:
445: sum1 = 0;
446: sum2 = 0;
447: v2 = v1 + n;
448:
449: for (n = 0; n< sz-1; n+=2) {
450: i1 = idx[0];
451: i2 = idx[1];
452: idx += 2;
453: tmp0 = x[i1];
454: tmp1 = x[i2];
455: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
456: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
457: }
458: if (n == sz-1){
459: tmp0 = x[*idx++];
460: sum1 += *v1++ * tmp0;
461: sum2 += *v2++ * tmp0;
462: }
463: y[row++]=sum1;
464: y[row++]=sum2;
465: v1 =v2; /* Since the next block to be processed starts there*/
466: idx +=sz;
467: break;
468: case 3:
469: sum1 = 0;
470: sum2 = 0;
471: sum3 = 0;
472: v2 = v1 + n;
473: v3 = v2 + n;
474:
475: for (n = 0; n< sz-1; n+=2) {
476: i1 = idx[0];
477: i2 = idx[1];
478: idx += 2;
479: tmp0 = x[i1];
480: tmp1 = x[i2];
481: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
482: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
483: sum3 += v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
484: }
485: if (n == sz-1){
486: tmp0 = x[*idx++];
487: sum1 += *v1++ * tmp0;
488: sum2 += *v2++ * tmp0;
489: sum3 += *v3++ * tmp0;
490: }
491: y[row++]=sum1;
492: y[row++]=sum2;
493: y[row++]=sum3;
494: v1 =v3; /* Since the next block to be processed starts there*/
495: idx +=2*sz;
496: break;
497: case 4:
498: sum1 = 0;
499: sum2 = 0;
500: sum3 = 0;
501: sum4 = 0;
502: v2 = v1 + n;
503: v3 = v2 + n;
504: v4 = v3 + n;
505:
506: for (n = 0; n< sz-1; n+=2) {
507: i1 = idx[0];
508: i2 = idx[1];
509: idx += 2;
510: tmp0 = x[i1];
511: tmp1 = x[i2];
512: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
513: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
514: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
515: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
516: }
517: if (n == sz-1){
518: tmp0 = x[*idx++];
519: sum1 += *v1++ * tmp0;
520: sum2 += *v2++ * tmp0;
521: sum3 += *v3++ * tmp0;
522: sum4 += *v4++ * tmp0;
523: }
524: y[row++]=sum1;
525: y[row++]=sum2;
526: y[row++]=sum3;
527: y[row++]=sum4;
528: v1 =v4; /* Since the next block to be processed starts there*/
529: idx +=3*sz;
530: break;
531: case 5:
532: sum1 = 0;
533: sum2 = 0;
534: sum3 = 0;
535: sum4 = 0;
536: sum5 = 0;
537: v2 = v1 + n;
538: v3 = v2 + n;
539: v4 = v3 + n;
540: v5 = v4 + n;
541:
542: for (n = 0; n<sz-1; n+=2) {
543: i1 = idx[0];
544: i2 = idx[1];
545: idx += 2;
546: tmp0 = x[i1];
547: tmp1 = x[i2];
548: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
549: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
550: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
551: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
552: sum5 += v5[0] * tmp0 + v5[1] *tmp1; v5 += 2;
553: }
554: if (n == sz-1){
555: tmp0 = x[*idx++];
556: sum1 += *v1++ * tmp0;
557: sum2 += *v2++ * tmp0;
558: sum3 += *v3++ * tmp0;
559: sum4 += *v4++ * tmp0;
560: sum5 += *v5++ * tmp0;
561: }
562: y[row++]=sum1;
563: y[row++]=sum2;
564: y[row++]=sum3;
565: y[row++]=sum4;
566: y[row++]=sum5;
567: v1 =v5; /* Since the next block to be processed starts there */
568: idx +=4*sz;
569: break;
570: default :
571: SETERRQ(PETSC_ERR_COR,"Node size not yet supported");
572: }
573: }
574: VecRestoreArray(xx,&x);
575: VecRestoreArray(yy,&y);
576: PetscLogFlops(2*a->nz - A->m);
577: return(0);
578: }
579: /* ----------------------------------------------------------- */
580: /* Almost same code as the MatMult_SeqAij_Inode() */
581: #undef __FUNCT__
583: static int MatMultAdd_SeqAIJ_Inode(Mat A,Vec xx,Vec zz,Vec yy)
584: {
585: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
586: PetscScalar sum1,sum2,sum3,sum4,sum5,tmp0,tmp1;
587: PetscScalar *v1,*v2,*v3,*v4,*v5,*x,*y,*z,*zt;
588: int ierr,*idx,i1,i2,n,i,row,node_max,*ns,*ii,nsz,sz;
589: int shift = a->indexshift;
590:
592: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
593: node_max = a->inode.node_count;
594: ns = a->inode.size; /* Node Size array */
595: VecGetArray(xx,&x);
596: VecGetArray(yy,&y);
597: if (zz != yy) {
598: VecGetArray(zz,&z);
599: } else {
600: z = y;
601: }
602: zt = z;
604: x = x + shift; /* shift for Fortran start by 1 indexing */
605: idx = a->j;
606: v1 = a->a;
607: ii = a->i;
609: for (i = 0,row = 0; i< node_max; ++i){
610: nsz = ns[i];
611: n = ii[1] - ii[0];
612: ii += nsz;
613: sz = n; /*No of non zeros in this row */
614: /* Switch on the size of Node */
615: switch (nsz){ /* Each loop in 'case' is unrolled */
616: case 1 :
617: sum1 = *zt++;
618:
619: for(n = 0; n< sz-1; n+=2) {
620: i1 = idx[0]; /* The instructions are ordered to */
621: i2 = idx[1]; /* make the compiler's job easy */
622: idx += 2;
623: tmp0 = x[i1];
624: tmp1 = x[i2];
625: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
626: }
627:
628: if(n == sz-1){ /* Take care of the last nonzero */
629: tmp0 = x[*idx++];
630: sum1 += *v1++ * tmp0;
631: }
632: y[row++]=sum1;
633: break;
634: case 2:
635: sum1 = *zt++;
636: sum2 = *zt++;
637: v2 = v1 + n;
638:
639: for(n = 0; n< sz-1; n+=2) {
640: i1 = idx[0];
641: i2 = idx[1];
642: idx += 2;
643: tmp0 = x[i1];
644: tmp1 = x[i2];
645: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
646: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
647: }
648: if(n == sz-1){
649: tmp0 = x[*idx++];
650: sum1 += *v1++ * tmp0;
651: sum2 += *v2++ * tmp0;
652: }
653: y[row++]=sum1;
654: y[row++]=sum2;
655: v1 =v2; /* Since the next block to be processed starts there*/
656: idx +=sz;
657: break;
658: case 3:
659: sum1 = *zt++;
660: sum2 = *zt++;
661: sum3 = *zt++;
662: v2 = v1 + n;
663: v3 = v2 + n;
664:
665: for (n = 0; n< sz-1; n+=2) {
666: i1 = idx[0];
667: i2 = idx[1];
668: idx += 2;
669: tmp0 = x[i1];
670: tmp1 = x[i2];
671: sum1 += v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
672: sum2 += v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
673: sum3 += v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
674: }
675: if (n == sz-1){
676: tmp0 = x[*idx++];
677: sum1 += *v1++ * tmp0;
678: sum2 += *v2++ * tmp0;
679: sum3 += *v3++ * tmp0;
680: }
681: y[row++]=sum1;
682: y[row++]=sum2;
683: y[row++]=sum3;
684: v1 =v3; /* Since the next block to be processed starts there*/
685: idx +=2*sz;
686: break;
687: case 4:
688: sum1 = *zt++;
689: sum2 = *zt++;
690: sum3 = *zt++;
691: sum4 = *zt++;
692: v2 = v1 + n;
693: v3 = v2 + n;
694: v4 = v3 + n;
695:
696: for (n = 0; n< sz-1; n+=2) {
697: i1 = idx[0];
698: i2 = idx[1];
699: idx += 2;
700: tmp0 = x[i1];
701: tmp1 = x[i2];
702: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
703: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
704: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
705: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
706: }
707: if (n == sz-1){
708: tmp0 = x[*idx++];
709: sum1 += *v1++ * tmp0;
710: sum2 += *v2++ * tmp0;
711: sum3 += *v3++ * tmp0;
712: sum4 += *v4++ * tmp0;
713: }
714: y[row++]=sum1;
715: y[row++]=sum2;
716: y[row++]=sum3;
717: y[row++]=sum4;
718: v1 =v4; /* Since the next block to be processed starts there*/
719: idx +=3*sz;
720: break;
721: case 5:
722: sum1 = *zt++;
723: sum2 = *zt++;
724: sum3 = *zt++;
725: sum4 = *zt++;
726: sum5 = *zt++;
727: v2 = v1 + n;
728: v3 = v2 + n;
729: v4 = v3 + n;
730: v5 = v4 + n;
731:
732: for (n = 0; n<sz-1; n+=2) {
733: i1 = idx[0];
734: i2 = idx[1];
735: idx += 2;
736: tmp0 = x[i1];
737: tmp1 = x[i2];
738: sum1 += v1[0] * tmp0 + v1[1] *tmp1; v1 += 2;
739: sum2 += v2[0] * tmp0 + v2[1] *tmp1; v2 += 2;
740: sum3 += v3[0] * tmp0 + v3[1] *tmp1; v3 += 2;
741: sum4 += v4[0] * tmp0 + v4[1] *tmp1; v4 += 2;
742: sum5 += v5[0] * tmp0 + v5[1] *tmp1; v5 += 2;
743: }
744: if(n == sz-1){
745: tmp0 = x[*idx++];
746: sum1 += *v1++ * tmp0;
747: sum2 += *v2++ * tmp0;
748: sum3 += *v3++ * tmp0;
749: sum4 += *v4++ * tmp0;
750: sum5 += *v5++ * tmp0;
751: }
752: y[row++]=sum1;
753: y[row++]=sum2;
754: y[row++]=sum3;
755: y[row++]=sum4;
756: y[row++]=sum5;
757: v1 =v5; /* Since the next block to be processed starts there */
758: idx +=4*sz;
759: break;
760: default :
761: SETERRQ(PETSC_ERR_COR,"Node size not yet supported");
762: }
763: }
764: VecRestoreArray(xx,&x);
765: VecRestoreArray(yy,&y);
766: if (zz != yy) {
767: VecRestoreArray(zz,&z);
768: }
769: PetscLogFlops(2*a->nz);
770: return(0);
771: }
772: /* ----------------------------------------------------------- */
773: EXTERN int MatColoringPatch_SeqAIJ_Inode(Mat,int,int,int *,ISColoring *);
775: /*
776: samestructure indicates that the matrix has not changed its nonzero structure so we
777: do not need to recompute the inodes
778: */
779: #undef __FUNCT__
781: int Mat_AIJ_CheckInode(Mat A,PetscTruth samestructure)
782: {
783: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
784: int ierr,i,j,m,nzx,nzy,*idx,*idy,*ns,*ii,node_count,blk_size;
785: PetscTruth flag,flg;
788: if (samestructure && a->inode.checked) return(0);
790: a->inode.checked = PETSC_TRUE;
792: /* Notes: We set a->inode.limit=5 in MatCreateSeqAIJ(). */
793: if (!a->inode.use) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to MatSetOption(MAT_DO_NOT_USE_INODESn"); return(0);}
794: PetscOptionsHasName(A->prefix,"-mat_aij_no_inode",&flg);
795: if (flg) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to -mat_aij_no_inoden");return(0);}
796: PetscOptionsHasName(A->prefix,"-mat_no_unroll",&flg);
797: if (flg) {PetscLogInfo(A,"Mat_AIJ_CheckInode: Not using Inode routines due to -mat_no_unrolln");return(0);}
798: PetscOptionsGetInt(A->prefix,"-mat_aij_inode_limit",&a->inode.limit,PETSC_NULL);
799: if (a->inode.limit > a->inode.max_limit) a->inode.limit = a->inode.max_limit;
800: m = A->m;
801: if (a->inode.size) {ns = a->inode.size;}
802: else {PetscMalloc((m+1)*sizeof(int),&ns);}
804: i = 0;
805: node_count = 0;
806: idx = a->j;
807: ii = a->i;
808: while (i < m){ /* For each row */
809: nzx = ii[i+1] - ii[i]; /* Number of nonzeros */
810: /* Limits the number of elements in a node to 'a->inode.limit' */
811: for (j=i+1,idy=idx,blk_size=1; j<m && blk_size <a->inode.limit; ++j,++blk_size) {
812: nzy = ii[j+1] - ii[j]; /* Same number of nonzeros */
813: if (nzy != nzx) break;
814: idy += nzx; /* Same nonzero pattern */
815: PetscMemcmp(idx,idy,nzx*sizeof(int),&flag);
816: if (flag == PETSC_FALSE) break;
817: }
818: ns[node_count++] = blk_size;
819: idx += blk_size*nzx;
820: i = j;
821: }
822: /* If not enough inodes found,, do not use inode version of the routines */
823: if (!a->inode.size && m && node_count > 0.9*m) {
824: PetscFree(ns);
825: A->ops->mult = MatMult_SeqAIJ;
826: A->ops->multadd = MatMultAdd_SeqAIJ;
827: A->ops->solve = MatSolve_SeqAIJ;
828: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
829: A->ops->getrowij = MatGetRowIJ_SeqAIJ;
830: A->ops->restorerowij = MatRestoreRowIJ_SeqAIJ;
831: A->ops->getcolumnij = MatGetColumnIJ_SeqAIJ;
832: A->ops->restorecolumnij = MatRestoreColumnIJ_SeqAIJ;
833: A->ops->coloringpatch = 0;
834: a->inode.node_count = 0;
835: a->inode.size = 0;
836: PetscLogInfo(A,"Mat_AIJ_CheckInode: Found %d nodes out of %d rows. Not using Inode routinesn",node_count,m);
837: } else {
838: A->ops->mult = MatMult_SeqAIJ_Inode;
839: A->ops->multadd = MatMultAdd_SeqAIJ_Inode;
840: A->ops->solve = MatSolve_SeqAIJ_Inode;
841: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode;
842: A->ops->getrowij = MatGetRowIJ_SeqAIJ_Inode;
843: A->ops->restorerowij = MatRestoreRowIJ_SeqAIJ_Inode;
844: A->ops->getcolumnij = MatGetColumnIJ_SeqAIJ_Inode;
845: A->ops->restorecolumnij = MatRestoreColumnIJ_SeqAIJ_Inode;
846: A->ops->coloringpatch = MatColoringPatch_SeqAIJ_Inode;
847: a->inode.node_count = node_count;
848: a->inode.size = ns;
849: PetscLogInfo(A,"Mat_AIJ_CheckInode: Found %d nodes of %d. Limit used: %d. Using Inode routinesn",node_count,m,a->inode.limit);
850: }
851: return(0);
852: }
854: /* ----------------------------------------------------------- */
855: #undef __FUNCT__
857: int MatSolve_SeqAIJ_Inode(Mat A,Vec bb,Vec xx)
858: {
859: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
860: IS iscol = a->col,isrow = a->row;
861: int *r,*c,ierr,i,j,n = A->m,*ai = a->i,nz,shift = a->indexshift,*a_j = a->j;
862: int node_max,*ns,row,nsz,aii,*vi,*ad,*aj,i0,i1,*rout,*cout;
863: PetscScalar *x,*b,*a_a = a->a,*tmp,*tmps,*aa,tmp0,tmp1;
864: PetscScalar sum1,sum2,sum3,sum4,sum5,*v1,*v2,*v3,*v4,*v5;
867: if (A->factor!=FACTOR_LU) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unfactored matrix");
868: if (!a->inode.size) SETERRQ(PETSC_ERR_COR,"Missing Inode Structure");
869: node_max = a->inode.node_count;
870: ns = a->inode.size; /* Node Size array */
872: VecGetArray(bb,&b);
873: VecGetArray(xx,&x);
874: tmp = a->solve_work;
875:
876: ISGetIndices(isrow,&rout); r = rout;
877: ISGetIndices(iscol,&cout); c = cout + (n-1);
878:
879: /* forward solve the lower triangular */
880: tmps = tmp + shift;
881: aa = a_a +shift;
882: aj = a_j + shift;
883: ad = a->diag;
885: for (i = 0,row = 0; i< node_max; ++i){
886: nsz = ns[i];
887: aii = ai[row];
888: v1 = aa + aii;
889: vi = aj + aii;
890: nz = ad[row]- aii;
891:
892: switch (nsz){ /* Each loop in 'case' is unrolled */
893: case 1 :
894: sum1 = b[*r++];
895: /* while (nz--) sum1 -= *v1++ *tmps[*vi++];*/
896: for(j=0; j<nz-1; j+=2){
897: i0 = vi[0];
898: i1 = vi[1];
899: vi +=2;
900: tmp0 = tmps[i0];
901: tmp1 = tmps[i1];
902: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
903: }
904: if(j == nz-1){
905: tmp0 = tmps[*vi++];
906: sum1 -= *v1++ *tmp0;
907: }
908: tmp[row ++]=sum1;
909: break;
910: case 2:
911: sum1 = b[*r++];
912: sum2 = b[*r++];
913: v2 = aa + ai[row+1];
915: for(j=0; j<nz-1; j+=2){
916: i0 = vi[0];
917: i1 = vi[1];
918: vi +=2;
919: tmp0 = tmps[i0];
920: tmp1 = tmps[i1];
921: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
922: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
923: }
924: if(j == nz-1){
925: tmp0 = tmps[*vi++];
926: sum1 -= *v1++ *tmp0;
927: sum2 -= *v2++ *tmp0;
928: }
929: sum2 -= *v2++ * sum1;
930: tmp[row ++]=sum1;
931: tmp[row ++]=sum2;
932: break;
933: case 3:
934: sum1 = b[*r++];
935: sum2 = b[*r++];
936: sum3 = b[*r++];
937: v2 = aa + ai[row+1];
938: v3 = aa + ai[row+2];
939:
940: for (j=0; j<nz-1; j+=2){
941: i0 = vi[0];
942: i1 = vi[1];
943: vi +=2;
944: tmp0 = tmps[i0];
945: tmp1 = tmps[i1];
946: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
947: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
948: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
949: }
950: if (j == nz-1){
951: tmp0 = tmps[*vi++];
952: sum1 -= *v1++ *tmp0;
953: sum2 -= *v2++ *tmp0;
954: sum3 -= *v3++ *tmp0;
955: }
956: sum2 -= *v2++ * sum1;
957: sum3 -= *v3++ * sum1;
958: sum3 -= *v3++ * sum2;
959: tmp[row ++]=sum1;
960: tmp[row ++]=sum2;
961: tmp[row ++]=sum3;
962: break;
963:
964: case 4:
965: sum1 = b[*r++];
966: sum2 = b[*r++];
967: sum3 = b[*r++];
968: sum4 = b[*r++];
969: v2 = aa + ai[row+1];
970: v3 = aa + ai[row+2];
971: v4 = aa + ai[row+3];
972:
973: for (j=0; j<nz-1; j+=2){
974: i0 = vi[0];
975: i1 = vi[1];
976: vi +=2;
977: tmp0 = tmps[i0];
978: tmp1 = tmps[i1];
979: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
980: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
981: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
982: sum4 -= v4[0] * tmp0 + v4[1] * tmp1; v4 += 2;
983: }
984: if (j == nz-1){
985: tmp0 = tmps[*vi++];
986: sum1 -= *v1++ *tmp0;
987: sum2 -= *v2++ *tmp0;
988: sum3 -= *v3++ *tmp0;
989: sum4 -= *v4++ *tmp0;
990: }
991: sum2 -= *v2++ * sum1;
992: sum3 -= *v3++ * sum1;
993: sum4 -= *v4++ * sum1;
994: sum3 -= *v3++ * sum2;
995: sum4 -= *v4++ * sum2;
996: sum4 -= *v4++ * sum3;
997:
998: tmp[row ++]=sum1;
999: tmp[row ++]=sum2;
1000: tmp[row ++]=sum3;
1001: tmp[row ++]=sum4;
1002: break;
1003: case 5:
1004: sum1 = b[*r++];
1005: sum2 = b[*r++];
1006: sum3 = b[*r++];
1007: sum4 = b[*r++];
1008: sum5 = b[*r++];
1009: v2 = aa + ai[row+1];
1010: v3 = aa + ai[row+2];
1011: v4 = aa + ai[row+3];
1012: v5 = aa + ai[row+4];
1013:
1014: for (j=0; j<nz-1; j+=2){
1015: i0 = vi[0];
1016: i1 = vi[1];
1017: vi +=2;
1018: tmp0 = tmps[i0];
1019: tmp1 = tmps[i1];
1020: sum1 -= v1[0] * tmp0 + v1[1] * tmp1; v1 += 2;
1021: sum2 -= v2[0] * tmp0 + v2[1] * tmp1; v2 += 2;
1022: sum3 -= v3[0] * tmp0 + v3[1] * tmp1; v3 += 2;
1023: sum4 -= v4[0] * tmp0 + v4[1] * tmp1; v4 += 2;
1024: sum5 -= v5[0] * tmp0 + v5[1] * tmp1; v5 += 2;
1025: }
1026: if (j == nz-1){
1027: tmp0 = tmps[*vi++];
1028: sum1 -= *v1++ *tmp0;
1029: sum2 -= *v2++ *tmp0;
1030: sum3 -= *v3++ *tmp0;
1031: sum4 -= *v4++ *tmp0;
1032: sum5 -= *v5++ *tmp0;
1033: }
1035: sum2 -= *v2++ * sum1;
1036: sum3 -= *v3++ * sum1;
1037: sum4 -= *v4++ * sum1;
1038: sum5 -= *v5++ * sum1;
1039: sum3 -= *v3++ * sum2;
1040: sum4 -= *v4++ * sum2;
1041: sum5 -= *v5++ * sum2;
1042: sum4 -= *v4++ * sum3;
1043: sum5 -= *v5++ * sum3;
1044: sum5 -= *v5++ * sum4;
1045:
1046: tmp[row ++]=sum1;
1047: tmp[row ++]=sum2;
1048: tmp[row ++]=sum3;
1049: tmp[row ++]=sum4;
1050: tmp[row ++]=sum5;
1051: break;
1052: default:
1053: SETERRQ(PETSC_ERR_COR,"Node size not yet supported n");
1054: }
1055: }
1056: /* backward solve the upper triangular */
1057: for (i=node_max -1 ,row = n-1 ; i>=0; i--){
1058: nsz = ns[i];
1059: aii = ai[row+1] -1;
1060: v1 = aa + aii;
1061: vi = aj + aii;
1062: nz = aii- ad[row];
1063: switch (nsz){ /* Each loop in 'case' is unrolled */
1064: case 1 :
1065: sum1 = tmp[row];
1067: for(j=nz ; j>1; j-=2){
1068: i0 = vi[0];
1069: i1 = vi[-1];
1070: vi -=2;
1071: tmp0 = tmps[i0];
1072: tmp1 = tmps[i1];
1073: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1074: }
1075: if (j==1){
1076: tmp0 = tmps[*vi--];
1077: sum1 -= *v1-- * tmp0;
1078: }
1079: x[*c--] = tmp[row] = sum1*a_a[ad[row]+shift]; row--;
1080: break;
1081: case 2 :
1082: sum1 = tmp[row];
1083: sum2 = tmp[row -1];
1084: v2 = aa + ai[row]-1;
1085: for (j=nz ; j>1; j-=2){
1086: i0 = vi[0];
1087: i1 = vi[-1];
1088: vi -=2;
1089: tmp0 = tmps[i0];
1090: tmp1 = tmps[i1];
1091: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1092: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1093: }
1094: if (j==1){
1095: tmp0 = tmps[*vi--];
1096: sum1 -= *v1-- * tmp0;
1097: sum2 -= *v2-- * tmp0;
1098: }
1099:
1100: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]+shift]; row--;
1101: sum2 -= *v2-- * tmp0;
1102: x[*c--] = tmp[row] = sum2*a_a[ad[row]+shift]; row--;
1103: break;
1104: case 3 :
1105: sum1 = tmp[row];
1106: sum2 = tmp[row -1];
1107: sum3 = tmp[row -2];
1108: v2 = aa + ai[row]-1;
1109: v3 = aa + ai[row -1]-1;
1110: for (j=nz ; j>1; j-=2){
1111: i0 = vi[0];
1112: i1 = vi[-1];
1113: vi -=2;
1114: tmp0 = tmps[i0];
1115: tmp1 = tmps[i1];
1116: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1117: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1118: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1119: }
1120: if (j==1){
1121: tmp0 = tmps[*vi--];
1122: sum1 -= *v1-- * tmp0;
1123: sum2 -= *v2-- * tmp0;
1124: sum3 -= *v3-- * tmp0;
1125: }
1126: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]+shift]; row--;
1127: sum2 -= *v2-- * tmp0;
1128: sum3 -= *v3-- * tmp0;
1129: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]+shift]; row--;
1130: sum3 -= *v3-- * tmp0;
1131: x[*c--] = tmp[row] = sum3*a_a[ad[row]+shift]; row--;
1132:
1133: break;
1134: case 4 :
1135: sum1 = tmp[row];
1136: sum2 = tmp[row -1];
1137: sum3 = tmp[row -2];
1138: sum4 = tmp[row -3];
1139: v2 = aa + ai[row]-1;
1140: v3 = aa + ai[row -1]-1;
1141: v4 = aa + ai[row -2]-1;
1143: for (j=nz ; j>1; j-=2){
1144: i0 = vi[0];
1145: i1 = vi[-1];
1146: vi -=2;
1147: tmp0 = tmps[i0];
1148: tmp1 = tmps[i1];
1149: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1150: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1151: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1152: sum4 -= v4[0] * tmp0 + v4[-1] * tmp1; v4 -= 2;
1153: }
1154: if (j==1){
1155: tmp0 = tmps[*vi--];
1156: sum1 -= *v1-- * tmp0;
1157: sum2 -= *v2-- * tmp0;
1158: sum3 -= *v3-- * tmp0;
1159: sum4 -= *v4-- * tmp0;
1160: }
1162: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]+shift]; row--;
1163: sum2 -= *v2-- * tmp0;
1164: sum3 -= *v3-- * tmp0;
1165: sum4 -= *v4-- * tmp0;
1166: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]+shift]; row--;
1167: sum3 -= *v3-- * tmp0;
1168: sum4 -= *v4-- * tmp0;
1169: tmp0 = x[*c--] = tmp[row] = sum3*a_a[ad[row]+shift]; row--;
1170: sum4 -= *v4-- * tmp0;
1171: x[*c--] = tmp[row] = sum4*a_a[ad[row]+shift]; row--;
1172: break;
1173: case 5 :
1174: sum1 = tmp[row];
1175: sum2 = tmp[row -1];
1176: sum3 = tmp[row -2];
1177: sum4 = tmp[row -3];
1178: sum5 = tmp[row -4];
1179: v2 = aa + ai[row]-1;
1180: v3 = aa + ai[row -1]-1;
1181: v4 = aa + ai[row -2]-1;
1182: v5 = aa + ai[row -3]-1;
1183: for (j=nz ; j>1; j-=2){
1184: i0 = vi[0];
1185: i1 = vi[-1];
1186: vi -=2;
1187: tmp0 = tmps[i0];
1188: tmp1 = tmps[i1];
1189: sum1 -= v1[0] * tmp0 + v1[-1] * tmp1; v1 -= 2;
1190: sum2 -= v2[0] * tmp0 + v2[-1] * tmp1; v2 -= 2;
1191: sum3 -= v3[0] * tmp0 + v3[-1] * tmp1; v3 -= 2;
1192: sum4 -= v4[0] * tmp0 + v4[-1] * tmp1; v4 -= 2;
1193: sum5 -= v5[0] * tmp0 + v5[-1] * tmp1; v5 -= 2;
1194: }
1195: if (j==1){
1196: tmp0 = tmps[*vi--];
1197: sum1 -= *v1-- * tmp0;
1198: sum2 -= *v2-- * tmp0;
1199: sum3 -= *v3-- * tmp0;
1200: sum4 -= *v4-- * tmp0;
1201: sum5 -= *v5-- * tmp0;
1202: }
1204: tmp0 = x[*c--] = tmp[row] = sum1*a_a[ad[row]+shift]; row--;
1205: sum2 -= *v2-- * tmp0;
1206: sum3 -= *v3-- * tmp0;
1207: sum4 -= *v4-- * tmp0;
1208: sum5 -= *v5-- * tmp0;
1209: tmp0 = x[*c--] = tmp[row] = sum2*a_a[ad[row]+shift]; row--;
1210: sum3 -= *v3-- * tmp0;
1211: sum4 -= *v4-- * tmp0;
1212: sum5 -= *v5-- * tmp0;
1213: tmp0 = x[*c--] = tmp[row] = sum3*a_a[ad[row]+shift]; row--;
1214: sum4 -= *v4-- * tmp0;
1215: sum5 -= *v5-- * tmp0;
1216: tmp0 = x[*c--] = tmp[row] = sum4*a_a[ad[row]+shift]; row--;
1217: sum5 -= *v5-- * tmp0;
1218: x[*c--] = tmp[row] = sum5*a_a[ad[row]+shift]; row--;
1219: break;
1220: default:
1221: SETERRQ(PETSC_ERR_COR,"Node size not yet supported n");
1222: }
1223: }
1224: ISRestoreIndices(isrow,&rout);
1225: ISRestoreIndices(iscol,&cout);
1226: VecRestoreArray(bb,&b);
1227: VecRestoreArray(xx,&x);
1228: PetscLogFlops(2*a->nz - A->n);
1229: return(0);
1230: }
1233: #undef __FUNCT__
1235: int MatLUFactorNumeric_SeqAIJ_Inode(Mat A,Mat *B)
1236: {
1237: Mat C = *B;
1238: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b = (Mat_SeqAIJ *)C->data;
1239: IS iscol = b->col,isrow = b->row,isicol = b->icol;
1240: int shift = a->indexshift,*r,*ic,*c,ierr,n = A->m,*bi = b->i;
1241: int *bj = b->j+shift,*nbj=b->j +(!shift),*ajtmp,*bjtmp,nz,row,prow;
1242: int *ics,i,j,idx,*ai = a->i,*aj = a->j+shift,*bd = b->diag,node_max,nsz;
1243: int *ns,*tmp_vec1,*tmp_vec2,*nsmap,*pj,ndamp = 0;
1244: PetscScalar *rtmp1,*rtmp2,*rtmp3,*v1,*v2,*v3,*pc1,*pc2,*pc3,mul1,mul2,mul3;
1245: PetscScalar tmp,*ba = b->a+shift,*aa = a->a+shift,*pv,*rtmps1,*rtmps2,*rtmps3;
1246: PetscReal damping = b->lu_damping,zeropivot = b->lu_zeropivot;
1247: PetscTruth damp = PETSC_FALSE;
1250: ierr = ISGetIndices(isrow,&r);
1251: ierr = ISGetIndices(iscol,&c);
1252: ierr = ISGetIndices(isicol,&ic);
1253: PetscMalloc((3*n+1)*sizeof(PetscScalar),&rtmp1);
1254: ierr = PetscMemzero(rtmp1,(3*n+1)*sizeof(PetscScalar));
1255: ics = ic + shift; rtmps1 = rtmp1 + shift;
1256: rtmp2 = rtmp1 + n; rtmps2 = rtmp2 + shift;
1257: rtmp3 = rtmp2 + n; rtmps3 = rtmp3 + shift;
1258:
1259: node_max = a->inode.node_count;
1260: ns = a->inode.size ;
1261: if (!ns){
1262: SETERRQ(1,"Matrix without inode information");
1263: }
1265: /* If max inode size > 3, split it into two inodes.*/
1266: /* also map the inode sizes according to the ordering */
1267: PetscMalloc((n+1)* sizeof(int),&tmp_vec1);
1268: for (i=0,j=0; i<node_max; ++i,++j){
1269: if (ns[i]>3) {
1270: tmp_vec1[j] = ns[i]/2; /* Assuming ns[i] < =5 */
1271: ++j;
1272: tmp_vec1[j] = ns[i] - tmp_vec1[j-1];
1273: } else {
1274: tmp_vec1[j] = ns[i];
1275: }
1276: }
1277: /* Use the correct node_max */
1278: node_max = j;
1280: /* Now reorder the inode info based on mat re-ordering info */
1281: /* First create a row -> inode_size_array_index map */
1282: PetscMalloc(n*sizeof(int)+1,&nsmap);
1283: PetscMalloc(node_max*sizeof(int)+1,&tmp_vec2);
1284: for (i=0,row=0; i<node_max; i++) {
1285: nsz = tmp_vec1[i];
1286: for (j=0; j<nsz; j++,row++) {
1287: nsmap[row] = i;
1288: }
1289: }
1290: /* Using nsmap, create a reordered ns structure */
1291: for (i=0,j=0; i< node_max; i++) {
1292: nsz = tmp_vec1[nsmap[r[j]]]; /* here the reordered row_no is in r[] */
1293: tmp_vec2[i] = nsz;
1294: j += nsz;
1295: }
1296: PetscFree(nsmap);
1297: PetscFree(tmp_vec1);
1298: /* Now use the correct ns */
1299: ns = tmp_vec2;
1302: do {
1303: /* Now loop over each block-row, and do the factorization */
1304: for (i=0,row=0; i<node_max; i++) {
1305: nsz = ns[i];
1306: nz = bi[row+1] - bi[row];
1307: bjtmp = bj + bi[row];
1308:
1309: switch (nsz){
1310: case 1:
1311: for (j=0; j<nz; j++){
1312: idx = bjtmp[j];
1313: rtmps1[idx] = 0.0;
1314: }
1315:
1316: /* load in initial (unfactored row) */
1317: idx = r[row];
1318: nz = ai[idx+1] - ai[idx];
1319: ajtmp = aj + ai[idx];
1320: v1 = aa + ai[idx];
1322: for (j=0; j<nz; j++) {
1323: idx = ics[ajtmp[j]];
1324: rtmp1[idx] = v1[j];
1325: if (damp && ajtmp[j] == r[row]) {
1326: rtmp1[idx] += damping;
1327: }
1328: }
1329: prow = *bjtmp++ + shift;
1330: while (prow < row) {
1331: pc1 = rtmp1 + prow;
1332: if (*pc1 != 0.0){
1333: pv = ba + bd[prow];
1334: pj = nbj + bd[prow];
1335: mul1 = *pc1 * *pv++;
1336: *pc1 = mul1;
1337: nz = bi[prow+1] - bd[prow] - 1;
1338: PetscLogFlops(2*nz);
1339: for (j=0; j<nz; j++) {
1340: tmp = pv[j];
1341: idx = pj[j];
1342: rtmps1[idx] -= mul1 * tmp;
1343: }
1344: }
1345: prow = *bjtmp++ + shift;
1346: }
1347: nz = bi[row+1] - bi[row];
1348: pj = bj + bi[row];
1349: pc1 = ba + bi[row];
1350: if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1351: if (damping) {
1352: if (damp) damping *= 2.0;
1353: damp = PETSC_TRUE;
1354: ndamp++;
1355: goto endofwhile;
1356: } else {
1357: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1358: }
1359: }
1360: rtmp1[row] = 1.0/rtmp1[row];
1361: for (j=0; j<nz; j++) {
1362: idx = pj[j];
1363: pc1[j] = rtmps1[idx];
1364: }
1365: break;
1366:
1367: case 2:
1368: for (j=0; j<nz; j++) {
1369: idx = bjtmp[j];
1370: rtmps1[idx] = 0.0;
1371: rtmps2[idx] = 0.0;
1372: }
1373:
1374: /* load in initial (unfactored row) */
1375: idx = r[row];
1376: nz = ai[idx+1] - ai[idx];
1377: ajtmp = aj + ai[idx];
1378: v1 = aa + ai[idx];
1379: v2 = aa + ai[idx+1];
1380:
1381: for (j=0; j<nz; j++) {
1382: idx = ics[ajtmp[j]];
1383: rtmp1[idx] = v1[j];
1384: rtmp2[idx] = v2[j];
1385: if (damp && ajtmp[j] == r[row]) {
1386: rtmp1[idx] += damping;
1387: }
1388: if (damp && ajtmp[j] == r[row+1]) {
1389: rtmp2[idx] += damping;
1390: }
1391: }
1392: prow = *bjtmp++ + shift;
1393: while (prow < row) {
1394: pc1 = rtmp1 + prow;
1395: pc2 = rtmp2 + prow;
1396: if (*pc1 != 0.0 || *pc2 != 0.0){
1397: pv = ba + bd[prow];
1398: pj = nbj + bd[prow];
1399: mul1 = *pc1 * *pv;
1400: mul2 = *pc2 * *pv;
1401: ++pv;
1402: *pc1 = mul1;
1403: *pc2 = mul2;
1404:
1405: nz = bi[prow+1] - bd[prow] - 1;
1406: PetscLogFlops(2*2*nz);
1407: for (j=0; j<nz; j++) {
1408: tmp = pv[j];
1409: idx = pj[j];
1410: rtmps1[idx] -= mul1 * tmp;
1411: rtmps2[idx] -= mul2 * tmp;
1412: }
1413: }
1414: prow = *bjtmp++ + shift;
1415: }
1416: /* Now take care of the odd element*/
1417: pc1 = rtmp1 + prow;
1418: pc2 = rtmp2 + prow;
1419: if (*pc2 != 0.0){
1420: pj = nbj + bd[prow];
1421: if (PetscAbsScalar(*pc1) < zeropivot) {
1422: if (damping) {
1423: if (damp) damping *= 2.0;
1424: damp = PETSC_TRUE;
1425: ndamp++;
1426: goto endofwhile;
1427: } else {
1428: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",prow,PetscAbsScalar(*pc1),zeropivot);
1429: }
1430: }
1431: mul2 = (*pc2)/(*pc1); /* since diag is not yet inverted.*/
1432: *pc2 = mul2;
1433: nz = bi[prow+1] - bd[prow] - 1;
1434: PetscLogFlops(2*nz);
1435: for (j=0; j<nz; j++) {
1436: idx = pj[j] + shift;
1437: tmp = rtmp1[idx];
1438: rtmp2[idx] -= mul2 * tmp;
1439: }
1440: }
1441:
1442: nz = bi[row+1] - bi[row];
1443: pj = bj + bi[row];
1444: pc1 = ba + bi[row];
1445: pc2 = ba + bi[row+1];
1446: if (PetscAbsScalar(rtmp1[row]) < zeropivot || PetscAbsScalar(rtmp2[row+1]) < zeropivot) {
1447: if (damping) {
1448: if (damp) damping *= 2.0;
1449: damp = PETSC_TRUE;
1450: ndamp++;
1451: goto endofwhile;
1452: } else if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1453: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1454: } else {
1455: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row+1,PetscAbsScalar(rtmp2[row+1]),zeropivot);
1456: }
1457: }
1458: rtmp1[row] = 1.0/rtmp1[row];
1459: rtmp2[row+1] = 1.0/rtmp2[row+1];
1460: for (j=0; j<nz; j++) {
1461: idx = pj[j];
1462: pc1[j] = rtmps1[idx];
1463: pc2[j] = rtmps2[idx];
1464: }
1465: break;
1467: case 3:
1468: for (j=0; j<nz; j++) {
1469: idx = bjtmp[j];
1470: rtmps1[idx] = 0.0;
1471: rtmps2[idx] = 0.0;
1472: rtmps3[idx] = 0.0;
1473: }
1474: /* copy the nonzeros for the 3 rows from sparse representation to dense in rtmp*[] */
1475: idx = r[row];
1476: nz = ai[idx+1] - ai[idx];
1477: ajtmp = aj + ai[idx];
1478: v1 = aa + ai[idx];
1479: v2 = aa + ai[idx+1];
1480: v3 = aa + ai[idx+2];
1481: for (j=0; j<nz; j++) {
1482: idx = ics[ajtmp[j]];
1483: rtmp1[idx] = v1[j];
1484: rtmp2[idx] = v2[j];
1485: rtmp3[idx] = v3[j];
1486: if (damp && ajtmp[j] == r[row]) {
1487: rtmp1[idx] += damping;
1488: }
1489: if (damp && ajtmp[j] == r[row+1]) {
1490: rtmp2[idx] += damping;
1491: }
1492: if (damp && ajtmp[j] == r[row+2]) {
1493: rtmp3[idx] += damping;
1494: }
1495: }
1496: /* loop over all pivot row blocks above this row block */
1497: prow = *bjtmp++ + shift;
1498: while (prow < row) {
1499: pc1 = rtmp1 + prow;
1500: pc2 = rtmp2 + prow;
1501: pc3 = rtmp3 + prow;
1502: if (*pc1 != 0.0 || *pc2 != 0.0 || *pc3 !=0.0){
1503: pv = ba + bd[prow];
1504: pj = nbj + bd[prow];
1505: mul1 = *pc1 * *pv;
1506: mul2 = *pc2 * *pv;
1507: mul3 = *pc3 * *pv;
1508: ++pv;
1509: *pc1 = mul1;
1510: *pc2 = mul2;
1511: *pc3 = mul3;
1512:
1513: nz = bi[prow+1] - bd[prow] - 1;
1514: PetscLogFlops(3*2*nz);
1515: /* update this row based on pivot row */
1516: for (j=0; j<nz; j++) {
1517: tmp = pv[j];
1518: idx = pj[j];
1519: rtmps1[idx] -= mul1 * tmp;
1520: rtmps2[idx] -= mul2 * tmp;
1521: rtmps3[idx] -= mul3 * tmp;
1522: }
1523: }
1524: prow = *bjtmp++ + shift;
1525: }
1526: /* Now take care of diagonal block in this set of rows */
1527: pc1 = rtmp1 + prow;
1528: pc2 = rtmp2 + prow;
1529: pc3 = rtmp3 + prow;
1530: if (*pc2 != 0.0 || *pc3 != 0.0){
1531: pj = nbj + bd[prow];
1532: if (PetscAbsScalar(*pc1) < zeropivot) {
1533: if (damping) {
1534: if (damp) damping *= 2.0;
1535: damp = PETSC_TRUE;
1536: ndamp++;
1537: goto endofwhile;
1538: } else {
1539: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",prow,PetscAbsScalar(*pc1),zeropivot);
1540: }
1541: }
1542: mul2 = (*pc2)/(*pc1);
1543: mul3 = (*pc3)/(*pc1);
1544: *pc2 = mul2;
1545: *pc3 = mul3;
1546: nz = bi[prow+1] - bd[prow] - 1;
1547: PetscLogFlops(2*2*nz);
1548: for (j=0; j<nz; j++) {
1549: idx = pj[j] + shift;
1550: tmp = rtmp1[idx];
1551: rtmp2[idx] -= mul2 * tmp;
1552: rtmp3[idx] -= mul3 * tmp;
1553: }
1554: }
1555: ++prow;
1556: pc2 = rtmp2 + prow;
1557: pc3 = rtmp3 + prow;
1558: if (*pc3 != 0.0){
1559: pj = nbj + bd[prow];
1560: pj = nbj + bd[prow];
1561: if (PetscAbsScalar(*pc2) < zeropivot) {
1562: if (damping) {
1563: if (damp) damping *= 2.0;
1564: damp = PETSC_TRUE;
1565: ndamp++;
1566: goto endofwhile;
1567: } else {
1568: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",prow,PetscAbsScalar(*pc2),zeropivot);
1569: }
1570: }
1571: mul3 = (*pc3)/(*pc2);
1572: *pc3 = mul3;
1573: nz = bi[prow+1] - bd[prow] - 1;
1574: PetscLogFlops(2*2*nz);
1575: for (j=0; j<nz; j++) {
1576: idx = pj[j] + shift;
1577: tmp = rtmp2[idx];
1578: rtmp3[idx] -= mul3 * tmp;
1579: }
1580: }
1581: nz = bi[row+1] - bi[row];
1582: pj = bj + bi[row];
1583: pc1 = ba + bi[row];
1584: pc2 = ba + bi[row+1];
1585: pc3 = ba + bi[row+2];
1586: if (PetscAbsScalar(rtmp1[row]) < zeropivot || PetscAbsScalar(rtmp2[row+1]) < zeropivot || PetscAbsScalar(rtmp3[row+2]) < zeropivot) {
1587: if (damping) {
1588: if (damp) damping *= 2.0;
1589: damp = PETSC_TRUE;
1590: ndamp++;
1591: goto endofwhile;
1592: } else if (PetscAbsScalar(rtmp1[row]) < zeropivot) {
1593: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row,PetscAbsScalar(rtmp1[row]),zeropivot);
1594: } else if (PetscAbsScalar(rtmp2[row+1]) < zeropivot) {
1595: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row+1,PetscAbsScalar(rtmp2[row+1]),zeropivot);
1596: } else {
1597: SETERRQ3(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %d value %g tolerance %g",row+2,PetscAbsScalar(rtmp3[row+2]),zeropivot);
1598: }
1599: }
1600: rtmp1[row] = 1.0/rtmp1[row];
1601: rtmp2[row+1] = 1.0/rtmp2[row+1];
1602: rtmp3[row+2] = 1.0/rtmp3[row+2];
1603: /* copy row entries from dense representation to sparse */
1604: for (j=0; j<nz; j++) {
1605: idx = pj[j];
1606: pc1[j] = rtmps1[idx];
1607: pc2[j] = rtmps2[idx];
1608: pc3[j] = rtmps3[idx];
1609: }
1610: break;
1612: default:
1613: SETERRQ(PETSC_ERR_COR,"Node size not yet supported n");
1614: }
1615: row += nsz; /* Update the row */
1616: }
1617: endofwhile:;
1618: } while (damp);
1619: PetscFree(rtmp1);
1620: PetscFree(tmp_vec2);
1621: ISRestoreIndices(isicol,&ic);
1622: ISRestoreIndices(isrow,&r);
1623: ISRestoreIndices(iscol,&c);
1624: C->factor = FACTOR_LU;
1625: C->assembled = PETSC_TRUE;
1626: if (ndamp || b->lu_damping) {
1627: PetscLogInfo(0,"MatLUFactorNumerical_SeqAIJ_Inode: number of damping tries %d damping value %gn",ndamp,damping);
1628: }
1629: PetscLogFlops(C->n);
1630: return(0);
1631: }
1633: /*
1634: This is really ugly. if inodes are used this replaces the
1635: permutations with ones that correspond to rows/cols of the matrix
1636: rather then inode blocks
1637: */
1638: #undef __FUNCT__
1640: int MatAdjustForInodes(Mat A,IS *rperm,IS *cperm)
1641: {
1642: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1643: int ierr,m = A->m,n = A->n,i,j,*ridx,*cidx,nslim_row = a->inode.node_count;
1644: int row,col,*permr,*permc,*ns_row = a->inode.size,*tns,start_val,end_val,indx;
1645: int nslim_col,*ns_col;
1646: IS ris = *rperm,cis = *cperm;
1647: PetscTruth flg;
1650: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
1651: if (!flg) return(0);
1653: if (!a->inode.size) return(0); /* no inodes so return */
1654: if (a->inode.node_count == m) return(0); /* all inodes are of size 1 */
1656: ierr = Mat_AIJ_CreateColInode(A,&nslim_col,&ns_col);
1657: ierr = PetscMalloc((((nslim_row>nslim_col)?nslim_row:nslim_col)+1)*sizeof(int),&tns);
1658: ierr = PetscMalloc((m+n+1)*sizeof(int),&permr);
1659: permc = permr + m;
1661: ierr = ISGetIndices(ris,&ridx);
1662: ierr = ISGetIndices(cis,&cidx);
1664: /* Form the inode structure for the rows of permuted matric using inv perm*/
1665: for (i=0,tns[0]=0; i<nslim_row; ++i) tns[i+1] = tns[i] + ns_row[i];
1667: /* Construct the permutations for rows*/
1668: for (i=0,row = 0; i<nslim_row; ++i){
1669: indx = ridx[i];
1670: start_val = tns[indx];
1671: end_val = tns[indx + 1];
1672: for (j=start_val; j<end_val; ++j,++row) permr[row]= j;
1673: }
1675: /* Form the inode structure for the columns of permuted matrix using inv perm*/
1676: for (i=0,tns[0]=0; i<nslim_col; ++i) tns[i+1] = tns[i] + ns_col[i];
1678: /*Construct permutations for columns*/
1679: for (i=0,col=0; i<nslim_col; ++i){
1680: indx = cidx[i];
1681: start_val = tns[indx];
1682: end_val = tns[indx + 1];
1683: for (j = start_val; j<end_val; ++j,++col) permc[col]= j;
1684: }
1686: ISCreateGeneral(PETSC_COMM_SELF,n,permr,rperm);
1687: ISSetPermutation(*rperm);
1688: ISCreateGeneral(PETSC_COMM_SELF,n,permc,cperm);
1689: ISSetPermutation(*cperm);
1690:
1691: ierr = ISRestoreIndices(ris,&ridx);
1692: ierr = ISRestoreIndices(cis,&cidx);
1694: PetscFree(ns_col);
1695: PetscFree(permr);
1696: ISDestroy(cis);
1697: ISDestroy(ris);
1698: PetscFree(tns);
1699: return(0);
1700: }
1703: #undef __FUNCT__
1706: /*@C
1707: MatSeqAIJGetInodeSizes - Returns the inode information of the SeqAIJ matrix.
1709: Collective on Mat
1711: Input Parameter:
1712: . A - the SeqAIJ matrix.
1714: Output Parameter:
1715: + node_count - no of inodes present in the matrix.
1716: . sizes - an array of size node_count,with sizes of each inode.
1717: - limit - the max size used to generate the inodes.
1719: Level: advanced
1721: Notes: This routine returns some internal storage information
1722: of the matrix, it is intended to be used by advanced users.
1723: It should be called after the matrix is assembled.
1724: The contents of the sizes[] array should not be changed.
1726: .keywords: matrix, seqaij, get, inode
1728: .seealso: MatGetInfo()
1729: @*/
1730: int MatSeqAIJGetInodeSizes(Mat A,int *node_count,int *sizes[],int *limit)
1731: {
1732: Mat_SeqAIJ *a;
1733: PetscTruth flg;
1734: int ierr;
1738: PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);
1739: if (!flg) SETERRQ(PETSC_ERR_ARG_WRONG,"MatSeqAIJ only");
1740: a = (Mat_SeqAIJ*)A->data;
1741: *node_count = a->inode.node_count;
1742: *sizes = a->inode.size;
1743: *limit = a->inode.limit;
1744: return(0);
1745: }