Actual source code: mpibaij.c
1: /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
3: #include src/mat/impls/baij/mpi/mpibaij.h
4: #include src/vec/vecimpl.h
6: EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7: EXTERN int DisAssemble_MPIBAIJ(Mat);
8: EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9: EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10: EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11: EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12: EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13: EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14: EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15: EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16: EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
18: /* UGLY, ugly, ugly
19: When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20: not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21: inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22: converts the entries into single precision and then calls ..._MatScalar() to put them
23: into the single precision data structures.
24: */
25: #if defined(PETSC_USE_MAT_SINGLE)
26: EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27: EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28: EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29: EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30: EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31: #else
32: #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
33: #define MatSetValues_MPIBAIJ_MatScalar MatSetValues_MPIBAIJ
34: #define MatSetValuesBlocked_MPIBAIJ_MatScalar MatSetValuesBlocked_MPIBAIJ
35: #define MatSetValues_MPIBAIJ_HT_MatScalar MatSetValues_MPIBAIJ_HT
36: #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar MatSetValuesBlocked_MPIBAIJ_HT
37: #endif
39: #undef __FUNCT__
41: int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42: {
43: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
44: int ierr,i;
45: PetscScalar *va,*vb;
46: Vec vtmp;
49:
50: MatGetRowMax(a->A,v);
51: VecGetArray(v,&va);
53: VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);
54: MatGetRowMax(a->B,vtmp);
55: VecGetArray(vtmp,&vb);
57: for (i=0; i<A->m; i++){
58: if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59: }
61: VecRestoreArray(v,&va);
62: VecRestoreArray(vtmp,&vb);
63: VecDestroy(vtmp);
64:
65: return(0);
66: }
68: EXTERN_C_BEGIN
69: #undef __FUNCT__
71: int MatStoreValues_MPIBAIJ(Mat mat)
72: {
73: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74: int ierr;
77: MatStoreValues(aij->A);
78: MatStoreValues(aij->B);
79: return(0);
80: }
81: EXTERN_C_END
83: EXTERN_C_BEGIN
84: #undef __FUNCT__
86: int MatRetrieveValues_MPIBAIJ(Mat mat)
87: {
88: Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89: int ierr;
92: MatRetrieveValues(aij->A);
93: MatRetrieveValues(aij->B);
94: return(0);
95: }
96: EXTERN_C_END
98: /*
99: Local utility routine that creates a mapping from the global column
100: number to the local number in the off-diagonal part of the local
101: storage of the matrix. This is done in a non scable way since the
102: length of colmap equals the global matrix length.
103: */
104: #undef __FUNCT__
106: static int CreateColmap_MPIBAIJ_Private(Mat mat)
107: {
108: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109: Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110: int nbs = B->nbs,i,bs=B->bs,ierr;
113: #if defined (PETSC_USE_CTABLE)
114: PetscTableCreate(baij->nbs,&baij->colmap);
115: for (i=0; i<nbs; i++){
116: PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);
117: }
118: #else
119: PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);
120: PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121: PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));
122: for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123: #endif
124: return(0);
125: }
127: #define CHUNKSIZE 10
129: #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv)
130: {
131:
132: brow = row/bs;
133: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
134: rmax = aimax[brow]; nrow = ailen[brow];
135: bcol = col/bs;
136: ridx = row % bs; cidx = col % bs;
137: low = 0; high = nrow;
138: while (high-low > 3) {
139: t = (low+high)/2;
140: if (rp[t] > bcol) high = t;
141: else low = t;
142: }
143: for (_i=low; _i<high; _i++) {
144: if (rp[_i] > bcol) break;
145: if (rp[_i] == bcol) {
146: bap = ap + bs2*_i + bs*cidx + ridx;
147: if (addv == ADD_VALUES) *bap += value;
148: else *bap = value;
149: goto a_noinsert;
150: }
151: }
152: if (a->nonew == 1) goto a_noinsert;
153: else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
154: if (nrow >= rmax) {
155: /* there is no extra room in row, therefore enlarge */
156: int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
157: MatScalar *new_a;
158:
159: if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
160:
161: /* malloc new storage space */
162: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
163: PetscMalloc(len,&new_a);
164: new_j = (int*)(new_a + bs2*new_nz);
165: new_i = new_j + new_nz;
166:
167: /* copy over old data into new slots */
168: for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
169: for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
170: PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));
171: len = (new_nz - CHUNKSIZE - ai[brow] - nrow);
172: PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));
173: PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));
174: PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));
175: PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),
176: aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));
177: /* free up old matrix storage */
178: PetscFree(a->a);
179: if (!a->singlemalloc) {
180: PetscFree(a->i);
181: PetscFree(a->j);
182: }
183: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
184: a->singlemalloc = PETSC_TRUE;
185:
186: rp = aj + ai[brow]; ap = aa + bs2*ai[brow];
187: rmax = aimax[brow] = aimax[brow] + CHUNKSIZE;
188: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
189: a->maxnz += bs2*CHUNKSIZE;
190: a->reallocs++;
191: a->nz++;
192: }
193: N = nrow++ - 1;
194: /* shift up all the later entries in this row */
195: for (ii=N; ii>=_i; ii--) {
196: rp[ii+1] = rp[ii];
197: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
198: }
199: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar)); }
200: rp[_i] = bcol;
201: ap[bs2*_i + bs*cidx + ridx] = value;
202: a_noinsert:;
203: ailen[brow] = nrow;
204: }
206: #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv)
207: {
208: brow = row/bs;
209: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
210: rmax = bimax[brow]; nrow = bilen[brow];
211: bcol = col/bs;
212: ridx = row % bs; cidx = col % bs;
213: low = 0; high = nrow;
214: while (high-low > 3) {
215: t = (low+high)/2;
216: if (rp[t] > bcol) high = t;
217: else low = t;
218: }
219: for (_i=low; _i<high; _i++) {
220: if (rp[_i] > bcol) break;
221: if (rp[_i] == bcol) {
222: bap = ap + bs2*_i + bs*cidx + ridx;
223: if (addv == ADD_VALUES) *bap += value;
224: else *bap = value;
225: goto b_noinsert;
226: }
227: }
228: if (b->nonew == 1) goto b_noinsert;
229: else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
230: if (nrow >= rmax) {
231: /* there is no extra room in row, therefore enlarge */
232: int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j;
233: MatScalar *new_a;
234:
235: if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
236:
237: /* malloc new storage space */
238: len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int);
239: ierr = PetscMalloc(len,&new_a);
240: new_j = (int*)(new_a + bs2*new_nz);
241: new_i = new_j + new_nz;
242:
243: /* copy over old data into new slots */
244: for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];}
245: for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
246: PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));
247: len = (new_nz - CHUNKSIZE - bi[brow] - nrow);
248: PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));
249: PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));
250: PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));
251: PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),
252: ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));
253: /* free up old matrix storage */
254: PetscFree(b->a);
255: if (!b->singlemalloc) {
256: PetscFree(b->i);
257: PetscFree(b->j);
258: }
259: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
260: b->singlemalloc = PETSC_TRUE;
261:
262: rp = bj + bi[brow]; ap = ba + bs2*bi[brow];
263: rmax = bimax[brow] = bimax[brow] + CHUNKSIZE;
264: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
265: b->maxnz += bs2*CHUNKSIZE;
266: b->reallocs++;
267: b->nz++;
268: }
269: N = nrow++ - 1;
270: /* shift up all the later entries in this row */
271: for (ii=N; ii>=_i; ii--) {
272: rp[ii+1] = rp[ii];
273: PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));
274: }
275: if (N>=_i) { PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));}
276: rp[_i] = bcol;
277: ap[bs2*_i + bs*cidx + ridx] = value;
278: b_noinsert:;
279: bilen[brow] = nrow;
280: }
282: #if defined(PETSC_USE_MAT_SINGLE)
283: #undef __FUNCT__
285: int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
286: {
287: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288: int ierr,i,N = m*n;
289: MatScalar *vsingle;
292: if (N > b->setvalueslen) {
293: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
294: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
295: b->setvalueslen = N;
296: }
297: vsingle = b->setvaluescopy;
299: for (i=0; i<N; i++) {
300: vsingle[i] = v[i];
301: }
302: MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
303: return(0);
304: }
306: #undef __FUNCT__
308: int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309: {
310: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311: int ierr,i,N = m*n*b->bs2;
312: MatScalar *vsingle;
315: if (N > b->setvalueslen) {
316: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
317: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
318: b->setvalueslen = N;
319: }
320: vsingle = b->setvaluescopy;
321: for (i=0; i<N; i++) {
322: vsingle[i] = v[i];
323: }
324: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);
325: return(0);
326: }
328: #undef __FUNCT__
330: int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
331: {
332: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333: int ierr,i,N = m*n;
334: MatScalar *vsingle;
337: if (N > b->setvalueslen) {
338: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
339: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
340: b->setvalueslen = N;
341: }
342: vsingle = b->setvaluescopy;
343: for (i=0; i<N; i++) {
344: vsingle[i] = v[i];
345: }
346: MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
347: return(0);
348: }
350: #undef __FUNCT__
352: int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
353: {
354: Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355: int ierr,i,N = m*n*b->bs2;
356: MatScalar *vsingle;
359: if (N > b->setvalueslen) {
360: if (b->setvaluescopy) {PetscFree(b->setvaluescopy);}
361: PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);
362: b->setvalueslen = N;
363: }
364: vsingle = b->setvaluescopy;
365: for (i=0; i<N; i++) {
366: vsingle[i] = v[i];
367: }
368: MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);
369: return(0);
370: }
371: #endif
373: #undef __FUNCT__
375: int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376: {
377: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378: MatScalar value;
379: PetscTruth roworiented = baij->roworiented;
380: int ierr,i,j,row,col;
381: int rstart_orig=baij->rstart_bs;
382: int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383: int cend_orig=baij->cend_bs,bs=baij->bs;
385: /* Some Variables required in the macro */
386: Mat A = baij->A;
387: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388: int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389: MatScalar *aa=a->a;
391: Mat B = baij->B;
392: Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393: int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394: MatScalar *ba=b->a;
396: int *rp,ii,nrow,_i,rmax,N,brow,bcol;
397: int low,high,t,ridx,cidx,bs2=a->bs2;
398: MatScalar *ap,*bap;
401: for (i=0; i<m; i++) {
402: if (im[i] < 0) continue;
403: #if defined(PETSC_USE_BOPT_g)
404: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405: #endif
406: if (im[i] >= rstart_orig && im[i] < rend_orig) {
407: row = im[i] - rstart_orig;
408: for (j=0; j<n; j++) {
409: if (in[j] >= cstart_orig && in[j] < cend_orig){
410: col = in[j] - cstart_orig;
411: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412: MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413: /* MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv); */
414: } else if (in[j] < 0) continue;
415: #if defined(PETSC_USE_BOPT_g)
416: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417: #endif
418: else {
419: if (mat->was_assembled) {
420: if (!baij->colmap) {
421: CreateColmap_MPIBAIJ_Private(mat);
422: }
423: #if defined (PETSC_USE_CTABLE)
424: PetscTableFind(baij->colmap,in[j]/bs + 1,&col);
425: col = col - 1;
426: #else
427: col = baij->colmap[in[j]/bs] - 1;
428: #endif
429: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430: DisAssemble_MPIBAIJ(mat);
431: col = in[j];
432: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433: B = baij->B;
434: b = (Mat_SeqBAIJ*)(B)->data;
435: bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436: ba=b->a;
437: } else col += in[j]%bs;
438: } else col = in[j];
439: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440: MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441: /* MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv); */
442: }
443: }
444: } else {
445: if (!baij->donotstash) {
446: if (roworiented) {
447: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
448: } else {
449: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
450: }
451: }
452: }
453: }
454: return(0);
455: }
457: #undef __FUNCT__
459: int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460: {
461: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462: MatScalar *value,*barray=baij->barray;
463: PetscTruth roworiented = baij->roworiented;
464: int ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465: int rend=baij->rend,cstart=baij->cstart,stepval;
466: int cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467:
469: if(!barray) {
470: ierr = PetscMalloc(bs2*sizeof(MatScalar),&barray);
471: baij->barray = barray;
472: }
474: if (roworiented) {
475: stepval = (n-1)*bs;
476: } else {
477: stepval = (m-1)*bs;
478: }
479: for (i=0; i<m; i++) {
480: if (im[i] < 0) continue;
481: #if defined(PETSC_USE_BOPT_g)
482: if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483: #endif
484: if (im[i] >= rstart && im[i] < rend) {
485: row = im[i] - rstart;
486: for (j=0; j<n; j++) {
487: /* If NumCol = 1 then a copy is not required */
488: if ((roworiented) && (n == 1)) {
489: barray = v + i*bs2;
490: } else if((!roworiented) && (m == 1)) {
491: barray = v + j*bs2;
492: } else { /* Here a copy is required */
493: if (roworiented) {
494: value = v + i*(stepval+bs)*bs + j*bs;
495: } else {
496: value = v + j*(stepval+bs)*bs + i*bs;
497: }
498: for (ii=0; ii<bs; ii++,value+=stepval) {
499: for (jj=0; jj<bs; jj++) {
500: *barray++ = *value++;
501: }
502: }
503: barray -=bs2;
504: }
505:
506: if (in[j] >= cstart && in[j] < cend){
507: col = in[j] - cstart;
508: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);
509: }
510: else if (in[j] < 0) continue;
511: #if defined(PETSC_USE_BOPT_g)
512: else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513: #endif
514: else {
515: if (mat->was_assembled) {
516: if (!baij->colmap) {
517: CreateColmap_MPIBAIJ_Private(mat);
518: }
520: #if defined(PETSC_USE_BOPT_g)
521: #if defined (PETSC_USE_CTABLE)
522: { int data;
523: PetscTableFind(baij->colmap,in[j]+1,&data);
524: if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525: }
526: #else
527: if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528: #endif
529: #endif
530: #if defined (PETSC_USE_CTABLE)
531: PetscTableFind(baij->colmap,in[j]+1,&col);
532: col = (col - 1)/bs;
533: #else
534: col = (baij->colmap[in[j]] - 1)/bs;
535: #endif
536: if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537: DisAssemble_MPIBAIJ(mat);
538: col = in[j];
539: }
540: }
541: else col = in[j];
542: MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);
543: }
544: }
545: } else {
546: if (!baij->donotstash) {
547: if (roworiented) {
548: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
549: } else {
550: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
551: }
552: }
553: }
554: }
555: return(0);
556: }
558: #define HASH_KEY 0.6180339887
559: #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560: /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561: /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562: #undef __FUNCT__
564: int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565: {
566: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567: PetscTruth roworiented = baij->roworiented;
568: int ierr,i,j,row,col;
569: int rstart_orig=baij->rstart_bs;
570: int rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571: int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572: PetscReal tmp;
573: MatScalar **HD = baij->hd,value;
574: #if defined(PETSC_USE_BOPT_g)
575: int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576: #endif
580: for (i=0; i<m; i++) {
581: #if defined(PETSC_USE_BOPT_g)
582: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584: #endif
585: row = im[i];
586: if (row >= rstart_orig && row < rend_orig) {
587: for (j=0; j<n; j++) {
588: col = in[j];
589: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590: /* Look up into the Hash Table */
591: key = (row/bs)*Nbs+(col/bs)+1;
592: h1 = HASH(size,key,tmp);
594:
595: idx = h1;
596: #if defined(PETSC_USE_BOPT_g)
597: insert_ct++;
598: total_ct++;
599: if (HT[idx] != key) {
600: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601: if (idx == size) {
602: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603: if (idx == h1) {
604: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605: }
606: }
607: }
608: #else
609: if (HT[idx] != key) {
610: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611: if (idx == size) {
612: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613: if (idx == h1) {
614: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615: }
616: }
617: }
618: #endif
619: /* A HASH table entry is found, so insert the values at the correct address */
620: if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621: else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value;
622: }
623: } else {
624: if (!baij->donotstash) {
625: if (roworiented) {
626: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
627: } else {
628: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
629: }
630: }
631: }
632: }
633: #if defined(PETSC_USE_BOPT_g)
634: baij->ht_total_ct = total_ct;
635: baij->ht_insert_ct = insert_ct;
636: #endif
637: return(0);
638: }
640: #undef __FUNCT__
642: int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643: {
644: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645: PetscTruth roworiented = baij->roworiented;
646: int ierr,i,j,ii,jj,row,col;
647: int rstart=baij->rstart ;
648: int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649: int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650: PetscReal tmp;
651: MatScalar **HD = baij->hd,*baij_a;
652: MatScalar *v_t,*value;
653: #if defined(PETSC_USE_BOPT_g)
654: int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655: #endif
656:
659: if (roworiented) {
660: stepval = (n-1)*bs;
661: } else {
662: stepval = (m-1)*bs;
663: }
664: for (i=0; i<m; i++) {
665: #if defined(PETSC_USE_BOPT_g)
666: if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667: if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668: #endif
669: row = im[i];
670: v_t = v + i*bs2;
671: if (row >= rstart && row < rend) {
672: for (j=0; j<n; j++) {
673: col = in[j];
675: /* Look up into the Hash Table */
676: key = row*Nbs+col+1;
677: h1 = HASH(size,key,tmp);
678:
679: idx = h1;
680: #if defined(PETSC_USE_BOPT_g)
681: total_ct++;
682: insert_ct++;
683: if (HT[idx] != key) {
684: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685: if (idx == size) {
686: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687: if (idx == h1) {
688: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689: }
690: }
691: }
692: #else
693: if (HT[idx] != key) {
694: for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695: if (idx == size) {
696: for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697: if (idx == h1) {
698: SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699: }
700: }
701: }
702: #endif
703: baij_a = HD[idx];
704: if (roworiented) {
705: /*value = v + i*(stepval+bs)*bs + j*bs;*/
706: /* value = v + (i*(stepval+bs)+j)*bs; */
707: value = v_t;
708: v_t += bs;
709: if (addv == ADD_VALUES) {
710: for (ii=0; ii<bs; ii++,value+=stepval) {
711: for (jj=ii; jj<bs2; jj+=bs) {
712: baij_a[jj] += *value++;
713: }
714: }
715: } else {
716: for (ii=0; ii<bs; ii++,value+=stepval) {
717: for (jj=ii; jj<bs2; jj+=bs) {
718: baij_a[jj] = *value++;
719: }
720: }
721: }
722: } else {
723: value = v + j*(stepval+bs)*bs + i*bs;
724: if (addv == ADD_VALUES) {
725: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726: for (jj=0; jj<bs; jj++) {
727: baij_a[jj] += *value++;
728: }
729: }
730: } else {
731: for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732: for (jj=0; jj<bs; jj++) {
733: baij_a[jj] = *value++;
734: }
735: }
736: }
737: }
738: }
739: } else {
740: if (!baij->donotstash) {
741: if (roworiented) {
742: MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
743: } else {
744: MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);
745: }
746: }
747: }
748: }
749: #if defined(PETSC_USE_BOPT_g)
750: baij->ht_total_ct = total_ct;
751: baij->ht_insert_ct = insert_ct;
752: #endif
753: return(0);
754: }
756: #undef __FUNCT__
758: int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
759: {
760: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761: int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762: int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
765: for (i=0; i<m; i++) {
766: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769: row = idxm[i] - bsrstart;
770: for (j=0; j<n; j++) {
771: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773: if (idxn[j] >= bscstart && idxn[j] < bscend){
774: col = idxn[j] - bscstart;
775: MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);
776: } else {
777: if (!baij->colmap) {
778: CreateColmap_MPIBAIJ_Private(mat);
779: }
780: #if defined (PETSC_USE_CTABLE)
781: PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);
782: data --;
783: #else
784: data = baij->colmap[idxn[j]/bs]-1;
785: #endif
786: if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787: else {
788: col = data + idxn[j]%bs;
789: MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);
790: }
791: }
792: }
793: } else {
794: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795: }
796: }
797: return(0);
798: }
800: #undef __FUNCT__
802: int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803: {
804: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805: Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806: int ierr,i,bs2=baij->bs2;
807: PetscReal sum = 0.0;
808: MatScalar *v;
811: if (baij->size == 1) {
812: MatNorm(baij->A,type,nrm);
813: } else {
814: if (type == NORM_FROBENIUS) {
815: v = amat->a;
816: for (i=0; i<amat->nz*bs2; i++) {
817: #if defined(PETSC_USE_COMPLEX)
818: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819: #else
820: sum += (*v)*(*v); v++;
821: #endif
822: }
823: v = bmat->a;
824: for (i=0; i<bmat->nz*bs2; i++) {
825: #if defined(PETSC_USE_COMPLEX)
826: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827: #else
828: sum += (*v)*(*v); v++;
829: #endif
830: }
831: MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);
832: *nrm = sqrt(*nrm);
833: } else {
834: SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835: }
836: }
837: return(0);
838: }
841: /*
842: Creates the hash table, and sets the table
843: This table is created only once.
844: If new entried need to be added to the matrix
845: then the hash table has to be destroyed and
846: recreated.
847: */
848: #undef __FUNCT__
850: int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851: {
852: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853: Mat A = baij->A,B=baij->B;
854: Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855: int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856: int size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857: int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858: int *HT,key;
859: MatScalar **HD;
860: PetscReal tmp;
861: #if defined(PETSC_USE_BOPT_g)
862: int ct=0,max=0;
863: #endif
866: baij->ht_size=(int)(factor*nz);
867: size = baij->ht_size;
869: if (baij->ht) {
870: return(0);
871: }
872:
873: /* Allocate Memory for Hash Table */
874: ierr = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);
875: baij->ht = (int*)(baij->hd + size);
876: HD = baij->hd;
877: HT = baij->ht;
880: PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));
881:
883: /* Loop Over A */
884: for (i=0; i<a->mbs; i++) {
885: for (j=ai[i]; j<ai[i+1]; j++) {
886: row = i+rstart;
887: col = aj[j]+cstart;
888:
889: key = row*Nbs + col + 1;
890: h1 = HASH(size,key,tmp);
891: for (k=0; k<size; k++){
892: if (HT[(h1+k)%size] == 0.0) {
893: HT[(h1+k)%size] = key;
894: HD[(h1+k)%size] = a->a + j*bs2;
895: break;
896: #if defined(PETSC_USE_BOPT_g)
897: } else {
898: ct++;
899: #endif
900: }
901: }
902: #if defined(PETSC_USE_BOPT_g)
903: if (k> max) max = k;
904: #endif
905: }
906: }
907: /* Loop Over B */
908: for (i=0; i<b->mbs; i++) {
909: for (j=bi[i]; j<bi[i+1]; j++) {
910: row = i+rstart;
911: col = garray[bj[j]];
912: key = row*Nbs + col + 1;
913: h1 = HASH(size,key,tmp);
914: for (k=0; k<size; k++){
915: if (HT[(h1+k)%size] == 0.0) {
916: HT[(h1+k)%size] = key;
917: HD[(h1+k)%size] = b->a + j*bs2;
918: break;
919: #if defined(PETSC_USE_BOPT_g)
920: } else {
921: ct++;
922: #endif
923: }
924: }
925: #if defined(PETSC_USE_BOPT_g)
926: if (k> max) max = k;
927: #endif
928: }
929: }
930:
931: /* Print Summary */
932: #if defined(PETSC_USE_BOPT_g)
933: for (i=0,j=0; i<size; i++) {
934: if (HT[i]) {j++;}
935: }
936: PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %dn",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937: #endif
938: return(0);
939: }
941: #undef __FUNCT__
943: int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944: {
945: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946: int ierr,nstash,reallocs;
947: InsertMode addv;
950: if (baij->donotstash) {
951: return(0);
952: }
954: /* make sure all processors are either in INSERTMODE or ADDMODE */
955: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
956: if (addv == (ADD_VALUES|INSERT_VALUES)) {
957: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958: }
959: mat->insertmode = addv; /* in case this processor had no cache */
961: MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);
962: MatStashScatterBegin_Private(&mat->bstash,baij->rowners);
963: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
964: PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.n",nstash,reallocs);
965: MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);
966: PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
967: return(0);
968: }
970: EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
971: #undef __FUNCT__
973: int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974: {
975: Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976: Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977: int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
978: int *row,*col,other_disassembled;
979: PetscTruth r1,r2,r3;
980: MatScalar *val;
981: InsertMode addv = mat->insertmode;
982: #if defined(PETSC_HAVE_DSCPACK)
983: PetscTruth flag;
984: #endif
987: if (!baij->donotstash) {
988: while (1) {
989: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
990: if (!flg) break;
992: for (i=0; i<n;) {
993: /* Now identify the consecutive vals belonging to the same row */
994: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995: if (j < n) ncols = j-i;
996: else ncols = n-i;
997: /* Now assemble all these values with a single function call */
998: MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);
999: i = j;
1000: }
1001: }
1002: MatStashScatterEnd_Private(&mat->stash);
1003: /* Now process the block-stash. Since the values are stashed column-oriented,
1004: set the roworiented flag to column oriented, and after MatSetValues()
1005: restore the original flags */
1006: r1 = baij->roworiented;
1007: r2 = a->roworiented;
1008: r3 = b->roworiented;
1009: baij->roworiented = PETSC_FALSE;
1010: a->roworiented = PETSC_FALSE;
1011: b->roworiented = PETSC_FALSE;
1012: while (1) {
1013: MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);
1014: if (!flg) break;
1015:
1016: for (i=0; i<n;) {
1017: /* Now identify the consecutive vals belonging to the same row */
1018: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019: if (j < n) ncols = j-i;
1020: else ncols = n-i;
1021: MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);
1022: i = j;
1023: }
1024: }
1025: MatStashScatterEnd_Private(&mat->bstash);
1026: baij->roworiented = r1;
1027: a->roworiented = r2;
1028: b->roworiented = r3;
1029: }
1031: MatAssemblyBegin(baij->A,mode);
1032: MatAssemblyEnd(baij->A,mode);
1034: /* determine if any processor has disassembled, if so we must
1035: also disassemble ourselfs, in order that we may reassemble. */
1036: /*
1037: if nonzero structure of submatrix B cannot change then we know that
1038: no processor disassembled thus we can skip this stuff
1039: */
1040: if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
1041: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
1042: if (mat->was_assembled && !other_disassembled) {
1043: DisAssemble_MPIBAIJ(mat);
1044: }
1045: }
1047: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048: MatSetUpMultiply_MPIBAIJ(mat);
1049: }
1050: MatAssemblyBegin(baij->B,mode);
1051: MatAssemblyEnd(baij->B,mode);
1052:
1053: #if defined(PETSC_USE_BOPT_g)
1054: if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055: PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2fn",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1056: baij->ht_total_ct = 0;
1057: baij->ht_insert_ct = 0;
1058: }
1059: #endif
1060: if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061: MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);
1062: mat->ops->setvalues = MatSetValues_MPIBAIJ_HT;
1063: mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1064: }
1066: if (baij->rowvalues) {
1067: PetscFree(baij->rowvalues);
1068: baij->rowvalues = 0;
1069: }
1070: #if defined(PETSC_HAVE_DSCPACK)
1071: PetscOptionsHasName(mat->prefix,"-mat_baij_dscpack",&flag);
1072: if (flag) { MatUseDSCPACK_MPIBAIJ(mat); }
1073: #endif
1074: return(0);
1075: }
1077: extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
1079: #undef __FUNCT__
1081: static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1082: {
1083: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1084: int ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1085: PetscTruth isascii,isdraw;
1086: PetscViewer sviewer;
1087: PetscViewerFormat format;
1090: /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...n"); */
1091: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1092: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1093: if (isascii) {
1094: PetscViewerGetFormat(viewer,&format);
1095: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1096: MatInfo info;
1097: MPI_Comm_rank(mat->comm,&rank);
1098: MatGetInfo(mat,MAT_LOCAL,&info);
1099: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %dn",
1100: rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1101: baij->bs,(int)info.memory);
1102: MatGetInfo(baij->A,MAT_LOCAL,&info);
1103: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
1104: MatGetInfo(baij->B,MAT_LOCAL,&info);
1105: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used*bs);
1106: PetscViewerFlush(viewer);
1107: VecScatterView(baij->Mvctx,viewer);
1108: return(0);
1109: } else if (format == PETSC_VIEWER_ASCII_INFO) {
1110: PetscViewerASCIIPrintf(viewer," block size is %dn",bs);
1111: return(0);
1112: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1113: #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
1114: MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);
1115: #endif
1116: return(0);
1117: }
1118: }
1120: if (isdraw) {
1121: PetscDraw draw;
1122: PetscTruth isnull;
1123: PetscViewerDrawGetDraw(viewer,0,&draw);
1124: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
1125: }
1127: if (size == 1) {
1128: PetscObjectSetName((PetscObject)baij->A,mat->name);
1129: MatView(baij->A,viewer);
1130: } else {
1131: /* assemble the entire matrix onto first processor. */
1132: Mat A;
1133: Mat_SeqBAIJ *Aloc;
1134: int M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1135: MatScalar *a;
1137: if (!rank) {
1138: MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
1139: } else {
1140: MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
1141: }
1142: PetscLogObjectParent(mat,A);
1144: /* copy over the A part */
1145: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1146: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147: PetscMalloc(bs*sizeof(int),&rvals);
1149: for (i=0; i<mbs; i++) {
1150: rvals[0] = bs*(baij->rstart + i);
1151: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1152: for (j=ai[i]; j<ai[i+1]; j++) {
1153: col = (baij->cstart+aj[j])*bs;
1154: for (k=0; k<bs; k++) {
1155: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1156: col++; a += bs;
1157: }
1158: }
1159: }
1160: /* copy over the B part */
1161: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1162: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163: for (i=0; i<mbs; i++) {
1164: rvals[0] = bs*(baij->rstart + i);
1165: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1166: for (j=ai[i]; j<ai[i+1]; j++) {
1167: col = baij->garray[aj[j]]*bs;
1168: for (k=0; k<bs; k++) {
1169: MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);
1170: col++; a += bs;
1171: }
1172: }
1173: }
1174: PetscFree(rvals);
1175: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1176: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1177: /*
1178: Everyone has to call to draw the matrix since the graphics waits are
1179: synchronized across all processors that share the PetscDraw object
1180: */
1181: PetscViewerGetSingleton(viewer,&sviewer);
1182: if (!rank) {
1183: PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);
1184: MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);
1185: }
1186: PetscViewerRestoreSingleton(viewer,&sviewer);
1187: MatDestroy(A);
1188: }
1189: return(0);
1190: }
1192: #undef __FUNCT__
1194: int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1195: {
1196: int ierr;
1197: PetscTruth isascii,isdraw,issocket,isbinary;
1200: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
1201: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
1202: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
1203: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
1204: if (isascii || isdraw || issocket || isbinary) {
1205: MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);
1206: } else {
1207: SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1208: }
1209: return(0);
1210: }
1212: #undef __FUNCT__
1214: int MatDestroy_MPIBAIJ(Mat mat)
1215: {
1216: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1217: int ierr;
1220: #if defined(PETSC_USE_LOG)
1221: PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1222: #endif
1223: MatStashDestroy_Private(&mat->stash);
1224: MatStashDestroy_Private(&mat->bstash);
1225: PetscFree(baij->rowners);
1226: MatDestroy(baij->A);
1227: MatDestroy(baij->B);
1228: #if defined (PETSC_USE_CTABLE)
1229: if (baij->colmap) {PetscTableDelete(baij->colmap);}
1230: #else
1231: if (baij->colmap) {PetscFree(baij->colmap);}
1232: #endif
1233: if (baij->garray) {PetscFree(baij->garray);}
1234: if (baij->lvec) {VecDestroy(baij->lvec);}
1235: if (baij->Mvctx) {VecScatterDestroy(baij->Mvctx);}
1236: if (baij->rowvalues) {PetscFree(baij->rowvalues);}
1237: if (baij->barray) {PetscFree(baij->barray);}
1238: if (baij->hd) {PetscFree(baij->hd);}
1239: #if defined(PETSC_USE_MAT_SINGLE)
1240: if (baij->setvaluescopy) {PetscFree(baij->setvaluescopy);}
1241: #endif
1242: PetscFree(baij);
1243: return(0);
1244: }
1246: #undef __FUNCT__
1248: int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1249: {
1250: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1251: int ierr,nt;
1254: VecGetLocalSize(xx,&nt);
1255: if (nt != A->n) {
1256: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1257: }
1258: VecGetLocalSize(yy,&nt);
1259: if (nt != A->m) {
1260: SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1261: }
1262: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1263: (*a->A->ops->mult)(a->A,xx,yy);
1264: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1265: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
1266: VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1267: return(0);
1268: }
1270: #undef __FUNCT__
1272: int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1273: {
1274: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1275: int ierr;
1278: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1279: (*a->A->ops->multadd)(a->A,xx,yy,zz);
1280: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
1281: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
1282: return(0);
1283: }
1285: #undef __FUNCT__
1287: int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1288: {
1289: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1290: int ierr;
1293: /* do nondiagonal part */
1294: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1295: /* send it on its way */
1296: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1297: /* do local part */
1298: (*a->A->ops->multtranspose)(a->A,xx,yy);
1299: /* receive remote parts: note this assumes the values are not actually */
1300: /* inserted in yy until the next line, which is true for my implementation*/
1301: /* but is not perhaps always true. */
1302: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1303: return(0);
1304: }
1306: #undef __FUNCT__
1308: int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1309: {
1310: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1311: int ierr;
1314: /* do nondiagonal part */
1315: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
1316: /* send it on its way */
1317: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1318: /* do local part */
1319: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
1320: /* receive remote parts: note this assumes the values are not actually */
1321: /* inserted in yy until the next line, which is true for my implementation*/
1322: /* but is not perhaps always true. */
1323: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
1324: return(0);
1325: }
1327: /*
1328: This only works correctly for square matrices where the subblock A->A is the
1329: diagonal block
1330: */
1331: #undef __FUNCT__
1333: int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1334: {
1335: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1336: int ierr;
1339: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1340: MatGetDiagonal(a->A,v);
1341: return(0);
1342: }
1344: #undef __FUNCT__
1346: int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1347: {
1348: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1349: int ierr;
1352: MatScale(aa,a->A);
1353: MatScale(aa,a->B);
1354: return(0);
1355: }
1357: #undef __FUNCT__
1359: int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1360: {
1361: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1362: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1363: int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1364: int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1365: int *cmap,*idx_p,cstart = mat->cstart;
1368: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1369: mat->getrowactive = PETSC_TRUE;
1371: if (!mat->rowvalues && (idx || v)) {
1372: /*
1373: allocate enough space to hold information from the longest row.
1374: */
1375: Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1376: int max = 1,mbs = mat->mbs,tmp;
1377: for (i=0; i<mbs; i++) {
1378: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1379: if (max < tmp) { max = tmp; }
1380: }
1381: PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1382: mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1383: }
1384:
1385: if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1386: lrow = row - brstart;
1388: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1389: if (!v) {pvA = 0; pvB = 0;}
1390: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1391: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1392: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1393: nztot = nzA + nzB;
1395: cmap = mat->garray;
1396: if (v || idx) {
1397: if (nztot) {
1398: /* Sort by increasing column numbers, assuming A and B already sorted */
1399: int imark = -1;
1400: if (v) {
1401: *v = v_p = mat->rowvalues;
1402: for (i=0; i<nzB; i++) {
1403: if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1404: else break;
1405: }
1406: imark = i;
1407: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1408: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1409: }
1410: if (idx) {
1411: *idx = idx_p = mat->rowindices;
1412: if (imark > -1) {
1413: for (i=0; i<imark; i++) {
1414: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1415: }
1416: } else {
1417: for (i=0; i<nzB; i++) {
1418: if (cmap[cworkB[i]/bs] < cstart)
1419: idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1420: else break;
1421: }
1422: imark = i;
1423: }
1424: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i];
1425: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426: }
1427: } else {
1428: if (idx) *idx = 0;
1429: if (v) *v = 0;
1430: }
1431: }
1432: *nz = nztot;
1433: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1434: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1435: return(0);
1436: }
1438: #undef __FUNCT__
1440: int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1441: {
1442: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1445: if (baij->getrowactive == PETSC_FALSE) {
1446: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1447: }
1448: baij->getrowactive = PETSC_FALSE;
1449: return(0);
1450: }
1452: #undef __FUNCT__
1454: int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1455: {
1456: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1459: *bs = baij->bs;
1460: return(0);
1461: }
1463: #undef __FUNCT__
1465: int MatZeroEntries_MPIBAIJ(Mat A)
1466: {
1467: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1468: int ierr;
1471: MatZeroEntries(l->A);
1472: MatZeroEntries(l->B);
1473: return(0);
1474: }
1476: #undef __FUNCT__
1478: int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479: {
1480: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1481: Mat A = a->A,B = a->B;
1482: int ierr;
1483: PetscReal isend[5],irecv[5];
1486: info->block_size = (PetscReal)a->bs;
1487: MatGetInfo(A,MAT_LOCAL,info);
1488: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1489: isend[3] = info->memory; isend[4] = info->mallocs;
1490: MatGetInfo(B,MAT_LOCAL,info);
1491: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1492: isend[3] += info->memory; isend[4] += info->mallocs;
1493: if (flag == MAT_LOCAL) {
1494: info->nz_used = isend[0];
1495: info->nz_allocated = isend[1];
1496: info->nz_unneeded = isend[2];
1497: info->memory = isend[3];
1498: info->mallocs = isend[4];
1499: } else if (flag == MAT_GLOBAL_MAX) {
1500: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1501: info->nz_used = irecv[0];
1502: info->nz_allocated = irecv[1];
1503: info->nz_unneeded = irecv[2];
1504: info->memory = irecv[3];
1505: info->mallocs = irecv[4];
1506: } else if (flag == MAT_GLOBAL_SUM) {
1507: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1508: info->nz_used = irecv[0];
1509: info->nz_allocated = irecv[1];
1510: info->nz_unneeded = irecv[2];
1511: info->memory = irecv[3];
1512: info->mallocs = irecv[4];
1513: } else {
1514: SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1515: }
1516: info->rows_global = (PetscReal)A->M;
1517: info->columns_global = (PetscReal)A->N;
1518: info->rows_local = (PetscReal)A->m;
1519: info->columns_local = (PetscReal)A->N;
1520: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1521: info->fill_ratio_needed = 0;
1522: info->factor_mallocs = 0;
1523: return(0);
1524: }
1526: #undef __FUNCT__
1528: int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1529: {
1530: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1531: int ierr;
1534: switch (op) {
1535: case MAT_NO_NEW_NONZERO_LOCATIONS:
1536: case MAT_YES_NEW_NONZERO_LOCATIONS:
1537: case MAT_COLUMNS_UNSORTED:
1538: case MAT_COLUMNS_SORTED:
1539: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1540: case MAT_KEEP_ZEROED_ROWS:
1541: case MAT_NEW_NONZERO_LOCATION_ERR:
1542: MatSetOption(a->A,op);
1543: MatSetOption(a->B,op);
1544: break;
1545: case MAT_ROW_ORIENTED:
1546: a->roworiented = PETSC_TRUE;
1547: MatSetOption(a->A,op);
1548: MatSetOption(a->B,op);
1549: break;
1550: case MAT_ROWS_SORTED:
1551: case MAT_ROWS_UNSORTED:
1552: case MAT_YES_NEW_DIAGONALS:
1553: PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignoredn");
1554: break;
1555: case MAT_COLUMN_ORIENTED:
1556: a->roworiented = PETSC_FALSE;
1557: MatSetOption(a->A,op);
1558: MatSetOption(a->B,op);
1559: break;
1560: case MAT_IGNORE_OFF_PROC_ENTRIES:
1561: a->donotstash = PETSC_TRUE;
1562: break;
1563: case MAT_NO_NEW_DIAGONALS:
1564: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1565: case MAT_USE_HASH_TABLE:
1566: a->ht_flag = PETSC_TRUE;
1567: break;
1568: default:
1569: SETERRQ(PETSC_ERR_SUP,"unknown option");
1570: }
1571: return(0);
1572: }
1574: #undef __FUNCT__
1576: int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1577: {
1578: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1579: Mat_SeqBAIJ *Aloc;
1580: Mat B;
1581: int ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1582: int bs=baij->bs,mbs=baij->mbs;
1583: MatScalar *a;
1584:
1586: if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1587: MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1588:
1589: /* copy over the A part */
1590: Aloc = (Mat_SeqBAIJ*)baij->A->data;
1591: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1592: PetscMalloc(bs*sizeof(int),&rvals);
1593:
1594: for (i=0; i<mbs; i++) {
1595: rvals[0] = bs*(baij->rstart + i);
1596: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1597: for (j=ai[i]; j<ai[i+1]; j++) {
1598: col = (baij->cstart+aj[j])*bs;
1599: for (k=0; k<bs; k++) {
1600: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1601: col++; a += bs;
1602: }
1603: }
1604: }
1605: /* copy over the B part */
1606: Aloc = (Mat_SeqBAIJ*)baij->B->data;
1607: ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1608: for (i=0; i<mbs; i++) {
1609: rvals[0] = bs*(baij->rstart + i);
1610: for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1611: for (j=ai[i]; j<ai[i+1]; j++) {
1612: col = baij->garray[aj[j]]*bs;
1613: for (k=0; k<bs; k++) {
1614: MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);
1615: col++; a += bs;
1616: }
1617: }
1618: }
1619: PetscFree(rvals);
1620: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1621: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1622:
1623: if (matout) {
1624: *matout = B;
1625: } else {
1626: MatHeaderCopy(A,B);
1627: }
1628: return(0);
1629: }
1631: #undef __FUNCT__
1633: int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1634: {
1635: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1636: Mat a = baij->A,b = baij->B;
1637: int ierr,s1,s2,s3;
1640: MatGetLocalSize(mat,&s2,&s3);
1641: if (rr) {
1642: VecGetLocalSize(rr,&s1);
1643: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1644: /* Overlap communication with computation. */
1645: VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1646: }
1647: if (ll) {
1648: VecGetLocalSize(ll,&s1);
1649: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1650: (*b->ops->diagonalscale)(b,ll,PETSC_NULL);
1651: }
1652: /* scale the diagonal block */
1653: (*a->ops->diagonalscale)(a,ll,rr);
1655: if (rr) {
1656: /* Do a scatter end and then right scale the off-diagonal block */
1657: VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);
1658: (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);
1659: }
1660:
1661: return(0);
1662: }
1664: #undef __FUNCT__
1666: int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1667: {
1668: Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1669: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1670: int *nprocs,j,idx,nsends,row;
1671: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1672: int *rvalues,tag = A->tag,count,base,slen,n,*source;
1673: int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1674: MPI_Comm comm = A->comm;
1675: MPI_Request *send_waits,*recv_waits;
1676: MPI_Status recv_status,*send_status;
1677: IS istmp;
1678: PetscTruth found;
1679:
1681: ISGetLocalSize(is,&N);
1682: ISGetIndices(is,&rows);
1683:
1684: /* first count number of contributors to each processor */
1685: ierr = PetscMalloc(2*size*sizeof(int),&nprocs);
1686: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
1687: ierr = PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
1688: for (i=0; i<N; i++) {
1689: idx = rows[i];
1690: found = PETSC_FALSE;
1691: for (j=0; j<size; j++) {
1692: if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1693: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1694: }
1695: }
1696: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1697: }
1698: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1699:
1700: /* inform other processors of number of messages and max length*/
1701: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
1702:
1703: /* post receives: */
1704: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
1705: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
1706: for (i=0; i<nrecvs; i++) {
1707: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
1708: }
1709:
1710: /* do sends:
1711: 1) starts[i] gives the starting index in svalues for stuff going to
1712: the ith processor
1713: */
1714: PetscMalloc((N+1)*sizeof(int),&svalues);
1715: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
1716: PetscMalloc((size+1)*sizeof(int),&starts);
1717: starts[0] = 0;
1718: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1719: for (i=0; i<N; i++) {
1720: svalues[starts[owner[i]]++] = rows[i];
1721: }
1722: ISRestoreIndices(is,&rows);
1723:
1724: starts[0] = 0;
1725: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1726: count = 0;
1727: for (i=0; i<size; i++) {
1728: if (nprocs[2*i+1]) {
1729: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
1730: }
1731: }
1732: PetscFree(starts);
1734: base = owners[rank]*bs;
1735:
1736: /* wait on receives */
1737: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
1738: source = lens + nrecvs;
1739: count = nrecvs; slen = 0;
1740: while (count) {
1741: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1742: /* unpack receives into our local space */
1743: MPI_Get_count(&recv_status,MPI_INT,&n);
1744: source[imdex] = recv_status.MPI_SOURCE;
1745: lens[imdex] = n;
1746: slen += n;
1747: count--;
1748: }
1749: PetscFree(recv_waits);
1750:
1751: /* move the data into the send scatter */
1752: PetscMalloc((slen+1)*sizeof(int),&lrows);
1753: count = 0;
1754: for (i=0; i<nrecvs; i++) {
1755: values = rvalues + i*nmax;
1756: for (j=0; j<lens[i]; j++) {
1757: lrows[count++] = values[j] - base;
1758: }
1759: }
1760: PetscFree(rvalues);
1761: PetscFree(lens);
1762: PetscFree(owner);
1763: PetscFree(nprocs);
1764:
1765: /* actually zap the local rows */
1766: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
1767: PetscLogObjectParent(A,istmp);
1769: /*
1770: Zero the required rows. If the "diagonal block" of the matrix
1771: is square and the user wishes to set the diagonal we use seperate
1772: code so that MatSetValues() is not called for each diagonal allocating
1773: new memory, thus calling lots of mallocs and slowing things down.
1775: Contributed by: Mathew Knepley
1776: */
1777: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1778: MatZeroRows_SeqBAIJ(l->B,istmp,0);
1779: if (diag && (l->A->M == l->A->N)) {
1780: MatZeroRows_SeqBAIJ(l->A,istmp,diag);
1781: } else if (diag) {
1782: MatZeroRows_SeqBAIJ(l->A,istmp,0);
1783: if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1784: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options n
1785: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1786: }
1787: for (i=0; i<slen; i++) {
1788: row = lrows[i] + rstart_bs;
1789: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
1790: }
1791: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1792: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1793: } else {
1794: MatZeroRows_SeqBAIJ(l->A,istmp,0);
1795: }
1797: ISDestroy(istmp);
1798: PetscFree(lrows);
1800: /* wait on sends */
1801: if (nsends) {
1802: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1803: MPI_Waitall(nsends,send_waits,send_status);
1804: PetscFree(send_status);
1805: }
1806: PetscFree(send_waits);
1807: PetscFree(svalues);
1809: return(0);
1810: }
1812: #undef __FUNCT__
1814: int MatPrintHelp_MPIBAIJ(Mat A)
1815: {
1816: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1817: MPI_Comm comm = A->comm;
1818: static int called = 0;
1819: int ierr;
1822: if (!a->rank) {
1823: MatPrintHelp_SeqBAIJ(a->A);
1824: }
1825: if (called) {return(0);} else called = 1;
1826: (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):n");
1827: (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assemblyn");
1828: return(0);
1829: }
1831: #undef __FUNCT__
1833: int MatSetUnfactored_MPIBAIJ(Mat A)
1834: {
1835: Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1836: int ierr;
1839: MatSetUnfactored(a->A);
1840: return(0);
1841: }
1843: static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1845: #undef __FUNCT__
1847: int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1848: {
1849: Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1850: Mat a,b,c,d;
1851: PetscTruth flg;
1852: int ierr;
1855: PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);
1856: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1857: a = matA->A; b = matA->B;
1858: c = matB->A; d = matB->B;
1860: MatEqual(a,c,&flg);
1861: if (flg == PETSC_TRUE) {
1862: MatEqual(b,d,&flg);
1863: }
1864: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1865: return(0);
1866: }
1869: #undef __FUNCT__
1871: int MatSetUpPreallocation_MPIBAIJ(Mat A)
1872: {
1873: int ierr;
1876: MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1877: return(0);
1878: }
1880: /* -------------------------------------------------------------------*/
1881: static struct _MatOps MatOps_Values = {
1882: MatSetValues_MPIBAIJ,
1883: MatGetRow_MPIBAIJ,
1884: MatRestoreRow_MPIBAIJ,
1885: MatMult_MPIBAIJ,
1886: MatMultAdd_MPIBAIJ,
1887: MatMultTranspose_MPIBAIJ,
1888: MatMultTransposeAdd_MPIBAIJ,
1889: 0,
1890: 0,
1891: 0,
1892: 0,
1893: 0,
1894: 0,
1895: 0,
1896: MatTranspose_MPIBAIJ,
1897: MatGetInfo_MPIBAIJ,
1898: MatEqual_MPIBAIJ,
1899: MatGetDiagonal_MPIBAIJ,
1900: MatDiagonalScale_MPIBAIJ,
1901: MatNorm_MPIBAIJ,
1902: MatAssemblyBegin_MPIBAIJ,
1903: MatAssemblyEnd_MPIBAIJ,
1904: 0,
1905: MatSetOption_MPIBAIJ,
1906: MatZeroEntries_MPIBAIJ,
1907: MatZeroRows_MPIBAIJ,
1908: 0,
1909: 0,
1910: 0,
1911: 0,
1912: MatSetUpPreallocation_MPIBAIJ,
1913: 0,
1914: 0,
1915: 0,
1916: 0,
1917: MatDuplicate_MPIBAIJ,
1918: 0,
1919: 0,
1920: 0,
1921: 0,
1922: 0,
1923: MatGetSubMatrices_MPIBAIJ,
1924: MatIncreaseOverlap_MPIBAIJ,
1925: MatGetValues_MPIBAIJ,
1926: 0,
1927: MatPrintHelp_MPIBAIJ,
1928: MatScale_MPIBAIJ,
1929: 0,
1930: 0,
1931: 0,
1932: MatGetBlockSize_MPIBAIJ,
1933: 0,
1934: 0,
1935: 0,
1936: 0,
1937: 0,
1938: 0,
1939: MatSetUnfactored_MPIBAIJ,
1940: 0,
1941: MatSetValuesBlocked_MPIBAIJ,
1942: 0,
1943: MatDestroy_MPIBAIJ,
1944: MatView_MPIBAIJ,
1945: MatGetPetscMaps_Petsc,
1946: 0,
1947: 0,
1948: 0,
1949: 0,
1950: 0,
1951: 0,
1952: MatGetRowMax_MPIBAIJ};
1955: EXTERN_C_BEGIN
1956: #undef __FUNCT__
1958: int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1959: {
1961: *a = ((Mat_MPIBAIJ *)A->data)->A;
1962: *iscopy = PETSC_FALSE;
1963: return(0);
1964: }
1965: EXTERN_C_END
1967: EXTERN_C_BEGIN
1968: #undef __FUNCT__
1970: int MatCreate_MPIBAIJ(Mat B)
1971: {
1972: Mat_MPIBAIJ *b;
1973: int ierr;
1974: PetscTruth flg;
1978: PetscNew(Mat_MPIBAIJ,&b);
1979: B->data = (void*)b;
1981: ierr = PetscMemzero(b,sizeof(Mat_MPIBAIJ));
1982: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1983: B->mapping = 0;
1984: B->factor = 0;
1985: B->assembled = PETSC_FALSE;
1987: B->insertmode = NOT_SET_VALUES;
1988: MPI_Comm_rank(B->comm,&b->rank);
1989: MPI_Comm_size(B->comm,&b->size);
1991: /* build local table of row and column ownerships */
1992: ierr = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);
1993: PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1994: b->cowners = b->rowners + b->size + 2;
1995: b->rowners_bs = b->cowners + b->size + 2;
1997: /* build cache for off array entries formed */
1998: MatStashCreate_Private(B->comm,1,&B->stash);
1999: b->donotstash = PETSC_FALSE;
2000: b->colmap = PETSC_NULL;
2001: b->garray = PETSC_NULL;
2002: b->roworiented = PETSC_TRUE;
2004: #if defined(PETSC_USE_MAT_SINGLE)
2005: /* stuff for MatSetValues_XXX in single precision */
2006: b->setvalueslen = 0;
2007: b->setvaluescopy = PETSC_NULL;
2008: #endif
2010: /* stuff used in block assembly */
2011: b->barray = 0;
2013: /* stuff used for matrix vector multiply */
2014: b->lvec = 0;
2015: b->Mvctx = 0;
2017: /* stuff for MatGetRow() */
2018: b->rowindices = 0;
2019: b->rowvalues = 0;
2020: b->getrowactive = PETSC_FALSE;
2022: /* hash table stuff */
2023: b->ht = 0;
2024: b->hd = 0;
2025: b->ht_size = 0;
2026: b->ht_flag = PETSC_FALSE;
2027: b->ht_fact = 0;
2028: b->ht_total_ct = 0;
2029: b->ht_insert_ct = 0;
2031: PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);
2032: if (flg) {
2033: PetscReal fact = 1.39;
2034: MatSetOption(B,MAT_USE_HASH_TABLE);
2035: PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);
2036: if (fact <= 1.0) fact = 1.39;
2037: MatMPIBAIJSetHashTableFactor(B,fact);
2038: PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2fn",fact);
2039: }
2040: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2041: "MatStoreValues_MPIBAIJ",
2042: MatStoreValues_MPIBAIJ);
2043: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2044: "MatRetrieveValues_MPIBAIJ",
2045: MatRetrieveValues_MPIBAIJ);
2046: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2047: "MatGetDiagonalBlock_MPIBAIJ",
2048: MatGetDiagonalBlock_MPIBAIJ);
2049: return(0);
2050: }
2051: EXTERN_C_END
2053: #undef __FUNCT__
2055: /*@C
2056: MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2057: (block compressed row). For good matrix assembly performance
2058: the user should preallocate the matrix storage by setting the parameters
2059: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2060: performance can be increased by more than a factor of 50.
2062: Collective on Mat
2064: Input Parameters:
2065: + A - the matrix
2066: . bs - size of blockk
2067: . d_nz - number of block nonzeros per block row in diagonal portion of local
2068: submatrix (same for all local rows)
2069: . d_nnz - array containing the number of block nonzeros in the various block rows
2070: of the in diagonal portion of the local (possibly different for each block
2071: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2072: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2073: submatrix (same for all local rows).
2074: - o_nnz - array containing the number of nonzeros in the various block rows of the
2075: off-diagonal portion of the local submatrix (possibly different for
2076: each block row) or PETSC_NULL.
2078: Output Parameter:
2081: Options Database Keys:
2082: . -mat_no_unroll - uses code that does not unroll the loops in the
2083: block calculations (much slower)
2084: . -mat_block_size - size of the blocks to use
2086: Notes:
2087: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2088: than it must be used on all processors that share the object for that argument.
2090: Storage Information:
2091: For a square global matrix we define each processor's diagonal portion
2092: to be its local rows and the corresponding columns (a square submatrix);
2093: each processor's off-diagonal portion encompasses the remainder of the
2094: local matrix (a rectangular submatrix).
2096: The user can specify preallocated storage for the diagonal part of
2097: the local submatrix with either d_nz or d_nnz (not both). Set
2098: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2099: memory allocation. Likewise, specify preallocated storage for the
2100: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2102: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2103: the figure below we depict these three local rows and all columns (0-11).
2105: .vb
2106: 0 1 2 3 4 5 6 7 8 9 10 11
2107: -------------------
2108: row 3 | o o o d d d o o o o o o
2109: row 4 | o o o d d d o o o o o o
2110: row 5 | o o o d d d o o o o o o
2111: -------------------
2112: .ve
2113:
2114: Thus, any entries in the d locations are stored in the d (diagonal)
2115: submatrix, and any entries in the o locations are stored in the
2116: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2117: stored simply in the MATSEQBAIJ format for compressed row storage.
2119: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2120: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2121: In general, for PDE problems in which most nonzeros are near the diagonal,
2122: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2123: or you will get TERRIBLE performance; see the users' manual chapter on
2124: matrices.
2126: Level: intermediate
2128: .keywords: matrix, block, aij, compressed row, sparse, parallel
2130: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2131: @*/
2132: int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2133: {
2134: Mat_MPIBAIJ *b;
2135: int ierr,i;
2136: PetscTruth flg2;
2139: PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);
2140: if (!flg2) return(0);
2142: B->preallocated = PETSC_TRUE;
2143: PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);
2145: if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2146: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2147: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2148: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2149: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2150: if (d_nnz) {
2151: for (i=0; i<B->m/bs; i++) {
2152: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2153: }
2154: }
2155: if (o_nnz) {
2156: for (i=0; i<B->m/bs; i++) {
2157: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2158: }
2159: }
2160:
2161: PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);
2162: PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);
2163: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
2164: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
2166: b = (Mat_MPIBAIJ*)B->data;
2167: b->bs = bs;
2168: b->bs2 = bs*bs;
2169: b->mbs = B->m/bs;
2170: b->nbs = B->n/bs;
2171: b->Mbs = B->M/bs;
2172: b->Nbs = B->N/bs;
2174: MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
2175: b->rowners[0] = 0;
2176: for (i=2; i<=b->size; i++) {
2177: b->rowners[i] += b->rowners[i-1];
2178: }
2179: b->rstart = b->rowners[b->rank];
2180: b->rend = b->rowners[b->rank+1];
2182: MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
2183: b->cowners[0] = 0;
2184: for (i=2; i<=b->size; i++) {
2185: b->cowners[i] += b->cowners[i-1];
2186: }
2187: b->cstart = b->cowners[b->rank];
2188: b->cend = b->cowners[b->rank+1];
2190: for (i=0; i<=b->size; i++) {
2191: b->rowners_bs[i] = b->rowners[i]*bs;
2192: }
2193: b->rstart_bs = b->rstart*bs;
2194: b->rend_bs = b->rend*bs;
2195: b->cstart_bs = b->cstart*bs;
2196: b->cend_bs = b->cend*bs;
2198: MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);
2199: PetscLogObjectParent(B,b->A);
2200: MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);
2201: PetscLogObjectParent(B,b->B);
2202: MatStashCreate_Private(B->comm,bs,&B->bstash);
2204: return(0);
2205: }
2207: #undef __FUNCT__
2209: /*@C
2210: MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2211: (block compressed row). For good matrix assembly performance
2212: the user should preallocate the matrix storage by setting the parameters
2213: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2214: performance can be increased by more than a factor of 50.
2216: Collective on MPI_Comm
2218: Input Parameters:
2219: + comm - MPI communicator
2220: . bs - size of blockk
2221: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2222: This value should be the same as the local size used in creating the
2223: y vector for the matrix-vector product y = Ax.
2224: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2225: This value should be the same as the local size used in creating the
2226: x vector for the matrix-vector product y = Ax.
2227: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2228: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2229: . d_nz - number of nonzero blocks per block row in diagonal portion of local
2230: submatrix (same for all local rows)
2231: . d_nnz - array containing the number of nonzero blocks in the various block rows
2232: of the in diagonal portion of the local (possibly different for each block
2233: row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero.
2234: . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local
2235: submatrix (same for all local rows).
2236: - o_nnz - array containing the number of nonzero blocks in the various block rows of the
2237: off-diagonal portion of the local submatrix (possibly different for
2238: each block row) or PETSC_NULL.
2240: Output Parameter:
2241: . A - the matrix
2243: Options Database Keys:
2244: . -mat_no_unroll - uses code that does not unroll the loops in the
2245: block calculations (much slower)
2246: . -mat_block_size - size of the blocks to use
2248: Notes:
2249: A nonzero block is any block that as 1 or more nonzeros in it
2251: The user MUST specify either the local or global matrix dimensions
2252: (possibly both).
2254: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor
2255: than it must be used on all processors that share the object for that argument.
2257: Storage Information:
2258: For a square global matrix we define each processor's diagonal portion
2259: to be its local rows and the corresponding columns (a square submatrix);
2260: each processor's off-diagonal portion encompasses the remainder of the
2261: local matrix (a rectangular submatrix).
2263: The user can specify preallocated storage for the diagonal part of
2264: the local submatrix with either d_nz or d_nnz (not both). Set
2265: d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2266: memory allocation. Likewise, specify preallocated storage for the
2267: off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2269: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2270: the figure below we depict these three local rows and all columns (0-11).
2272: .vb
2273: 0 1 2 3 4 5 6 7 8 9 10 11
2274: -------------------
2275: row 3 | o o o d d d o o o o o o
2276: row 4 | o o o d d d o o o o o o
2277: row 5 | o o o d d d o o o o o o
2278: -------------------
2279: .ve
2280:
2281: Thus, any entries in the d locations are stored in the d (diagonal)
2282: submatrix, and any entries in the o locations are stored in the
2283: o (off-diagonal) submatrix. Note that the d and the o submatrices are
2284: stored simply in the MATSEQBAIJ format for compressed row storage.
2286: Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2287: and o_nz should indicate the number of block nonzeros per row in the o matrix.
2288: In general, for PDE problems in which most nonzeros are near the diagonal,
2289: one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2290: or you will get TERRIBLE performance; see the users' manual chapter on
2291: matrices.
2293: Level: intermediate
2295: .keywords: matrix, block, aij, compressed row, sparse, parallel
2297: .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2298: @*/
2299: int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2300: {
2301: int ierr,size;
2304: MatCreate(comm,m,n,M,N,A);
2305: MPI_Comm_size(comm,&size);
2306: if (size > 1) {
2307: MatSetType(*A,MATMPIBAIJ);
2308: MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);
2309: } else {
2310: MatSetType(*A,MATSEQBAIJ);
2311: MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);
2312: }
2313: return(0);
2314: }
2316: #undef __FUNCT__
2318: static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2319: {
2320: Mat mat;
2321: Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2322: int ierr,len=0;
2325: *newmat = 0;
2326: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
2327: MatSetType(mat,MATMPIBAIJ);
2328: mat->preallocated = PETSC_TRUE;
2329: mat->assembled = PETSC_TRUE;
2330: a = (Mat_MPIBAIJ*)mat->data;
2331: a->bs = oldmat->bs;
2332: a->bs2 = oldmat->bs2;
2333: a->mbs = oldmat->mbs;
2334: a->nbs = oldmat->nbs;
2335: a->Mbs = oldmat->Mbs;
2336: a->Nbs = oldmat->Nbs;
2337:
2338: a->rstart = oldmat->rstart;
2339: a->rend = oldmat->rend;
2340: a->cstart = oldmat->cstart;
2341: a->cend = oldmat->cend;
2342: a->size = oldmat->size;
2343: a->rank = oldmat->rank;
2344: a->donotstash = oldmat->donotstash;
2345: a->roworiented = oldmat->roworiented;
2346: a->rowindices = 0;
2347: a->rowvalues = 0;
2348: a->getrowactive = PETSC_FALSE;
2349: a->barray = 0;
2350: a->rstart_bs = oldmat->rstart_bs;
2351: a->rend_bs = oldmat->rend_bs;
2352: a->cstart_bs = oldmat->cstart_bs;
2353: a->cend_bs = oldmat->cend_bs;
2355: /* hash table stuff */
2356: a->ht = 0;
2357: a->hd = 0;
2358: a->ht_size = 0;
2359: a->ht_flag = oldmat->ht_flag;
2360: a->ht_fact = oldmat->ht_fact;
2361: a->ht_total_ct = 0;
2362: a->ht_insert_ct = 0;
2364: PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));
2365: MatStashCreate_Private(matin->comm,1,&mat->stash);
2366: MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);
2367: if (oldmat->colmap) {
2368: #if defined (PETSC_USE_CTABLE)
2369: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
2370: #else
2371: PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);
2372: PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2373: PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));
2374: #endif
2375: } else a->colmap = 0;
2376: if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2377: PetscMalloc(len*sizeof(int),&a->garray);
2378: PetscLogObjectMemory(mat,len*sizeof(int));
2379: PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));
2380: } else a->garray = 0;
2381:
2382: VecDuplicate(oldmat->lvec,&a->lvec);
2383: PetscLogObjectParent(mat,a->lvec);
2384: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
2386: PetscLogObjectParent(mat,a->Mvctx);
2387: MatDuplicate(oldmat->A,cpvalues,&a->A);
2388: PetscLogObjectParent(mat,a->A);
2389: MatDuplicate(oldmat->B,cpvalues,&a->B);
2390: PetscLogObjectParent(mat,a->B);
2391: PetscFListDuplicate(matin->qlist,&mat->qlist);
2392: *newmat = mat;
2393: return(0);
2394: }
2396: #include petscsys.h
2398: EXTERN_C_BEGIN
2399: #undef __FUNCT__
2401: int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2402: {
2403: Mat A;
2404: int i,nz,ierr,j,rstart,rend,fd;
2405: PetscScalar *vals,*buf;
2406: MPI_Comm comm = ((PetscObject)viewer)->comm;
2407: MPI_Status status;
2408: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2409: int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2410: int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2411: int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2412: int dcount,kmax,k,nzcount,tmp;
2413:
2415: PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);
2417: MPI_Comm_size(comm,&size);
2418: MPI_Comm_rank(comm,&rank);
2419: if (!rank) {
2420: PetscViewerBinaryGetDescriptor(viewer,&fd);
2421: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
2422: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2423: if (header[3] < 0) {
2424: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2425: }
2426: }
2428: MPI_Bcast(header+1,3,MPI_INT,0,comm);
2429: M = header[1]; N = header[2];
2431: if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2433: /*
2434: This code adds extra rows to make sure the number of rows is
2435: divisible by the blocksize
2436: */
2437: Mbs = M/bs;
2438: extra_rows = bs - M + bs*(Mbs);
2439: if (extra_rows == bs) extra_rows = 0;
2440: else Mbs++;
2441: if (extra_rows &&!rank) {
2442: PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksizen");
2443: }
2445: /* determine ownership of all rows */
2446: mbs = Mbs/size + ((Mbs % size) > rank);
2447: m = mbs*bs;
2448: ierr = PetscMalloc(2*(size+2)*sizeof(int),&rowners);
2449: browners = rowners + size + 1;
2450: ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);
2451: rowners[0] = 0;
2452: for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2453: for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2454: rstart = rowners[rank];
2455: rend = rowners[rank+1];
2457: /* distribute row lengths to all processors */
2458: PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);
2459: if (!rank) {
2460: PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);
2461: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
2462: for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2463: PetscMalloc(size*sizeof(int),&sndcounts);
2464: for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2465: MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
2466: PetscFree(sndcounts);
2467: } else {
2468: MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);
2469: }
2471: if (!rank) {
2472: /* calculate the number of nonzeros on each processor */
2473: PetscMalloc(size*sizeof(int),&procsnz);
2474: PetscMemzero(procsnz,size*sizeof(int));
2475: for (i=0; i<size; i++) {
2476: for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2477: procsnz[i] += rowlengths[j];
2478: }
2479: }
2480: PetscFree(rowlengths);
2481:
2482: /* determine max buffer needed and allocate it */
2483: maxnz = 0;
2484: for (i=0; i<size; i++) {
2485: maxnz = PetscMax(maxnz,procsnz[i]);
2486: }
2487: PetscMalloc(maxnz*sizeof(int),&cols);
2489: /* read in my part of the matrix column indices */
2490: nz = procsnz[0];
2491: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
2492: mycols = ibuf;
2493: if (size == 1) nz -= extra_rows;
2494: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
2495: if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2497: /* read in every ones (except the last) and ship off */
2498: for (i=1; i<size-1; i++) {
2499: nz = procsnz[i];
2500: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2501: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
2502: }
2503: /* read in the stuff for the last proc */
2504: if (size != 1) {
2505: nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */
2506: PetscBinaryRead(fd,cols,nz,PETSC_INT);
2507: for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2508: MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);
2509: }
2510: PetscFree(cols);
2511: } else {
2512: /* determine buffer space needed for message */
2513: nz = 0;
2514: for (i=0; i<m; i++) {
2515: nz += locrowlens[i];
2516: }
2517: ierr = PetscMalloc(nz*sizeof(int),&ibuf);
2518: mycols = ibuf;
2519: /* receive message of column indices*/
2520: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
2521: MPI_Get_count(&status,MPI_INT,&maxnz);
2522: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2523: }
2524:
2525: /* loop over local rows, determining number of off diagonal entries */
2526: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);
2527: odlens = dlens + (rend-rstart);
2528: ierr = PetscMalloc(3*Mbs*sizeof(int),&mask);
2529: ierr = PetscMemzero(mask,3*Mbs*sizeof(int));
2530: masked1 = mask + Mbs;
2531: masked2 = masked1 + Mbs;
2532: rowcount = 0; nzcount = 0;
2533: for (i=0; i<mbs; i++) {
2534: dcount = 0;
2535: odcount = 0;
2536: for (j=0; j<bs; j++) {
2537: kmax = locrowlens[rowcount];
2538: for (k=0; k<kmax; k++) {
2539: tmp = mycols[nzcount++]/bs;
2540: if (!mask[tmp]) {
2541: mask[tmp] = 1;
2542: if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2543: else masked1[dcount++] = tmp;
2544: }
2545: }
2546: rowcount++;
2547: }
2548:
2549: dlens[i] = dcount;
2550: odlens[i] = odcount;
2552: /* zero out the mask elements we set */
2553: for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2554: for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2555: }
2557: /* create our matrix */
2558: MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);
2559: A = *newmat;
2560: MatSetOption(A,MAT_COLUMNS_SORTED);
2561:
2562: if (!rank) {
2563: PetscMalloc(maxnz*sizeof(PetscScalar),&buf);
2564: /* read in my part of the matrix numerical values */
2565: nz = procsnz[0];
2566: vals = buf;
2567: mycols = ibuf;
2568: if (size == 1) nz -= extra_rows;
2569: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2570: if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2572: /* insert into matrix */
2573: jj = rstart*bs;
2574: for (i=0; i<m; i++) {
2575: MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2576: mycols += locrowlens[i];
2577: vals += locrowlens[i];
2578: jj++;
2579: }
2580: /* read in other processors (except the last one) and ship out */
2581: for (i=1; i<size-1; i++) {
2582: nz = procsnz[i];
2583: vals = buf;
2584: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2585: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
2586: }
2587: /* the last proc */
2588: if (size != 1){
2589: nz = procsnz[i] - extra_rows;
2590: vals = buf;
2591: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
2592: for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2593: MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);
2594: }
2595: PetscFree(procsnz);
2596: } else {
2597: /* receive numeric values */
2598: PetscMalloc(nz*sizeof(PetscScalar),&buf);
2600: /* receive message of values*/
2601: vals = buf;
2602: mycols = ibuf;
2603: ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
2604: ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
2605: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2607: /* insert into matrix */
2608: jj = rstart*bs;
2609: for (i=0; i<m; i++) {
2610: ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);
2611: mycols += locrowlens[i];
2612: vals += locrowlens[i];
2613: jj++;
2614: }
2615: }
2616: PetscFree(locrowlens);
2617: PetscFree(buf);
2618: PetscFree(ibuf);
2619: PetscFree(rowners);
2620: PetscFree(dlens);
2621: PetscFree(mask);
2622: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
2623: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
2624: return(0);
2625: }
2626: EXTERN_C_END
2628: #undef __FUNCT__
2630: /*@
2631: MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2633: Input Parameters:
2634: . mat - the matrix
2635: . fact - factor
2637: Collective on Mat
2639: Level: advanced
2641: Notes:
2642: This can also be set by the command line option: -mat_use_hash_table fact
2644: .keywords: matrix, hashtable, factor, HT
2646: .seealso: MatSetOption()
2647: @*/
2648: int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2649: {
2650: Mat_MPIBAIJ *baij;
2651: int ierr;
2652: PetscTruth flg;
2656: PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);
2657: if (!flg) {
2658: SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2659: }
2660: baij = (Mat_MPIBAIJ*)mat->data;
2661: baij->ht_fact = fact;
2662: return(0);
2663: }
2665: #undef __FUNCT__
2667: int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2668: {
2669: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2671: *Ad = a->A;
2672: *Ao = a->B;
2673: *colmap = a->garray;
2674: return(0);
2675: }