Actual source code: mpiaij.c
1: /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
3: #include src/mat/impls/aij/mpi/mpiaij.h
4: #include src/vec/vecimpl.h
5: #include src/inline/spops.h
7: EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8: EXTERN int DisAssemble_MPIAIJ(Mat);
9: EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10: EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11: EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12: EXTERN int MatPrintHelp_SeqAIJ(Mat);
14: /*
15: Local utility routine that creates a mapping from the global column
16: number to the local number in the off-diagonal part of the local
17: storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at
18: a slightly higher hash table cost; without it it is not scalable (each processor
19: has an order N integer array but is fast to acess.
20: */
21: #undef __FUNCT__
23: int CreateColmap_MPIAIJ_Private(Mat mat)
24: {
25: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26: int n = aij->B->n,i,ierr;
29: #if defined (PETSC_USE_CTABLE)
30: PetscTableCreate(n,&aij->colmap);
31: for (i=0; i<n; i++){
32: PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);
33: }
34: #else
35: PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);
36: PetscLogObjectMemory(mat,mat->N*sizeof(int));
37: PetscMemzero(aij->colmap,mat->N*sizeof(int));
38: for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39: #endif
40: return(0);
41: }
43: #define CHUNKSIZE 15
44: #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv)
45: {
46:
47: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
48: rmax = aimax[row]; nrow = ailen[row];
49: col1 = col - shift;
50:
51: low = 0; high = nrow;
52: while (high-low > 5) {
53: t = (low+high)/2;
54: if (rp[t] > col) high = t;
55: else low = t;
56: }
57: for (_i=low; _i<high; _i++) {
58: if (rp[_i] > col1) break;
59: if (rp[_i] == col1) {
60: if (addv == ADD_VALUES) ap[_i] += value;
61: else ap[_i] = value;
62: goto a_noinsert;
63: }
64: }
65: if (nonew == 1) goto a_noinsert;
66: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
67: if (nrow >= rmax) {
68: /* there is no extra room in row, therefore enlarge */
69: int new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j;
70: PetscScalar *new_a;
71:
72: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
73:
74: /* malloc new storage space */
75: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int);
76: ierr = PetscMalloc(len,&new_a);
77: new_j = (int*)(new_a + new_nz);
78: new_i = new_j + new_nz;
79:
80: /* copy over old data into new slots */
81: for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
82: for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
83: PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));
84: len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift);
85: PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow,
86: len*sizeof(int));
87: PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));
88: PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow,
89: len*sizeof(PetscScalar));
90: /* free up old matrix storage */
91:
92: PetscFree(a->a);
93: if (!a->singlemalloc) {
94: PetscFree(a->i);
95: PetscFree(a->j);
96: }
97: aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
98: a->singlemalloc = PETSC_TRUE;
99:
100: rp = aj + ai[row] + shift; ap = aa + ai[row] + shift;
101: rmax = aimax[row] = aimax[row] + CHUNKSIZE;
102: PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
103: a->maxnz += CHUNKSIZE;
104: a->reallocs++;
105: }
106: N = nrow++ - 1; a->nz++;
107: /* shift up all the later entries in this row */
108: for (ii=N; ii>=_i; ii--) {
109: rp[ii+1] = rp[ii];
110: ap[ii+1] = ap[ii];
111: }
112: rp[_i] = col1;
113: ap[_i] = value;
114: a_noinsert: ;
115: ailen[row] = nrow;
116: }
118: #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv)
119: {
120:
121: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
122: rmax = bimax[row]; nrow = bilen[row];
123: col1 = col - shift;
124:
125: low = 0; high = nrow;
126: while (high-low > 5) {
127: t = (low+high)/2;
128: if (rp[t] > col) high = t;
129: else low = t;
130: }
131: for (_i=low; _i<high; _i++) {
132: if (rp[_i] > col1) break;
133: if (rp[_i] == col1) {
134: if (addv == ADD_VALUES) ap[_i] += value;
135: else ap[_i] = value;
136: goto b_noinsert;
137: }
138: }
139: if (nonew == 1) goto b_noinsert;
140: else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix");
141: if (nrow >= rmax) {
142: /* there is no extra room in row, therefore enlarge */
143: int new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j;
144: PetscScalar *new_a;
145:
146: if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix");
147:
148: /* malloc new storage space */
149: len = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int);
150: ierr = PetscMalloc(len,&new_a);
151: new_j = (int*)(new_a + new_nz);
152: new_i = new_j + new_nz;
153:
154: /* copy over old data into new slots */
155: for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];}
156: for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;}
157: PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));
158: len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift);
159: PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow,
160: len*sizeof(int));
161: PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));
162: PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow,
163: len*sizeof(PetscScalar));
164: /* free up old matrix storage */
165:
166: PetscFree(b->a);
167: if (!b->singlemalloc) {
168: PetscFree(b->i);
169: PetscFree(b->j);
170: }
171: ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;
172: b->singlemalloc = PETSC_TRUE;
173:
174: rp = bj + bi[row] + shift; ap = ba + bi[row] + shift;
175: rmax = bimax[row] = bimax[row] + CHUNKSIZE;
176: PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar)));
177: b->maxnz += CHUNKSIZE;
178: b->reallocs++;
179: }
180: N = nrow++ - 1; b->nz++;
181: /* shift up all the later entries in this row */
182: for (ii=N; ii>=_i; ii--) {
183: rp[ii+1] = rp[ii];
184: ap[ii+1] = ap[ii];
185: }
186: rp[_i] = col1;
187: ap[_i] = value;
188: b_noinsert: ;
189: bilen[row] = nrow;
190: }
192: #undef __FUNCT__
194: int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195: {
196: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
197: PetscScalar value;
198: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199: int cstart = aij->cstart,cend = aij->cend,row,col;
200: PetscTruth roworiented = aij->roworiented;
202: /* Some Variables required in the macro */
203: Mat A = aij->A;
204: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
205: int *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206: PetscScalar *aa = a->a;
207: PetscTruth ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208: Mat B = aij->B;
209: Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data;
210: int *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211: PetscScalar *ba = b->a;
213: int *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214: int nonew = a->nonew,shift = a->indexshift;
215: PetscScalar *ap;
218: for (i=0; i<m; i++) {
219: if (im[i] < 0) continue;
220: #if defined(PETSC_USE_BOPT_g)
221: if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222: #endif
223: if (im[i] >= rstart && im[i] < rend) {
224: row = im[i] - rstart;
225: for (j=0; j<n; j++) {
226: if (in[j] >= cstart && in[j] < cend){
227: col = in[j] - cstart;
228: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229: if (ignorezeroentries && value == 0.0) continue;
230: MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231: /* MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv); */
232: } else if (in[j] < 0) continue;
233: #if defined(PETSC_USE_BOPT_g)
234: else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235: #endif
236: else {
237: if (mat->was_assembled) {
238: if (!aij->colmap) {
239: CreateColmap_MPIAIJ_Private(mat);
240: }
241: #if defined (PETSC_USE_CTABLE)
242: PetscTableFind(aij->colmap,in[j]+1,&col);
243: col--;
244: #else
245: col = aij->colmap[in[j]] - 1;
246: #endif
247: if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248: DisAssemble_MPIAIJ(mat);
249: col = in[j];
250: /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251: B = aij->B;
252: b = (Mat_SeqAIJ*)B->data;
253: bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254: ba = b->a;
255: }
256: } else col = in[j];
257: if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258: if (ignorezeroentries && value == 0.0) continue;
259: MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260: /* MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv); */
261: }
262: }
263: } else {
264: if (!aij->donotstash) {
265: if (roworiented) {
266: if (ignorezeroentries && v[i*n] == 0.0) continue;
267: MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);
268: } else {
269: if (ignorezeroentries && v[i] == 0.0) continue;
270: MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);
271: }
272: }
273: }
274: }
275: return(0);
276: }
278: #undef __FUNCT__
280: int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281: {
282: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283: int ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284: int cstart = aij->cstart,cend = aij->cend,row,col;
287: for (i=0; i<m; i++) {
288: if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289: if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290: if (idxm[i] >= rstart && idxm[i] < rend) {
291: row = idxm[i] - rstart;
292: for (j=0; j<n; j++) {
293: if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294: if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295: if (idxn[j] >= cstart && idxn[j] < cend){
296: col = idxn[j] - cstart;
297: MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);
298: } else {
299: if (!aij->colmap) {
300: CreateColmap_MPIAIJ_Private(mat);
301: }
302: #if defined (PETSC_USE_CTABLE)
303: PetscTableFind(aij->colmap,idxn[j]+1,&col);
304: col --;
305: #else
306: col = aij->colmap[idxn[j]] - 1;
307: #endif
308: if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309: else {
310: MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);
311: }
312: }
313: }
314: } else {
315: SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316: }
317: }
318: return(0);
319: }
321: #undef __FUNCT__
323: int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324: {
325: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
326: int ierr,nstash,reallocs;
327: InsertMode addv;
330: if (aij->donotstash) {
331: return(0);
332: }
334: /* make sure all processors are either in INSERTMODE or ADDMODE */
335: MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);
336: if (addv == (ADD_VALUES|INSERT_VALUES)) {
337: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338: }
339: mat->insertmode = addv; /* in case this processor had no cache */
341: MatStashScatterBegin_Private(&mat->stash,aij->rowners);
342: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
343: PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.n",nstash,reallocs);
344: return(0);
345: }
348: #undef __FUNCT__
350: int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351: {
352: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353: int i,j,rstart,ncols,n,ierr,flg;
354: int *row,*col,other_disassembled;
355: PetscScalar *val;
356: InsertMode addv = mat->insertmode;
357: #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358: PetscTruth flag;
359: #endif
362: if (!aij->donotstash) {
363: while (1) {
364: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
365: if (!flg) break;
367: for (i=0; i<n;) {
368: /* Now identify the consecutive vals belonging to the same row */
369: for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370: if (j < n) ncols = j-i;
371: else ncols = n-i;
372: /* Now assemble all these values with a single function call */
373: MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);
374: i = j;
375: }
376: }
377: MatStashScatterEnd_Private(&mat->stash);
378: }
379:
380: MatAssemblyBegin(aij->A,mode);
381: MatAssemblyEnd(aij->A,mode);
383: /* determine if any processor has disassembled, if so we must
384: also disassemble ourselfs, in order that we may reassemble. */
385: /*
386: if nonzero structure of submatrix B cannot change then we know that
387: no processor disassembled thus we can skip this stuff
388: */
389: if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
390: MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);
391: if (mat->was_assembled && !other_disassembled) {
392: DisAssemble_MPIAIJ(mat);
393: }
394: }
396: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397: MatSetUpMultiply_MPIAIJ(mat);
398: }
399: MatAssemblyBegin(aij->B,mode);
400: MatAssemblyEnd(aij->B,mode);
402: if (aij->rowvalues) {
403: PetscFree(aij->rowvalues);
404: aij->rowvalues = 0;
405: }
406: #if defined(PETSC_HAVE_SUPERLUDIST)
407: PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);
408: if (flag) { MatUseSuperLU_DIST_MPIAIJ(mat); }
409: #endif
411: #if defined(PETSC_HAVE_SPOOLES)
412: PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);
413: if (flag) { MatUseSpooles_MPIAIJ(mat); }
414: #endif
415: return(0);
416: }
418: #undef __FUNCT__
420: int MatZeroEntries_MPIAIJ(Mat A)
421: {
422: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423: int ierr;
426: MatZeroEntries(l->A);
427: MatZeroEntries(l->B);
428: return(0);
429: }
431: #undef __FUNCT__
433: int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434: {
435: Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
436: int i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437: int *nprocs,j,idx,nsends,row;
438: int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439: int *rvalues,tag = A->tag,count,base,slen,n,*source;
440: int *lens,imdex,*lrows,*values,rstart=l->rstart;
441: MPI_Comm comm = A->comm;
442: MPI_Request *send_waits,*recv_waits;
443: MPI_Status recv_status,*send_status;
444: IS istmp;
445: PetscTruth found;
448: ISGetLocalSize(is,&N);
449: ISGetIndices(is,&rows);
451: /* first count number of contributors to each processor */
452: PetscMalloc(2*size*sizeof(int),&nprocs);
453: ierr = PetscMemzero(nprocs,2*size*sizeof(int));
454: PetscMalloc((N+1)*sizeof(int),&owner); /* see note*/
455: for (i=0; i<N; i++) {
456: idx = rows[i];
457: found = PETSC_FALSE;
458: for (j=0; j<size; j++) {
459: if (idx >= owners[j] && idx < owners[j+1]) {
460: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461: }
462: }
463: if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464: }
465: nsends = 0; for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
467: /* inform other processors of number of messages and max length*/
468: PetscMaxSum(comm,nprocs,&nmax,&nrecvs);
470: /* post receives: */
471: PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);
472: PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);
473: for (i=0; i<nrecvs; i++) {
474: MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);
475: }
477: /* do sends:
478: 1) starts[i] gives the starting index in svalues for stuff going to
479: the ith processor
480: */
481: PetscMalloc((N+1)*sizeof(int),&svalues);
482: PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);
483: PetscMalloc((size+1)*sizeof(int),&starts);
484: starts[0] = 0;
485: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486: for (i=0; i<N; i++) {
487: svalues[starts[owner[i]]++] = rows[i];
488: }
489: ISRestoreIndices(is,&rows);
491: starts[0] = 0;
492: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493: count = 0;
494: for (i=0; i<size; i++) {
495: if (nprocs[2*i+1]) {
496: MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);
497: }
498: }
499: PetscFree(starts);
501: base = owners[rank];
503: /* wait on receives */
504: ierr = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);
505: source = lens + nrecvs;
506: count = nrecvs; slen = 0;
507: while (count) {
508: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
509: /* unpack receives into our local space */
510: MPI_Get_count(&recv_status,MPI_INT,&n);
511: source[imdex] = recv_status.MPI_SOURCE;
512: lens[imdex] = n;
513: slen += n;
514: count--;
515: }
516: PetscFree(recv_waits);
517:
518: /* move the data into the send scatter */
519: PetscMalloc((slen+1)*sizeof(int),&lrows);
520: count = 0;
521: for (i=0; i<nrecvs; i++) {
522: values = rvalues + i*nmax;
523: for (j=0; j<lens[i]; j++) {
524: lrows[count++] = values[j] - base;
525: }
526: }
527: PetscFree(rvalues);
528: PetscFree(lens);
529: PetscFree(owner);
530: PetscFree(nprocs);
531:
532: /* actually zap the local rows */
533: ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);
534: PetscLogObjectParent(A,istmp);
536: /*
537: Zero the required rows. If the "diagonal block" of the matrix
538: is square and the user wishes to set the diagonal we use seperate
539: code so that MatSetValues() is not called for each diagonal allocating
540: new memory, thus calling lots of mallocs and slowing things down.
542: Contributed by: Mathew Knepley
543: */
544: /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545: MatZeroRows(l->B,istmp,0);
546: if (diag && (l->A->M == l->A->N)) {
547: ierr = MatZeroRows(l->A,istmp,diag);
548: } else if (diag) {
549: MatZeroRows(l->A,istmp,0);
550: if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551: SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat optionsn
552: MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553: }
554: for (i = 0; i < slen; i++) {
555: row = lrows[i] + rstart;
556: MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);
557: }
558: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
559: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
560: } else {
561: MatZeroRows(l->A,istmp,0);
562: }
563: ISDestroy(istmp);
564: PetscFree(lrows);
566: /* wait on sends */
567: if (nsends) {
568: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
569: MPI_Waitall(nsends,send_waits,send_status);
570: PetscFree(send_status);
571: }
572: PetscFree(send_waits);
573: PetscFree(svalues);
575: return(0);
576: }
578: #undef __FUNCT__
580: int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581: {
582: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583: int ierr,nt;
586: VecGetLocalSize(xx,&nt);
587: if (nt != A->n) {
588: SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589: }
590: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
591: (*a->A->ops->mult)(a->A,xx,yy);
592: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
593: (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);
594: return(0);
595: }
597: #undef __FUNCT__
599: int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600: {
601: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602: int ierr;
605: VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
606: (*a->A->ops->multadd)(a->A,xx,yy,zz);
607: VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);
608: (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);
609: return(0);
610: }
612: #undef __FUNCT__
614: int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615: {
616: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617: int ierr;
620: /* do nondiagonal part */
621: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
622: /* send it on its way */
623: VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
624: /* do local part */
625: (*a->A->ops->multtranspose)(a->A,xx,yy);
626: /* receive remote parts: note this assumes the values are not actually */
627: /* inserted in yy until the next line, which is true for my implementation*/
628: /* but is not perhaps always true. */
629: VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
630: return(0);
631: }
633: #undef __FUNCT__
635: int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636: {
637: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638: int ierr;
641: /* do nondiagonal part */
642: (*a->B->ops->multtranspose)(a->B,xx,a->lvec);
643: /* send it on its way */
644: VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
645: /* do local part */
646: (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);
647: /* receive remote parts: note this assumes the values are not actually */
648: /* inserted in yy until the next line, which is true for my implementation*/
649: /* but is not perhaps always true. */
650: VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);
651: return(0);
652: }
654: /*
655: This only works correctly for square matrices where the subblock A->A is the
656: diagonal block
657: */
658: #undef __FUNCT__
660: int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661: {
662: int ierr;
663: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
666: if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667: if (a->rstart != a->cstart || a->rend != a->cend) {
668: SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669: }
670: MatGetDiagonal(a->A,v);
671: return(0);
672: }
674: #undef __FUNCT__
676: int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677: {
678: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679: int ierr;
682: MatScale(aa,a->A);
683: MatScale(aa,a->B);
684: return(0);
685: }
687: #undef __FUNCT__
689: int MatDestroy_MPIAIJ(Mat mat)
690: {
691: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692: int ierr;
695: #if defined(PETSC_USE_LOG)
696: PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697: #endif
698: MatStashDestroy_Private(&mat->stash);
699: PetscFree(aij->rowners);
700: MatDestroy(aij->A);
701: MatDestroy(aij->B);
702: #if defined (PETSC_USE_CTABLE)
703: if (aij->colmap) {PetscTableDelete(aij->colmap);}
704: #else
705: if (aij->colmap) {PetscFree(aij->colmap);}
706: #endif
707: if (aij->garray) {PetscFree(aij->garray);}
708: if (aij->lvec) {VecDestroy(aij->lvec);}
709: if (aij->Mvctx) {VecScatterDestroy(aij->Mvctx);}
710: if (aij->rowvalues) {PetscFree(aij->rowvalues);}
711: PetscFree(aij);
712: return(0);
713: }
715: extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716: extern int MatFactorInfo_Spooles(Mat,PetscViewer);
718: #undef __FUNCT__
720: int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721: {
722: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
723: Mat_SeqAIJ* A = (Mat_SeqAIJ*)aij->A->data;
724: Mat_SeqAIJ* B = (Mat_SeqAIJ*)aij->B->data;
725: int nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726: int nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727: PetscScalar *column_values;
730: MPI_Comm_rank(mat->comm,&rank);
731: MPI_Comm_size(mat->comm,&size);
732: nz = A->nz + B->nz;
733: if (rank == 0) {
734: header[0] = MAT_FILE_COOKIE;
735: header[1] = mat->M;
736: header[2] = mat->N;
737: MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);
738: PetscViewerBinaryGetDescriptor(viewer,&fd);
739: PetscBinaryWrite(fd,header,4,PETSC_INT,1);
740: /* get largest number of rows any processor has */
741: rlen = mat->m;
742: PetscMapGetGlobalRange(mat->rmap,&range);
743: for (i=1; i<size; i++) {
744: rlen = PetscMax(rlen,range[i+1] - range[i]);
745: }
746: } else {
747: MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);
748: rlen = mat->m;
749: }
751: /* load up the local row counts */
752: PetscMalloc((rlen+1)*sizeof(int),&row_lengths);
753: for (i=0; i<mat->m; i++) {
754: row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755: }
757: /* store the row lengths to the file */
758: if (rank == 0) {
759: MPI_Status status;
760: PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);
761: for (i=1; i<size; i++) {
762: rlen = range[i+1] - range[i];
763: MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);
764: PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);
765: }
766: } else {
767: MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);
768: }
769: PetscFree(row_lengths);
771: /* load up the local column indices */
772: nzmax = nz; /* )th processor needs space a largest processor needs */
773: MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);
774: PetscMalloc((nzmax+1)*sizeof(int),&column_indices);
775: cnt = 0;
776: for (i=0; i<mat->m; i++) {
777: for (j=B->i[i]; j<B->i[i+1]; j++) {
778: if ( (col = garray[B->j[j]]) > cstart) break;
779: column_indices[cnt++] = col;
780: }
781: for (k=A->i[i]; k<A->i[i+1]; k++) {
782: column_indices[cnt++] = A->j[k] + cstart;
783: }
784: for (; j<B->i[i+1]; j++) {
785: column_indices[cnt++] = garray[B->j[j]];
786: }
787: }
788: if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
790: /* store the column indices to the file */
791: if (rank == 0) {
792: MPI_Status status;
793: PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);
794: for (i=1; i<size; i++) {
795: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
796: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797: MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);
798: PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);
799: }
800: } else {
801: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
802: MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);
803: }
804: PetscFree(column_indices);
806: /* load up the local column values */
807: PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);
808: cnt = 0;
809: for (i=0; i<mat->m; i++) {
810: for (j=B->i[i]; j<B->i[i+1]; j++) {
811: if ( garray[B->j[j]] > cstart) break;
812: column_values[cnt++] = B->a[j];
813: }
814: for (k=A->i[i]; k<A->i[i+1]; k++) {
815: column_values[cnt++] = A->a[k];
816: }
817: for (; j<B->i[i+1]; j++) {
818: column_values[cnt++] = B->a[j];
819: }
820: }
821: if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
823: /* store the column values to the file */
824: if (rank == 0) {
825: MPI_Status status;
826: PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);
827: for (i=1; i<size; i++) {
828: MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);
829: if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830: MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);
831: PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);
832: }
833: } else {
834: MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);
835: MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);
836: }
837: PetscFree(column_values);
838: return(0);
839: }
841: #undef __FUNCT__
843: int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844: {
845: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
846: Mat_SeqAIJ* C = (Mat_SeqAIJ*)aij->A->data;
847: int ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848: PetscTruth isdraw,isascii,flg,isbinary;
849: PetscViewer sviewer;
850: PetscViewerFormat format;
853: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
854: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
855: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
856: if (isascii) {
857: PetscViewerGetFormat(viewer,&format);
858: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859: MatInfo info;
860: MPI_Comm_rank(mat->comm,&rank);
861: MatGetInfo(mat,MAT_LOCAL,&info);
862: PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);
863: if (flg) {
864: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routinesn",
865: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
866: } else {
867: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routinesn",
868: rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);
869: }
870: MatGetInfo(aij->A,MAT_LOCAL,&info);
871: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d n",rank,(int)info.nz_used);
872: MatGetInfo(aij->B,MAT_LOCAL,&info);
873: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d n",rank,(int)info.nz_used);
874: PetscViewerFlush(viewer);
875: VecScatterView(aij->Mvctx,viewer);
876: return(0);
877: } else if (format == PETSC_VIEWER_ASCII_INFO) {
878: return(0);
879: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880: #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881: MatMPIAIJFactorInfo_SuperLu(mat,viewer);
882: #endif
883: #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
884: MatFactorInfo_Spooles(mat,viewer);
885: #endif
887: return(0);
888: }
889: } else if (isbinary) {
890: if (size == 1) {
891: PetscObjectSetName((PetscObject)aij->A,mat->name);
892: MatView(aij->A,viewer);
893: } else {
894: MatView_MPIAIJ_Binary(mat,viewer);
895: }
896: return(0);
897: } else if (isdraw) {
898: PetscDraw draw;
899: PetscTruth isnull;
900: PetscViewerDrawGetDraw(viewer,0,&draw);
901: PetscDrawIsNull(draw,&isnull); if (isnull) return(0);
902: }
904: if (size == 1) {
905: PetscObjectSetName((PetscObject)aij->A,mat->name);
906: MatView(aij->A,viewer);
907: } else {
908: /* assemble the entire matrix onto first processor. */
909: Mat A;
910: Mat_SeqAIJ *Aloc;
911: int M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912: PetscScalar *a;
914: if (!rank) {
915: MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
916: } else {
917: MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);
918: }
919: PetscLogObjectParent(mat,A);
921: /* copy over the A part */
922: Aloc = (Mat_SeqAIJ*)aij->A->data;
923: m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924: row = aij->rstart;
925: for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926: for (i=0; i<m; i++) {
927: MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);
928: row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929: }
930: aj = Aloc->j;
931: for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
933: /* copy over the B part */
934: Aloc = (Mat_SeqAIJ*)aij->B->data;
935: m = aij->B->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936: row = aij->rstart;
937: PetscMalloc((ai[m]+1)*sizeof(int),&cols);
938: ct = cols;
939: for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940: for (i=0; i<m; i++) {
941: MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);
942: row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943: }
944: PetscFree(ct);
945: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
946: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
947: /*
948: Everyone has to call to draw the matrix since the graphics waits are
949: synchronized across all processors that share the PetscDraw object
950: */
951: PetscViewerGetSingleton(viewer,&sviewer);
952: if (!rank) {
953: PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);
954: MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);
955: }
956: PetscViewerRestoreSingleton(viewer,&sviewer);
957: MatDestroy(A);
958: }
959: return(0);
960: }
962: #undef __FUNCT__
964: int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965: {
966: int ierr;
967: PetscTruth isascii,isdraw,issocket,isbinary;
968:
970: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);
971: ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);
972: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);
973: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);
974: if (isascii || isdraw || isbinary || issocket) {
975: MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);
976: } else {
977: SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978: }
979: return(0);
980: }
984: #undef __FUNCT__
986: int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987: {
988: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
989: int ierr;
990: Vec bb1;
991: PetscScalar mone=-1.0;
994: if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
996: VecDuplicate(bb,&bb1);
998: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999: if (flag & SOR_ZERO_INITIAL_GUESS) {
1000: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);
1001: its--;
1002: }
1003:
1004: while (its--) {
1005: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1006: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1008: /* update rhs: bb1 = bb - B*x */
1009: VecScale(&mone,mat->lvec);
1010: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1012: /* local sweep */
1013: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014:
1015: }
1016: } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017: if (flag & SOR_ZERO_INITIAL_GUESS) {
1018: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1019: its--;
1020: }
1021: while (its--) {
1022: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1023: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1025: /* update rhs: bb1 = bb - B*x */
1026: VecScale(&mone,mat->lvec);
1027: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1029: /* local sweep */
1030: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031:
1032: }
1033: } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034: if (flag & SOR_ZERO_INITIAL_GUESS) {
1035: (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);
1036: its--;
1037: }
1038: while (its--) {
1039: VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1040: VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);
1042: /* update rhs: bb1 = bb - B*x */
1043: VecScale(&mone,mat->lvec);
1044: (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);
1046: /* local sweep */
1047: (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048:
1049: }
1050: } else {
1051: SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052: }
1054: VecDestroy(bb1);
1055: return(0);
1056: }
1058: #undef __FUNCT__
1060: int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061: {
1062: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063: Mat A = mat->A,B = mat->B;
1064: int ierr;
1065: PetscReal isend[5],irecv[5];
1068: info->block_size = 1.0;
1069: MatGetInfo(A,MAT_LOCAL,info);
1070: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071: isend[3] = info->memory; isend[4] = info->mallocs;
1072: MatGetInfo(B,MAT_LOCAL,info);
1073: isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074: isend[3] += info->memory; isend[4] += info->mallocs;
1075: if (flag == MAT_LOCAL) {
1076: info->nz_used = isend[0];
1077: info->nz_allocated = isend[1];
1078: info->nz_unneeded = isend[2];
1079: info->memory = isend[3];
1080: info->mallocs = isend[4];
1081: } else if (flag == MAT_GLOBAL_MAX) {
1082: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);
1083: info->nz_used = irecv[0];
1084: info->nz_allocated = irecv[1];
1085: info->nz_unneeded = irecv[2];
1086: info->memory = irecv[3];
1087: info->mallocs = irecv[4];
1088: } else if (flag == MAT_GLOBAL_SUM) {
1089: MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);
1090: info->nz_used = irecv[0];
1091: info->nz_allocated = irecv[1];
1092: info->nz_unneeded = irecv[2];
1093: info->memory = irecv[3];
1094: info->mallocs = irecv[4];
1095: }
1096: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1097: info->fill_ratio_needed = 0;
1098: info->factor_mallocs = 0;
1099: info->rows_global = (double)matin->M;
1100: info->columns_global = (double)matin->N;
1101: info->rows_local = (double)matin->m;
1102: info->columns_local = (double)matin->N;
1104: return(0);
1105: }
1107: #undef __FUNCT__
1109: int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110: {
1111: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112: int ierr;
1115: switch (op) {
1116: case MAT_NO_NEW_NONZERO_LOCATIONS:
1117: case MAT_YES_NEW_NONZERO_LOCATIONS:
1118: case MAT_COLUMNS_UNSORTED:
1119: case MAT_COLUMNS_SORTED:
1120: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121: case MAT_KEEP_ZEROED_ROWS:
1122: case MAT_NEW_NONZERO_LOCATION_ERR:
1123: case MAT_USE_INODES:
1124: case MAT_DO_NOT_USE_INODES:
1125: case MAT_IGNORE_ZERO_ENTRIES:
1126: MatSetOption(a->A,op);
1127: MatSetOption(a->B,op);
1128: break;
1129: case MAT_ROW_ORIENTED:
1130: a->roworiented = PETSC_TRUE;
1131: MatSetOption(a->A,op);
1132: MatSetOption(a->B,op);
1133: break;
1134: case MAT_ROWS_SORTED:
1135: case MAT_ROWS_UNSORTED:
1136: case MAT_YES_NEW_DIAGONALS:
1137: PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignoredn");
1138: break;
1139: case MAT_COLUMN_ORIENTED:
1140: a->roworiented = PETSC_FALSE;
1141: MatSetOption(a->A,op);
1142: MatSetOption(a->B,op);
1143: break;
1144: case MAT_IGNORE_OFF_PROC_ENTRIES:
1145: a->donotstash = PETSC_TRUE;
1146: break;
1147: case MAT_NO_NEW_DIAGONALS:
1148: SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1149: default:
1150: SETERRQ(PETSC_ERR_SUP,"unknown option");
1151: }
1152: return(0);
1153: }
1155: #undef __FUNCT__
1157: int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1158: {
1159: Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1160: PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p;
1161: int i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1162: int nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1163: int *cmap,*idx_p;
1166: if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1167: mat->getrowactive = PETSC_TRUE;
1169: if (!mat->rowvalues && (idx || v)) {
1170: /*
1171: allocate enough space to hold information from the longest row.
1172: */
1173: Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1174: int max = 1,tmp;
1175: for (i=0; i<matin->m; i++) {
1176: tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1177: if (max < tmp) { max = tmp; }
1178: }
1179: PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);
1180: mat->rowindices = (int*)(mat->rowvalues + max);
1181: }
1183: if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1184: lrow = row - rstart;
1186: pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1187: if (!v) {pvA = 0; pvB = 0;}
1188: if (!idx) {pcA = 0; if (!v) pcB = 0;}
1189: (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);
1190: (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);
1191: nztot = nzA + nzB;
1193: cmap = mat->garray;
1194: if (v || idx) {
1195: if (nztot) {
1196: /* Sort by increasing column numbers, assuming A and B already sorted */
1197: int imark = -1;
1198: if (v) {
1199: *v = v_p = mat->rowvalues;
1200: for (i=0; i<nzB; i++) {
1201: if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1202: else break;
1203: }
1204: imark = i;
1205: for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i];
1206: for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i];
1207: }
1208: if (idx) {
1209: *idx = idx_p = mat->rowindices;
1210: if (imark > -1) {
1211: for (i=0; i<imark; i++) {
1212: idx_p[i] = cmap[cworkB[i]];
1213: }
1214: } else {
1215: for (i=0; i<nzB; i++) {
1216: if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1217: else break;
1218: }
1219: imark = i;
1220: }
1221: for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i];
1222: for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]];
1223: }
1224: } else {
1225: if (idx) *idx = 0;
1226: if (v) *v = 0;
1227: }
1228: }
1229: *nz = nztot;
1230: (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);
1231: (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);
1232: return(0);
1233: }
1235: #undef __FUNCT__
1237: int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1238: {
1239: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1242: if (aij->getrowactive == PETSC_FALSE) {
1243: SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1244: }
1245: aij->getrowactive = PETSC_FALSE;
1246: return(0);
1247: }
1249: #undef __FUNCT__
1251: int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1252: {
1253: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1254: Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1255: int ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1256: PetscReal sum = 0.0;
1257: PetscScalar *v;
1260: if (aij->size == 1) {
1261: MatNorm(aij->A,type,norm);
1262: } else {
1263: if (type == NORM_FROBENIUS) {
1264: v = amat->a;
1265: for (i=0; i<amat->nz; i++) {
1266: #if defined(PETSC_USE_COMPLEX)
1267: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1268: #else
1269: sum += (*v)*(*v); v++;
1270: #endif
1271: }
1272: v = bmat->a;
1273: for (i=0; i<bmat->nz; i++) {
1274: #if defined(PETSC_USE_COMPLEX)
1275: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1276: #else
1277: sum += (*v)*(*v); v++;
1278: #endif
1279: }
1280: MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);
1281: *norm = sqrt(*norm);
1282: } else if (type == NORM_1) { /* max column norm */
1283: PetscReal *tmp,*tmp2;
1284: int *jj,*garray = aij->garray;
1285: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);
1286: PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);
1287: PetscMemzero(tmp,mat->N*sizeof(PetscReal));
1288: *norm = 0.0;
1289: v = amat->a; jj = amat->j;
1290: for (j=0; j<amat->nz; j++) {
1291: tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v); v++;
1292: }
1293: v = bmat->a; jj = bmat->j;
1294: for (j=0; j<bmat->nz; j++) {
1295: tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1296: }
1297: MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);
1298: for (j=0; j<mat->N; j++) {
1299: if (tmp2[j] > *norm) *norm = tmp2[j];
1300: }
1301: PetscFree(tmp);
1302: PetscFree(tmp2);
1303: } else if (type == NORM_INFINITY) { /* max row norm */
1304: PetscReal ntemp = 0.0;
1305: for (j=0; j<aij->A->m; j++) {
1306: v = amat->a + amat->i[j] + shift;
1307: sum = 0.0;
1308: for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1309: sum += PetscAbsScalar(*v); v++;
1310: }
1311: v = bmat->a + bmat->i[j] + shift;
1312: for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1313: sum += PetscAbsScalar(*v); v++;
1314: }
1315: if (sum > ntemp) ntemp = sum;
1316: }
1317: MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);
1318: } else {
1319: SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1320: }
1321: }
1322: return(0);
1323: }
1325: #undef __FUNCT__
1327: int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1328: {
1329: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1330: Mat_SeqAIJ *Aloc = (Mat_SeqAIJ*)a->A->data;
1331: int ierr,shift = Aloc->indexshift;
1332: int M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1333: Mat B;
1334: PetscScalar *array;
1337: if (!matout && M != N) {
1338: SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1339: }
1341: MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);
1343: /* copy over the A part */
1344: Aloc = (Mat_SeqAIJ*)a->A->data;
1345: m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1346: row = a->rstart;
1347: for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1348: for (i=0; i<m; i++) {
1349: MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);
1350: row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1351: }
1352: aj = Aloc->j;
1353: for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1355: /* copy over the B part */
1356: Aloc = (Mat_SeqAIJ*)a->B->data;
1357: m = a->B->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1358: row = a->rstart;
1359: PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);
1360: ct = cols;
1361: for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1362: for (i=0; i<m; i++) {
1363: MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);
1364: row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1365: }
1366: PetscFree(ct);
1367: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1368: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1369: if (matout) {
1370: *matout = B;
1371: } else {
1372: MatHeaderCopy(A,B);
1373: }
1374: return(0);
1375: }
1377: #undef __FUNCT__
1379: int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1380: {
1381: Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1382: Mat a = aij->A,b = aij->B;
1383: int ierr,s1,s2,s3;
1386: MatGetLocalSize(mat,&s2,&s3);
1387: if (rr) {
1388: VecGetLocalSize(rr,&s1);
1389: if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390: /* Overlap communication with computation. */
1391: VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1392: }
1393: if (ll) {
1394: VecGetLocalSize(ll,&s1);
1395: if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1396: (*b->ops->diagonalscale)(b,ll,0);
1397: }
1398: /* scale the diagonal block */
1399: (*a->ops->diagonalscale)(a,ll,rr);
1401: if (rr) {
1402: /* Do a scatter end and then right scale the off-diagonal block */
1403: VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);
1404: (*b->ops->diagonalscale)(b,0,aij->lvec);
1405: }
1406:
1407: return(0);
1408: }
1411: #undef __FUNCT__
1413: int MatPrintHelp_MPIAIJ(Mat A)
1414: {
1415: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1416: int ierr;
1419: if (!a->rank) {
1420: MatPrintHelp_SeqAIJ(a->A);
1421: }
1422: return(0);
1423: }
1425: #undef __FUNCT__
1427: int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1428: {
1430: *bs = 1;
1431: return(0);
1432: }
1433: #undef __FUNCT__
1435: int MatSetUnfactored_MPIAIJ(Mat A)
1436: {
1437: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1438: int ierr;
1441: MatSetUnfactored(a->A);
1442: return(0);
1443: }
1445: #undef __FUNCT__
1447: int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1448: {
1449: Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1450: Mat a,b,c,d;
1451: PetscTruth flg;
1452: int ierr;
1455: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1456: if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1457: a = matA->A; b = matA->B;
1458: c = matB->A; d = matB->B;
1460: MatEqual(a,c,&flg);
1461: if (flg == PETSC_TRUE) {
1462: MatEqual(b,d,&flg);
1463: }
1464: MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);
1465: return(0);
1466: }
1468: #undef __FUNCT__
1470: int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1471: {
1472: int ierr;
1473: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1474: Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1475: PetscTruth flg;
1478: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);
1479: if (str != SAME_NONZERO_PATTERN || !flg) {
1480: /* because of the column compression in the off-processor part of the matrix a->B,
1481: the number of columns in a->B and b->B may be different, hence we cannot call
1482: the MatCopy() directly on the two parts. If need be, we can provide a more
1483: efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1484: then copying the submatrices */
1485: MatCopy_Basic(A,B,str);
1486: } else {
1487: MatCopy(a->A,b->A,str);
1488: MatCopy(a->B,b->B,str);
1489: }
1490: return(0);
1491: }
1493: #undef __FUNCT__
1495: int MatSetUpPreallocation_MPIAIJ(Mat A)
1496: {
1497: int ierr;
1500: MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);
1501: return(0);
1502: }
1504: EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1505: EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1506: EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1507: EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1508: EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1509: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1510: EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1511: #endif
1513: #include petscblaslapack.h
1515: #undef __FUNCT__
1517: int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1518: {
1519: int ierr,one;
1520: Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1521: Mat_SeqAIJ *x,*y;
1524: if (str == SAME_NONZERO_PATTERN) {
1525: x = (Mat_SeqAIJ *)xx->A->data;
1526: y = (Mat_SeqAIJ *)yy->A->data;
1527: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1528: x = (Mat_SeqAIJ *)xx->B->data;
1529: y = (Mat_SeqAIJ *)yy->B->data;
1530: BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1531: } else {
1532: MatAXPY_Basic(a,X,Y,str);
1533: }
1534: return(0);
1535: }
1537: /* -------------------------------------------------------------------*/
1538: static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1539: MatGetRow_MPIAIJ,
1540: MatRestoreRow_MPIAIJ,
1541: MatMult_MPIAIJ,
1542: MatMultAdd_MPIAIJ,
1543: MatMultTranspose_MPIAIJ,
1544: MatMultTransposeAdd_MPIAIJ,
1545: 0,
1546: 0,
1547: 0,
1548: 0,
1549: 0,
1550: 0,
1551: MatRelax_MPIAIJ,
1552: MatTranspose_MPIAIJ,
1553: MatGetInfo_MPIAIJ,
1554: MatEqual_MPIAIJ,
1555: MatGetDiagonal_MPIAIJ,
1556: MatDiagonalScale_MPIAIJ,
1557: MatNorm_MPIAIJ,
1558: MatAssemblyBegin_MPIAIJ,
1559: MatAssemblyEnd_MPIAIJ,
1560: 0,
1561: MatSetOption_MPIAIJ,
1562: MatZeroEntries_MPIAIJ,
1563: MatZeroRows_MPIAIJ,
1564: #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1565: MatLUFactorSymbolic_MPIAIJ_TFS,
1566: #else
1567: 0,
1568: #endif
1569: 0,
1570: 0,
1571: 0,
1572: MatSetUpPreallocation_MPIAIJ,
1573: 0,
1574: 0,
1575: 0,
1576: 0,
1577: MatDuplicate_MPIAIJ,
1578: 0,
1579: 0,
1580: 0,
1581: 0,
1582: MatAXPY_MPIAIJ,
1583: MatGetSubMatrices_MPIAIJ,
1584: MatIncreaseOverlap_MPIAIJ,
1585: MatGetValues_MPIAIJ,
1586: MatCopy_MPIAIJ,
1587: MatPrintHelp_MPIAIJ,
1588: MatScale_MPIAIJ,
1589: 0,
1590: 0,
1591: 0,
1592: MatGetBlockSize_MPIAIJ,
1593: 0,
1594: 0,
1595: 0,
1596: 0,
1597: MatFDColoringCreate_MPIAIJ,
1598: 0,
1599: MatSetUnfactored_MPIAIJ,
1600: 0,
1601: 0,
1602: MatGetSubMatrix_MPIAIJ,
1603: MatDestroy_MPIAIJ,
1604: MatView_MPIAIJ,
1605: MatGetPetscMaps_Petsc,
1606: 0,
1607: 0,
1608: 0,
1609: 0,
1610: 0,
1611: 0,
1612: 0,
1613: 0,
1614: MatSetColoring_MPIAIJ,
1615: MatSetValuesAdic_MPIAIJ,
1616: MatSetValuesAdifor_MPIAIJ
1617: };
1619: /* ----------------------------------------------------------------------------------------*/
1621: EXTERN_C_BEGIN
1622: #undef __FUNCT__
1624: int MatStoreValues_MPIAIJ(Mat mat)
1625: {
1626: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627: int ierr;
1630: MatStoreValues(aij->A);
1631: MatStoreValues(aij->B);
1632: return(0);
1633: }
1634: EXTERN_C_END
1636: EXTERN_C_BEGIN
1637: #undef __FUNCT__
1639: int MatRetrieveValues_MPIAIJ(Mat mat)
1640: {
1641: Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1642: int ierr;
1645: MatRetrieveValues(aij->A);
1646: MatRetrieveValues(aij->B);
1647: return(0);
1648: }
1649: EXTERN_C_END
1651: #include petscpc.h
1652: EXTERN_C_BEGIN
1653: EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1654: EXTERN_C_END
1656: EXTERN_C_BEGIN
1657: #undef __FUNCT__
1659: int MatCreate_MPIAIJ(Mat B)
1660: {
1661: Mat_MPIAIJ *b;
1662: int ierr,i,size;
1665: MPI_Comm_size(B->comm,&size);
1667: ierr = PetscNew(Mat_MPIAIJ,&b);
1668: B->data = (void*)b;
1669: ierr = PetscMemzero(b,sizeof(Mat_MPIAIJ));
1670: ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));
1671: B->factor = 0;
1672: B->assembled = PETSC_FALSE;
1673: B->mapping = 0;
1675: B->insertmode = NOT_SET_VALUES;
1676: b->size = size;
1677: MPI_Comm_rank(B->comm,&b->rank);
1679: PetscSplitOwnership(B->comm,&B->m,&B->M);
1680: PetscSplitOwnership(B->comm,&B->n,&B->N);
1682: /* the information in the maps duplicates the information computed below, eventually
1683: we should remove the duplicate information that is not contained in the maps */
1684: PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);
1685: PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);
1687: /* build local table of row and column ownerships */
1688: PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);
1689: PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1690: b->cowners = b->rowners + b->size + 2;
1691: MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);
1692: b->rowners[0] = 0;
1693: for (i=2; i<=b->size; i++) {
1694: b->rowners[i] += b->rowners[i-1];
1695: }
1696: b->rstart = b->rowners[b->rank];
1697: b->rend = b->rowners[b->rank+1];
1698: MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);
1699: b->cowners[0] = 0;
1700: for (i=2; i<=b->size; i++) {
1701: b->cowners[i] += b->cowners[i-1];
1702: }
1703: b->cstart = b->cowners[b->rank];
1704: b->cend = b->cowners[b->rank+1];
1706: /* build cache for off array entries formed */
1707: MatStashCreate_Private(B->comm,1,&B->stash);
1708: b->donotstash = PETSC_FALSE;
1709: b->colmap = 0;
1710: b->garray = 0;
1711: b->roworiented = PETSC_TRUE;
1713: /* stuff used for matrix vector multiply */
1714: b->lvec = PETSC_NULL;
1715: b->Mvctx = PETSC_NULL;
1717: /* stuff for MatGetRow() */
1718: b->rowindices = 0;
1719: b->rowvalues = 0;
1720: b->getrowactive = PETSC_FALSE;
1722: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1723: "MatStoreValues_MPIAIJ",
1724: MatStoreValues_MPIAIJ);
1725: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1726: "MatRetrieveValues_MPIAIJ",
1727: MatRetrieveValues_MPIAIJ);
1728: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1729: "MatGetDiagonalBlock_MPIAIJ",
1730: MatGetDiagonalBlock_MPIAIJ);
1732: return(0);
1733: }
1734: EXTERN_C_END
1736: #undef __FUNCT__
1738: int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1739: {
1740: Mat mat;
1741: Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1742: int ierr;
1745: *newmat = 0;
1746: MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);
1747: MatSetType(mat,MATMPIAIJ);
1748: a = (Mat_MPIAIJ*)mat->data;
1749: ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1750: mat->factor = matin->factor;
1751: mat->assembled = PETSC_TRUE;
1752: mat->insertmode = NOT_SET_VALUES;
1753: mat->preallocated = PETSC_TRUE;
1755: a->rstart = oldmat->rstart;
1756: a->rend = oldmat->rend;
1757: a->cstart = oldmat->cstart;
1758: a->cend = oldmat->cend;
1759: a->size = oldmat->size;
1760: a->rank = oldmat->rank;
1761: a->donotstash = oldmat->donotstash;
1762: a->roworiented = oldmat->roworiented;
1763: a->rowindices = 0;
1764: a->rowvalues = 0;
1765: a->getrowactive = PETSC_FALSE;
1767: ierr = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));
1768: ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);
1769: if (oldmat->colmap) {
1770: #if defined (PETSC_USE_CTABLE)
1771: PetscTableCreateCopy(oldmat->colmap,&a->colmap);
1772: #else
1773: PetscMalloc((mat->N)*sizeof(int),&a->colmap);
1774: PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1775: ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));
1776: #endif
1777: } else a->colmap = 0;
1778: if (oldmat->garray) {
1779: int len;
1780: len = oldmat->B->n;
1781: PetscMalloc((len+1)*sizeof(int),&a->garray);
1782: PetscLogObjectMemory(mat,len*sizeof(int));
1783: if (len) { PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int)); }
1784: } else a->garray = 0;
1785:
1786: VecDuplicate(oldmat->lvec,&a->lvec);
1787: PetscLogObjectParent(mat,a->lvec);
1788: VecScatterCopy(oldmat->Mvctx,&a->Mvctx);
1789: PetscLogObjectParent(mat,a->Mvctx);
1790: MatDuplicate(oldmat->A,cpvalues,&a->A);
1791: PetscLogObjectParent(mat,a->A);
1792: MatDuplicate(oldmat->B,cpvalues,&a->B);
1793: PetscLogObjectParent(mat,a->B);
1794: PetscFListDuplicate(matin->qlist,&mat->qlist);
1795: *newmat = mat;
1796: return(0);
1797: }
1799: #include petscsys.h
1801: EXTERN_C_BEGIN
1802: #undef __FUNCT__
1804: int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1805: {
1806: Mat A;
1807: PetscScalar *vals,*svals;
1808: MPI_Comm comm = ((PetscObject)viewer)->comm;
1809: MPI_Status status;
1810: int i,nz,ierr,j,rstart,rend,fd;
1811: int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1812: int *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1813: int tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1816: MPI_Comm_size(comm,&size);
1817: MPI_Comm_rank(comm,&rank);
1818: if (!rank) {
1819: PetscViewerBinaryGetDescriptor(viewer,&fd);
1820: PetscBinaryRead(fd,(char *)header,4,PETSC_INT);
1821: if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1822: if (header[3] < 0) {
1823: SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1824: }
1825: }
1827: MPI_Bcast(header+1,3,MPI_INT,0,comm);
1828: M = header[1]; N = header[2];
1829: /* determine ownership of all rows */
1830: m = M/size + ((M % size) > rank);
1831: PetscMalloc((size+2)*sizeof(int),&rowners);
1832: MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);
1833: rowners[0] = 0;
1834: for (i=2; i<=size; i++) {
1835: rowners[i] += rowners[i-1];
1836: }
1837: rstart = rowners[rank];
1838: rend = rowners[rank+1];
1840: /* distribute row lengths to all processors */
1841: ierr = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);
1842: offlens = ourlens + (rend-rstart);
1843: if (!rank) {
1844: PetscMalloc(M*sizeof(int),&rowlengths);
1845: PetscBinaryRead(fd,rowlengths,M,PETSC_INT);
1846: PetscMalloc(size*sizeof(int),&sndcounts);
1847: for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1848: MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1849: PetscFree(sndcounts);
1850: } else {
1851: MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);
1852: }
1854: if (!rank) {
1855: /* calculate the number of nonzeros on each processor */
1856: PetscMalloc(size*sizeof(int),&procsnz);
1857: PetscMemzero(procsnz,size*sizeof(int));
1858: for (i=0; i<size; i++) {
1859: for (j=rowners[i]; j< rowners[i+1]; j++) {
1860: procsnz[i] += rowlengths[j];
1861: }
1862: }
1863: PetscFree(rowlengths);
1865: /* determine max buffer needed and allocate it */
1866: maxnz = 0;
1867: for (i=0; i<size; i++) {
1868: maxnz = PetscMax(maxnz,procsnz[i]);
1869: }
1870: PetscMalloc(maxnz*sizeof(int),&cols);
1872: /* read in my part of the matrix column indices */
1873: nz = procsnz[0];
1874: PetscMalloc(nz*sizeof(int),&mycols);
1875: PetscBinaryRead(fd,mycols,nz,PETSC_INT);
1877: /* read in every one elses and ship off */
1878: for (i=1; i<size; i++) {
1879: nz = procsnz[i];
1880: PetscBinaryRead(fd,cols,nz,PETSC_INT);
1881: MPI_Send(cols,nz,MPI_INT,i,tag,comm);
1882: }
1883: PetscFree(cols);
1884: } else {
1885: /* determine buffer space needed for message */
1886: nz = 0;
1887: for (i=0; i<m; i++) {
1888: nz += ourlens[i];
1889: }
1890: PetscMalloc((nz+1)*sizeof(int),&mycols);
1892: /* receive message of column indices*/
1893: MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);
1894: MPI_Get_count(&status,MPI_INT,&maxnz);
1895: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1896: }
1898: /* determine column ownership if matrix is not square */
1899: if (N != M) {
1900: n = N/size + ((N % size) > rank);
1901: ierr = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);
1902: cstart = cend - n;
1903: } else {
1904: cstart = rstart;
1905: cend = rend;
1906: n = cend - cstart;
1907: }
1909: /* loop over local rows, determining number of off diagonal entries */
1910: PetscMemzero(offlens,m*sizeof(int));
1911: jj = 0;
1912: for (i=0; i<m; i++) {
1913: for (j=0; j<ourlens[i]; j++) {
1914: if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1915: jj++;
1916: }
1917: }
1919: /* create our matrix */
1920: for (i=0; i<m; i++) {
1921: ourlens[i] -= offlens[i];
1922: }
1923: MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);
1924: A = *newmat;
1925: MatSetOption(A,MAT_COLUMNS_SORTED);
1926: for (i=0; i<m; i++) {
1927: ourlens[i] += offlens[i];
1928: }
1930: if (!rank) {
1931: PetscMalloc(maxnz*sizeof(PetscScalar),&vals);
1933: /* read in my part of the matrix numerical values */
1934: nz = procsnz[0];
1935: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1936:
1937: /* insert into matrix */
1938: jj = rstart;
1939: smycols = mycols;
1940: svals = vals;
1941: for (i=0; i<m; i++) {
1942: MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1943: smycols += ourlens[i];
1944: svals += ourlens[i];
1945: jj++;
1946: }
1948: /* read in other processors and ship out */
1949: for (i=1; i<size; i++) {
1950: nz = procsnz[i];
1951: PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);
1952: MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);
1953: }
1954: PetscFree(procsnz);
1955: } else {
1956: /* receive numeric values */
1957: PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);
1959: /* receive message of values*/
1960: MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);
1961: MPI_Get_count(&status,MPIU_SCALAR,&maxnz);
1962: if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1964: /* insert into matrix */
1965: jj = rstart;
1966: smycols = mycols;
1967: svals = vals;
1968: for (i=0; i<m; i++) {
1969: ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);
1970: smycols += ourlens[i];
1971: svals += ourlens[i];
1972: jj++;
1973: }
1974: }
1975: PetscFree(ourlens);
1976: PetscFree(vals);
1977: PetscFree(mycols);
1978: PetscFree(rowners);
1980: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
1981: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
1982: return(0);
1983: }
1984: EXTERN_C_END
1986: #undef __FUNCT__
1988: /*
1989: Not great since it makes two copies of the submatrix, first an SeqAIJ
1990: in local and then by concatenating the local matrices the end result.
1991: Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1992: */
1993: int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1994: {
1995: int ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1996: int *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1997: Mat *local,M,Mreuse;
1998: PetscScalar *vwork,*aa;
1999: MPI_Comm comm = mat->comm;
2000: Mat_SeqAIJ *aij;
2004: MPI_Comm_rank(comm,&rank);
2005: MPI_Comm_size(comm,&size);
2007: if (call == MAT_REUSE_MATRIX) {
2008: PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);
2009: if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2010: local = &Mreuse;
2011: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);
2012: } else {
2013: ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);
2014: Mreuse = *local;
2015: ierr = PetscFree(local);
2016: }
2018: /*
2019: m - number of local rows
2020: n - number of columns (same on all processors)
2021: rstart - first row in new global matrix generated
2022: */
2023: MatGetSize(Mreuse,&m,&n);
2024: if (call == MAT_INITIAL_MATRIX) {
2025: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2026: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2027: ii = aij->i;
2028: jj = aij->j;
2030: /*
2031: Determine the number of non-zeros in the diagonal and off-diagonal
2032: portions of the matrix in order to do correct preallocation
2033: */
2035: /* first get start and end of "diagonal" columns */
2036: if (csize == PETSC_DECIDE) {
2037: ISGetSize(isrow,&mglobal);
2038: if (mglobal == n) { /* square matrix */
2039: nlocal = m;
2040: } else {
2041: nlocal = n/size + ((n % size) > rank);
2042: }
2043: } else {
2044: nlocal = csize;
2045: }
2046: ierr = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);
2047: rstart = rend - nlocal;
2048: if (rank == size - 1 && rend != n) {
2049: SETERRQ(1,"Local column sizes do not add up to total number of columns");
2050: }
2052: /* next, compute all the lengths */
2053: ierr = PetscMalloc((2*m+1)*sizeof(int),&dlens);
2054: olens = dlens + m;
2055: for (i=0; i<m; i++) {
2056: jend = ii[i+1] - ii[i];
2057: olen = 0;
2058: dlen = 0;
2059: for (j=0; j<jend; j++) {
2060: if (*jj < rstart || *jj >= rend) olen++;
2061: else dlen++;
2062: jj++;
2063: }
2064: olens[i] = olen;
2065: dlens[i] = dlen;
2066: }
2067: MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);
2068: PetscFree(dlens);
2069: } else {
2070: int ml,nl;
2072: M = *newmat;
2073: MatGetLocalSize(M,&ml,&nl);
2074: if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2075: MatZeroEntries(M);
2076: /*
2077: The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2078: rather than the slower MatSetValues().
2079: */
2080: M->was_assembled = PETSC_TRUE;
2081: M->assembled = PETSC_FALSE;
2082: }
2083: MatGetOwnershipRange(M,&rstart,&rend);
2084: aij = (Mat_SeqAIJ*)(Mreuse)->data;
2085: if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2086: ii = aij->i;
2087: jj = aij->j;
2088: aa = aij->a;
2089: for (i=0; i<m; i++) {
2090: row = rstart + i;
2091: nz = ii[i+1] - ii[i];
2092: cwork = jj; jj += nz;
2093: vwork = aa; aa += nz;
2094: MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);
2095: }
2097: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
2098: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
2099: *newmat = M;
2101: /* save submatrix used in processor for next request */
2102: if (call == MAT_INITIAL_MATRIX) {
2103: PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);
2104: PetscObjectDereference((PetscObject)Mreuse);
2105: }
2107: return(0);
2108: }
2110: #undef __FUNCT__
2112: /*@C
2113: MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2114: (the default parallel PETSc format). For good matrix assembly performance
2115: the user should preallocate the matrix storage by setting the parameters
2116: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2117: performance can be increased by more than a factor of 50.
2119: Collective on MPI_Comm
2121: Input Parameters:
2122: + A - the matrix
2123: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2124: (same value is used for all local rows)
2125: . d_nnz - array containing the number of nonzeros in the various rows of the
2126: DIAGONAL portion of the local submatrix (possibly different for each row)
2127: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2128: The size of this array is equal to the number of local rows, i.e 'm'.
2129: You must leave room for the diagonal entry even if it is zero.
2130: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2131: submatrix (same value is used for all local rows).
2132: - o_nnz - array containing the number of nonzeros in the various rows of the
2133: OFF-DIAGONAL portion of the local submatrix (possibly different for
2134: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2135: structure. The size of this array is equal to the number
2136: of local rows, i.e 'm'.
2138: The AIJ format (also called the Yale sparse matrix format or
2139: compressed row storage), is fully compatible with standard Fortran 77
2140: storage. That is, the stored row and column indices can begin at
2141: either one (as in Fortran) or zero. See the users manual for details.
2143: The user MUST specify either the local or global matrix dimensions
2144: (possibly both).
2146: The parallel matrix is partitioned such that the first m0 rows belong to
2147: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2148: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2150: The DIAGONAL portion of the local submatrix of a processor can be defined
2151: as the submatrix which is obtained by extraction the part corresponding
2152: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2153: first row that belongs to the processor, and r2 is the last row belonging
2154: to the this processor. This is a square mxm matrix. The remaining portion
2155: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2157: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2159: By default, this format uses inodes (identical nodes) when possible.
2160: We search for consecutive rows with the same nonzero structure, thereby
2161: reusing matrix information to achieve increased efficiency.
2163: Options Database Keys:
2164: + -mat_aij_no_inode - Do not use inodes
2165: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2166: - -mat_aij_oneindex - Internally use indexing starting at 1
2167: rather than 0. Note that when calling MatSetValues(),
2168: the user still MUST index entries starting at 0!
2170: Example usage:
2171:
2172: Consider the following 8x8 matrix with 34 non-zero values, that is
2173: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2174: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2175: as follows:
2177: .vb
2178: 1 2 0 | 0 3 0 | 0 4
2179: Proc0 0 5 6 | 7 0 0 | 8 0
2180: 9 0 10 | 11 0 0 | 12 0
2181: -------------------------------------
2182: 13 0 14 | 15 16 17 | 0 0
2183: Proc1 0 18 0 | 19 20 21 | 0 0
2184: 0 0 0 | 22 23 0 | 24 0
2185: -------------------------------------
2186: Proc2 25 26 27 | 0 0 28 | 29 0
2187: 30 0 0 | 31 32 33 | 0 34
2188: .ve
2190: This can be represented as a collection of submatrices as:
2192: .vb
2193: A B C
2194: D E F
2195: G H I
2196: .ve
2198: Where the submatrices A,B,C are owned by proc0, D,E,F are
2199: owned by proc1, G,H,I are owned by proc2.
2201: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2202: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2203: The 'M','N' parameters are 8,8, and have the same values on all procs.
2205: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2206: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2207: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2208: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2209: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2210: matrix, ans [DF] as another SeqAIJ matrix.
2212: When d_nz, o_nz parameters are specified, d_nz storage elements are
2213: allocated for every row of the local diagonal submatrix, and o_nz
2214: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2215: One way to choose d_nz and o_nz is to use the max nonzerors per local
2216: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2217: In this case, the values of d_nz,o_nz are:
2218: .vb
2219: proc0 : dnz = 2, o_nz = 2
2220: proc1 : dnz = 3, o_nz = 2
2221: proc2 : dnz = 1, o_nz = 4
2222: .ve
2223: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2224: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2225: for proc3. i.e we are using 12+15+10=37 storage locations to store
2226: 34 values.
2228: When d_nnz, o_nnz parameters are specified, the storage is specified
2229: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2230: In the above case the values for d_nnz,o_nnz are:
2231: .vb
2232: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2233: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2234: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2235: .ve
2236: Here the space allocated is sum of all the above values i.e 34, and
2237: hence pre-allocation is perfect.
2239: Level: intermediate
2241: .keywords: matrix, aij, compressed row, sparse, parallel
2243: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2244: @*/
2245: int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2246: {
2247: Mat_MPIAIJ *b;
2248: int ierr,i;
2249: PetscTruth flg2;
2252: PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);
2253: if (!flg2) return(0);
2254: B->preallocated = PETSC_TRUE;
2255: if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2256: if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2257: if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2258: if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2259: if (d_nnz) {
2260: for (i=0; i<B->m; i++) {
2261: if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2262: }
2263: }
2264: if (o_nnz) {
2265: for (i=0; i<B->m; i++) {
2266: if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2267: }
2268: }
2269: b = (Mat_MPIAIJ*)B->data;
2271: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);
2272: PetscLogObjectParent(B,b->A);
2273: MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);
2274: PetscLogObjectParent(B,b->B);
2276: return(0);
2277: }
2279: #undef __FUNCT__
2281: /*@C
2282: MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2283: (the default parallel PETSc format). For good matrix assembly performance
2284: the user should preallocate the matrix storage by setting the parameters
2285: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2286: performance can be increased by more than a factor of 50.
2288: Collective on MPI_Comm
2290: Input Parameters:
2291: + comm - MPI communicator
2292: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2293: This value should be the same as the local size used in creating the
2294: y vector for the matrix-vector product y = Ax.
2295: . n - This value should be the same as the local size used in creating the
2296: x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2297: calculated if N is given) For square matrices n is almost always m.
2298: . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2299: . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2300: . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix
2301: (same value is used for all local rows)
2302: . d_nnz - array containing the number of nonzeros in the various rows of the
2303: DIAGONAL portion of the local submatrix (possibly different for each row)
2304: or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2305: The size of this array is equal to the number of local rows, i.e 'm'.
2306: You must leave room for the diagonal entry even if it is zero.
2307: . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local
2308: submatrix (same value is used for all local rows).
2309: - o_nnz - array containing the number of nonzeros in the various rows of the
2310: OFF-DIAGONAL portion of the local submatrix (possibly different for
2311: each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2312: structure. The size of this array is equal to the number
2313: of local rows, i.e 'm'.
2315: Output Parameter:
2316: . A - the matrix
2318: Notes:
2319: m,n,M,N parameters specify the size of the matrix, and its partitioning across
2320: processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2321: storage requirements for this matrix.
2323: If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one
2324: processor than it must be used on all processors that share the object for
2325: that argument.
2327: The AIJ format (also called the Yale sparse matrix format or
2328: compressed row storage), is fully compatible with standard Fortran 77
2329: storage. That is, the stored row and column indices can begin at
2330: either one (as in Fortran) or zero. See the users manual for details.
2332: The user MUST specify either the local or global matrix dimensions
2333: (possibly both).
2335: The parallel matrix is partitioned such that the first m0 rows belong to
2336: process 0, the next m1 rows belong to process 1, the next m2 rows belong
2337: to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2339: The DIAGONAL portion of the local submatrix of a processor can be defined
2340: as the submatrix which is obtained by extraction the part corresponding
2341: to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2342: first row that belongs to the processor, and r2 is the last row belonging
2343: to the this processor. This is a square mxm matrix. The remaining portion
2344: of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2346: If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2348: By default, this format uses inodes (identical nodes) when possible.
2349: We search for consecutive rows with the same nonzero structure, thereby
2350: reusing matrix information to achieve increased efficiency.
2352: Options Database Keys:
2353: + -mat_aij_no_inode - Do not use inodes
2354: . -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2355: - -mat_aij_oneindex - Internally use indexing starting at 1
2356: rather than 0. Note that when calling MatSetValues(),
2357: the user still MUST index entries starting at 0!
2360: Example usage:
2361:
2362: Consider the following 8x8 matrix with 34 non-zero values, that is
2363: assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2364: proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2365: as follows:
2367: .vb
2368: 1 2 0 | 0 3 0 | 0 4
2369: Proc0 0 5 6 | 7 0 0 | 8 0
2370: 9 0 10 | 11 0 0 | 12 0
2371: -------------------------------------
2372: 13 0 14 | 15 16 17 | 0 0
2373: Proc1 0 18 0 | 19 20 21 | 0 0
2374: 0 0 0 | 22 23 0 | 24 0
2375: -------------------------------------
2376: Proc2 25 26 27 | 0 0 28 | 29 0
2377: 30 0 0 | 31 32 33 | 0 34
2378: .ve
2380: This can be represented as a collection of submatrices as:
2382: .vb
2383: A B C
2384: D E F
2385: G H I
2386: .ve
2388: Where the submatrices A,B,C are owned by proc0, D,E,F are
2389: owned by proc1, G,H,I are owned by proc2.
2391: The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2392: The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2393: The 'M','N' parameters are 8,8, and have the same values on all procs.
2395: The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2396: submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2397: corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2398: Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2399: part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2400: matrix, ans [DF] as another SeqAIJ matrix.
2402: When d_nz, o_nz parameters are specified, d_nz storage elements are
2403: allocated for every row of the local diagonal submatrix, and o_nz
2404: storage locations are allocated for every row of the OFF-DIAGONAL submat.
2405: One way to choose d_nz and o_nz is to use the max nonzerors per local
2406: rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2407: In this case, the values of d_nz,o_nz are:
2408: .vb
2409: proc0 : dnz = 2, o_nz = 2
2410: proc1 : dnz = 3, o_nz = 2
2411: proc2 : dnz = 1, o_nz = 4
2412: .ve
2413: We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2414: translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2415: for proc3. i.e we are using 12+15+10=37 storage locations to store
2416: 34 values.
2418: When d_nnz, o_nnz parameters are specified, the storage is specified
2419: for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2420: In the above case the values for d_nnz,o_nnz are:
2421: .vb
2422: proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2423: proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2424: proc2: d_nnz = [1,1] and o_nnz = [4,4]
2425: .ve
2426: Here the space allocated is sum of all the above values i.e 34, and
2427: hence pre-allocation is perfect.
2429: Level: intermediate
2431: .keywords: matrix, aij, compressed row, sparse, parallel
2433: .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2434: @*/
2435: int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2436: {
2437: int ierr,size;
2440: MatCreate(comm,m,n,M,N,A);
2441: MPI_Comm_size(comm,&size);
2442: if (size > 1) {
2443: MatSetType(*A,MATMPIAIJ);
2444: MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);
2445: } else {
2446: MatSetType(*A,MATSEQAIJ);
2447: MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);
2448: }
2449: return(0);
2450: }
2452: #undef __FUNCT__
2454: int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2455: {
2456: Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2458: *Ad = a->A;
2459: *Ao = a->B;
2460: *colmap = a->garray;
2461: return(0);
2462: }
2464: #undef __FUNCT__
2466: int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2467: {
2468: int ierr;
2469: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2472: if (coloring->ctype == IS_COLORING_LOCAL) {
2473: int *allcolors,*colors,i;
2474: ISColoring ocoloring;
2476: /* set coloring for diagonal portion */
2477: MatSetColoring_SeqAIJ(a->A,coloring);
2479: /* set coloring for off-diagonal portion */
2480: ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);
2481: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2482: for (i=0; i<a->B->n; i++) {
2483: colors[i] = allcolors[a->garray[i]];
2484: }
2485: PetscFree(allcolors);
2486: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2487: MatSetColoring_SeqAIJ(a->B,ocoloring);
2488: ISColoringDestroy(ocoloring);
2489: } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2490: int *colors,i,*larray;
2491: ISColoring ocoloring;
2493: /* set coloring for diagonal portion */
2494: PetscMalloc((a->A->n+1)*sizeof(int),&larray);
2495: for (i=0; i<a->A->n; i++) {
2496: larray[i] = i + a->cstart;
2497: }
2498: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);
2499: PetscMalloc((a->A->n+1)*sizeof(int),&colors);
2500: for (i=0; i<a->A->n; i++) {
2501: colors[i] = coloring->colors[larray[i]];
2502: }
2503: PetscFree(larray);
2504: ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);
2505: MatSetColoring_SeqAIJ(a->A,ocoloring);
2506: ISColoringDestroy(ocoloring);
2508: /* set coloring for off-diagonal portion */
2509: PetscMalloc((a->B->n+1)*sizeof(int),&larray);
2510: ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);
2511: PetscMalloc((a->B->n+1)*sizeof(int),&colors);
2512: for (i=0; i<a->B->n; i++) {
2513: colors[i] = coloring->colors[larray[i]];
2514: }
2515: PetscFree(larray);
2516: ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);
2517: MatSetColoring_SeqAIJ(a->B,ocoloring);
2518: ISColoringDestroy(ocoloring);
2519: } else {
2520: SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2521: }
2523: return(0);
2524: }
2526: #undef __FUNCT__
2528: int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2529: {
2530: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2531: int ierr;
2534: MatSetValuesAdic_SeqAIJ(a->A,advalues);
2535: MatSetValuesAdic_SeqAIJ(a->B,advalues);
2536: return(0);
2537: }
2539: #undef __FUNCT__
2541: int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2542: {
2543: Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2544: int ierr;
2547: MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);
2548: MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);
2549: return(0);
2550: }
2552: #undef __FUNCT__
2554: /*@C
2555: MatMerge - Creates a single large PETSc matrix by concatinating sequential
2556: matrices from each processor
2558: Collective on MPI_Comm
2560: Input Parameters:
2561: + comm - the communicators the parallel matrix will live on
2562: - inmat - the input sequential matrices
2564: Output Parameter:
2565: . outmat - the parallel matrix generated
2567: Level: advanced
2569: Notes: The number of columns of the matrix in EACH of the seperate files
2570: MUST be the same.
2572: @*/
2573: int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2574: {
2575: int ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2576: PetscScalar *values;
2577: PetscMap columnmap,rowmap;
2580:
2581: MatGetSize(inmat,&m,&n);
2583: /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2584: PetscMapCreate(comm,&columnmap);
2585: PetscMapSetSize(columnmap,n);
2586: PetscMapSetType(columnmap,MAP_MPI);
2587: PetscMapGetLocalSize(columnmap,&n);
2588: PetscMapDestroy(columnmap);
2590: PetscMapCreate(comm,&rowmap);
2591: PetscMapSetLocalSize(rowmap,m);
2592: PetscMapSetType(rowmap,MAP_MPI);
2593: PetscMapGetLocalRange(rowmap,&rstart,0);
2594: PetscMapDestroy(rowmap);
2596: MatPreallocateInitialize(comm,m,n,dnz,onz);
2597: for (i=0;i<m;i++) {
2598: MatGetRow(inmat,i,&nnz,&indx,&values);
2599: MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);
2600: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2601: }
2602: MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);
2603: MatPreallocateFinalize(dnz,onz);
2605: for (i=0;i<m;i++) {
2606: MatGetRow(inmat,i,&nnz,&indx,&values);
2607: I = i + rstart;
2608: MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);
2609: MatRestoreRow(inmat,i,&nnz,&indx,&values);
2610: }
2611: MatDestroy(inmat);
2612: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
2613: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
2615: return(0);
2616: }
2618: #undef __FUNCT__
2620: int MatFileSplit(Mat A,char *outfile)
2621: {
2622: int ierr,rank,len,m,N,i,rstart,*indx,nnz;
2623: PetscViewer out;
2624: char *name;
2625: Mat B;
2626: PetscScalar *values;
2629:
2630: MatGetLocalSize(A,&m,0);
2631: MatGetSize(A,0,&N);
2632: MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);
2633: MatGetOwnershipRange(A,&rstart,0);
2634: for (i=0;i<m;i++) {
2635: MatGetRow(A,i+rstart,&nnz,&indx,&values);
2636: MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);
2637: MatRestoreRow(A,i+rstart,&nnz,&indx,&values);
2638: }
2639: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2640: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2642: MPI_Comm_rank(A->comm,&rank);
2643: PetscStrlen(outfile,&len);
2644: PetscMalloc((len+5)*sizeof(char),&name);
2645: sprintf(name,"%s.%d",outfile,rank);
2646: PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);
2647: PetscFree(name);
2648: MatView(B,out);
2649: PetscViewerDestroy(out);
2650: MatDestroy(B);
2651: return(0);
2652: }