Actual source code: dgmres.c
petsc-3.7.5 2017-01-01
1: /*
2: This file implements the deflated GMRES.
4: */
6: #include <../src/ksp/ksp/impls/gmres/dgmres/dgmresimpl.h> /*I "petscksp.h" I*/
8: PetscLogEvent KSP_DGMRESComputeDeflationData, KSP_DGMRESApplyDeflation;
10: #define GMRES_DELTA_DIRECTIONS 10
11: #define GMRES_DEFAULT_MAXK 30
12: static PetscErrorCode KSPDGMRESGetNewVectors(KSP,PetscInt);
13: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP,PetscInt,PetscBool,PetscReal*);
14: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
18: PetscErrorCode KSPDGMRESSetEigen(KSP ksp,PetscInt nb_eig)
19: {
23: PetscTryMethod((ksp),"KSPDGMRESSetEigen_C",(KSP,PetscInt),(ksp,nb_eig));
24: return(0);
25: }
28: PetscErrorCode KSPDGMRESSetMaxEigen(KSP ksp,PetscInt max_neig)
29: {
33: PetscTryMethod((ksp),"KSPDGMRESSetMaxEigen_C",(KSP,PetscInt),(ksp,max_neig));
34: return(0);
35: }
38: PetscErrorCode KSPDGMRESForce(KSP ksp,PetscBool force)
39: {
43: PetscTryMethod((ksp),"KSPDGMRESForce_C",(KSP,PetscBool),(ksp,force));
44: return(0);
45: }
48: PetscErrorCode KSPDGMRESSetRatio(KSP ksp,PetscReal ratio)
49: {
53: PetscTryMethod((ksp),"KSPDGMRESSetRatio_C",(KSP,PetscReal),(ksp,ratio));
54: return(0);
55: }
58: PetscErrorCode KSPDGMRESComputeSchurForm(KSP ksp,PetscInt *neig)
59: {
63: PetscUseMethod((ksp),"KSPDGMRESComputeSchurForm_C",(KSP, PetscInt*),(ksp, neig));
64: return(0);
65: }
68: PetscErrorCode KSPDGMRESComputeDeflationData(KSP ksp)
69: {
73: PetscUseMethod((ksp),"KSPDGMRESComputeDeflationData_C",(KSP),(ksp));
74: return(0);
75: }
78: PetscErrorCode KSPDGMRESApplyDeflation(KSP ksp, Vec x, Vec y)
79: {
83: PetscUseMethod((ksp),"KSPDGMRESApplyDeflation_C",(KSP, Vec, Vec),(ksp, x, y));
84: return(0);
85: }
89: PetscErrorCode KSPDGMRESImproveEig(KSP ksp, PetscInt neig)
90: {
94: PetscUseMethod((ksp), "KSPDGMRESImproveEig_C",(KSP, PetscInt),(ksp, neig));
95: return(0);
96: }
100: PetscErrorCode KSPSetUp_DGMRES(KSP ksp)
101: {
103: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
104: PetscInt neig = dgmres->neig+EIG_OFFSET;
105: PetscInt max_k = dgmres->max_k+1;
108: KSPSetUp_GMRES(ksp);
109: if (!dgmres->neig) return(0);
111: /* Allocate workspace for the Schur vectors*/
112: PetscMalloc1(neig*max_k, &SR);
113: dgmres->wr = NULL;
114: dgmres->wi = NULL;
115: dgmres->perm = NULL;
116: dgmres->modul = NULL;
117: dgmres->Q = NULL;
118: dgmres->Z = NULL;
120: UU = NULL;
121: XX = NULL;
122: MX = NULL;
123: AUU = NULL;
124: XMX = NULL;
125: XMU = NULL;
126: UMX = NULL;
127: AUAU = NULL;
128: TT = NULL;
129: TTF = NULL;
130: INVP = NULL;
131: X1 = NULL;
132: X2 = NULL;
133: MU = NULL;
134: return(0);
135: }
137: /*
138: Run GMRES, possibly with restart. Return residual history if requested.
139: input parameters:
141: . gmres - structure containing parameters and work areas
143: output parameters:
144: . nres - residuals (from preconditioned system) at each step.
145: If restarting, consider passing nres+it. If null,
146: ignored
147: . itcount - number of iterations used. nres[0] to nres[itcount]
148: are defined. If null, ignored.
150: Notes:
151: On entry, the value in vector VEC_VV(0) should be the initial residual
152: (this allows shortcuts where the initial preconditioned residual is 0).
153: */
156: PetscErrorCode KSPDGMRESCycle(PetscInt *itcount,KSP ksp)
157: {
158: KSP_DGMRES *dgmres = (KSP_DGMRES*)(ksp->data);
159: PetscReal res_norm,res,hapbnd,tt;
161: PetscInt it = 0;
162: PetscInt max_k = dgmres->max_k;
163: PetscBool hapend = PETSC_FALSE;
164: PetscReal res_old;
165: PetscInt test = 0;
168: VecNormalize(VEC_VV(0),&res_norm);
169: KSPCheckNorm(ksp,res_norm);
170: res = res_norm;
171: *GRS(0) = res_norm;
173: /* check for the convergence */
174: PetscObjectSAWsTakeAccess((PetscObject)ksp);
175: ksp->rnorm = res;
176: PetscObjectSAWsGrantAccess((PetscObject)ksp);
177: dgmres->it = (it - 1);
178: KSPLogResidualHistory(ksp,res);
179: KSPMonitor(ksp,ksp->its,res);
180: if (!res) {
181: if (itcount) *itcount = 0;
182: ksp->reason = KSP_CONVERGED_ATOL;
183: PetscInfo(ksp,"Converged due to zero residual norm on entry\n");
184: return(0);
185: }
186: /* record the residual norm to test if deflation is needed */
187: res_old = res;
189: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
190: while (!ksp->reason && it < max_k && ksp->its < ksp->max_it) {
191: if (it) {
192: KSPLogResidualHistory(ksp,res);
193: KSPMonitor(ksp,ksp->its,res);
194: }
195: dgmres->it = (it - 1);
196: if (dgmres->vv_allocated <= it + VEC_OFFSET + 1) {
197: KSPDGMRESGetNewVectors(ksp,it+1);
198: }
199: if (dgmres->r > 0) {
200: if (ksp->pc_side == PC_LEFT) {
201: /* Apply the first preconditioner */
202: KSP_PCApplyBAorAB(ksp,VEC_VV(it), VEC_TEMP,VEC_TEMP_MATOP);
203: /* Then apply Deflation as a preconditioner */
204: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_VV(1+it));
205: } else if (ksp->pc_side == PC_RIGHT) {
206: KSPDGMRESApplyDeflation(ksp, VEC_VV(it), VEC_TEMP);
207: KSP_PCApplyBAorAB(ksp, VEC_TEMP, VEC_VV(1+it), VEC_TEMP_MATOP);
208: }
209: } else {
210: KSP_PCApplyBAorAB(ksp,VEC_VV(it),VEC_VV(1+it),VEC_TEMP_MATOP);
211: }
212: dgmres->matvecs += 1;
213: /* update hessenberg matrix and do Gram-Schmidt */
214: (*dgmres->orthog)(ksp,it);
216: /* vv(i+1) . vv(i+1) */
217: VecNormalize(VEC_VV(it+1),&tt);
218: /* save the magnitude */
219: *HH(it+1,it) = tt;
220: *HES(it+1,it) = tt;
222: /* check for the happy breakdown */
223: hapbnd = PetscAbsScalar(tt / *GRS(it));
224: if (hapbnd > dgmres->haptol) hapbnd = dgmres->haptol;
225: if (tt < hapbnd) {
226: PetscInfo2(ksp,"Detected happy breakdown, current hapbnd = %g tt = %g\n",(double)hapbnd,(double)tt);
227: hapend = PETSC_TRUE;
228: }
229: KSPDGMRESUpdateHessenberg(ksp,it,hapend,&res);
231: it++;
232: dgmres->it = (it-1); /* For converged */
233: ksp->its++;
234: ksp->rnorm = res;
235: if (ksp->reason) break;
237: (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);
239: /* Catch error in happy breakdown and signal convergence and break from loop */
240: if (hapend) {
241: if (!ksp->reason) {
242: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res);
243: else {
244: ksp->reason = KSP_DIVERGED_BREAKDOWN;
245: break;
246: }
247: }
248: }
249: }
251: /* Monitor if we know that we will not return for a restart */
252: if (it && (ksp->reason || ksp->its >= ksp->max_it)) {
253: KSPLogResidualHistory(ksp,res);
254: KSPMonitor(ksp,ksp->its,res);
255: }
256: if (itcount) *itcount = it;
258: /*
259: Down here we have to solve for the "best" coefficients of the Krylov
260: columns, add the solution values together, and possibly unwind the
261: preconditioning from the solution
262: */
263: /* Form the solution (or the solution so far) */
264: KSPDGMRESBuildSoln(GRS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-1);
266: /* Compute data for the deflation to be used during the next restart */
267: if (!ksp->reason && ksp->its < ksp->max_it) {
268: test = max_k *PetscLogReal(ksp->rtol/res) /PetscLogReal(res/res_old);
269: /* Compute data for the deflation if the residual rtol will not be reached in the remaining number of steps allowed */
270: if ((test > dgmres->smv*(ksp->max_it-ksp->its)) || dgmres->force) {
271: KSPDGMRESComputeDeflationData(ksp);
272: }
273: }
274: return(0);
275: }
279: PetscErrorCode KSPSolve_DGMRES(KSP ksp)
280: {
282: PetscInt i,its,itcount;
283: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
284: PetscBool guess_zero = ksp->guess_zero;
287: if (ksp->calc_sings && !dgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
289: PetscObjectSAWsTakeAccess((PetscObject)ksp);
290: ksp->its = 0;
291: dgmres->matvecs = 0;
292: PetscObjectSAWsGrantAccess((PetscObject)ksp);
294: itcount = 0;
295: ksp->reason = KSP_CONVERGED_ITERATING;
296: while (!ksp->reason) {
297: KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);
298: if (ksp->pc_side == PC_LEFT) {
299: dgmres->matvecs += 1;
300: if (dgmres->r > 0) {
301: KSPDGMRESApplyDeflation(ksp, VEC_VV(0), VEC_TEMP);
302: VecCopy(VEC_TEMP, VEC_VV(0));
303: }
304: }
306: KSPDGMRESCycle(&its,ksp);
307: itcount += its;
308: if (itcount >= ksp->max_it) {
309: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
310: break;
311: }
312: ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
313: }
314: ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
316: for (i = 0; i < dgmres->r; i++) {
317: VecViewFromOptions(UU[i],(PetscObject)ksp,"-ksp_dgmres_view_deflation_vecs");
318: }
319: return(0);
320: }
324: PetscErrorCode KSPDestroy_DGMRES(KSP ksp)
325: {
327: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
328: PetscInt neig1 = dgmres->neig+EIG_OFFSET;
329: PetscInt max_neig = dgmres->max_neig;
332: if (dgmres->r) {
333: VecDestroyVecs(max_neig, &UU);
334: VecDestroyVecs(max_neig, &MU);
335: if (XX) {
336: VecDestroyVecs(neig1, &XX);
337: VecDestroyVecs(neig1, &MX);
338: }
340: PetscFree(TT);
341: PetscFree(TTF);
342: PetscFree(INVP);
344: PetscFree(XMX);
345: PetscFree(UMX);
346: PetscFree(XMU);
347: PetscFree(X1);
348: PetscFree(X2);
349: PetscFree(dgmres->work);
350: PetscFree(dgmres->iwork);
351: PetscFree(dgmres->wr);
352: PetscFree(dgmres->wi);
353: PetscFree(dgmres->modul);
354: PetscFree(dgmres->Q);
355: PetscFree(ORTH);
356: PetscFree(AUAU);
357: PetscFree(AUU);
358: PetscFree(SR2);
359: }
360: PetscFree(SR);
361: KSPDestroy_GMRES(ksp);
362: return(0);
363: }
364: /*
365: KSPDGMRESBuildSoln - create the solution from the starting vector and the
366: current iterates.
368: Input parameters:
369: nrs - work area of size it + 1.
370: vs - index of initial guess
371: vdest - index of result. Note that vs may == vdest (replace
372: guess with the solution).
374: This is an internal routine that knows about the GMRES internals.
375: */
378: static PetscErrorCode KSPDGMRESBuildSoln(PetscScalar *nrs,Vec vs,Vec vdest,KSP ksp,PetscInt it)
379: {
380: PetscScalar tt;
382: PetscInt ii,k,j;
383: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
385: /* Solve for solution vector that minimizes the residual */
388: /* If it is < 0, no gmres steps have been performed */
389: if (it < 0) {
390: VecCopy(vs,vdest); /* VecCopy() is smart, exists immediately if vguess == vdest */
391: return(0);
392: }
393: if (*HH(it,it) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is the zero operator. HH(it,it) is identically zero; it = %D GRS(it) = %g",it,(double)PetscAbsScalar(*GRS(it)));
394: if (*HH(it,it) != 0.0) nrs[it] = *GRS(it) / *HH(it,it);
395: else nrs[it] = 0.0;
397: for (ii=1; ii<=it; ii++) {
398: k = it - ii;
399: tt = *GRS(k);
400: for (j=k+1; j<=it; j++) tt = tt - *HH(k,j) * nrs[j];
401: if (*HH(k,k) == 0.0) SETERRQ2(PetscObjectComm((PetscObject)ksp), PETSC_ERR_CONV_FAILED,"Likely your matrix is singular. HH(k,k) is identically zero; it = %D k = %D",it,k);
402: nrs[k] = tt / *HH(k,k);
403: }
405: /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
406: VecSet(VEC_TEMP,0.0);
407: VecMAXPY(VEC_TEMP,it+1,nrs,&VEC_VV(0));
409: /* Apply deflation */
410: if (ksp->pc_side==PC_RIGHT && dgmres->r > 0) {
411: KSPDGMRESApplyDeflation(ksp, VEC_TEMP, VEC_TEMP_MATOP);
412: VecCopy(VEC_TEMP_MATOP, VEC_TEMP);
413: }
414: KSPUnwindPreconditioner(ksp,VEC_TEMP,VEC_TEMP_MATOP);
416: /* add solution to previous solution */
417: if (vdest != vs) {
418: VecCopy(vs,vdest);
419: }
420: VecAXPY(vdest,1.0,VEC_TEMP);
421: return(0);
422: }
423: /*
424: Do the scalar work for the orthogonalization. Return new residual norm.
425: */
428: static PetscErrorCode KSPDGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
429: {
430: PetscScalar *hh,*cc,*ss,tt;
431: PetscInt j;
432: KSP_DGMRES *dgmres = (KSP_DGMRES*) (ksp->data);
435: hh = HH(0,it);
436: cc = CC(0);
437: ss = SS(0);
439: /* Apply all the previously computed plane rotations to the new column
440: of the Hessenberg matrix */
441: for (j=1; j<=it; j++) {
442: tt = *hh;
443: *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
444: hh++;
445: *hh = *cc++ * *hh -(*ss++ * tt);
446: }
448: /*
449: compute the new plane rotation, and apply it to:
450: 1) the right-hand-side of the Hessenberg system
451: 2) the new column of the Hessenberg matrix
452: thus obtaining the updated value of the residual
453: */
454: if (!hapend) {
455: tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
456: if (tt == 0.0) {
457: ksp->reason = KSP_DIVERGED_NULL;
458: return(0);
459: }
460: *cc = *hh / tt;
461: *ss = *(hh+1) / tt;
462: *GRS(it+1) = -(*ss * *GRS(it));
463: *GRS(it) = PetscConj(*cc) * *GRS(it);
464: *hh = PetscConj(*cc) * *hh + *ss * *(hh+1);
465: *res = PetscAbsScalar(*GRS(it+1));
466: } else {
467: /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
468: another rotation matrix (so RH doesn't change). The new residual is
469: always the new sine term times the residual from last time (GRS(it)),
470: but now the new sine rotation would be zero...so the residual should
471: be zero...so we will multiply "zero" by the last residual. This might
472: not be exactly what we want to do here -could just return "zero". */
474: *res = 0.0;
475: }
476: return(0);
477: }
478: /*
479: This routine allocates more work vectors, starting from VEC_VV(it).
480: */
483: static PetscErrorCode KSPDGMRESGetNewVectors(KSP ksp,PetscInt it)
484: {
485: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
487: PetscInt nwork = dgmres->nwork_alloc,k,nalloc;
490: nalloc = PetscMin(ksp->max_it,dgmres->delta_allocate);
491: /* Adjust the number to allocate to make sure that we don't exceed the
492: number of available slots */
493: if (it + VEC_OFFSET + nalloc >= dgmres->vecs_allocated) {
494: nalloc = dgmres->vecs_allocated - it - VEC_OFFSET;
495: }
496: if (!nalloc) return(0);
498: dgmres->vv_allocated += nalloc;
500: KSPCreateVecs(ksp,nalloc,&dgmres->user_work[nwork],0,NULL);
501: PetscLogObjectParents(ksp,nalloc,dgmres->user_work[nwork]);
503: dgmres->mwork_alloc[nwork] = nalloc;
504: for (k=0; k<nalloc; k++) {
505: dgmres->vecs[it+VEC_OFFSET+k] = dgmres->user_work[nwork][k];
506: }
507: dgmres->nwork_alloc++;
508: return(0);
509: }
513: PetscErrorCode KSPBuildSolution_DGMRES(KSP ksp,Vec ptr,Vec *result)
514: {
515: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
519: if (!ptr) {
520: if (!dgmres->sol_temp) {
521: VecDuplicate(ksp->vec_sol,&dgmres->sol_temp);
522: PetscLogObjectParent((PetscObject)ksp,(PetscObject)dgmres->sol_temp);
523: }
524: ptr = dgmres->sol_temp;
525: }
526: if (!dgmres->nrs) {
527: /* allocate the work area */
528: PetscMalloc1(dgmres->max_k,&dgmres->nrs);
529: PetscLogObjectMemory((PetscObject)ksp,dgmres->max_k*sizeof(PetscScalar));
530: }
532: KSPDGMRESBuildSoln(dgmres->nrs,ksp->vec_sol,ptr,ksp,dgmres->it);
533: if (result) *result = ptr;
534: return(0);
535: }
539: PetscErrorCode KSPView_DGMRES(KSP ksp,PetscViewer viewer)
540: {
541: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
543: PetscBool iascii,isharmonic;
546: KSPView_GMRES(ksp,viewer);
547: PetscObjectTypeCompare((PetscObject) viewer,PETSCVIEWERASCII,&iascii);
548: if (iascii) {
549: if (dgmres->force) PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: FALSE\n");
550: else PetscViewerASCIIPrintf(viewer, " DGMRES: Adaptive strategy is used: TRUE\n");
551: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &isharmonic);
552: if (isharmonic) {
553: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Harmonic Ritz values \n", dgmres->neig);
554: } else {
555: PetscViewerASCIIPrintf(viewer, " DGMRES: Frequency of extracted eigenvalues = %D using Ritz values \n", dgmres->neig);
556: }
557: PetscViewerASCIIPrintf(viewer, " DGMRES: Total number of extracted eigenvalues = %D\n", dgmres->r);
558: PetscViewerASCIIPrintf(viewer, " DGMRES: Maximum number of eigenvalues set to be extracted = %D\n", dgmres->max_neig);
559: PetscViewerASCIIPrintf(viewer, " DGMRES: relaxation parameter for the adaptive strategy(smv) = %g\n", dgmres->smv);
560: PetscViewerASCIIPrintf(viewer, " DGMRES: Number of matvecs : %D\n", dgmres->matvecs);
561: }
562: return(0);
563: }
565: /* New DGMRES functions */
569: static PetscErrorCode KSPDGMRESSetEigen_DGMRES(KSP ksp,PetscInt neig)
570: {
571: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
574: if (neig< 0 && neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of neig must be positive and less than the restart value ");
575: dgmres->neig=neig;
576: return(0);
577: }
581: static PetscErrorCode KSPDGMRESSetMaxEigen_DGMRES(KSP ksp,PetscInt max_neig)
582: {
583: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
586: if (max_neig < 0 && max_neig >dgmres->max_k) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The value of max_neig must be positive and less than the restart value ");
587: dgmres->max_neig=max_neig;
588: return(0);
589: }
593: static PetscErrorCode KSPDGMRESSetRatio_DGMRES(KSP ksp,PetscReal ratio)
594: {
595: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
598: if (ratio <= 0) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_ARG_OUTOFRANGE,"The relaxation parameter value must be positive");
599: dgmres->smv=ratio;
600: return(0);
601: }
605: static PetscErrorCode KSPDGMRESForce_DGMRES(KSP ksp,PetscBool force)
606: {
607: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
610: dgmres->force = force;
611: return(0);
612: }
614: extern PetscErrorCode KSPSetFromOptions_GMRES(PetscOptionItems *PetscOptionsObject,KSP);
618: PetscErrorCode KSPSetFromOptions_DGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
619: {
621: PetscInt neig;
622: PetscInt max_neig;
623: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
624: PetscBool flg;
627: KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);
628: PetscOptionsHead(PetscOptionsObject,"KSP DGMRES Options");
629: PetscOptionsInt("-ksp_dgmres_eigen","Number of smallest eigenvalues to extract at each restart","KSPDGMRESSetEigen",dgmres->neig, &neig, &flg);
630: if (flg) {
631: KSPDGMRESSetEigen(ksp, neig);
632: }
633: PetscOptionsInt("-ksp_dgmres_max_eigen","Maximum Number of smallest eigenvalues to extract ","KSPDGMRESSetMaxEigen",dgmres->max_neig, &max_neig, &flg);
634: if (flg) {
635: KSPDGMRESSetMaxEigen(ksp, max_neig);
636: }
637: PetscOptionsReal("-ksp_dgmres_ratio","Relaxation parameter for the smaller number of matrix-vectors product allowed","KSPDGMRESSetRatio",dgmres->smv,&dgmres->smv,NULL);
638: PetscOptionsBool("-ksp_dgmres_improve","Improve the computation of eigenvalues by solving a new generalized eigenvalue problem (experimental - not stable at this time)",NULL,dgmres->improve,&dgmres->improve,NULL);
639: PetscOptionsBool("-ksp_dgmres_force","Sets DGMRES always at restart active, i.e do not use the adaptive strategy","KSPDGMRESForce",dgmres->force,&dgmres->force,NULL);
640: PetscOptionsTail();
641: return(0);
642: }
646: static PetscErrorCode KSPDGMRESComputeDeflationData_DGMRES(KSP ksp, PetscInt *ExtrNeig)
647: {
648: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
650: PetscInt i,j, k;
651: PetscBLASInt nr, bmax;
652: PetscInt r = dgmres->r;
653: PetscInt neig; /* number of eigenvalues to extract at each restart */
654: PetscInt neig1 = dgmres->neig + EIG_OFFSET; /* max number of eig that can be extracted at each restart */
655: PetscInt max_neig = dgmres->max_neig; /* Max number of eigenvalues to extract during the iterative process */
656: PetscInt N = dgmres->max_k+1;
657: PetscInt n = dgmres->it+1;
658: PetscReal alpha;
661: PetscLogEventBegin(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
662: if (dgmres->neig == 0 || (max_neig < (r+neig1) && !dgmres->improve)) {
663: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
664: return(0);
665: }
667: KSPDGMRESComputeSchurForm(ksp, &neig);
668: /* Form the extended Schur vectors X=VV*Sr */
669: if (!XX) {
670: VecDuplicateVecs(VEC_VV(0), neig1, &XX);
671: }
672: for (j = 0; j<neig; j++) {
673: VecZeroEntries(XX[j]);
674: VecMAXPY(XX[j], n, &SR[j*N], &VEC_VV(0));
675: }
677: /* Orthogonalize X against U */
678: if (!ORTH) {
679: PetscMalloc1(max_neig, &ORTH);
680: }
681: if (r > 0) {
682: /* modified Gram-Schmidt */
683: for (j = 0; j<neig; j++) {
684: for (i=0; i<r; i++) {
685: /* First, compute U'*X[j] */
686: VecDot(XX[j], UU[i], &alpha);
687: /* Then, compute X(j)=X(j)-U*U'*X(j) */
688: VecAXPY(XX[j], -alpha, UU[i]);
689: }
690: }
691: }
692: /* Compute MX = M^{-1}*A*X */
693: if (!MX) {
694: VecDuplicateVecs(VEC_VV(0), neig1, &MX);
695: }
696: for (j = 0; j<neig; j++) {
697: KSP_PCApplyBAorAB(ksp, XX[j], MX[j], VEC_TEMP_MATOP);
698: }
699: dgmres->matvecs += neig;
701: if ((r+neig1) > max_neig && dgmres->improve) { /* Improve the approximate eigenvectors in X by solving a new generalized eigenvalue -- Quite expensive to do this actually */
702: KSPDGMRESImproveEig(ksp, neig);
703: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
704: return(0); /* We return here since data for M have been improved in KSPDGMRESImproveEig()*/
705: }
707: /* Compute XMX = X'*M^{-1}*A*X -- size (neig, neig) */
708: if (!XMX) {
709: PetscMalloc1(neig1*neig1, &XMX);
710: }
711: for (j = 0; j < neig; j++) {
712: VecMDot(MX[j], neig, XX, &(XMX[j*neig1]));
713: }
715: if (r > 0) {
716: /* Compute UMX = U'*M^{-1}*A*X -- size (r, neig) */
717: if (!UMX) {
718: PetscMalloc1(max_neig*neig1, &UMX);
719: }
720: for (j = 0; j < neig; j++) {
721: VecMDot(MX[j], r, UU, &(UMX[j*max_neig]));
722: }
723: /* Compute XMU = X'*M^{-1}*A*U -- size(neig, r) */
724: if (!XMU) {
725: PetscMalloc1(max_neig*neig1, &XMU);
726: }
727: for (j = 0; j<r; j++) {
728: VecMDot(MU[j], neig, XX, &(XMU[j*neig1]));
729: }
730: }
732: /* Form the new matrix T = [T UMX; XMU XMX]; */
733: if (!TT) {
734: PetscMalloc1(max_neig*max_neig, &TT);
735: }
736: if (r > 0) {
737: /* Add XMU to T */
738: for (j = 0; j < r; j++) {
739: PetscMemcpy(&(TT[max_neig*j+r]), &(XMU[neig1*j]), neig*sizeof(PetscReal));
740: }
741: /* Add [UMX; XMX] to T */
742: for (j = 0; j < neig; j++) {
743: k = r+j;
744: PetscMemcpy(&(TT[max_neig*k]), &(UMX[max_neig*j]), r*sizeof(PetscReal));
745: PetscMemcpy(&(TT[max_neig*k + r]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
746: }
747: } else { /* Add XMX to T */
748: for (j = 0; j < neig; j++) {
749: PetscMemcpy(&(TT[max_neig*j]), &(XMX[neig1*j]), neig*sizeof(PetscReal));
750: }
751: }
753: dgmres->r += neig;
754: r = dgmres->r;
755: PetscBLASIntCast(r,&nr);
756: /*LU Factorize T with Lapack xgetrf routine */
758: PetscBLASIntCast(max_neig,&bmax);
759: if (!TTF) {
760: PetscMalloc1(bmax*bmax, &TTF);
761: }
762: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
763: if (!INVP) {
764: PetscMalloc1(bmax, &INVP);
765: }
766: #if defined(PETSC_MISSING_LAPACK_GETRF)
767: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
768: #else
769: {
770: PetscBLASInt info;
771: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bmax, INVP, &info));
772: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
773: }
774: #endif
776: /* Save X in U and MX in MU for the next cycles and increase the size of the invariant subspace */
777: if (!UU) {
778: VecDuplicateVecs(VEC_VV(0), max_neig, &UU);
779: VecDuplicateVecs(VEC_VV(0), max_neig, &MU);
780: }
781: for (j=0; j<neig; j++) {
782: VecCopy(XX[j], UU[r-neig+j]);
783: VecCopy(MX[j], MU[r-neig+j]);
784: }
785: PetscLogEventEnd(KSP_DGMRESComputeDeflationData, ksp, 0,0,0);
786: return(0);
787: }
791: static PetscErrorCode KSPDGMRESComputeSchurForm_DGMRES(KSP ksp, PetscInt *neig)
792: {
793: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
795: PetscInt N = dgmres->max_k + 1, n=dgmres->it+1;
796: PetscBLASInt bn, bN;
797: PetscReal *A;
798: PetscBLASInt ihi;
799: PetscBLASInt ldA; /* leading dimension of A */
800: PetscBLASInt ldQ; /* leading dimension of Q */
801: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
802: PetscReal *work; /* working vector */
803: PetscBLASInt lwork; /* size of the working vector */
804: PetscInt *perm; /* Permutation vector to sort eigenvalues */
805: PetscInt i, j;
806: PetscBLASInt NbrEig; /* Number of eigenvalues really extracted */
807: PetscReal *wr, *wi, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
808: PetscBLASInt *select;
809: PetscBLASInt *iwork;
810: PetscBLASInt liwork;
811: PetscScalar *Ht; /* Transpose of the Hessenberg matrix */
812: PetscScalar *t; /* Store the result of the solution of H^T*t=h_{m+1,m}e_m */
813: PetscBLASInt *ipiv; /* Permutation vector to be used in LAPACK */
814: PetscBool flag; /* determine whether to use Ritz vectors or harmonic Ritz vectors */
817: PetscBLASIntCast(n,&bn);
818: PetscBLASIntCast(N,&bN);
819: ihi = ldQ = bn;
820: ldA = bN;
821: PetscBLASIntCast(5*N,&lwork);
823: #if defined(PETSC_USE_COMPLEX)
824: SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "NO SUPPORT FOR COMPLEX VALUES AT THIS TIME");
825: #endif
827: PetscMalloc1(ldA*ldA, &A);
828: PetscMalloc1(ldQ*n, &Q);
829: PetscMalloc1(lwork, &work);
830: if (!dgmres->wr) {
831: PetscMalloc1(n, &dgmres->wr);
832: PetscMalloc1(n, &dgmres->wi);
833: }
834: wr = dgmres->wr;
835: wi = dgmres->wi;
836: PetscMalloc1(n,&modul);
837: PetscMalloc1(n,&perm);
838: /* copy the Hessenberg matrix to work space */
839: PetscMemcpy(A, dgmres->hes_origin, ldA*ldA*sizeof(PetscReal));
840: PetscOptionsHasName(((PetscObject)ksp)->options,((PetscObject)ksp)->prefix, "-ksp_dgmres_harmonic_ritz", &flag);
841: if (flag) {
842: /* Compute the matrix H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
843: /* Transpose the Hessenberg matrix */
844: PetscMalloc1(bn*bn, &Ht);
845: for (i = 0; i < bn; i++) {
846: for (j = 0; j < bn; j++) {
847: Ht[i * bn + j] = dgmres->hes_origin[j * ldA + i];
848: }
849: }
851: /* Solve the system H^T*t = h_{m+1,m}e_m */
852: PetscCalloc1(bn, &t);
853: t[bn-1] = dgmres->hes_origin[(bn -1) * ldA + bn]; /* Pick the last element H(m+1,m) */
854: PetscMalloc1(bn, &ipiv);
855: /* Call the LAPACK routine dgesv to solve the system Ht^-1 * t */
856: #if defined(PETSC_MISSING_LAPACK_GESV)
857: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GESV - Lapack routine is unavailable.");
858: #else
859: {
860: PetscBLASInt info;
861: PetscBLASInt nrhs = 1;
862: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&bn, &nrhs, Ht, &bn, ipiv, t, &bn, &info));
863: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling the Lapack routine DGESV");
864: }
865: #endif
866: /* Now form H + H^{-T}*h^2_{m+1,m}e_m*e_m^T */
867: for (i = 0; i < bn; i++) A[(bn-1)*bn+i] += t[i];
868: PetscFree(t);
869: PetscFree(Ht);
870: }
871: /* Compute eigenvalues with the Schur form */
872: #if defined(PETSC_MISSING_LAPACK_HSEQR)
873: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable.");
874: #else
875: {
876: PetscBLASInt info;
877: PetscBLASInt ilo = 1;
878: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S", "I", &bn, &ilo, &ihi, A, &ldA, wr, wi, Q, &ldQ, work, &lwork, &info));
879: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XHSEQR %d",(int) info);
880: }
881: #endif
882: PetscFree(work);
884: /* sort the eigenvalues */
885: for (i=0; i<n; i++) modul[i] = PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
886: for (i=0; i<n; i++) perm[i] = i;
888: PetscSortRealWithPermutation(n, modul, perm);
889: /* save the complex modulus of the largest eigenvalue in magnitude */
890: if (dgmres->lambdaN < modul[perm[n-1]]) dgmres->lambdaN=modul[perm[n-1]];
891: /* count the number of extracted eigenvalues (with complex conjugates) */
892: NbrEig = 0;
893: while (NbrEig < dgmres->neig) {
894: if (wi[perm[NbrEig]] != 0) NbrEig += 2;
895: else NbrEig += 1;
896: }
897: /* Reorder the Schur decomposition so that the cluster of smallest eigenvalues appears in the leading diagonal blocks of A */
899: PetscCalloc1(n, &select);
901: if (!dgmres->GreatestEig) {
902: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
903: } else {
904: for (j = 0; j < NbrEig; j++) select[perm[n-j-1]] = 1;
905: }
906: /* call Lapack dtrsen */
907: lwork = PetscMax(1, 4 * NbrEig *(bn-NbrEig));
908: liwork = PetscMax(1, 2 * NbrEig *(bn-NbrEig));
909: PetscMalloc1(lwork, &work);
910: PetscMalloc1(liwork, &iwork);
911: #if defined(PETSC_MISSING_LAPACK_TRSEN)
912: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable.");
913: #else
914: {
915: PetscBLASInt info;
916: PetscReal CondEig; /* lower bound on the reciprocal condition number for the selected cluster of eigenvalues */
917: PetscReal CondSub; /* estimated reciprocal condition number of the specified invariant subspace. */
918: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("B", "V", select, &bn, A, &ldA, Q, &ldQ, wr, wi, &NbrEig, &CondEig, &CondSub, work, &lwork, iwork, &liwork, &info));
919: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
920: }
921: #endif
922: PetscFree(select);
924: /* Extract the Schur vectors */
925: for (j = 0; j < NbrEig; j++) {
926: PetscMemcpy(&SR[j*N], &(Q[j*ldQ]), n*sizeof(PetscReal));
927: }
928: *neig = NbrEig;
929: PetscFree(A);
930: PetscFree(work);
931: PetscFree(perm);
932: PetscFree(work);
933: PetscFree(iwork);
934: PetscFree(modul);
935: PetscFree(Q);
936: return(0);
937: }
941: static PetscErrorCode KSPDGMRESApplyDeflation_DGMRES(KSP ksp, Vec x, Vec y)
942: {
943: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
944: PetscInt i, r = dgmres->r;
946: PetscReal alpha = 1.0;
947: PetscInt max_neig = dgmres->max_neig;
948: PetscBLASInt br,bmax;
949: PetscReal lambda = dgmres->lambdaN;
952: PetscBLASIntCast(r,&br);
953: PetscBLASIntCast(max_neig,&bmax);
954: PetscLogEventBegin(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
955: if (!r) {
956: VecCopy(x,y);
957: return(0);
958: }
959: /* Compute U'*x */
960: if (!X1) {
961: PetscMalloc1(bmax, &X1);
962: PetscMalloc1(bmax, &X2);
963: }
964: VecMDot(x, r, UU, X1);
966: /* Solve T*X1=X2 for X1*/
967: PetscMemcpy(X2, X1, br*sizeof(PetscReal));
968: #if defined(PETSC_MISSING_LAPACK_GETRS)
969: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
970: #else
971: {
972: PetscBLASInt info;
973: PetscBLASInt nrhs = 1;
974: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N", &br, &nrhs, TTF, &bmax, INVP, X1, &bmax, &info));
975: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRS %d", (int) info);
976: }
977: #endif
978: /* Iterative refinement -- is it really necessary ?? */
979: if (!WORK) {
980: PetscMalloc1(3*bmax, &WORK);
981: PetscMalloc1(bmax, &IWORK);
982: }
983: #if defined(PETSC_MISSING_LAPACK_GERFS)
984: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GERFS - Lapack routine is unavailable.");
985: #else
986: {
987: PetscBLASInt info;
988: PetscReal berr, ferr;
989: PetscBLASInt nrhs = 1;
990: PetscStackCallBLAS("LAPACKgerfs",LAPACKgerfs_("N", &br, &nrhs, TT, &bmax, TTF, &bmax, INVP, X2, &bmax,X1, &bmax, &ferr, &berr, WORK, IWORK, &info));
991: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGERFS %d", (int) info);
992: }
993: #endif
995: for (i = 0; i < r; i++) X2[i] = X1[i]/lambda - X2[i];
997: /* Compute X2=U*X2 */
998: VecZeroEntries(y);
999: VecMAXPY(y, r, X2, UU);
1000: VecAXPY(y, alpha, x);
1002: PetscLogEventEnd(KSP_DGMRESApplyDeflation, ksp, 0, 0, 0);
1003: return(0);
1004: }
1008: static PetscErrorCode KSPDGMRESImproveEig_DGMRES(KSP ksp, PetscInt neig)
1009: {
1010: KSP_DGMRES *dgmres = (KSP_DGMRES*) ksp->data;
1011: PetscInt j,r_old, r = dgmres->r;
1012: PetscBLASInt i = 0;
1013: PetscInt neig1 = dgmres->neig + EIG_OFFSET;
1014: PetscInt bmax = dgmres->max_neig;
1015: PetscInt aug = r + neig; /* actual size of the augmented invariant basis */
1016: PetscInt aug1 = bmax+neig1; /* maximum size of the augmented invariant basis */
1017: PetscBLASInt ldA; /* leading dimension of AUAU and AUU*/
1018: PetscBLASInt N; /* size of AUAU */
1019: PetscReal *Q; /* orthogonal matrix of (left) schur vectors */
1020: PetscReal *Z; /* orthogonal matrix of (right) schur vectors */
1021: PetscReal *work; /* working vector */
1022: PetscBLASInt lwork; /* size of the working vector */
1023: PetscInt *perm; /* Permutation vector to sort eigenvalues */
1024: PetscReal *wr, *wi, *beta, *modul; /* Real and imaginary part and modul of the eigenvalues of A*/
1025: PetscInt ierr;
1026: PetscBLASInt NbrEig = 0,nr,bm;
1027: PetscBLASInt *select;
1028: PetscBLASInt liwork, *iwork;
1031: /* Block construction of the matrices AUU=(AU)'*U and (AU)'*AU*/
1032: if (!AUU) {
1033: PetscMalloc1(aug1*aug1, &AUU);
1034: PetscMalloc1(aug1*aug1, &AUAU);
1035: }
1036: /* AUU = (AU)'*U = [(MU)'*U (MU)'*X; (MX)'*U (MX)'*X]
1037: * Note that MU and MX have been computed previously either in ComputeDataDeflation() or down here in a previous call to this function */
1038: /* (MU)'*U size (r x r) -- store in the <r> first columns of AUU*/
1039: for (j=0; j < r; j++) {
1040: VecMDot(UU[j], r, MU, &AUU[j*aug1]);
1041: }
1042: /* (MU)'*X size (r x neig) -- store in AUU from the column <r>*/
1043: for (j = 0; j < neig; j++) {
1044: VecMDot(XX[j], r, MU, &AUU[(r+j) *aug1]);
1045: }
1046: /* (MX)'*U size (neig x r) -- store in the <r> first columns of AUU from the row <r>*/
1047: for (j = 0; j < r; j++) {
1048: VecMDot(UU[j], neig, MX, &AUU[j*aug1+r]);
1049: }
1050: /* (MX)'*X size (neig neig) -- store in AUU from the column <r> and the row <r>*/
1051: for (j = 0; j < neig; j++) {
1052: VecMDot(XX[j], neig, MX, &AUU[(r+j) *aug1 + r]);
1053: }
1055: /* AUAU = (AU)'*AU = [(MU)'*MU (MU)'*MX; (MX)'*MU (MX)'*MX] */
1056: /* (MU)'*MU size (r x r) -- store in the <r> first columns of AUAU*/
1057: for (j=0; j < r; j++) {
1058: VecMDot(MU[j], r, MU, &AUAU[j*aug1]);
1059: }
1060: /* (MU)'*MX size (r x neig) -- store in AUAU from the column <r>*/
1061: for (j = 0; j < neig; j++) {
1062: VecMDot(MX[j], r, MU, &AUAU[(r+j) *aug1]);
1063: }
1064: /* (MX)'*MU size (neig x r) -- store in the <r> first columns of AUAU from the row <r>*/
1065: for (j = 0; j < r; j++) {
1066: VecMDot(MU[j], neig, MX, &AUAU[j*aug1+r]);
1067: }
1068: /* (MX)'*MX size (neig neig) -- store in AUAU from the column <r> and the row <r>*/
1069: for (j = 0; j < neig; j++) {
1070: VecMDot(MX[j], neig, MX, &AUAU[(r+j) *aug1 + r]);
1071: }
1073: /* Computation of the eigenvectors */
1074: PetscBLASIntCast(aug1,&ldA);
1075: PetscBLASIntCast(aug,&N);
1076: lwork = 8 * N + 20; /* sizeof the working space */
1077: PetscMalloc1(N, &wr);
1078: PetscMalloc1(N, &wi);
1079: PetscMalloc1(N, &beta);
1080: PetscMalloc1(N, &modul);
1081: PetscMalloc1(N, &perm);
1082: PetscMalloc1(N*N, &Q);
1083: PetscMalloc1(N*N, &Z);
1084: PetscMalloc1(lwork, &work);
1085: #if defined(PETSC_MISSING_LAPACK_GGES)
1086: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
1087: #else
1088: {
1089: PetscBLASInt info;
1090: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &N, AUAU, &ldA, AUU, &ldA, &i, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
1091: if (info) SETERRQ1 (PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGGES %d", (int) info);
1092: }
1093: #endif
1094: for (i=0; i<N; i++) {
1095: if (beta[i] !=0.0) {
1096: wr[i] /=beta[i];
1097: wi[i] /=beta[i];
1098: }
1099: }
1100: /* sort the eigenvalues */
1101: for (i=0; i<N; i++) modul[i]=PetscSqrtReal(wr[i]*wr[i]+wi[i]*wi[i]);
1102: for (i=0; i<N; i++) perm[i] = i;
1103: PetscSortRealWithPermutation(N, modul, perm);
1104: /* Save the norm of the largest eigenvalue */
1105: if (dgmres->lambdaN < modul[perm[N-1]]) dgmres->lambdaN = modul[perm[N-1]];
1106: /* Allocate space to extract the first r schur vectors */
1107: if (!SR2) {
1108: PetscMalloc1(aug1*bmax, &SR2);
1109: }
1110: /* count the number of extracted eigenvalues (complex conjugates count as 2) */
1111: while (NbrEig < bmax) {
1112: if (wi[perm[NbrEig]] == 0) NbrEig += 1;
1113: else NbrEig += 2;
1114: }
1115: if (NbrEig > bmax) NbrEig = bmax - 1;
1116: r_old = r; /* previous size of r */
1117: dgmres->r = r = NbrEig;
1119: /* Select the eigenvalues to reorder */
1120: PetscCalloc1(N, &select);
1121: if (!dgmres->GreatestEig) {
1122: for (j = 0; j < NbrEig; j++) select[perm[j]] = 1;
1123: } else {
1124: for (j = 0; j < NbrEig; j++) select[perm[N-j-1]] = 1;
1125: }
1126: /* Reorder and extract the new <r> schur vectors */
1127: lwork = PetscMax(4 * N + 16, 2 * NbrEig *(N - NbrEig));
1128: liwork = PetscMax(N + 6, 2 * NbrEig *(N - NbrEig));
1129: PetscFree(work);
1130: PetscMalloc1(lwork, &work);
1131: PetscMalloc1(liwork, &iwork);
1132: #if defined(PETSC_MISSING_LAPACK_TGSEN)
1133: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TGSEN - Lapack routine is unavailable.");
1134: #else
1135: {
1136: PetscBLASInt info;
1137: PetscReal Dif[2];
1138: PetscBLASInt ijob = 2;
1139: PetscBLASInt wantQ = 1, wantZ = 1;
1140: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &N, AUAU, &ldA, AUU, &ldA, wr, wi, beta, Q, &N, Z, &N, &NbrEig, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
1141: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp), -1, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
1142: }
1143: #endif
1144: PetscFree(select);
1146: for (j=0; j<r; j++) {
1147: PetscMemcpy(&SR2[j*aug1], &(Z[j*N]), N*sizeof(PetscReal));
1148: }
1150: /* Multiply the Schur vectors SR2 by U (and X) to get a new U
1151: -- save it temporarily in MU */
1152: for (j = 0; j < r; j++) {
1153: VecZeroEntries(MU[j]);
1154: VecMAXPY(MU[j], r_old, &SR2[j*aug1], UU);
1155: VecMAXPY(MU[j], neig, &SR2[j*aug1+r_old], XX);
1156: }
1157: /* Form T = U'*MU*U */
1158: for (j = 0; j < r; j++) {
1159: VecCopy(MU[j], UU[j]);
1160: KSP_PCApplyBAorAB(ksp, UU[j], MU[j], VEC_TEMP_MATOP);
1161: }
1162: dgmres->matvecs += r;
1163: for (j = 0; j < r; j++) {
1164: VecMDot(MU[j], r, UU, &TT[j*bmax]);
1165: }
1166: /* Factorize T */
1167: PetscMemcpy(TTF, TT, bmax*r*sizeof(PetscReal));
1168: PetscBLASIntCast(r,&nr);
1169: PetscBLASIntCast(bmax,&bm);
1170: #if defined(PETSC_MISSING_LAPACK_GETRF)
1171: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GETRF - Lapack routine is unavailable.");
1172: #else
1173: {
1174: PetscBLASInt info;
1175: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&nr, &nr, TTF, &bm, INVP, &info));
1176: if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGETRF INFO=%d",(int) info);
1177: }
1178: #endif
1179: /* Free Memory */
1180: PetscFree(wr);
1181: PetscFree(wi);
1182: PetscFree(beta);
1183: PetscFree(modul);
1184: PetscFree(perm);
1185: PetscFree(Q);
1186: PetscFree(Z);
1187: PetscFree(work);
1188: PetscFree(iwork);
1189: return(0);
1190: }
1192: /* end new DGMRES functions */
1194: /*MC
1195: KSPDGMRES - Implements the deflated GMRES as defined in [1,2].
1196: In this implementation, the adaptive strategy allows to switch to the deflated GMRES when the
1197: stagnation occurs.
1199: Options Database Keys:
1200: GMRES Options (inherited):
1201: + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
1202: . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
1203: . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
1204: vectors are allocated as needed)
1205: . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
1206: . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
1207: . -ksp_gmres_cgs_refinement_type <never,ifneeded,always> - determine if iterative refinement is used to increase the
1208: stability of the classical Gram-Schmidt orthogonalization.
1209: - -ksp_gmres_krylov_monitor - plot the Krylov space generated
1211: DGMRES Options Database Keys:
1212: + -ksp_dgmres_eigen <neig> - number of smallest eigenvalues to extract at each restart
1213: . -ksp_dgmres_max_eigen <max_neig> - maximum number of eigenvalues that can be extracted during the iterative
1214: process
1215: . -ksp_dgmres_force - use the deflation at each restart; switch off the adaptive strategy.
1216: - -ksp_dgmres_view_deflation_vecs <viewerspec> - View the deflation vectors, where viewerspec is a key that can be
1217: parsed by PetscOptionsGetViewer(). If neig > 1, viewerspec should
1218: end with ":append". No vectors will be viewed if the adaptive
1219: strategy chooses not to deflate, so -ksp_dgmres_force should also
1220: be given.
1221: The deflation vectors span a subspace that may be a good
1222: approximation of the subspace of smallest eigenvectors of the
1223: preconditioned operator, so this option can aid in understanding
1224: the performance of a preconditioner.
1226: Level: beginner
1228: Notes: Left and right preconditioning are supported, but not symmetric preconditioning. Complex arithmetic is not yet supported
1230: References:
1231: + 1. - J. Erhel, K. Burrage and B. Pohl, Restarted GMRES preconditioned by deflation,J. Computational and Applied Mathematics, 69(1996).
1232: - 2. - D. NUENTSA WAKAM and F. PACULL, Memory Efficient Hybrid Algebraic Solvers for Linear Systems Arising from Compressible Flows, Computers and Fluids,
1233: In Press, http://dx.doi.org/10.1016/j.compfluid.2012.03.023
1235: Contributed by: Desire NUENTSA WAKAM,INRIA
1237: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPFGMRES, KSPLGMRES,
1238: KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESSetOrthogonalization(), KSPGMRESGetOrthogonalization(),
1239: KSPGMRESClassicalGramSchmidtOrthogonalization(), KSPGMRESModifiedGramSchmidtOrthogonalization(),
1240: KSPGMRESCGSRefinementType, KSPGMRESSetCGSRefinementType(), KSPGMRESGetCGSRefinementType(), KSPGMRESMonitorKrylov(), KSPSetPCSide()
1242: M*/
1246: PETSC_EXTERN PetscErrorCode KSPCreate_DGMRES(KSP ksp)
1247: {
1248: KSP_DGMRES *dgmres;
1252: PetscNewLog(ksp,&dgmres);
1253: ksp->data = (void*) dgmres;
1255: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
1256: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
1258: ksp->ops->buildsolution = KSPBuildSolution_DGMRES;
1259: ksp->ops->setup = KSPSetUp_DGMRES;
1260: ksp->ops->solve = KSPSolve_DGMRES;
1261: ksp->ops->destroy = KSPDestroy_DGMRES;
1262: ksp->ops->view = KSPView_DGMRES;
1263: ksp->ops->setfromoptions = KSPSetFromOptions_DGMRES;
1264: ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
1265: ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES;
1267: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);
1268: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetOrthogonalization_C",KSPGMRESSetOrthogonalization_GMRES);
1269: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);
1270: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetHapTol_C",KSPGMRESSetHapTol_GMRES);
1271: PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetCGSRefinementType_C",KSPGMRESSetCGSRefinementType_GMRES);
1272: /* -- New functions defined in DGMRES -- */
1273: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetEigen_C",KSPDGMRESSetEigen_DGMRES);
1274: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetMaxEigen_C",KSPDGMRESSetMaxEigen_DGMRES);
1275: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESSetRatio_C",KSPDGMRESSetRatio_DGMRES);
1276: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESForce_C",KSPDGMRESForce_DGMRES);
1277: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeSchurForm_C",KSPDGMRESComputeSchurForm_DGMRES);
1278: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESComputeDeflationData_C",KSPDGMRESComputeDeflationData_DGMRES);
1279: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESApplyDeflation_C",KSPDGMRESApplyDeflation_DGMRES);
1280: PetscObjectComposeFunction((PetscObject)ksp, "KSPDGMRESImproveEig_C", KSPDGMRESImproveEig_DGMRES);
1282: PetscLogEventRegister("DGMRESComputeDefl", KSP_CLASSID, &KSP_DGMRESComputeDeflationData);
1283: PetscLogEventRegister("DGMRESApplyDefl", KSP_CLASSID, &KSP_DGMRESApplyDeflation);
1285: dgmres->haptol = 1.0e-30;
1286: dgmres->q_preallocate = 0;
1287: dgmres->delta_allocate = GMRES_DELTA_DIRECTIONS;
1288: dgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization;
1289: dgmres->nrs = 0;
1290: dgmres->sol_temp = 0;
1291: dgmres->max_k = GMRES_DEFAULT_MAXK;
1292: dgmres->Rsvd = 0;
1293: dgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER;
1294: dgmres->orthogwork = 0;
1296: /* Default values for the deflation */
1297: dgmres->r = 0;
1298: dgmres->neig = DGMRES_DEFAULT_EIG;
1299: dgmres->max_neig = DGMRES_DEFAULT_MAXEIG-1;
1300: dgmres->lambdaN = 0.0;
1301: dgmres->smv = SMV;
1302: dgmres->matvecs = 0;
1303: dgmres->GreatestEig = PETSC_FALSE; /* experimental */
1304: dgmres->HasSchur = PETSC_FALSE;
1305: return(0);
1306: }