CHROMA
syssolver_linop_fgmres_dr.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Solve a M*psi=chi linear system by MR
3  */
4 #include <algorithm>
5 #include <functional>
6 #include <vector>
7 #include "chromabase.h"
8 #include "qdp-lapack.h"
11 
13 
14 namespace Chroma
15 {
16 
17  //! FGMRESDR system solver namespace
18  namespace LinOpSysSolverFGMRESDREnv
19  {
20  //! Callback function
22  const std::string& path,
23  Handle< FermState< LatticeFermion, multi1d<LatticeColorMatrix>, multi1d<LatticeColorMatrix> > > state,
25  {
26  return new LinOpSysSolverFGMRESDR(A,state, SysSolverFGMRESDRParams(xml_in, path));
27  }
28 
29 
30  //! Name to be used
31  const std::string name("FGMRESDR_INVERTER");
32 
33  //! Local registration flag
34  static bool registered = false;
35 
36  //! Register all the factories
37  bool registerAll()
38  {
39  bool success = true;
40  if (! registered)
41  {
43  registered = true;
44  }
45  return success;
46  }
47  }
48 
49 
50  /*! Flexible Arnodli Iteration.
51  *
52  * Works on the system A (dx) = r
53  *
54  * resulting from recasting A ( x_0 + dx ) = b
55  *
56  * This routine, produces the Krylov Subspace V
57  * and the Flexible subspace Z
58  *
59  * It also constructs the relevant H matrix
60  * and currently diagonalizes it with Givens rotations to yield R
61  *
62  * as it is proceeding it builds up the vector g, and tracks the accumulated
63  * residuum.
64  *
65  * The workspaces: V, Z, H, R, givens_rots and 'g' must be initialized
66  * outside this routine. This routine does no size checking at the moment.
67  *
68  *
69  *
70  * \param n_krylov is the length of the 'cycle'
71  * \param n_deflate is the size of the augmented subspace ( not yet used )
72  * \param rsd_target is the desired target relative residuum
73  * \param A is the linear operator for the solve
74  * \param M is the preconditioner (a SystemSolver)
75 
76  */
77  template< typename T >
78  void FlexibleArnoldiT(int n_krylov,
79  int n_deflate, // Size of augmented space. Not yet used.
80  const Real& rsd_target,
81  const LinearOperator<T>& A, // Operator
82  const LinOpSystemSolver<T>& M, // Preconditioner
83  multi1d<T>& V,
84  multi1d<T>& Z,
85  multi2d<DComplex>& H,
86  multi2d<DComplex>& R,
87  multi1d< Handle<Givens> >& givens_rots,
88  multi1d<DComplex>& g,
89  multi2d<DComplex>& Qk,
90  multi1d<DComplex>& Qk_tau,
91  int& ndim_cycle)
92 
93  {
94  const Subset& s = M.subset(); // Linear Operator Subset
95  ndim_cycle = 0;
96 
97  const int total_dim=n_krylov+n_deflate;
98 
99 
100  // Work by columns:
101  for(int j=n_deflate; j < total_dim; ++j) {
102 
103  M( Z[j], V[j] ); // z_j = M v_j
104  T w;
105  A( w, Z[j], PLUS); // w = A z_
106 
107  // Fill out column j
108  for(int i=0; i <= j ; ++i ) {
109  H(j,i) = innerProduct(V[i], w, s);
110  w[s] -= H(j,i)* V[i];
111  }
112 
113  Double wnorm=sqrt(norm2(w,s));
114  H(j,j+1) = DComplex(wnorm);
115 
116  // In principle I should check w_norm to be 0, and if it is I should
117  // terminate: Code Smell -- get rid of 1.0e-14.
118  if ( toBool( fabs( wnorm ) < Double(1.0e-14) ) ) {
119 
120  // If wnorm = 0 exactly, then we have converged exactly
121  // Replay Givens rots here, how to test?
122 
123  QDPIO::cout << "Converged at iter = " << j-n_deflate+1 << std::endl;
124  ndim_cycle = j;
125  return;
126  }
127 
128  Double invwnorm = Double(1)/wnorm;
129  V[j+1] = invwnorm*w;
130 
131  // Done construcing H.
132  // Now to incrementally update the R matrix
133  // and compute the residuum
134  if( n_deflate > 0 ) {
135 
136  // If we have deflation, Qk and Qk_tau
137  // should hold the QR decomposition of the
138  // Deflated part of H, called H_k that is constructed
139  // outside the Arnoldi.
140  // To incrementally update the 'R' matrix we need to
141  // apply the Q out of this, to the current column, before
142  // adding the Givens rotations
143 
144  // Copy column of H into R_col
145  multi1d<DComplex> R_col(j+2);
146  for(int i=0; i <= j+1; i++) {
147  R_col[i] = H(j,i);
148  }
149 
150  // R_col <- Qk^H R_col
151  char side = 'L';
152  char trans = 'C';
153  QDPLapack::zunmqrv(side,trans, n_deflate+1, n_deflate+1, Qk, Qk_tau, R_col );
154  // Copy into R
155  for(int i=0; i <= j+1; i++) {
156  R(j,i)=R_col[i];
157  }
158  }
159  else {
160  // If no deflation, just copy the column into R
161  for(int i=0; i <= j+1; ++i) {
162  R(j,i) = H(j,i);
163  }
164  }
165 
166  // Apply Existing Givens Rotations to this column of R
167  for(int i=0;i < j; ++i) {
168  (*givens_rots[i])(j,R);
169  }
170 
171  // Compute next Givens Rot for this column
172  givens_rots[j] = new Givens(j,R);
173 
174  (*givens_rots[j])(j,R); // Apply it to R
175  (*givens_rots[j])(g); // Apply it to the g vector
176 
177  Double accum_resid = fabs(real(g[j+1]));
178 
179 #if 0
180  // Debug
181  for(int rows=0; rows <= j+1; ++rows) {
182  QDPIO::cout << " g["<<rows<<"]="<<g[rows]<< std::endl;
183  }
184 #endif
185  // j-ndeflate is the 0 based iteration count
186  // j-ndeflate+1 is the 1 based human readable iteration count
187  QDPIO::cout << "Iter " << j-n_deflate+1 << " || r || = " << accum_resid << " Target=" <<rsd_target << std::endl;
188 
189  ndim_cycle = j+1;
190  if ( toBool( accum_resid <= rsd_target ) ) {
191  QDPIO::cout << "Flexible Arnoldi Cycle Converged at iter = " << j-n_deflate << std::endl;
192  return;
193  }
194  }
195  }
196 
197  /*! Initialize the key matrices (resize and zero)
198  */
200  {
201  int total_dim = invParam_.NKrylov + invParam_.NDefl;
202  H_.resize(total_dim, total_dim+1); // This is odd. Shouldn't it be
203  R_.resize(total_dim, total_dim+1); // resize (n_cols, n_rows)?
204  // Doing that give weird double free
205  //errors
206 
207  V_.resize(total_dim+1);
208  Z_.resize(total_dim+1);
209  givens_rots_.resize(total_dim+1);
210  g_.resize(total_dim+1);
211  c_.resize(total_dim+1);
212  eta_.resize(total_dim);
213 
214 
215  for(int col =0; col < total_dim; col++) {
216  for(int row = 0; row < total_dim+1; row++) {
217  H_(col,row) = zero;
218  R_(col,row) = zero;
219  }
220  }
221 
222  for(int row = 0; row < total_dim+1; row++) {
223  V_[row] = zero;
224  Z_[row] = zero;
225  g_[row] = zero;
226  c_[row] = zero;
227  }
228 
229  for(int row = 0; row < total_dim; row++) {
230  eta_[row] = zero;
231  }
232 
233 
234  }
235 
236 
237  /*! Solve least squares system to get the coefficients for
238  * Updating the solution at the end of an GGMRES-DR Cycle.
239  * In this instantiation the system solved is
240  *
241  * R \eta = rhs
242  *
243  * where R is the matrix H from GMRES, reduced to upper triangular
244  * form by Givens rotations, and rhs is the result of the Givens rotations
245  * applied to the orignal g = [ beta, 0, 0 ... ]^T vector.
246  *
247  * Hence this operation is a straightforward back substitution
248  * NB: the matrix R has dim+1 rows and dim columns, but the dim+1-th row has
249  * All zero entries and should be deleted. d
250  *
251  * dim is passed in instead of using matrix dimensions in case
252  * the cycle terminated early.
253  *
254  */
255 
256  void
257  LinOpSysSolverFGMRESDR::LeastSquaresSolve(const multi2d<DComplex>& R,
258  const multi1d<DComplex>& Q_rhs,
259  multi1d<DComplex>& eta,
260  int n_cols) const
261  {
262  /* Assume here we have a square matrix with an extra row.
263  Hence the loop counters are the columns not the rows.
264  NB: For an augmented system this will change */
265  eta[n_cols-1] = Q_rhs[n_cols-1]/R(n_cols-1,n_cols-1);
266  for(int row = n_cols-2; row >= 0; --row) {
267  eta[row] = Q_rhs[row];
268  for(int col=row+1; col < n_cols; ++col) {
269  eta[row] -= R(col,row)*eta[col];
270  }
271  eta[row] /= R(row,row);
272  }
273  }
274 
275 #if 0
276  // This version for no incremental update of R. It takes H and c
277  // and does an explicit QR on H. May be more stable?
278  void
279  LinOpSysSolverFGMRESDR::LeastSquaresSolve(const multi2d<DComplex>& H,
280  const multi1d<DComplex>& rhs,
281  multi1d<DComplex>& eta,
282  int n_cols) const
283  {
284  // QR decompose H
285  multi2d<DComplex> R(n_cols,n_cols+1);
286  for(int col=0; col < n_cols; ++col) {
287  for(int row=0; row < n_cols+1; ++row) {
288  R(col,row) = H(col,row);
289  }
290  }
291 
292  multi1d<DComplex> Q_rhs(n_cols+1);
293  for(int row=0; row < n_cols+1; ++row) {
294  Q_rhs[row] = rhs[row];
295  }
296 
297  multi1d<DComplex> R_taus;
298  QDPLapack::zgeqrf(n_cols+1, n_cols, R, R_taus);
299  char side='L';
300  char trans='C';
301  QDPLapack::zunmqrv(side,
302  trans,
303  n_cols+1,
304  n_cols+1,
305  R,
306  R_taus,
307  Q_rhs
308  );
309 
310  eta[n_cols-1] = Q_rhs[n_cols-1]/R(n_cols-1,n_cols-1);
311  for(int row = n_cols-2; row >= 0; --row) {
312  eta[row] = Q_rhs[row];
313  for(int col=row+1; col < n_cols; ++col) {
314  eta[row] -= R(col,row)*eta[col];
315  }
316  eta[row] /= R(row,row);
317  }
318  }
319 #endif
320 
321  /*! Get the eigenvectors of the matrix
322  * H_tilde = H_m + f_m h^{H}_m
323  *
324  * of which we will keep the first NDefl corresponding
325  * to the NDefl eigenvalues of smallest modulus as our
326  * matrix G. In turn the Harmonic Ritz vectors are
327  * V_m G_m.
328  *
329  * We need a two stage process here.
330  * One: first we need to solve H_m^H f_m = h^H_m
331  * where h_m is the last row of the \hat{H}_m
332  *
333  * second, once in posession of f_m we need to constuct
334  * H_tilde and get its eigenvalues and eigenvectors.
335  *
336  * NB: We use lapack here, specifically the routines:
337  * zgetrf/zgetrs to solve for f_m
338  *
339  * zgeev to get the eigenvalues/eigenvectors
340  *
341  */
342  void
344  const multi2d<DComplex>& H,
345  multi1d<DComplex>& f_m,
346  multi2d<DComplex>& evecs,
347  multi1d<DComplex>& evals,
348  multi1d<int>& order_array) const
349  {
350  // Lapack routines overwrite the matrix
351  // with factorizations so I want a copy here.
352  evals.resize(total_dim);
353  evecs.resize(total_dim,total_dim);
354  order_array.resize(total_dim);
355  f_m.resize(total_dim);
356 
357  multi2d<DComplex> H_tilde(total_dim,total_dim);
358  multi1d<DComplex> h_m(total_dim);
359  for(int col=0; col < total_dim; ++col) {
360  for(int row=0; row < total_dim; ++row) {
361  H_tilde(col,row)=H(col,row);
362  }
363  }
364  for(int col=0; col < total_dim; ++col) {
365  // h_m is a column vector and is the hermitian conjugate
366  // of the last row of H (ie H_{row=total_dim}, col=:})
367  // Hence I conjugate here.
368  h_m[col] = conj(H(col,total_dim));
369  f_m[col] = h_m[col]; // f_m will be overwritten
370  }
371 
372  multi1d<int> ipiv(total_dim);
373  int info;
374  QDPLapack::zgetrf(total_dim,total_dim,H_tilde,total_dim,ipiv,info);
375  if (info != 0) {
376  QDPIO::cout << "ZGETRF reported failure: info=" << info << std::endl;
377  QDP_abort(1);
378  }
379  char trans='C'; // We will solve with H^H. C=solve with Herm. Conj.
380  QDPLapack::zgetrs(trans,total_dim,1,H_tilde,total_dim,ipiv,f_m,total_dim,info);
381  if (info != 0) {
382  QDPIO::cout << "ZGETRS reported failure: info=" << info << std::endl;
383  QDP_abort(1);
384  }
385 
386  // OK now we can construct the matrix whose eigenvalues we seek
387  // We can reuse H_tilde for this
388  for(int col=0; col < total_dim; ++col) {
389  for(int row=0; row < total_dim; ++row) {
390  H_tilde(col,row) = H(col,row) + f_m[row]*conj(h_m[col]);
391  }
392  }
393 
394  QDPLapack::zgeev(total_dim, H_tilde, evals, evecs);
395 
396 
397  // Now I need to sort the eigenvalues in terms of smallest modulus.
398  // I will use the C++ std library here.
399  // It needs iterators tho.
400  std::vector<int> std_order_array(total_dim);
401  for(int i=0; i < total_dim; ++i) {
402  std_order_array[i]=i;
403  }
404 
405  // This calls the standard library sort function
406  // with a C++ lambda for the comparison function.
407  //
408  sort(std_order_array.begin(), std_order_array.end(),
409  // This is the lambda below: [&evals] brings evals into the closure
410  // (so called capture region
411  [&evals](int i, int j)->bool {
412  return toBool( norm2(evals[i]) < norm2(evals[j]));
413  });
414 
415  for(int i=0; i < total_dim; ++i) {
416  order_array[i]=std_order_array[i];
417  }
418 
419  }
420 
421 
422 
423  /*! Flexible Arnolid Process.
424  * Currently not using augmentation (n_deflate ignored)
425  *
426  * NB: This currentl just forwards to a templated free function
427  */
428  void
430  int n_deflate,
431  const Real& rsd_target,
432  multi1d<T>& V,
433  multi1d<T>& Z,
434  multi2d<DComplex>&H,
435  multi2d<DComplex>&R,
436  multi1d<Handle<Givens> >& givens_rots,
437  multi1d<DComplex> &g,
438  multi2d<DComplex> &Qk,
439  multi1d<DComplex> &Qk_tau,
440  int& ndim_cycle) const
441  {
442  FlexibleArnoldiT<>(n_krylov,
443  n_deflate,
444  rsd_target,
445  (*A_),
446  (*preconditioner_),
447  V,Z,H,R,givens_rots,g, Qk, Qk_tau, ndim_cycle);
448  }
449 
450 
451  /*! Solve the linear system A psi = chi via FGMRES-DR
452  * Right now the DR part is not implemented.
453  *
454  *
455  *
456  */
457 
460  {
461  START_CODE();
462  SystemSolverResults_t res; // Value to return
463 
464  const Subset& s = A_->subset();
465  Double norm_rhs = sqrt(norm2(chi,s)); // || b ||
466  Double target = norm_rhs * invParam_.RsdTarget; // Target || r || < || b || RsdTarget
467 
468 
469  // Compute ||r||
470  T r = zero; T tmp = zero;
471  r[s] = chi;
472  (*A_)(tmp, psi, PLUS);
473  r[s] -=tmp;
474 
475  // The current residuum
476  Double r_norm = sqrt(norm2(r,s));
477 
478  // Initialize iterations
479  int iters_total = 0;
480  int n_cycles = 0;
481  int prev_dim = invParam_.NKrylov; // This is the default previous dimension
482 
483  // We are done if norm is sufficiently accurate,
484  bool finished = toBool( r_norm <= target ) ;
485 
486  // We keep executing cycles until we are finished
487  while( !finished ) {
488 
489  // If not finished, we should do another cycle with RHS='r' to find dx to add to psi.
490  ++n_cycles;
491 
492 
493  int dim; // dim at the end of cycle (in case we terminate in-cycle
494  int n_deflate = invParam_.NDefl;
495  int n_krylov = invParam_.NKrylov;
496 
497  // Set up the input g vector (c-H eta?)
498  //
499  if (n_cycles == 1 || n_deflate == 0 ) {
500  // We are either first cycle, or
501  // We have no deflation subspace ie we are regular FGMRES
502  // and we are just restarting
503  //
504  // Set up initial vector c = [ beta, 0 ... 0 ]^T
505  // In this case beta should be the r_norm, ie || r || (=|| b || for the first cycle)
506  //
507  // NB: We will have a copy of this called 'g' onto which we will
508  // apply Givens rotations to get an inline estimate of the residuum
509  for(int j=0; j < g_.size(); ++j) {
510  c_[j] = DComplex(0);
511  g_[j] = DComplex(0);
512  }
513  c_[0] = r_norm;
514  g_[0] = r_norm;
515 
516  // Set up initial V[0] = rhs / || r^2 ||
517  // and since we are solving for A delta x = r
518  // the rhs is 'r'
519  //
520  Double beta_inv = Double(1)/r_norm;
521  V_[0][s] = beta_inv * r;
522 
523  n_deflate = 0; // Override n_deflate for first cycle
524  // So Arnoldi process starts from right place
525  }
526  else {
527  QDPIO::cout << "AUGMENTING SUBSPACE: n_deflate=" << n_deflate << " prev_dim=" << prev_dim << std::endl;
528 
529  // This is a cycle where we need to augment the space
530  multi1d<DComplex> f_m(prev_dim);
531  multi2d<DComplex> evecs(prev_dim,prev_dim);
532  multi1d<DComplex> evals(prev_dim);
533  multi1d<int> order_array(prev_dim);
534 
535  (*this).GetEigenvectors(prev_dim,
536  H_,
537  f_m,
538  evecs,
539  evals,
540  order_array);
541 
542 
543  // This is where we will store G_{k = [ g_1 | .. | g_k ]
544  //
545  // G_{k+1} = [ G_k c - H \eta ]
546  // [ 0 ]
547  //
548  // Then we will perform the QR decomposition of Gkplus1
549  // to get Qplus1.
550  //
551  // NB: The LAPACK QR decomposition will overwrite
552  // the original G_{k+1} matrix so I will call it
553  // Qkplus1 (for Q_{k+1}) right away.
554  multi2d<DComplex> Qkplus1(n_deflate+1,prev_dim+1);
555 
556  // First copy in the eigenvectors
557  for(int col=0; col < n_deflate; ++col) {
558  for(int row=0; row < prev_dim; ++row) {
559  Qkplus1(col,row) = evecs( order_array[col], row);
560  }
561  Qkplus1(col,prev_dim) = zero;
562  }
563 
564  // Q_{k+1} + G_{k+1} =[ G_k | c - H \eta ]
565  // [ 0 | ]
566 
567  for(int row = 0; row < prev_dim+1; ++row) {
568  Qkplus1(n_deflate,row) = c_[row];
569  for(int col=0; col < prev_dim; ++col) {
570  Qkplus1(n_deflate,row) -= H_(col,row)*eta_(col);
571  }
572  }
573 
574  // QR Decompose Qkplus1
575  multi1d<DComplex> tau_kplus1; // QR Decomposition Factors
576  QDPIO::cout << "QR Decomposing Gk+1" << std::endl;
577  QDPLapack::zgeqrf(prev_dim+1, n_deflate+1, Qkplus1, tau_kplus1);
578 
579  /// Copy H into H_copy
580  multi2d<DComplex> H_copy(prev_dim, prev_dim+1);
581  for(int col=0; col < prev_dim; ++col) {
582  for(int row=0; row < prev_dim + 1; ++row) {
583  H_copy(col,row) = H_(col,row);
584  }
585  }
586 
587  char side='R';
588  char trans='N';
589  QDPIO::cout << "Post multiplying with Q_k using ZUNMQR 1" << std::endl;
590 
591  // !!!! NB: The dimensions here are still the original dimensions of H_copy,
592  // !!!! even tho at the end of this result only the (rows=prev_dim+1)x(cols=n_deflate) portion is
593  // !!!! valid
594  QDPLapack::zunmqr2(side,trans, prev_dim+1, prev_dim, n_deflate, Qkplus1, tau_kplus1, H_copy);
595 
596  QDPIO::cout << "Pre multiply with Q^{H}_{k+1} usign ZUNMQR2" << std::endl;
597  trans='C'; // Multiply with Herm Conjugate
598  side='L'; // Multiply from Left
599  // !!!! NB: The dimensions here are still the original dimensions of H_copy2
600  // !!!! Even tho when we are done here, really only the (rows=k+1)x(cols=k) portion of it
601  // !!!! is what is relevant
602  QDPLapack::zunmqr2(side,trans, prev_dim+1, prev_dim, n_deflate+1, Qkplus1, tau_kplus1, H_copy);
603 
604  // H_copy is the (rows=n_deflate+1, cols=n_deflate) part of 'H' with which we will start the cycle.
605 
606  // Now I need to form Q_{k+1} explicitly to form the new bases V_{k+1} and Z_{k}
607  QDPLapack::zungqr(prev_dim+1,n_deflate+1,n_deflate+1, Qkplus1, tau_kplus1);
608 
609 
610  multi1d<LatticeFermion> new_V(n_deflate+1);
611  for(int i=0; i < n_deflate+1; ++i) {
612  new_V[i][s] = zero;
613  for(int j=0; j < prev_dim+1; ++j) {
614  new_V[i][s] += V_[j]*Qkplus1(i,j);
615  }
616  }
617 
618  multi1d<LatticeFermion> new_Z(n_deflate);
619  for(int i=0; i < n_deflate; ++i) {
620  new_Z[i][s] = zero;
621  for(int j=0; j < prev_dim; ++j) {
622  new_Z[i][s] += Z_[j]*Qkplus1(i,j);
623  }
624  }
625 
626  // Reinit things:
627  int total_dim = n_krylov+n_deflate;
628 
629  for(int col=0; col < total_dim; ++col) {
630  for(int row=0; row < total_dim; ++row) {
631  // Nuke H
632  H_(col,row) = zero;
633  R_(col,row) = zero;
634  }
635  }
636 
637  for(int row=0; row < total_dim; ++row) {
638  c_[row] = zero;
639  g_[row] = zero;
640  }
641 
642  // Reinit space -- copy in the first new Z's and zero the rest
643  for(int cols=0; cols < n_deflate; ++cols) {
644  Z_[cols][s] = new_Z[cols];
645  }
646  for(int cols=n_deflate; cols < total_dim; ++cols) {
647  Z_[cols][s] = zero;
648  }
649 
650  // Reinit space -- copy in the new V's and zero the rest
651  for(int cols=0; cols < n_deflate+1; ++cols) {
652  V_[cols][s] = new_V[cols];
653  }
654 
655  for(int cols=n_deflate+1; cols < total_dim+1; ++cols) {
656  V_[cols][s] = zero;
657  }
658 
659  // c = [ V^H_{k+1} r ]
660  // [ 0 ]
661  for(int row=0; row < n_deflate+1; ++row) {
662  c_[row] = innerProduct( V_[row],r,s );
663  g_[row] = c_[row];
664  }
665  for(int row=n_deflate+1; row < total_dim+1; ++row) {
666  c_[row] = zero;
667  g_[row] = zero;
668  }
669 
670  Hk_QR_.resize(n_deflate,n_deflate+1);
671  Hk_QR_taus_.resize(n_deflate+1);
672 
673  for(int col=0; col < n_deflate; ++col) {
674  for(int row=0; row < n_deflate+1; ++row) {
675  H_(col,row) = H_copy(col,row);
676  Hk_QR_(col,row) = H_(col,row);
677  }
678  }
679 
680  // Now I want to do a QR decomposition of Hk_QR_
681  QDPLapack::zgeqrf(n_deflate+1, n_deflate, Hk_QR_, Hk_QR_taus_);
682  for(int col=0; col < n_deflate; col++) {
683  for(int row=col; row < n_deflate; row++) {
684  R_(col,row) = Hk_QR_(col,row);
685  }
686  }
687 
688  // g = Q_H c for incremental residuum
689  side = 'L';
690  trans = 'C';
691  QDPLapack::zunmqrv(side,trans, n_deflate+1, n_deflate+1, Hk_QR_, Hk_QR_taus_, g_);
692 
693 #if 0
694  for(int row=0; row < total_dim+1; ++row) {
695  QDPIO::cout << " g[" << row << "]=" << g_[row] << std::endl;
696  }
697 #endif
698  }
699 
700  // Carry out Flexible Arnoldi process for the cycle
701 
702  // We are solving for the defect: A dx = r
703  // so the RHS in the Arnoldi process is 'r'
704  // NB: We recompute a true 'r' after every cycle
705  // So in the cycle we could in principle
706  // use reduced precision... TBInvestigated.
707  FlexibleArnoldi(n_krylov,
708  n_deflate,
709  target,
710  V_,
711  Z_,
712  H_,
713  R_,
714  givens_rots_,
715  g_,
716  Hk_QR_,
717  Hk_QR_taus_,
718  dim);
719 
720  int iters_this_cycle = dim - n_deflate;
721 
722  QDPIO::cout << "Arnoldi cycle finished: dim=" << dim << std::endl;
723  // Solve the least squares system for the lsq_coeffs coefficients
724 #if 0
725  LeastSquaresSolve(H_,c_,eta_, dim); // Solve Least Squares System
726 #endif
727  LeastSquaresSolve(R_,g_,eta_, dim); // Solve Least Squares System
728  // Compute the correction dx = sum_j eta_j Z_j
729  LatticeFermion dx = zero;
730  for(int j=0; j < dim; ++j) {
731  dx[s] += eta_[j]*Z_[j];
732  }
733 
734  // Update psi
735  psi[s] += dx;
736 
737 
738  QDPIO::cout << "FGMRESDR: Cycle finished with " << iters_this_cycle << " iterations" << std::endl;
739 
740  // Recompute r
741  r[s] = chi;
742  (*A_)(tmp, psi, PLUS);
743  r[s] -= tmp; // This 'r' will be used in next cycle as the || rhs ||
744 
745  // Recompute true norm
746  r_norm = sqrt(norm2(r,s));
747  QDPIO::cout << "FGMRESDR: || r || = " << r_norm << " target = " << target << std::endl;
748 
749  // Update total iters
750  iters_total += iters_this_cycle;
751 
752  // Check if we are done either via convergence, or runnign out of iterations
753  finished = toBool( r_norm <= target ) || (iters_total >= invParam_.MaxIter);
754  prev_dim = dim;
755 
756  }
757 
758  // Either we've exceeded max iters, or we have converged in either case set res:
759  res.n_count = iters_total;
760  res.resid = r_norm;
761  QDPIO::cout << "FGMRESDR: Done. Cycles=" << n_cycles << ", Iters=" << iters_total << " || r ||/|| b ||=" << r_norm / norm_rhs << " Target=" << invParam_.RsdTarget << std::endl;
762  END_CODE();
763  return res;
764 
765  }
766 
767 
768 
769 
770 
771 
772 }
773 
774 
Primary include file for CHROMA library code.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Solve a M*psi=chi linear system by FGMRESDR.
void LeastSquaresSolve(const multi2d< DComplex > &R, const multi1d< DComplex > &rhs, multi1d< DComplex > &eta, int dim) const
multi1d< Handle< Givens > > givens_rots_
void FlexibleArnoldi(int n_krylov, int n_deflate, const Real &rsd_target, multi1d< T > &V, multi1d< T > &Z, multi2d< DComplex > &H, multi2d< DComplex > &R, multi1d< Handle< Givens > > &givens_rots, multi1d< DComplex > &g, multi2d< DComplex > &Qk, multi1d< DComplex > &Qk_tau, int &ndim_cycle) const
Handle< LinOpSystemSolver< T > > preconditioner_
void GetEigenvectors(int total_dim, const multi2d< DComplex > &H, multi1d< DComplex > &f_m, multi2d< DComplex > &evecs, multi1d< DComplex > &evals, multi1d< int > &order_array) const
SystemSolverResults_t operator()(T &psi, const T &chi) const
Solver the linear system.
Handle< LinearOperator< T > > A_
void InitMatrices()
Initialize the internal matrices.
static T & Instance()
Definition: singleton.h:432
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
unsigned j
Definition: ldumul_w.cc:35
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
LinOpSystemSolver< LatticeFermion > * createFerm(XMLReader &xml_in, const std::string &path, Handle< FermState< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > > state, Handle< LinearOperator< LatticeFermion > > A)
Callback function.
static bool registered
Local registration flag.
bool registerAll()
Register all the factories.
const std::string name("FGMRESDR_INVERTER")
Name to be used.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
LinOpSysSolverMGProtoClover::T T
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
LatticeFermion eta
Definition: mespbg5p_w.cc:37
START_CODE()
A(A, psi, r, Ncb, PLUS)
void FlexibleArnoldiT(int n_krylov, int n_deflate, const Real &rsd_target, const LinearOperator< T > &A, const LinOpSystemSolver< T > &M, multi1d< T > &V, multi1d< T > &Z, multi2d< DComplex > &H, multi2d< DComplex > &R, multi1d< Handle< Givens > > &givens_rots, multi1d< DComplex > &g, multi2d< DComplex > &Qk, multi1d< DComplex > &Qk_tau, int &ndim_cycle)
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
static QOP_info_t info
Double zero
Definition: invbicg.cc:106
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
Double r_norm
Definition: pade_trln_w.cc:86
Holds return info from SystemSolver call.
Definition: syssolver.h:17
Register linop system solvers that solve M*psi=chi.
Factory for solving M*psi=chi where M is not hermitian or pos. def.
Solve a M*psi=chi linear system by FGMRES_DR.