CHROMA
ritz.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Ritz code for eigenvalues
3  */
4 
5 #include "chromabase.h"
6 #include "meas/eig/ritz.h"
7 #include "meas/eig/gramschm.h"
8 
9 namespace Chroma {
10 
11 //! Minimizes the Ritz functional with a CG based algorithm
12 /*!
13  * \ingroup eig
14  *
15  * This subroutine minimizes the Ritz functional with a CG based
16  * algorithm to find the n-th lowest eigenvalue of a hermitian A
17 
18  * lambda_n = min_z <z|Az>/<z|z>
19 
20  * In the subspace orthogonal to the (n-1) lower eigenvectors of A
21 
22  * This routine can handle the original version or be a part of a
23  * Kalkreuter Simma algorithm.
24  *
25  * The algorithm has been modified that there is now only one target
26  * residue -- the relative one.
27  *
28  * so the original stopping criterion is now:
29  *
30  * || g || < Rsd_r * lambda
31  *
32  * The two key features for Kalkreuter - Simma over the normal procedure
33  * are:
34  * i) Adaptive termination of the procedure (controlled from caller)
35  *
36  * The iteration will stop if the gradient has decreased by at least
37  * a given factor of gamma^{-1} which according to the paper is O(10)
38  *
39  * ii) The initial error estimate is now the "delta_cycle" criterion.
40  * basically this scales the norm of || g^2 ||. So the iteration will
41  * also stop if delta_cycle || gamma^2 || < Rsd_r mu.
42  *
43  * To run in non K-S mode, set gamma = 1, delta_cycle = 1.
44  *
45  * For details of the Kalkreuter Simma algorithm see
46  * hep-lat/9507023
47  *
48  * Algorithm:
49 
50  * Apsi[0] := A . Psi[0] ;
51  * mu[0] := < Psi[0] | A . Psi[0] > ; Initial Ritz value
52  * Note: we assume < Psi[0] |Psi[0] > = 1
53  * p[1] = g[0] := ( A - mu[0] ) Psi[0] ; Initial direction
54  * if converged then return
55  * FOR k FROM 1 TO MaxCG DO CG iterations
56  * s1 = (mu[k-1] + < p[k] | A p[k] >/p[k]|^2) / 2;
57  * s2 = (mu[k-1] - < p[k] | A p[k] >/p[k]|^2) / 2;
58  * s3 = |g[k-1]|^2 / |p[k]|;
59  * Compute a and cos(theta), sin(theta)
60  * (see DESY internal report, September 1994, by Bunk, Jansen,
61  * Luescher and Simma)
62  * lambda = mu[k] = mu[k-1] - 2 a sin^2(theta);
63  * Psi[k] = cos(theta) Psi[k-1] + sin(theta) p[k]/|p[k]|;
64  * Apsi[k] = cos(theta) Apsi[k-1] + sin(theta) A p[k]/|p[k]|;
65  * g[k] = Apsi[k] - mu[k] Psi[k]
66  *
67  * if now converged, then exit
68  *
69  * b[k+1] := cos(theta) |g[k]|^2 / |g[k-1]|^2;
70  * p[k+1] := g[k] + b[k+1] (p[k] - Psi[k] < Psi[k] | p[k] >); New direction
71 
72  * Arguments:
73 
74  * A The hermitian Matrix as a lin op (Read)
75  * lambda Eigenvalue to find (Write)
76  * Psi_all Eigenvectors (Modify)
77  * N_eig Current Eigenvalue number (Read)
78  * RsdR_r (relative) residue (Read)
79  * n_renorm Renormalize every n_renorm iter.(Read)
80  * n_min Minimum no of CG iters to do (Read)
81  * n_max Maximum no of CG iters to do (Read)
82  * n_count Number of CG iteration (done) (Write)
83  * Kalk_Sim Use Kalkreuter-Simma criterion (Read)
84  * delta_cycle The initial error estimate (Read)
85  * gamma "Convergence factor" gamma required (Read)
86  * ProjApsiP flag for projecting A.psi (Read)
87 
88  * Local Variables:
89 
90  * psi New eigenstd::vector
91  * p Direction std::vector
92  * Apsi Temporary for A.psi
93  * Ap Temporary for A.p, and other
94  * mu Ritz functional value
95  * g2 | g[k] |**2
96  * p2 | p[k] |**2
97  * k CG iteration counter
98  * b beta[k+1]
99  * and some others...
100  */
101 
102 template < typename T >
103 void Ritz_t(const LinearOperator<T>& A, // Herm Pos Def
104  Real& lambda, // Current E-value
105  multi1d<T>& psi_all, // E-std::vector array
106  int N_eig, // Current evec index
107  const Real& Rsd_r, // Target relative residue
108  const Real& Rsd_a, // Target absolute residue
109  const Real& zero_cutoff, // if ev-slips below this
110  // we consider it to be a zero
111  int n_renorm, // Renormalise frequency
112  int n_min, int n_max, // minimum / maximum no of iters to do
113  int MaxCG, // Max iters after which we bomb
114  bool ProjApsiP, // Project A (?) -- user option
115  int& n_count, // No of iters actually taken
116  Real& final_grad, // || g || at the end
117  bool Kalk_Sim, // Are we in Kalk Simma mode?
118  const Real& delta_cycle, // Initial error estimate (KS mode)
119  const Real& gamma_factor) // Convergence factor Gamma
120 {
121  START_CODE();
122 
123  T psi;
124  T p;
125  T Apsi;
126  T Ap;
127 
128  const Subset& s = A.subset(); // Subset over which A acts
129 
130  // Make Psi_all(N_eig-1) orthogonal to the previous eigenvectors */
131  int N_eig_index = N_eig - 1;
132  int N_eig_minus_one = N_eig_index;
133 
134  // For now equate worry about concrete subsetting later
135  psi[s] = psi_all[N_eig_index];
136 
137  // Project out subspace of previous
138  if( N_eig_minus_one > 0 )
139  GramSchm(psi, psi_all, N_eig_minus_one, s);
140 
141  // Normalize
142  Double d = sqrt(norm2(psi,s));
143  psi[s] /= d;
144 
145  // Now we can start
146  // Apsi[0] := A . Psi[0]
147  A(Apsi, psi, PLUS);
148 
149  // Project to orthogonal subspace, if wanted
150  // Should not be necessary, following Kalkreuter-Simma **/
151  if( N_eig_minus_one > 0 ) {
152  if (ProjApsiP) {
153  GramSchm(Apsi, psi_all, N_eig_index, s);
154  }
155  }
156 
157  // mu := < Psi[0] | A Psi[0] >
158  Double mu = innerProductReal(psi, Apsi, s);
159 
160  // p[0] = g[0] := ( A - mu[0] ) Psi[0] = Apsi - mu psi */
161  lambda = Real(mu);
162 
163 
164  // Psi and Apsi are both projected so p should also be (?)
165  p[s] = Apsi - lambda * psi;
166 
167  // g2 = p2 = |g[0]|^2 = |p[0]|^2
168  Double g2;
169  Double p2;
170  Double g2_0;
171  Double g2_p;
172 
173  g2 = norm2(p,s);
174 
175  // Keep hold of initial g2
176  g2_0 = g2;
177  p2 = g2;
178 
179 #if 0
180  // Debugging
181  QDPIO::cout << "Starting Ritz: N_eig=" << N_eig << ", mu = " << mu
182  << ", g2_0 = " << g2_0 << std::endl;
183 #endif
184 
185  // Check whether we have converged
186  bool convP;
187  bool minItersDoneP = n_min <= 0;
188  bool maxItersDoneP = n_max <= 0;
189  bool zeroFoundP;
190  bool CGConvP;
191  bool KSConvP;
192  bool deltaCycleConvP;
193 
194  Double rsd_r_sq;
195  Double rsd_a_sq;
196 
197  // Make up the stopping criteria
198  rsd_a_sq = Rsd_a * Rsd_a; // Absolute
199  rsd_r_sq = Rsd_r * Rsd_r * mu * mu; // Relative
200 
201  // Converged if g2 < Max( absolute_target, relative-target)
202  // This is necessary for low lying non-zero modes, where
203  // the relative error can reach below machine precision
204  // (ie g2 never gets smaller than it )
205  CGConvP = toBool( g2 < rsd_r_sq ) || toBool(g2 < rsd_a_sq );
206 
207  // Zero Ev-s can reach the cutoff value quickly, also
208  // the absolute error doesn't apply to them.
209  // I could try and use the absolute error as a zero cutoff...
210  // But lets allow this flexibility
211  zeroFoundP = toBool( fabs(mu) < zero_cutoff );
212 
213  // error based on delta_cycle
214  // NOT USED AT THIS TIME BUT POSSIBLY IN THE FUTURE
215  Double delta_cycle_err = delta_cycle;
216 
217  if( Kalk_Sim == false ) {
218 
219  // Normal CG stopping criterion
220  // The stopping criterion is satisfied AND
221  // we have performed the minimum number of iterations
222  convP = minItersDoneP && ( CGConvP || zeroFoundP );
223  }
224  else {
225  // Kalk-Sim stopping criterion
226  // The iteration terminates if
227  //
228  // i) The minimum no of iters is done and max iters is also done
229  // (this is iteration 0 so I ignore max iters at this pint
230  //
231  // AND either the following is true
232  //
233  // i) The delta cycle bound is reached (*)
234  // ii) If the normal CG criterion is satisfied
235  // iii) If mu has reached the zero cutoff
236  //
237  // (*) KS delta cycle bount is not in use at this time.
238  //KSConvP = toBool( delta_cycle_err < rsd );
239  KSConvP = false; // Not in use
240 
241  convP = minItersDoneP && ( KSConvP || CGConvP || zeroFoundP );
242  }
243 
244  if ( convP ) {
245  n_count = 0;
246  END_CODE();
247  return;
248  }
249 
250 
251  Double s1, s2, s3, a, b ;
252  const Double one = Double(1);
253  const Double two = Double(2);
254  const Double half = Double(0.5);
255  Complex xp;
256 
257  Double st, ct; // Sin theta, cos theta
258 
259  // FOR k FROM 1 TO MaxCG do
260  // MaxCG is an absolute max after which we bomb
261  for(int k = 1; k <= MaxCG; ++k)
262  {
263  // Ap = A * p */
264  A(Ap, p, PLUS);
265 
266  // Project to orthogonal subspace, if wanted
267  if( N_eig_minus_one > 0 ) {
268  if (ProjApsiP) {
269  GramSchm(Ap, psi_all, N_eig_minus_one, s);
270  }
271  }
272 
273  // d = < p | A p >
274  d = innerProductReal(p, Ap, s);
275 
276  // Minimise Ritz functional along circle.
277  // Work out cos theta and sin theta
278  // See internal report cited above for details
279  d = d / p2;
280  s1 = half * (mu+d);
281  s2 = half * (mu-d);
282  p2 = sqrt(p2);
283  p2 = one / p2;
284  s3 = g2 * p2;
285  a = fabs(s2);
286 
287  if( toBool (a >= s3) )
288  {
289  d = s3 / s2;
290  d = one + d*d;
291  d = sqrt(d);
292  a *= d;
293  }
294  else
295  {
296  d = s2 / s3;
297  d = one + d*d;
298  d = sqrt(d);
299  a = s3 * d;
300  }
301 
302  s2 /= a; // Now s2 is cos(delta)
303  s3 /= a; // Now s3 is sin(delta)
304 
305  if( toBool( s2 > 0) )
306  {
307  s2 = half * (one+s2);
308  d = -sqrt(s2); // Now d is sin(theta)
309  s2 = -half * s3 / d; // Now s2 is cos(theta)
310  }
311  else
312  {
313  s2 = half * (one-s2);
314  s2 = sqrt(s2); // Now s2 is cos(theta)
315  d = -half * s3 / s2; // Now d is sin(theta)
316  }
317 
318  // mu[k] = mu[k-1] - 2 a d^2
319  mu -= (two * a * d * d);
320  lambda = Real(mu);
321 
322  // del_lamb is the change in lambda
323  // used in an old stopping criterion, which
324  // we may have to revisit
325  // del_lam = Real(s1);
326 
327  st = d*p2; // Now st is sin(theta)/|p|
328  ct = s2; // Now ct is cos(theta)
329 
330  // Psi[k] = ct Psi[k-1] + st p[k-1]
331  // Apsi[k] = ct Apsi[k-1] + st Ap
332  psi[s] *= Real(ct);
333  psi[s] += p * Real(st);
334  Apsi[s] *= Real(ct);
335  Apsi[s] += Ap * Real(st);
336 
337 
338  // Ap = g[k] = Apsi[k] - mu[k] Psi[k]
339  Ap[s] = Apsi - psi*lambda;
340 
341 
342  // g2 = |g[k]|**2 = |Ap|**2
343  s1 = g2; // Now s1 is |g[k-1]|^2
344  g2_p = g2; // g2_p is g2 "previous"
345  g2 = norm2(Ap,s);
346 
347  // Convergence test
348  // Check for min_KS / max_KS iters done
349  minItersDoneP = ( k >= n_min );
350  maxItersDoneP = ( k >= n_max );
351 
352  // Conservative CG test
353  rsd_a_sq = Rsd_a*Rsd_a;
354  rsd_r_sq = Rsd_r*Rsd_r*mu*mu;
355 
356  // Converged if g2 < Max( absolute_target, relative-target)
357  // This is necessary for low lying non-zero modes, where
358  // the relative error can reach below machine precision
359  // (ie g2 never gets smaller than it )
360  CGConvP = toBool( g2 < rsd_a_sq ) || toBool( g2 < rsd_r_sq);
361 
362  // Zero Ev-s can reach the cutoff value quickly, also
363  // the absolute error doesn't apply to them.
364  // I could try and use the absolute error as a zero cutoff...
365  // But lets allow this flexibility
366  zeroFoundP = toBool( fabs(mu) < zero_cutoff );
367 
368  if( Kalk_Sim == false ) {
369 
370  // Non Kalk Sim criteria. We have done the minimum
371  // Number of iterations, and we have either done the maximum
372  // number too or we have converged
373  convP = minItersDoneP && ( maxItersDoneP || CGConvP || zeroFoundP );
374  }
375  else {
376  // Kalk-Sim stopping criterion
377  // The iteration terminates if
378  //
379  // i) The minimum no of iters is done
380  // AND either the following is true
381  // i) KS_max iters is also done
382  // ii) The delta cycle bound is reached (*)
383  // iii) If the normal CG criterion is satisfied
384  // vi) If mu has reached the zero cutoff
385  //
386  // (*) KS delta cycle bount is not in use at this time.
387 
388  /* The following commented lines were a stab at the delta_cycle bound
389 
390  // work out decrease in || g^2 ||.
391  Double g_decr_factor = g2 / g2_p;
392 
393  // "Extrapolate the delta_cycle error with the decrease in g"
394  delta_cycle_err *= fabs(g_decr_factor);
395 
396  deltaCycleConvP = toBool( delta_cycle_err < rsd );
397  */
398 
399  deltaCycleConvP = false;
400  Double g2_g0_ratio = g2 / g2_0;
401  KSConvP = deltaCycleConvP || toBool( g2_g0_ratio <= Double(gamma_factor)) ;
402  convP = minItersDoneP && ( maxItersDoneP || CGConvP || KSConvP || zeroFoundP );
403 
404  }
405 
406 
407  if( convP ) {
408  n_count = k;
409 
410  // Project onto orthog subspace
411  if( N_eig_minus_one > 0 ) {
412  GramSchm(psi, psi_all, N_eig_minus_one, s);
413  }
414 
415  // Renormalise
416  d = sqrt(norm2(psi,s));
417  psi[s] /= d;
418  d -= one;
419 
420 
421  // Print out info about convergence
422 #if 0
423  QDPIO::cout << "Converged at iter=" << k << ", lambda = " << lambda
424  << ", rsd | mu | = " << rsd << ", || g || = "
425  << sqrt(g2) << " || x || - 1 = " << d << std::endl;
426 
427  if(Kalk_Sim)
428  {
429  // Extra info for KalkSimma Mode
430  QDPIO::cout << "KS: gamma = "<< gamma_factor << ", || g ||^2/|| g_0 ||^2="
431  << g2/g2_0
432  << ", delta_cycle_err=" << delta_cycle_err << std::endl;
433  QDPIO::cout << "KS: CGConvP = " << CGConvP << ", KSConvP = " << KSConvP << " deltaCycleConvP = " << deltaCycleConvP << std::endl;
434  }
435 #endif
436  // Recompute lambda
437  // Apsi = A psi
438  A(Apsi, psi, PLUS);
439  s1 = mu;
440  mu = innerProductReal(psi, Apsi, s);
441  lambda = Real(mu);
442 #if 0
443  QDPIO::cout << "Mu-s at convergence: old " << s1 << " vs " << mu << std::endl;
444 
445 #endif
446  // Copy std::vector back into psi array.
447  psi_all[N_eig_index][s] = psi;
448 
449  // Work out final gradient
450  Ap[s] = Apsi - psi*lambda;
451  final_grad = sqrt(norm2(Ap,s));
452  END_CODE();
453  return;
454  }
455 
456 
457  // Work out new beta
458  /* b = beta[k] = cos(theta) |g[k]|^2 / |g[k-1]|^2 */
459  b = ct * g2/g2_p;
460 
461  ct *= 0.05 * b;
462  d = sqrt(g2);
463  ct /= p2*d;
464 
465  // If beta gets very big,
466  if( toBool( ct > Double(1)) )
467  {
468  /* Restart: p[k] = g[k] = Ap */
469  QDPIO::cout << "Restart at iter " << k << " since beta = " << b << std::endl;
470  p[s] = Ap;
471  }
472  else {
473  /* xp = < Psi[k] | p[k-1] > */
474  xp = innerProduct(psi, p, s);
475 
476  /* p[k] = g[k] + b (p[k-1] - xp psi[k]) */
477  p[s] -= psi * xp;
478  p[s] *= b;
479  p[s] += Ap;
480  }
481 
482  if( k % n_renorm == 0 )
483  {
484  /* Renormalize, and re-orthogonalize */
485  /* Project to orthogonal subspace */
486  if( N_eig_minus_one > 0 ) {
487  GramSchm (psi, psi_all, N_eig_minus_one, s);
488  }
489  /* Normalize */
490  d = sqrt(norm2(psi,s));
491 
492  ct = Double(1)/d;
493  psi *= ct;
494  d -= one;
495 
496  // Apsi := A . Psi
497  A(Apsi, psi, PLUS);
498 
499  // Project to orthogonal subspace, if wanted
500  // Should not be necessary, following Kalkreuter-Simma
501  if (ProjApsiP) {
502  GramSchm (Apsi, psi_all, N_eig_index, s);
503  }
504 
505  // mu := < Psi | A Psi >
506  mu = innerProductReal(psi, Apsi, s);
507 
508  /* g[k] = Ap = ( A - mu ) Psi */
509  lambda = Real(mu);
510  Ap[s] = Apsi - psi * lambda;
511 
512  /* g2 = |g[k]|**2 = |Ap|**2 */
513  g2 = norm2(Ap,s);
514 
515  /* Project p[k] to orthogonal subspace */
516  if( N_eig_minus_one > 0 ) {
517  GramSchm(p, psi_all, N_eig_minus_one, s);
518  }
519 
520  /* Make p[k] orthogonal to Psi[k] */
521  GramSchm(p, psi, s);
522 
523  /* Make < g[k] | p[k] > = |g[k]|**2: p[k] = p_old[k] + xp g[k],
524  xp = (g2 - < g | p_old >) / g2; g[k] = Ap */
525  p2 = 0;
526  ct = Double(1) / g2;
527  xp = Real(ct)*(cmplx(Real(g2),Real(0)) - innerProduct(Ap, p, s));
528 
529  p[s] += Ap * xp;
530  }
531  else if (ProjApsiP && N_eig_minus_one > 0)
532  {
533  /* Project psi and p to orthogonal subspace */
534  if( N_eig_minus_one > 0 ) {
535  GramSchm (psi, psi_all, N_eig_minus_one, s);
536  GramSchm (p, psi_all, N_eig_minus_one, s);
537  }
538  }
539 
540  /* p2 = |p[k]|**2 */
541  p2 = norm2(p, s);
542  }
543 
544  /* Copy psi back into psi_all(N_eig-1) */
545 
546  psi_all[N_eig_index][s] = psi;
547  n_count = MaxCG;
548  final_grad = sqrt(g2);
549  QDPIO::cerr << "too many CG/Ritz iterations: n_count=" << n_count
550  << ", rsd_r =" << sqrt(rsd_r_sq) <<" rsd_a=" << Rsd_a << ", ||g||=" << sqrt(g2) << ", p2=" << p2
551  << ", lambda" << lambda << std::endl;
552  QDP_abort(1);
553  END_CODE();
554 }
555 
556 
557 
558 
559 void Ritz(const LinearOperator<LatticeFermion>& A, // Herm Pos Def
560  Real& lambda, // Current E-value
561  multi1d<LatticeFermion>& psi_all, // E-std::vector array
562  int N_eig, // Current evec index
563  const Real& Rsd_r, // Target relative residue
564  const Real& Rsd_a, // Absolute target residue
565  const Real& zero_cutoff, // If ev-slips below this we consider it
566  // to be zero
567  int n_renorm, // Renormalise frequency
568  int n_min, int n_max, // minimum / maximum no of iters to do
569  int MaxCG, // Max no of iters after which we bomb
570  bool ProjApsiP, // Project A (?) -- user option
571  int& n_count, // No of iters actually taken
572  Real& final_grad, // Final gradient at the end.
573  bool Kalk_Sim, // Are we in Kalk Simma mode?
574  const Real& delta_cycle, // Initial error estimate (KS mode)
575  const Real& gamma_factor) // Convergence factor Gamma
576 {
577  Ritz_t(A, lambda, psi_all, N_eig, Rsd_r, Rsd_a, zero_cutoff,
578  n_renorm, n_min, n_max, MaxCG,
579  ProjApsiP, n_count, final_grad,
580  Kalk_Sim, delta_cycle, gamma_factor);
581 }
582 
583 } // end namespace Chroma
Primary include file for CHROMA library code.
Linear Operator.
Definition: linearop.h:27
int mu
Definition: cool.cc:24
Gramm-Schmidt orthogonolization.
void GramSchm(multi1d< LatticeFermion > &psi, const int Npsi, const multi1d< LatticeFermion > &vec, const int Nvec, const Subset &sub)
Gramm-Schmidt orthogonolization.
Definition: gramschm.cc:127
void Ritz_t(const LinearOperator< T > &A, Real &lambda, multi1d< T > &psi_all, int N_eig, const Real &Rsd_r, const Real &Rsd_a, const Real &zero_cutoff, int n_renorm, int n_min, int n_max, int MaxCG, bool ProjApsiP, int &n_count, Real &final_grad, bool Kalk_Sim, const Real &delta_cycle, const Real &gamma_factor)
Minimizes the Ritz functional with a CG based algorithm.
Definition: ritz.cc:103
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
Double two
Definition: pbg5p_w.cc:53
int n_count
Definition: invbicg.cc:78
Double one
Definition: invbicg.cc:105
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
@ PLUS
Definition: chromabase.h:45
void Ritz(const LinearOperator< LatticeFermion > &A, Real &lambda, multi1d< LatticeFermion > &psi_all, int N_eig, const Real &Rsd_r, const Real &Rsd_a, const Real &zero_cutoff, int n_renorm, int n_min, int n_max, int MaxCG, bool ProjApsiP, int &n_count, Real &final_grad, bool Kalk_Sim, const Real &delta_cycle, const Real &gamma_factor)
Definition: ritz.cc:559
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351