CHROMA
lovddag_double_pass_w.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Overlap-pole operator
3  */
4 #include "chromabase.h"
6 #include "meas/eig/gramschm.h"
7 
8 
9 namespace Chroma
10 {
11 //! Apply the GW operator onto a source std::vector
12 /*! \ingroup linop
13  *
14  */
15 
16 // We are evaluating
17 //
18 // init_fac*psi + (gamma_5 +/- 1) eps(H) psi_{+/-}
19 //
20 // Split psi as P_evec psi + (1 - P_evec) psi
21 //
22 // and
23 //
24 // apply eps(H, P_evec psi) + eps(H, (1-P_evec)psi)
25 //
26 // P_evec psi = Sum_i lambda_i | e_i >< e_i | psi>
27 // so eps(H, P_evec psi ) = Sum_i eps(lambda_i) | e_i >< e_i | psi >
28 //
29 // and ( 1 - P_evec ) psi = psi - P_evec_psi
30 // = psi - Sum_i lambda_i | e_i >< e_i | psi >
31 //
32 // so eps(H, (1-P_evec) psi )) = eps(H, Y)
33 // = [ p_0 + sum_r p_r * (H^2 + q)^{-1} ] Y
34 
35 void lovddag_double_pass::operator() (LatticeFermion& chi,
36  const LatticeFermion& psi,
37  enum PlusMinus isign) const
38 {
40 }
41 
42 
43 //! Apply the GW operator onto a source std::vector
44 /*! \ingroup linop
45  *
46  */
47 
48 // We are evaluating
49 //
50 // init_fac*psi + (gamma_5 +/- 1) eps(H) psi_{+/-}
51 //
52 // Split psi as P_evec psi + (1 - P_evec) psi
53 //
54 // and
55 //
56 // apply eps(H, P_evec psi) + eps(H, (1-P_evec)psi)
57 //
58 // P_evec psi = Sum_i lambda_i | e_i >< e_i | psi>
59 // so eps(H, P_evec psi ) = Sum_i eps(lambda_i) | e_i >< e_i | psi >
60 //
61 // and ( 1 - P_evec ) psi = psi - P_evec_psi
62 // = psi - Sum_i lambda_i | e_i >< e_i | psi >
63 //
64 // so eps(H, (1-P_evec) psi )) = eps(H, Y)
65 // = [ p_0 + sum_r p_r * (H^2 + q)^{-1} ] Y
66 
67 void lovddag_double_pass::operator() (LatticeFermion& chi,
68  const LatticeFermion& psi,
69  enum PlusMinus isign,
70  Real epsilon) const
71 {
72  START_CODE();
73 
74  LatticeFermion ltmp;
75  LatticeFermion tmp1;
76  Real ftmp;
77 
78  const int G5 = Ns * Ns - 1;
79  int n;
80 
81  Real mass = ( Real(1) + m_q ) / ( Real(1) - m_q );
82  Real init_fac = (Real(1)/mass) + mass;
83 
84  LatticeFermion psi_proj = psi;
85 
86  // Accumulate the initial factor * psi this needs no projection
87  // or any of the (gamma_5 + 1 ) tricks.
88  chi = init_fac * psi;
89 
90 
91  // Now we need to work out ( gamma_5 +/- 1 ) sgn(H) psi_{+/-}
92  // Split psi into psi_proj and evecs
93  //
94  // evecs = sum lambda_i | e_i ><e_i | psi>
95 
96  // psi_proj = psi - sum lambda_i | e_i> <e_i | psi>
97  //
98  // Applying the ( gamma_5 +/-1 ) sgn(H) evecs
99  // = (gamma_5 +/-1) sum sgn(lambda_i) |e_i><e_i | psi>
100  // = (gamma_5 +/-1) EigValFunc[i] | e_i> <e_i|psi>
101  //
102  // We accumulate this directly onto chi, and compute psi_proj
103  // in the process
104 
105  if (NEig > 0)
106  {
107  Complex cconsts;
108 
109  for(int i = 0; i < NEig; ++i)
110  {
111  cconsts = innerProduct(EigVec[i], psi_proj);
112 
113  psi_proj -= cconsts * EigVec[i];
114 
115 
116  // Construct tmp1 = (gamma_5 + ichiral) * EigVec
117  tmp1 = Gamma(G5)*EigVec[i];
118 
119  switch (ichiral) {
120  case CH_PLUS:
121  tmp1 += EigVec[i];
122  break;
123  case CH_MINUS:
124  tmp1 -= EigVec[i];
125  break;
126  case CH_NONE:
127  {
128  std::ostringstream error_message;
129  error_message << "Should not use lovddag_double_pass unless the chirality is either CH_PLUS or CH_MINUS" << std::endl;
130  throw error_message.str();
131  }
132  break;
133  default:
134  QDP_error_exit("ichiral is outside allowed range: %d\n", ichiral);
135  break;
136  }
137 
138  /* chi += "eps(lambda)" * (gamma_5 + ichiral) * EigVec */
139  cconsts *= EigValFunc[i];
140  chi += cconsts * tmp1;
141 
142  }
143 
144  }
145 
146  // Chi Now holds the projected part + init_fac part
147  // ie:
148  // chi = init_fac*psi + (gamma_5 + ichiral) sum sgn(lambda) <e_i|psi> |e_i>
149 
150  // Now we have to apply (gamma_5 + 1 ) eps psi_proj
151  //
152  // We do this by doing the double pass algorithm
153  // In this algorithm we do a multi_shift solve on
154  // (H^2 + Q_i) x_i = H^psi_proj
155  //
156  // for all i, and then evaluate (gamma_5 + 1) ( constP H^psi_proj +
157  // sum resP_i x_i
158  //
159  // The sum is carried out by the double pass
160  //
161 
162  // tmp_1 will hold the source for the inversion
163  // ie tmp_1 = H psi_proj
164  // = gamma_5 D_w psi_proj
165  (*M)( ltmp, psi_proj, PLUS);
166  tmp1 = Gamma(G5)*ltmp;
167  Double c = norm2(tmp1);
168 
169  // If exactly 0 norm, then solution must be 0 (for pos. def. operator)
170  // In which case we stop right now...
171  if ( toBool(c == Real(0)) ) {
172  chi = zero;
173  END_CODE();
174  return;
175  }
176 
177 
178 
179  // Now do the multi shiftery using the double pass
180  // *******************************************************************
181  // Solve (MdagM + rootQ_n) chi_n = tmp1
182  //
183  // We put this part of the solution into ltmp (which we don't need
184  // elsewhere
185 
186  Double rsd_sq = norm2(psi) * epsilon*epsilon; // || psi ||^2 * epsilon^2
187 
188  // Actually we are solving 4/( 1 - m_q^2 ) so need to rescale
189  // target residue by (1-m_q^2)/4
190  // (squared of course for rsd_sq)
191  rsd_sq *= ( 1 - m_q*m_q)*(1 - m_q*m_q)/Real(16);
192 
193  Double cp;
194  Double d; // InnerProduct
195 
196  LatticeFermion Ap;
197  LatticeFermion r;
198  LatticeFermion p;
199 
200  multi1d<Double> a(MaxCG+1); // Alpha for unshifted (isz) system
201  multi1d<Double> b(MaxCG+1); // Beta for unshifted system
202 
203  bool convP;
204  int iters_taken;
205 
206  // By default, rootQ(isz) is considered the smallest shift
207  int isz = numroot-1; // isz identifies system with smalles shift
208 
209 
210  // Multiply in P(0) -- this may well be 0 for type 0 rational approximations
211  ltmp = tmp1 * constP;
212 
213 
214  // r[0] := p[0] := tmp1
215  r = tmp1;
216  p = tmp1;
217 
218 
219  cp = 0;
220  convP = false;
221 
222  b[0] = 0;
223  int k;
224 
225  // Record || r_i ||^2 for each iteration
226  multi1d<Double> c_iter(MaxCG+1);
227 
228  // We already know it for iter 0
229  c_iter[0] = c;
230 
231  // Do the iterations proper
232  // first pass
233  for(k = 0; k < MaxCG && ! convP ; ++k) {
234 
235  // Keep hold of the residuum
236 
237  (*MdagM)(Ap, p, PLUS);
238  Ap += p * rootQ[isz];
239 
240  // Project out eigenvectors
241  if (k % ReorthFreq == 0) {
242  GramSchm(Ap, EigVec, NEig, all);
243  }
244 
245  // d = < p, A.p >
246  d = innerProductReal(p, Ap); // 2 Nc Ns flops
247 
248 
249  // k+1 because a[0] corresponds to a_{-1}
250  a[k] = c/d;
251  r -= Real(a[k])*Ap;
252 
253  // Project out eigenvectors
254  if (k % ReorthFreq == 0) {
255  GramSchm (r, EigVec, NEig, all);
256  }
257 
258  cp = c;
259  c = norm2(r);
260 
261  c_iter[k+1] = c;
262  b[k+1] = c/cp;
263 
264  p = r + Real(b[k+1])*p;
265 
266  convP = toBool( c < rsd_sq );
267  }
268 
269  int niters = k;
270 
271  // OK First pass done. I now need to compute gamma_j and c_j
272  multi2d<Double> gamma(niters+1, numroot);
273  for(int s = 0; s < numroot; s++) {
274  gamma[0][s] = 1; // Really gamma_0
275  }
276 
277 
278  multi1d<bool> convPs(numroot);
279  multi1d<int> convIter(numroot);
280  convPs = false;
281 
282  for(int j=0; j < niters; j++) {
283  for(int s =0; s < numroot; s++) {
284 
285 
286  if( s != isz && ! convPs[s] ) {
287  Double a_minus;
288 
289  if( j == 0 ) {
290  // alpha_minus_one -- no need to store. Only need alpha_0,...
291  // in second pass
292  a_minus = Double(1);
293  }
294  else {
295  a_minus = a[j-1];
296  }
297 
298  Double ga = gamma[j][s];
299  Double ga_minus;
300  if( j == 0 ) {
301  // gamma[-1][s] -- no need to store. Only need gamma[0][s]...
302  // in second pass
303  ga_minus =Double(1);
304  }
305  else{
306  ga_minus = gamma[j-1][s];
307  }
308 
309  Double tmp_num = ga*ga_minus*a_minus;
310  Double tmp_den = ga_minus*a_minus*(Double(1) + a[j]*(rootQ[s] - rootQ[isz]));
311  tmp_den += a[j]*b[j]*(ga_minus - ga);
312 
313  gamma[j+1][s] = tmp_num/tmp_den;
314 
315  // If this system has converged at iter j+1, then dont update
316  // gamma-s anymore and note the convergence point. Updating
317  // ad infinitum causes underflow.
318  if( toBool( gamma[j+1][s]*gamma[j+1][s]*c_iter[j+1] < rsd_sq ) ) {
319  convPs[s] = true;
320  convIter[s] = j+1;
321  }
322 
323  }
324 
325 
326  }
327 
328 
329  }
330 
331  // Now work out the c_j constants for the second pass
332  multi1d<Double> sumC(niters+1);
333 
334  for(int j=0; j<=niters; j++) {
335 
336  sumC[j] = 0;
337 
338  for(int m=0; m < niters-j; m++) {
339 
340  Double qsum=resP[isz];
341 
342  for(int s =0; s < numroot; s++) {
343 
344  if( s != isz ) {
345 
346  // Only add gammas which are unconverged.
347  // Converged gamma-s are not updated beyond convergence
348  // which would cause problems (with underflows)
349  // gamma[convIters[s]] is last valid gamma
350  if( toBool( (j+m+1) <= convIter[s] ) ) {
351  qsum += resP[s]*gamma[m+j+1][s]*gamma[m+j][s]/gamma[j][s];
352  }
353 
354  }
355  }
356 
357 
358  Double delta_m = Double(1);
359 
360  if( m > 0 ) {
361  for(int k=1; k <= m; k++) {
362  delta_m *= b[j+k];
363  }
364  }
365 
366 
367  sumC[j] += a[j+m]*delta_m*qsum;
368  }
369  }
370 
371 
372  // Second pass Lanczos
373 
374  // Initialise r_0, p_0
375  r=tmp1;
376  p=tmp1;
377 
378  convP = false;
379 
380  for(k=0; k < niters && !convP ; k++) {
381  (*MdagM)(Ap, p, PLUS);
382  Ap += p * rootQ[isz];
383 
384  // Project out eigenvectors
385  if (k % ReorthFreq == 0) {
386  GramSchm(Ap, EigVec, NEig, all);
387  }
388 
389 
390  // Update chi
391  ltmp += Real(sumC[k])*r;
392 
393  // Update r
394  r -= Real(a[k])*Ap;
395 
396  // Project out eigenvectors
397  if (k % ReorthFreq == 0) {
398  GramSchm (r, EigVec, NEig, all);
399  }
400 
401 #if 0
402  // I have to abandon this stopping criterion
403  // when using relaxed solver
404  Double ltmp_norm_new = epsilon*epsilon*norm2(ltmp);
405 
406  // Convergence criterion for total signum. Might be good enough
407  // without running to full niters
408  convP = toBool( c_iter[k+1]*sumC[k+1]*sumC[k+1] < ltmp_norm_new );
409 #endif
410 
411  p = r + Real(b[k+1])*p;
412  }
413 
414  // Now we have
415  // ltmp = eps(H) psi_proj
416 
417  // we have to add (gamma_5 + ichiral) eps(H) psi_proj
418  // onto chi
419  //
420  tmp1 = Gamma(G5)*ltmp;
421  switch (ichiral) {
422  case CH_PLUS:
423  tmp1 += ltmp;
424  break;
425 
426  case CH_MINUS:
427  tmp1 -= ltmp;
428  break;
429 
430  default:
431  QDP_error_exit("unsupported chirality", ichiral);
432  }
433  chi += tmp1;
434 
435  /* Rescale to the correct normalization */
436  /* chi <- (1/4)*(1-m_q^2) * chi */
437  ftmp = Real(0.25) * (Real(1) - m_q*m_q);
438  chi *= ftmp; /* 2 Nc Ns flops */
439  QDPIO::cout << "Overlap Inner Solve (lovddag_double_pass(" << ichiral << ")) = " << k << " iterations" << std::endl;
440 
441  END_CODE();
442 }
443 
444 } // End Namespace Chroma
445 
Primary include file for CHROMA library code.
const multi1d< LatticeFermion > EigVec
const multi1d< Real > EigValFunc
Gramm-Schmidt orthogonolization.
void GramSchm(multi1d< LatticeFermion > &psi, const int Npsi, const multi1d< LatticeFermion > &vec, const int Nvec, const Subset &sub)
Gramm-Schmidt orthogonolization.
Definition: gramschm.cc:127
void operator()(LatticeFermion &chi, const LatticeFermion &psi, enum PlusMinus isign) const
Apply the operator onto a source std::vector.
unsigned j
Definition: ldumul_w.cc:35
unsigned n
Definition: ldumul_w.cc:36
Internal Overlap-pole operator.
static int m[4]
Definition: make_seeds.cc:16
int epsilon(int i, int j, int k)
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int G5
Definition: pbg5p_w.cc:57
@ CH_NONE
Definition: ischiral_w.h:8
@ CH_PLUS
Definition: ischiral_w.h:8
@ CH_MINUS
Definition: ischiral_w.h:8
Double c
Definition: invbicg.cc:108
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
Double mass
Definition: pbg5p_w.cc:54
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151