CHROMA
lg5eps_w.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Overlap-pole operator
3  */
4 #include "chromabase.h"
6 #include "meas/eig/gramschm.h"
7 
8 
9 namespace Chroma
10 {
11 //! Internal Overlap-pole operator
12 /*!
13  * \ingroup linop
14  *
15  * This routine is specific to Wilson fermions!
16  *
17  * Chi = gamma_5 * B . Psi
18  * where B is the pole approx. to eps(H(m))
19  *
20  * We note that gamma_5*B is unitary.
21  */
22 void lg5eps::operator() (LatticeFermion& chi, const LatticeFermion& psi,
23  enum PlusMinus isign) const
24 {
26 }
27 
28 //! Internal Overlap-pole operator
29 /*!
30  * \ingroup linop
31  *
32  * This routine is specific to Wilson fermions!
33  *
34  * Chi = gamma_5 * B . Psi
35  * where B is the pole approx. to eps(H(m))
36  *
37  * We note that gamma_5*B is unitary.
38  */
39 
40 void lg5eps::operator() (LatticeFermion& chi, const LatticeFermion& psi,
41  enum PlusMinus isign, Real epsilon) const
42 {
43  START_CODE();
44 
45  LatticeFermion tmp1, tmp2;
46 
47  // Gamma_5
48  int G5 = Ns*Ns - 1;
49 
50  chi = zero;
51 
52  switch (isign)
53  {
54  case PLUS:
55  // Non-Dagger: psi is source and tmp1
56  // chi := gamma_5 * (gamma_5 * mass + sgn(H)) * Psi
57  tmp1 = psi;
58  break;
59 
60  case MINUS:
61  // Dagger: apply gamma_5 to source psi to make tmp1
62  // chi := (mass + sgn(H) * gamma_5) * Psi
63  tmp1 = Gamma(G5) * psi;
64  break;
65 
66  default:
67  QDP_error_exit("unknown isign value", isign);
68  }
69 
70 
71 
72  // Project out eigenvectors of source if desired
73  //
74  // psi_proj = tmp1 = (1-P) psi
75  //
76  // with P = sum_i e_i > < e_i | psi >
77  //
78  // so that psi_proj =
79  // chi += func(lambda) * EigVec * <EigVec, psi>
80  // Usually "func(.)" is sgn(.); it is precomputed in EigValFunc.
81  // for all the eigenvalues
82  if (NEig > 0)
83  {
84  Complex cconsts;
85  LatticeFermion tmp3 = tmp1;
86  for(int i = 0; i < NEig; ++i)
87  {
88  cconsts = innerProduct(EigVec[i], tmp3);
89  tmp1 -= EigVec[i] * cconsts;
90 
91  cconsts *= EigValFunc[i];
92  chi += EigVec[i] * cconsts;
93  }
94  }
95 
96  // tmp1 <- H * Projected psi
97  // <- gamma_5 * M * psi
98  (*M)(tmp2, tmp1, PLUS);
99  tmp1 = Gamma(G5) * tmp2;
100 
101  Double c = norm2(tmp1);
102 
103  /* If exactly 0 norm, then solution must be 0 (for pos. def. operator) */
104  if (toBool(c == 0))
105  {
106  chi = zero;
107  END_CODE();
108  return;
109  }
110 
111  // Multiply in P(0) -- this may well be 0 for type 0 rational approximations
112  chi += constP*tmp1;
113 
114  // *******************************************************************
115  // Solve (MdagM + rootQ_n) chi_n = tmp1
116  LatticeFermion Ap;
117  LatticeFermion r;
118  multi1d<LatticeFermion> p(numroot);
119 
120  Real a; // Alpha for unshifted (isz) system
121  Real as; // alpha for current shifted system
122  Real b; // Beta for unshifted system
123  Real bp; // Beta previous for unshifted system
124 
125  Double ztmp; // Assorted reals (shifted residues)
126  Double cp;
127  Double d;
128  Real z0; // temporary value for zeta previous
129  Real z1; // temporary value for zeta current
130 
131  multi1d<Real> bs(numroot); // beta for shifted system
132  multi2d<Real> z(2,numroot); // zeta for shifted system
133 
134  Double chi_sq_new; // sgn() convergence criterion
135  Double chi_sq_diff;
136  multi1d<bool> convsP(numroot); // convergence mask for shifted system
137  bool convP; // overall convergence mask
138 
139  int iz; // Temporary index for getting at
140  // zeta values in z array
141 
142  int s; // Counter for loops over shifts
143 
144 
145  Real rsdcg_sq = epsilon * epsilon; // Target residuum squared
146  Real rsd_sq = norm2(psi) * rsdcg_sq; // Used for relative residue comparisons
147  // r_t^2 * || r ||^2
148 
149 
150  // By default, rootQ(isz) is considered the smallest shift
151  int isz = numroot-1; // isz identifies system with smalles shift
152 
153 
154 
155 
156  // r[0] := p[0] := tmp1
157  r = tmp1;
158 
159  // Initialise search vectors for shifted systems
160  for(s = 0; s < numroot; ++s){
161  p[s] = tmp1;
162  }
163 
164  // Set convergence masks to false
165  convsP = false;
166  convP = false;
167 
168 
169  iz = 1; // z_index z[ iz , s ] holds zeta(s)
170  // z[ 1-iz, s ] holds zeta_minus(s)
171 
172  z = 1; // This fills both zeta and zeta_minus
173 
174  a = 0; // Alpha for unshifted
175  b = 1; // beta for unshifted
176 
177  int k;
178  // Do the iterations proper
179  for(k = 0; k <= MaxCG && ! convP ; ++k) {
180 
181  // Unshifted beta value: b[k] -- k is iteration index
182  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
183  // First compute d = < p, A.p >
184  // Ap = A . p
185 
186  // This bit computes
187  // Ap = [ M^dag M + rootQ(isz) ] p_isz
188 
189 
190  (*MdagM)(Ap, p[isz], PLUS);
191  Ap += p[isz] * rootQ[isz];
192 
193  // Project out eigenvectors
194  if (k % ReorthFreq == 0) {
195  GramSchm(Ap, EigVec, NEig, all);
196  }
197 
198  // d = < p, A.p >
199  d = innerProductReal(p[isz], Ap); // 2 Nc Ns flops
200 
201  bp = b; // Store previous unshifted beta
202  b = -Real(c/d); // New unshifted beta
203 
204  // Compute the shifted bs and z
205  bs[isz] = b;
206 
207 
208  // iz now points to previous z's
209  iz = 1 - iz;
210 
211  // Compute new shifted beta and zeta values
212  // as per Beat Jegerlehner's paper: hep-lat/9612014
213  // eqns 2.42, 2.44 on page 7
214  for(s = 0; s < numroot; s++)
215  {
216 
217  // Do this to avoid mitsmp compiler bug !!
218  z0 = z[1-iz][s]; // The current z for the system under considerstion
219  z1 = z[iz][s] ; // The previous z for the system under consideration
220 
221  // We only compute beta and z factors if
222  // i) The system is not yet converged
223  // ii) The system is shifted
224  if (s != isz && !convsP[s]) {
225 
226  // We write the new z-s in the place of the previous ones.
227  // Our ones will become previous next time around
228  z[iz][s] = z0*z1*bp ;
229  z[iz][s] /= b*a*(z1 - z0) + z1*bp*(1 - (rootQ[s] - rootQ[isz])*b);
230  bs[s] = b * z[iz][s] / z0;
231 
232  }
233  }
234 
235 
236  // New residual of system with smallest shift
237  // r[k+1] += b[k] A . p[k] ;
238  r += b * Ap; // 2 Nc Ns flops
239 
240 
241  // Project out eigenvectors
242  if (k % ReorthFreq == 0) {
243  GramSchm (r, EigVec, NEig, all);
244  }
245 
246  // Work out new iterate for sgn(H).
247  //
248  // This in effect updates all x vectors and performs
249  // immediately the linear sum.
250  // However, the x's are never explicitly computed. Rather
251  // the changes they would get get rolled onto the chi immediately
252  //
253  // chi[k+1] -= sum_{shifts} resP_{shift} b_{shift}[k] p_{shift}[k] ;
254  //
255  // since we are doing the linear combinations we multiply in the
256  // constant in the numerator too.
257 
258  // smallest shift first
259  Real(rtmp);
260  rtmp = resP[isz] * b;
261  tmp1 = p[isz] * rtmp; // 2 Nc Ns flops
262 
263  // Now the other shifts.
264  // Converged systems haven' changed, so we only add results
265  // from the unconverged systems
266  for(s = 0; s < numroot; ++s) {
267  if(s != isz && !convsP[s]) {
268 
269  rtmp = bs[s] * resP[s];
270  tmp1 += p[s] * rtmp; // 2 Nc Ns flops
271  }
272  }
273 
274  // Now update the sgn(H) with the above accumulated linear sum
275  chi -= tmp1; // 2 Nc Ns flops
276 
277  // Store in cp the previous value of c
278  // cp = | r[k] |**2
279  cp = c;
280 
281  // And compute the current norm of r into the (now saved) c
282  // c = | r[k] |**2
283  c = norm2(r); // 2 Nc Ns flops
284 
285  // Work out the alpha factor for the system with smallest shift.
286  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
287  a = Real(c/cp);
288 
289  // Now update search vectors p_{shift}[k]
290  // the system with smallest shift gets updated as
291  //
292  // p[k+1] := r[k+1] + a[k+1] p[k];
293 
294  // The updated systems get updated as
295  //
296  // ps[k+1] := zs[k+1] r[k+1] + as[k+1] ps[k];
297  //
298  // where the as[]-s are the shifted versions of alpha.
299  // we must first computed these as per Beat's paper hep-lat/9612014
300  // eq 2.43 on page 7.
301  //
302  // As usual we only update the unconverged systems
303  for(s = 0; s < numroot; ++s)
304  {
305 
306  if (s == isz) {
307  // Smallest shift
308  // p[k+1] = r[k+1] + a[k+1] p[k]
309  //
310  // k is iteration index
311  p[s] *= a; // Nc Ns flops
312  p[s] += r; // Nc Ns flops
313  }
314  else {
315  if (! convsP[s]) {
316  // Unshifted systems
317  // First compute shifted alpha
318  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
319 
320  // Then update
321  // ps[k+1] := zs[k+1] r[k+1] + as[k+1] ps[k];
322  //
323  // k is iteration index
324  p[s] *= as; // Nc Ns flops
325  p[s] += r * z[iz][s]; // Nc Ns flops
326 
327  }
328  }
329  }
330 
331 #if 0
332  // Project out eigenvectors
333  if (k % ReorthFreq == 0)
335 #endif
336 
337  // Convergence tests start here.
338  //
339  // These are two steps:
340  //
341  // i) Check that the shifted vectors have converged
342  // by checking their accumulated residues
343  //
344  // ii) Check that the sign function itself has converged
345  //
346  //
347  // We set the global convergence flag to true, and then if
348  // any vectors are unconverged this will flip the global flag back
349  // to false
350  convP = true; // Assume convergence and prove
351  // otherwise
352 
353  for(s = 0; s < numroot; ++s) {
354 
355  // Only deal with unconverged systems
356  if (! convsP[s]) {
357 
358  // Compute the shifted residuum squared
359  //
360  // r_{shift} = || r || * zeta(shift)
361  //
362  // hence || r_shift ||^2 = || r ||^2 * zeta_shift^2
363  //
364  // || r^2 || is already computed in c
365  //
366  // Store || r_shift ||^2 in ztmp
367  ztmp = Real(c) * z[iz][s]*z[iz][s];
368 
369  // Check ztmp is smaller than the target residuum
370  bool btmp = toBool(ztmp < rsd_sq);
371  convP = convP & btmp;
372  convsP[s] = btmp;
373  }
374  }
375 
376 
377  // Now check convergence of the sgn() itself.
378  // It was updated with
379  // sum resP(shift) beta(shift) p_shift
380  // and this quantity is still in tmp1.
381  //
382  // So norm tmp1 is like an abolute error in sgn() aka: Delta sgn()
383  //
384  // Here we ensure Delta sgn() < target r^2 || sgn H[k-1] ||
385  //
386  // ie that the relative error in sgn() is smaller than the target.
387  // sgn H is kept in chi
388  //
389  // Only converge if chi is converged. If vectors converge first, then error
390 
391 #if 0
392  if (k > 0 && !convP) {
393 
394  // Get target r^2 * || sgn (H) ||^2
395  chi_sq_new = rsdcg_sq * norm2(chi);
396 
397  // Get || Delta sgn()
398  chi_sq_diff = norm2(tmp1); // the diff of old and new soln
399 
400 #if 0
401  QDPIO::cout << "Iter " << k << " || delta Sgn() || " << sqrt(chi_sq_diff) << std::endl;
402 #endif
403  // Check convergence
404  bool btmp = toBool(chi_sq_diff < chi_sq_new);
405 
406  // If we havent converged globally but the vectors have then error
407  if (! btmp && convP) {
408  QDP_error_exit("vectors converged but not final chi");
409  }
410 
411  // cnvP = convP & btmp;
412  convP = btmp;
413  }
414 #endif
415  }
416 
417 
418  if(isign == PLUS) {
419  tmp1 = chi;
420  chi = Gamma(G5)*tmp1;
421  }
422 
423 
424  QDPIO::cout << "Overlap Inner Solve (lg5eps): " << k << " iterations " << std::endl;
425  // End of MULTI SHIFTERY
426 
427  END_CODE();
428 }
429 
430 
431 } // End Namespace Chroma
432 
Primary include file for CHROMA library code.
const multi1d< Real > rootQ
Definition: lg5eps_w.h:96
const Real RsdCG
Definition: lg5eps_w.h:101
const multi1d< LatticeFermion > EigVec
Definition: lg5eps_w.h:97
const multi1d< Real > resP
Definition: lg5eps_w.h:95
const multi1d< Real > EigValFunc
Definition: lg5eps_w.h:98
const int ReorthFreq
Definition: lg5eps_w.h:102
const Real constP
Definition: lg5eps_w.h:94
Gramm-Schmidt orthogonolization.
void GramSchm(multi1d< LatticeFermion > &psi, const int Npsi, const multi1d< LatticeFermion > &vec, const int Nvec, const Subset &sub)
Gramm-Schmidt orthogonolization.
Definition: gramschm.cc:127
void operator()(LatticeFermion &chi, const LatticeFermion &psi, enum PlusMinus isign) const
Apply the operator onto a source std::vector.
Definition: lg5eps_w.cc:22
Internal pole epsilon operator. Just the unitary part.
int z
Definition: meslate.cc:36
int iz
Definition: meslate.cc:32
Real rtmp
Definition: meslate.cc:59
Double tmp2
Definition: mesq.cc:30
Double tmp3
Definition: mesq.cc:31
int epsilon(int i, int j, int k)
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int G5
Definition: pbg5p_w.cc:57
Double c
Definition: invbicg.cc:108
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151