CHROMA
minvcg.cc
Go to the documentation of this file.
1 
2 /*! \file
3  * \brief Multishift Conjugate-Gradient algorithm for a Linear Operator
4  */
5 
6 #include "linearop.h"
8 
9 namespace Chroma
10 {
11 
12 
13  //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
14  /*! \ingroup invert
15  *
16  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
17  * the solution of the set of linear equations
18  * Method used is described in Jegerlehner, hep-lat/9708029
19 
20  * We are searching in a subspace orthogonal to the eigenvectors EigVec
21  * of A. The source chi is assumed to already be orthogonal!
22 
23  * Chi = A . Psi
24 
25  * Algorithm:
26 
27  * Psi[0] := 0; Zeroed
28  * r[0] := Chi; Initial residual
29  * p[1] := Chi ; Initial direction
30  * b[0] := |r[0]|**2 / <p[0],Ap[0]> ;
31  * z[0] := 1 / (1 - (shift - shift(0))*b)
32  * bs[0] := b[0] * z[0]
33  * r[1] += b[k] A . p[0] ; New residual
34  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
35  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
36  * FOR k FROM 1 TO MaxCG DO CG iterations
37  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
38  * p[k] := r[k] + a[k] p[k-1]; New direction
39  * b[k+1] := |r[k]|**2 / <p[k],Ap[k]> ;
40  * r[k+1] += b[k+1] A . p[k] ; New residual
41  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
42  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
43 
44  * Arguments:
45 
46  * A Hermitian linear operator (Read)
47  * Chi Source (Read)
48  * Psi array of solutions (Write)
49  * shifts shifts of form A + mass (Read)
50  * RsdCG residual accuracy (Read/Write)
51  * n_count Number of CG iteration (Write)
52 
53  * Local Variables:
54 
55  * p Direction std::vector
56  * r Residual std::vector
57  * cp | r[k] |**2
58  * c | r[k-1] |**2
59  * k CG iteration counter
60  * a a[k]
61  * b b[k+1]
62  * d < p[k], A.p[k] >
63  * Ap Temporary for M.p
64 
65  * MaxCG Maximum number of CG iterations allowed
66 
67  * Subroutines:
68  * A Apply matrix hermitian A to std::vector
69  */
70 
71  template<typename T>
73  const T& chi,
74  multi1d<T>& psi,
75  const multi1d<Real>& shifts,
76  const multi1d<Real>& RsdCG,
77  int MaxCG,
78  int& n_count)
79  {
80  START_CODE();
81 
82  const Subset& sub = A.subset();
83 
84  if (shifts.size() != RsdCG.size())
85  {
86  QDPIO::cerr << "MInvCG: number of shifts and residuals must match" << std::endl;
87  QDP_abort(1);
88  }
89 
90  int n_shift = shifts.size();
91 
92  if (n_shift == 0)
93  {
94  QDPIO::cerr << "MInvCG: You must supply at least 1 mass: mass.size() = "
95  << n_shift << std::endl;
96  QDP_abort(1);
97  }
98 
99  /* Now find the smallest mass */
100  int isz = 0;
101  for(int findit=1; findit < n_shift; ++findit) {
102  if ( toBool( shifts[findit] < shifts[isz]) ) {
103  isz = findit;
104  }
105  }
106 
107 #if 0
108  QDPIO::cout << "n_shift = " << n_shift << " isz = " << isz << " shift = " << shifts[0] << std::endl;
109 #endif
110 
111  // We need to make sure, that psi is at least as big as the number
112  // of shifts. We resize it if it is not big enough.
113  // However, it is allowed to be bigger.
114  if( psi.size() < n_shift ) {
115  psi.resize(n_shift);
116  }
117 
118  // For this algorithm, all the psi have to be 0 to start
119  // Only is that way the initial residuum r = chi
120  for(int i= 0; i < n_shift; ++i) {
121  psi[i][sub] = zero;
122  }
123 
124  T chi_internal; moveToFastMemoryHint(chi_internal);
125  chi_internal[sub] = chi;
126  moveToFastMemoryHint(psi,true);
127 
128  FlopCounter flopcount;
129  flopcount.reset();
130  StopWatch swatch;
131  swatch.reset();
132  swatch.start();
133 
134  // If chi has zero norm then the result is zero
135  Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
136  Double chi_norm = sqrt(chi_norm_sq);
137 
138  if( toBool( chi_norm < fuzz ))
139  {
140  swatch.stop();
141 
142  n_count = 0;
143 
144  QDPIO::cout << "MInvCG: " << n_count << " iterations" << std::endl;
145  flopcount.report("minvcg", swatch.getTimeInSeconds());
146  revertFromFastMemoryHint(psi,true);
147 
148  // The psi are all zero anyway at this point
149  // for(int i=0; i < n_shift; i++) { psi[i] = zero; }
150  END_CODE();
151  return;
152  }
153 
154  multi1d<Double> rsd_sq(n_shift);
155  multi1d<Double> rsdcg_sq(n_shift);
156 
157  Double cp = chi_norm_sq;
158  int s;
159  for(s = 0; s < n_shift; ++s) {
160  rsdcg_sq[s] = RsdCG[s] * RsdCG[s]; // RsdCG^2
161  rsd_sq[s] = Real(cp) * rsdcg_sq[s]; // || chi ||^2 RsdCG^2
162  }
163 
164 
165  // r[0] := p[0] := Chi
166  T r; moveToFastMemoryHint(r);
167  r[sub] = chi_internal; // no flops
168 
169  // Psi[0] := 0;
170  multi1d<T> p(n_shift); moveToFastMemoryHint(p);
171  for(s = 0; s < n_shift; ++s) {
172  p[s][sub] = chi_internal; // no flops
173  }
174 
175 
176  // b[0] := - | r[0] |**2 / < p[0], Ap[0] > ;/
177  // First compute d = < p, A.p >
178  // Ap = A . p */
179  T Ap; moveToFastMemoryHint(Ap);
180  A(Ap, p[isz], PLUS); flopcount.addFlops(A.nFlops());
181  Ap[sub] += shifts[isz]*p[isz]; flopcount.addSiteFlops(4*Nc*Ns,sub);
182 
183  /* d = < p, A.p > */
184  Double d = innerProductReal(p[isz], Ap, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
185 
186 
187  Double b = -cp/d;
188 
189  /* Compute the shifted bs and z */
190  multi1d<Double> bs(n_shift);
191  multi2d<Double> z(2, n_shift);
192  int iz;
193 
194  z[0][isz] = Double(1);
195  z[1][isz] = Double(1);
196  bs[isz] = b;
197  iz = 1;
198 
199  for(s = 0; s < n_shift; ++s)
200  {
201  if( s != isz ) {
202  z[1-iz][s] = Double(1);
203  z[iz][s] = Double(1) / (Double(1) - (Double(shifts[s])-Double(shifts[isz]))*b);
204  bs[s] = b * z[iz][s];
205  }
206  }
207 
208  // r[1] += b[0] A . p[0];
209  r[sub] += Real(b)*Ap; flopcount.addSiteFlops(4*Nc*Ns,sub);
210 
211  // Psi[1] -= b[0] p[0] = - b[0] chi;
212  for(s = 0; s < n_shift; ++s) {
213  psi[s][sub] = - Real(bs[s])*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
214  }
215 
216  // c = |r[1]|^2
217  Double c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
218 
219  // Check convergence of first solution
220  multi1d<bool> convsP(n_shift);
221  for(s = 0; s < n_shift; ++s) {
222  convsP[s] = false;
223  }
224 
225  bool convP = toBool( c < rsd_sq[isz] );
226 
227 #if 0
228  QDPIO::cout << "MInvCG: k = 0 r = " << sqrt(c) << std::endl;
229 #endif
230 
231  // FOR k FROM 1 TO MaxCG DO
232  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
233  Double z0, z1;
234  Double ztmp;
235  Double cs;
236  Double a;
237  Double as;
238  Double bp;
239  int k;
240 
241  for(k = 1; k <= MaxCG && !convP ; ++k)
242  {
243  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
244  a = c/cp;
245 
246  // p[k+1] := r[k+1] + a[k+1] p[k];
247  // Compute the shifted as */
248  // ps[k+1] := zs[k+1] r[k+1] + a[k+1] ps[k];
249  for(s = 0; s < n_shift; ++s)
250  {
251  // Always update p[isz] even if isz is converged
252  // since the other p-s depend on it.
253  if (s == isz)
254  {
255  /* p[s][sub] *= Real(a);
256  p[s][sub] += r;
257  */
258  p[s][sub] = r + Real(a)*p[s]; flopcount.addSiteFlops(4*Nc*Ns,sub);
259  }
260  else
261  {
262  // Don't update other p-s if converged.
263  if( ! convsP[s] )
264  {
265  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
266  /*
267  p[s][sub] *= Real(as);
268  p[s][sub] += Real(z[iz][s])*r;
269  */
270  p[s][sub] = Real(z[iz][s])*r + Real(as)*p[s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
271  }
272  }
273  }
274 
275  // cp = | r[k] |**2
276  cp = c;
277 
278  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
279  // First compute d = < p, A.p >
280  // Ap = A . p
281  A(Ap, p[isz], PLUS); flopcount.addFlops(A.nFlops());
282  Ap[sub] += shifts[isz]*p[isz]; flopcount.addSiteFlops(4*Nc*Ns,sub);
283 
284  /* d = < p, A.p > */
285  d = innerProductReal(p[isz], Ap, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
286 
287  bp = b;
288  b = -cp/d;
289 
290  // Compute the shifted bs and z
291  bs[isz] = b;
292  iz = 1 - iz;
293  for(s = 0; s < n_shift; s++)
294  {
295  if (s != isz && !convsP[s] )
296  {
297  z0 = z[1-iz][s];
298  z1 = z[iz][s];
299  z[iz][s] = z0*z1*bp;
300  z[iz][s] /= b*a*(z1-z0) + z1*bp*(Double(1) - (shifts[s] - shifts[isz])*b);
301  bs[s] = b*z[iz][s]/z0;
302  }
303  }
304 
305  // r[k+1] += b[k] A . p[k] ;
306  r[sub] += Real(b)*Ap; flopcount.addSiteFlops(4*Nc*Ns,sub);
307 
308 
309  // Psi[k+1] -= b[k] p[k] ;
310  for(s = 0; s < n_shift; ++s)
311  {
312  if (! convsP[s] )
313  {
314  psi[s][sub] -= Real(bs[s])*p[s]; flopcount.addSiteFlops(2*Nc*Ns,sub);
315  }
316  }
317 
318  // c = | r[k] |**2
319  c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
320 
321  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
322  // or IF |r[k+1]| <= RsdCG |chi| THEN RETURN;
323  convP = true;
324  for(s = 0; s < n_shift; s++)
325  {
326  if (! convsP[s] )
327  {
328  // Convergence methods
329  // Check norm of shifted residuals
330  Double css = c * z[iz][s]* z[iz][s];
331 
332 #if 0
333  QDPIO::cout << "MInvCG (shift=" << s << ") k = " << k <<" r = "
334  << css << " rsd_sq["<<s<<"] = " << rsd_sq[s] << std::endl;
335 #endif
336 
337  convsP[s] = toBool( css < rsd_sq[s] );
338 
339 #if 0
340 
341  //
342  // Check relative error of solution
343 
344  // cs holds | beta_s p_s |^2 = | psi_next |^2
345  cs = norm2(p[s],sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
346  cs *= bs[s]*bs[s];
347 
348  // d holds | psi |^2 * epsilon^2
349  d = norm2(psi[s],sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
350  d *= rsdcg_sq[s];
351 
352 
353  // Terminate if | psi |^2/|psi_next|^2 < epsilon^2
354  convsP[s] = toBool( cs < d );
355 
356 #if 0
357  QDPIO::cout << "MInvCG (shift=" << s << ") k = " << k << " cs = "
358  << cs << " d = " << d << std::endl;
359 #endif
360 #endif
361 
362  }
363  convP &= convsP[s];
364  }
365 
366  n_count = k;
367  }
368 
369  swatch.stop();
370 
371 #if 0
372  // This is paranoia - do not need
373 
374  // Expicitly check the ALL solutions
375  for(s = 0; s < n_shift; ++s)
376  {
377  A(Ap, psi[s], PLUS);
378  Ap[sub] += shifts[s]*psi[s];
379 
380  Ap[sub] -= chi_internal;
381 
382  c = norm2(Ap,sub); /* 2 Nc Ns flops */
383 
384  QDPIO::cout << "MInvCG (conv): s = " << s
385  << " shift = " << shifts[s]
386  << " r = " << Real(sqrt(c)/chi_norm) << std::endl;
387 
388  }
389  /* end */
390 #endif
391 
392  QDPIO::cout << "MInvCG: " << n_count << " iterations" << std::endl;
393  flopcount.report("minvcg", swatch.getTimeInSeconds());
394  revertFromFastMemoryHint(psi,true);
395 
396  if (n_count == MaxCG) {
397  QDP_error_exit("too many CG iterationns: %d\n", n_count);
398  }
399 
400  END_CODE();
401  return;
402  }
403 
404 
405  /*! \ingroup invert */
406  template<>
408  const LatticeFermion& chi,
409  multi1d<LatticeFermion>& psi,
410  const multi1d<Real>& shifts,
411  const multi1d<Real>& RsdCG,
412  int MaxCG,
413  int &n_count)
414  {
415  MInvCG_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
416  }
417 
418 
419  /*! \ingroup invert */
420  template<>
421  void MInvCG(const DiffLinearOperator<LatticeFermion,
422  multi1d<LatticeColorMatrix>,
423  multi1d<LatticeColorMatrix> >& M,
424  const LatticeFermion& chi,
425  multi1d<LatticeFermion>& psi,
426  const multi1d<Real>& shifts,
427  const multi1d<Real>& RsdCG,
428  int MaxCG,
429  int &n_count)
430  {
431  MInvCG_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
432  }
433 
434 } // end namespace Chroma
Differentiable Linear Operator.
Definition: linearop.h:98
Linear Operator.
Definition: linearop.h:27
void MInvCG(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, multi1d< LatticeFermion > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Definition: minvcg.cc:407
void MInvCG_a(const LinearOperator< T > &A, const T &chi, multi1d< T > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minvcg.cc:72
Linear Operators.
int z
Definition: meslate.cc:36
int iz
Definition: meslate.cc:32
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double chi_norm
Definition: invbicg.cc:79
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151