CHROMA
invcg2.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 using namespace QDP::Hints;
9 #undef PAT
10 #ifdef PAT
11 #include <pat_api.h>
12 #endif
13 
14 namespace Chroma
15 {
16 
17  //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator
18  /*! \ingroup invert
19  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
20  * the solution of the set of linear equations
21  *
22  * Chi = A . Psi
23  *
24  * where A = M^dag . M
25  *
26  * Algorithm:
27 
28  * Psi[0] := initial guess; Linear interpolation (argument)
29  * r[0] := Chi - M^dag . M . Psi[0] ; Initial residual
30  * p[1] := r[0] ; Initial direction
31  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
32  * FOR k FROM 1 TO MaxCG DO CG iterations
33  * a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;
34  * Psi[k] += a[k] p[k] ; New solution std::vector
35  * r[k] -= a[k] M^dag . M . p[k] ; New residual
36  * IF |r[k]| <= RsdCG |Chi| THEN RETURN; Converged?
37  * b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
38  * p[k+1] := r[k] + b[k+1] p[k]; New direction
39  *
40  * Arguments:
41  *
42  * \param M Linear Operator (Read)
43  * \param chi Source (Read)
44  * \param psi Solution (Modify)
45  * \param RsdCG CG residual accuracy (Read)
46  * \param MaxCG Maximum CG iterations (Read)
47  * \return res System solver results
48  *
49  * Local Variables:
50  *
51  * p Direction std::vector
52  * r Residual std::vector
53  * cp | r[k] |**2
54  * c | r[k-1] |**2
55  * k CG iteration counter
56  * a a[k]
57  * b b[k+1]
58  * d < p[k], A.p[k] >
59  * Mp Temporary for M.p
60  *
61  * Subroutines:
62  * +
63  * A Apply matrix M or M to std::vector
64  *
65  * Operations:
66  *
67  * 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )
68  */
69 
70  template<typename T, typename RT>
71  SystemSolverResults_t
73  const T& chi,
74  T& psi,
75  const RT& RsdCG,
76  int MaxCG)
77  {
78  START_CODE();
79 
80  const Subset& s = M.subset();
81 
83  T mp; moveToFastMemoryHint(mp);
84  T mmp; moveToFastMemoryHint(mmp);
85  T p; moveToFastMemoryHint(p);
86  moveToFastMemoryHint(psi,true);
87  T r; moveToFastMemoryHint(r);
88  T chi_internal; moveToFastMemoryHint(chi_internal);
89 
90  chi_internal[s] = chi;
91 
92  QDPIO::cout << "InvCG2: starting" << std::endl;
93  FlopCounter flopcount;
94  flopcount.reset();
95  StopWatch swatch;
96  swatch.reset();
97  swatch.start();
98 
99 // Real rsd_sq = (RsdCG * RsdCG) * Real(norm2(chi,s));
100  Double chi_sq = norm2(chi_internal,s);
101  flopcount.addSiteFlops(4*Nc*Ns,s);
102 
103 #if 0
104  QDPIO::cout << "chi_norm = " << sqrt(chi_sq) << std::endl;
105 #endif
106 
107  Double rsd_sq = (RsdCG * RsdCG) * chi_sq;
108 
109  // +
110  // r[0] := Chi - A . Psi[0] where A = M . M
111 
112  // +
113  // r := [ Chi - M(u) . M(u) . psi ]
114  M(mp, psi, PLUS);
115  M(mmp, mp, MINUS);
116  flopcount.addFlops(2*M.nFlops());
117 
118  r[s] = chi_internal - mmp;
119  flopcount.addSiteFlops(2*Nc*Ns,s);
120  // Cp = |r[0]|^2
121  Double cp = norm2(r, s); /* 2 Nc Ns flops */
122  flopcount.addSiteFlops(4*Nc*Ns, s);
123 
124 
125 #if 0
126  QDPIO::cout << "InvCG: k = 0 || r ||= " <<sqrt(cp) << std::endl;
127 #endif
128 
129  // p[1] := r[0]
130  p[s] = r;
131 
132 
133 
134 
135  // IF |r[0]| <= RsdCG |Chi| THEN RETURN;
136  if ( toBool(cp <= rsd_sq) )
137  {
138  res.n_count = 0;
139  res.resid = sqrt(cp);
140  swatch.stop();
141  flopcount.report("invcg2", swatch.getTimeInSeconds());
142  revertFromFastMemoryHint(psi,true);
143  END_CODE();
144  return res;
145  }
146 
147  //
148  // FOR k FROM 1 TO MaxCG DO
149  //
150 
151  Double a, b, c, d;
152 
153  for(int k = 1; k <= MaxCG; ++k)
154  {
155  // c = | r[k-1] |**2
156  c = cp;
157 
158  // a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;
159  // +
160  // First compute d = < p, A.p > = < p, M . M . p > = < M.p, M.p >
161  // Mp = M(u) * p
162  M(mp, p, PLUS); flopcount.addFlops(M.nFlops());
163 
164  // d = | mp | ** 2
165  d = norm2(mp, s); flopcount.addSiteFlops(4*Nc*Ns,s);
166 
167  // r[k] -= a[k] A . p[k] ;
168  // + +
169  // r = r - M(u) . Mp = M . M . p = A . p
170  M(mmp, mp, MINUS);
171  flopcount.addFlops(M.nFlops());
172 
173 
174  a = c/d;
175 
176  RT ar = a;
177 
178  r[s] -= ar * mmp;
179  flopcount.addSiteFlops(4*Nc*Ns, s);
180 
181  // cp = | r[k] |**2
182  cp = norm2(r, s); flopcount.addSiteFlops(4*Nc*Ns,s);
183 
184  // Psi[k] += a[k] p[k]
185  psi[s] += ar * p; flopcount.addSiteFlops(4*Nc*Ns,s);
186 
187 
188 
189  // IF |r[k]| <= RsdCG |Chi| THEN RETURN;
190 
191 
192 // QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
193 
194  if ( toBool(cp <= rsd_sq) )
195  {
196  res.n_count = k;
197  res.resid = sqrt(cp);
198  swatch.stop();
199  // QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
200  flopcount.report("invcg2", swatch.getTimeInSeconds());
201  revertFromFastMemoryHint(psi,true);
202 
203  // Compute the actual residual
204  {
205  M(mp, psi, PLUS);
206  M(mmp, mp, MINUS);
207  Double actual_res = norm2(chi - mmp,s);
208  res.resid = sqrt(actual_res);
209  }
210 
211  END_CODE();
212  return res;
213  }
214 
215  // b[k+1] := |r[k]|**2 / |r[k-1]|**2
216  b = cp / c;
217  RT br = b;
218 
219  // p[k+1] := r[k] + b[k+1] p[k]
220  p[s] = r + br*p; flopcount.addSiteFlops(4*Nc*Ns,s);
221  }
222  res.n_count = MaxCG;
223  res.resid = sqrt(cp);
224  swatch.stop();
225  QDPIO::cerr << "Nonconvergence Warning" << std::endl;
226  flopcount.report("invcg2", swatch.getTimeInSeconds());
227  revertFromFastMemoryHint(psi,true);
228  QDPIO::cerr << "too many CG iterations: count =" << res.n_count <<" rsd^2= " << cp << std::endl <<std::flush;
229 
230  END_CODE();
231  return res;
232  }
233 
234 
235  //
236  // Explicit versions
237  //
238  // Single precision
239  SystemSolverResults_t
241  const LatticeFermionF& chi,
242  LatticeFermionF& psi,
243  const Real& RsdCG,
244  int MaxCG)
245  {
246 #ifdef PAT
247  int ierr = PAT_region_begin(20, "InvCG2Single");
248 #endif
249  return InvCG2_a<LatticeFermionF,RealF>(M, chi, psi, RsdCG, MaxCG);
250 #ifdef PAT
251  ierr = PAT_region_end(20);
252 #endif
253 
254  }
255 
256  // Double precision
257  SystemSolverResults_t
259  const LatticeFermionD& chi,
260  LatticeFermionD& psi,
261  const Real& RsdCG,
262  int MaxCG)
263  {
264 #ifdef PAT
265  int ierr=PAT_region_begin(21, "InvCG2Double");
266 #endif
267  return InvCG2_a<LatticeFermionD, RealD>(M, chi, psi, RsdCG, MaxCG);
268 #ifdef PAT
269  ierr= PAT_region_end(21);
270 #endif
271  }
272 
273  // Single precision
274  SystemSolverResults_t
276  const LatticeStaggeredFermionF& chi,
277  LatticeStaggeredFermionF& psi,
278  const Real& RsdCG,
279  int MaxCG)
280  {
281  return InvCG2_a<LatticeStaggeredFermionF,RealF>(M, chi, psi, RsdCG, MaxCG);
282  }
283 
284  // Double precision
285  SystemSolverResults_t
287  const LatticeStaggeredFermionD& chi,
288  LatticeStaggeredFermionD& psi,
289  const Real& RsdCG,
290  int MaxCG)
291  {
292  return InvCG2_a<LatticeStaggeredFermionD,RealD>(M, chi, psi, RsdCG, MaxCG);
293  }
294 
295 } // end namespace Chroma
Primary include file for CHROMA library code.
#define END_CODE()
Definition: chromabase.h:65
#define START_CODE()
Definition: chromabase.h:64
Linear Operator.
Definition: linearop.h:27
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
Definition: linearop.h:48
EXTERN int MaxCG
SystemSolverResults_t InvCG2_a(const LinearOperator< T > &M, const T &chi, T &psi, const RT &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Definition: invcg2.cc:72
SystemSolverResults_t InvCG2(const LinearOperator< LatticeStaggeredFermionD > &M, const LatticeStaggeredFermionD &chi, LatticeStaggeredFermionD &psi, const Real &RsdCG, int MaxCG)
Definition: invcg2.cc:286
Conjugate-Gradient algorithm for a generic Linear Operator.
unsigned s
Definition: ldumul_w.cc:37
int c
Definition: meslate.cc:61
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
LinOpSysSolverMGProtoClover::T T
Real rsd_sq
Definition: invbicg.cc:121
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
Complex a
Definition: invbicg.cc:95
DComplex d
Definition: invbicg.cc:99
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
FloatingPoint< double > Double
Definition: gtest.h:7351
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
Holds return info from SystemSolver call.
Definition: syssolver.h:17