CHROMA
invcg2_array.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 namespace Chroma
9 {
10 
11 
12  //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator
13  /*! \ingroup invert
14  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
15  * the solution of the set of linear equations
16  *
17  * Chi = A . Psi
18  *
19  * where A = M^dag . M
20  *
21  * Algorithm:
22 
23  * Psi[0] := initial guess; Linear interpolation (argument)
24  * r[0] := Chi - M^dag . M . Psi[0] ; Initial residual
25  * p[1] := r[0] ; Initial direction
26  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
27  * FOR k FROM 1 TO MaxCG DO CG iterations
28  * a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;
29  * Psi[k] += a[k] p[k] ; New solution std::vector
30  * r[k] -= a[k] M^dag . M . p[k] ; New residual
31  * IF |r[k]| <= RsdCG |Chi| THEN RETURN; Converged?
32  * b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
33  * p[k+1] := r[k] + b[k+1] p[k]; New direction
34  *
35  * Arguments:
36  *
37  * \param M Linear Operator (Read)
38  * \param chi Source (Read)
39  * \param psi Solution (Modify)
40  * \param RsdCG CG residual accuracy (Read)
41  * \param MaxCG Maximum CG iterations (Read)
42  * \return res System solver results
43  *
44  * Local Variables:
45  *
46  * p Direction std::vector
47  * r Residual std::vector
48  * cp | r[k] |**2
49  * c | r[k-1] |**2
50  * k CG iteration counter
51  * a a[k]
52  * b b[k+1]
53  * d < p[k], A.p[k] >
54  * Mp Temporary for M.p
55  *
56  * Subroutines:
57  * +
58  * A Apply matrix M or M to std::vector
59  *
60  * Operations:
61  *
62  * 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )
63  */
64 
65 #undef PRINT_5D_RESID
66  template<typename T, typename C>
67  SystemSolverResults_t
68  InvCG2_a(const C& M,
69  const multi1d<T> & chi,
70  multi1d<T>& psi,
71  const Real& RsdCG,
72  int MaxCG)
73  {
74  START_CODE();
75 
76  int N = M.size();
77  // Subset subs = M.subset();
78 
80 
81  // Move what we can to fast memory
82  multi1d<T> mp(N); // moveToFastMemoryHint(mp);
83  multi1d<T> mmp(N); // moveToFastMemoryHint(mmp);
84  multi1d<T> p(N); // moveToFastMemoryHint(p);
85 
86  // moveToFastMemoryHint(psi,true);
87 
88  multi1d<T> r(N); // moveToFastMemoryHint(r);
89  multi1d<T> chi_internal(N); // moveToFastMemoryHint(chi_internal);
90 
91  if (M.size() != chi.size())
92  {
93  QDPIO::cerr << __func__ << ": linop has size=" << M.size()
94  << " but chi has size=" << chi.size() << std::endl;
95  QDP_abort(1);
96  }
97 
98  for(int i=0; i < N; i++) {
99  chi_internal[i][ M.subset() ] = chi[i];
100  }
101 
102  QDPIO::cout << "InvCG2: starting" << std::endl;
103  FlopCounter flopcount;
104  flopcount.reset();
105  StopWatch swatch;
106  swatch.reset();
107  swatch.start();
108 
109  Real chi_sq = Real(norm2(chi_internal,M.subset())); // 4*Nc*Ns flops per site
110  flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
111 
112 
113  // QDPIO::cout << "chi_norm = " << sqrt(chi_sq) << std::endl;
114  Real rsd_sq = (RsdCG * RsdCG) * chi_sq;
115 
116  // +
117  // r[0] := Chi - A . Psi[0] where A = M . M
118 
119  // +
120  // r := [ Chi - M(u) . M(u) . psi ]
121 
122  M(mp, psi, PLUS);
123  M(mmp, mp, MINUS);
124  flopcount.addFlops(2*M.nFlops());
125 
126  for(int n=0; n < N; ++n){
127  r[n][ M.subset() ] = chi_internal[n] - mmp[n];
128  }
129 
130  flopcount.addSiteFlops(2*Nc*Ns*N,M.subset());
131 
132 #ifdef PRINT_5D_RESID
133  for(int n=0; n < N; n++) {
134  Double norm_r = norm2(r[n],M.subset());
135  if( toBool( norm_r > Double(1.0e-20)) ) {
136  QDPIO::cout << "Iteration 0 r[" << n << "] = " << norm_r << std::endl;
137  }
138  }
139 #endif
140 
141 
142  // p[1] := r[0]
143 
144  for(int n=0; n < N; ++n)
145  p[n][ M.subset() ] = r[n];
146 
147  // Cp = |r[0]|^2
148  Double cp = norm2(r, M.subset()); /* 4 Nc Ns flops/cbsite */
149  flopcount.addSiteFlops(4*Nc*Ns*N, M.subset());
150 
151  //QDPIO::cout << "InvCG: k = 0 cp = " << cp << " rsd_sq = " << rsd_sq << std::endl;
152 
153  // IF |r[0]| <= RsdCG |Chi| THEN RETURN;
154  if ( toBool(cp <= rsd_sq) )
155  {
156  res.n_count = 0;
157  res.resid = sqrt(cp);
158  swatch.stop();
159  QDPIO::cout << "InvCG: k = 0 cp = " << cp << " rsd_sq = " << rsd_sq << std::endl;
160  // Try it all at the end.
161  flopcount.report("invcg2_array", swatch.getTimeInSeconds());
162  revertFromFastMemoryHint(psi,true);
163  END_CODE();
164  return res;
165  }
166 
167  //
168  // FOR k FROM 1 TO MaxCG DO
169  //
170  Real a, b;
171  Double c, d;
172 
173  for(int k = 1; k <= MaxCG; ++k)
174  {
175  // c = | r[k-1] |**2
176  c = cp;
177 
178  // a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;
179  // +
180  // First compute d = < p, A.p > = < p, M . M . p > = < M.p, M.p >
181  // Mp = M(u) * p
182 
183  M(mp, p, PLUS); flopcount.addFlops(M.nFlops());
184 
185  // d = | mp | ** 2
186  d = norm2(mp, M.subset()); flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
187 
188  a = Real(c)/Real(d);
189 
190  // Psi[k] += a[k] p[k]
191  for(int n=0; n < N; ++n) {
192  psi[n][ M.subset() ] += a * p[n]; /* 4 Nc Ns cbsite flops */
193  }
194  flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
195 
196  // r[k] -= a[k] A . p[k] ;
197  // + +
198  // r = r - M(u) . Mp = M . M . p = A . p
199 
200  M(mmp, mp, MINUS);
201  flopcount.addFlops(M.nFlops());
202 
203  for(int n=0; n < N; ++n) {
204  r[n][ M.subset() ] -= a * mmp[n];
205  }
206  flopcount.addSiteFlops(4*Nc*Ns*N, M.subset());
207 
208 #ifdef PRINT_5D_RESID
209  for(int n=0; n < N; n++) {
210  Double norm_r = norm2(r[n],M.subset());
211  if( toBool( norm_r > Double(1.0e-20)) )
212  QDPIO::cout << "Iteration " << k << " r[" << n << "] = " << norm_r << std::endl;
213  }
214 #endif
215 
216  // IF |r[k]| <= RsdCG |Chi| THEN RETURN;
217 
218  // cp = | r[k] |**2
219  cp = norm2(r, M.subset()); /* 2 Nc Ns flops */
220  flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
221 
222  // QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
223 
224  if ( toBool(cp <= rsd_sq) )
225  {
226  res.n_count = k;
227  swatch.stop();
228  QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
229  flopcount.report("invcg2_array", swatch.getTimeInSeconds());
230  revertFromFastMemoryHint(psi,true);
231 
232  // Compute the actual residual
233  {
234  M(mp, psi, PLUS);
235  M(mmp, mp, MINUS);
236  Double actual_res(zero);
237  for(int n=0; n < N; ++n)
238  {
239  Double norm_r = norm2(chi[n] - mmp[n],M.subset());
240 // QDPIO::cout<<"True residual "<<" r[" << n << "] = "<< sqrt(norm_r/chi_sq)<<std::endl;
241  actual_res += norm_r ;
242  }
243 
244  res.resid = sqrt(actual_res);
245  QDPIO::cout << "Actual residual r = " << sqrt(actual_res) << " Actual relative residual="<< sqrt(actual_res/chi_sq) << std::endl;
246  }
247 
248  END_CODE();
249  return res;
250  }
251 
252  // b[k+1] := |r[k]|**2 / |r[k-1]|**2
253  b = Real(cp) / Real(c);
254 #if 1
255  // QDPIO::cout << "InvCGev: k = " << k << " alpha = " << a << " beta = " << b << std::endl;
256 #endif
257  // p[k+1] := r[k] + b[k+1] p[k]
258  for(int n=0; n < N; ++n) {
259  p[n][ M.subset() ] = r[n] + b*p[n]; /* Nc Ns flops */
260  }
261  flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
262  }
263 
264  res.n_count = MaxCG;
265  res.resid = sqrt(cp);
266  swatch.stop();
267  QDPIO::cerr << "Nonconvergence Warning" << std::endl;
268  flopcount.report("invcg2_array", swatch.getTimeInSeconds());
269  revertFromFastMemoryHint(psi,true);
270 
271  END_CODE();
272  return res;
273  }
274 
275 
276  //
277  // Explicit versions
278  //
279  // Single precision
280  SystemSolverResults_t
282  const multi1d<LatticeFermionF>& chi,
283  multi1d<LatticeFermionF>& psi,
284  const Real& RsdCG,
285  int MaxCG)
286  {
287  return InvCG2_a(M, chi, psi, RsdCG, MaxCG);
288  }
289 
290  // Double precision
291  SystemSolverResults_t
293  const multi1d<LatticeFermionD>& chi,
294  multi1d<LatticeFermionD>& psi,
295  const Real& RsdCG,
296  int MaxCG)
297  {
298  return InvCG2_a(M, chi, psi, RsdCG, MaxCG);
299  }
300 
301 } // end namespace Chroma
Primary include file for CHROMA library code.
Linear Operator to arrays.
Definition: linearop.h:61
SystemSolverResults_t InvCG2_a(const LinearOperator< T > &M, const T &chi, T &psi, const RT &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Definition: invcg2.cc:72
SystemSolverResults_t InvCG2(const LinearOperator< LatticeFermionF > &M, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Definition: invcg2.cc:240
Conjugate-Gradient algorithm for a generic Linear Operator.
unsigned n
Definition: ldumul_w.cc:36
SpinMatrix C()
C = Gamma(10)
Definition: barspinmat_w.cc:29
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
Double c
Definition: invbicg.cc:108
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
FloatingPoint< double > Double
Definition: gtest.h:7351
Holds return info from SystemSolver call.
Definition: syssolver.h:17