CHROMA
minvcg2_accum.cc
Go to the documentation of this file.
1 
2 /*! \file
3  * \brief Multishift Conjugate-Gradient algorithm for a Linear Operator
4  */
5 
6 #include "linearop.h"
8 
9 namespace Chroma
10 {
11 
12 
13  //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
14  /*! \ingroup invert
15  *
16  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
17  * the solution of the set of linear equations
18  * Method used is described in Jegerlehner, hep-lat/9708029
19 
20  * We are searching in a subspace orthogonal to the eigenvectors EigVec
21  * of A. The source chi is assumed to already be orthogonal!
22 
23  * Chi = M^\dag M . Psi
24 
25  * Algorithm:
26 
27  * Psi[0] := 0; Zeroed
28  * r[0] := Chi; Initial residual
29  * p[1] := Chi ; Initial direction
30  * b[0] := |r[0]|**2 / <M p[0], M p[0]> ;
31  * z[0] := 1 / (1 - (shift - shift(0))*b)
32  * bs[0] := b[0] * z[0]
33  * r[1] += b[k] M^\dag M . p[0] ; New residual
34  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
35  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
36  * FOR k FROM 1 TO MaxCG DO CG iterations
37  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
38  * p[k] := r[k] + a[k] p[k-1]; New direction
39  * b[k+1] := |r[k]|**2 / <M p[k], Mp[k]> ;
40  * r[k+1] += b[k+1] M^\dag M . p[k] ; New residual
41  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
42  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
43 
44  * Arguments:
45 
46  * A Hermitian linear operator (Read)
47  * Chi Source (Read)
48  * Psi array of solutions (Write)
49  * shifts shifts of form A + mass (Read)
50  * RsdCG residual accuracy (Read/Write)
51  * n_count Number of CG iteration (Write)
52 
53  * Local Variables:
54 
55  * p Direction std::vector
56  * r Residual std::vector
57  * cp | r[k] |**2
58  * c | r[k-1] |**2
59  * k CG iteration counter
60  * a a[k]
61  * b b[k+1]
62  * d < p[k], A.p[k] >
63  * Ap Temporary for M.p
64 
65  * MaxCG Maximum number of CG iterations allowed
66 
67  * Subroutines:
68  * A Apply matrix hermitian A to std::vector
69  */
70 
71  template<typename T>
73  const T& chi,
74  T& psi,
75  const Real& norm,
76  const multi1d<Real>& residues,
77  const multi1d<Real>& poles,
78  const Real& RsdCG,
79  int MaxCG,
80  int& n_count)
81  {
82 
83  START_CODE();
84 
85  const Subset& sub = M.subset();
86  moveToFastMemoryHint(psi,true);
87 
88  int n_shift = poles.size();
89 
90  if (n_shift == 0)
91  {
92  QDPIO::cerr << "MInvCG: You must supply at least 1 mass: mass.size() = "
93  << n_shift << std::endl;
94  QDP_abort(1);
95  }
96 
97  /* Now find the smallest mass */
98  int isz = 0;
99  for(int findit=1; findit < n_shift; ++findit) {
100  if ( toBool( poles[findit] < poles[isz]) ) {
101  isz = findit;
102  }
103  }
104 
105  // Temporary to work with.
106  multi1d<T> X(n_shift);
107 
108  // We need to make sure, that X is at least as big as the number
109  // of poles. We resize it if it is not big enough.
110  // However, it is allowed to be bigger.
111  if( X.size() < n_shift ) {
112  X.resize(n_shift);
113  }
114 
115  // For this algorithm, all the X have to be 0 to start
116  // Only is that way the initial residuum r = chi
117  for(int i= 0; i < n_shift; ++i) {
118  X[i][sub] = zero;
119  }
120 
121  T chi_internal; moveToFastMemoryHint(chi_internal);
122  chi_internal[sub] = chi;
123  moveToFastMemoryHint(X,true);
124 
125  FlopCounter flopcount;
126  flopcount.reset();
127  StopWatch swatch;
128  swatch.reset();
129  swatch.start();
130 
131  // If chi has zero norm then the result is zero
132  Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
133  Double chi_norm = sqrt(chi_norm_sq);
134 
135  if( toBool( chi_norm < fuzz ))
136  {
137  swatch.stop();
138 
139  n_count = 0;
140 
141  QDPIO::cout << "MInvCG2: " << n_count << " iterations" << std::endl;
142  flopcount.report("minvcg2", swatch.getTimeInSeconds());
143 
144  psi[sub] = norm*chi_internal;
145  for(int s=0; s < n_shift; s++) {
146  psi[sub] += residues[s]*X[s];
147  }
148  revertFromFastMemoryHint(X,false);
149  revertFromFastMemoryHint(psi,true);
150 
151  // The X are all zero anyway at this point
152  // for(int i=0; i < n_shift; i++) { X[i] = zero; }
153  END_CODE();
154  return;
155  }
156 
157  multi1d<Double> rsd_sq(n_shift);
158  multi1d<Double> rsdcg_sq(n_shift);
159 
160  Double cp = chi_norm_sq;
161  int s;
162  for(s = 0; s < n_shift; ++s) {
163  rsdcg_sq[s] = RsdCG * RsdCG; // RsdCG^2
164  rsd_sq[s] = Real(cp) * rsdcg_sq[s]; // || chi ||^2 RsdCG^2
165  }
166 
167 
168  // r[0] := p[0] := Chi
169  T r; moveToFastMemoryHint(r);
170  r[sub] = chi_internal; // no flops
171 
172 
173  T p_0; moveToFastMemoryHint(p_0);
174  p_0[sub] = chi_internal;
175 
176 
177  // X[0] := 0;
178  multi1d<T> p(n_shift); moveToFastMemoryHint(p);
179  for(s = 0; s < n_shift; ++s) {
180  p[s][sub] = chi_internal; // no flops
181  }
182 
183 
184  // b[0] := - | r[0] |**2 / < p[0], Ap[0] > ;/
185  // First compute d = < p, A.p >
186  // Ap = A . p */
187  T Mp, MMp; moveToFastMemoryHint(Mp); moveToFastMemoryHint(MMp);
188  M(Mp, p_0, PLUS); flopcount.addFlops(M.nFlops());
189 
190  /* d = < M p, M.p > */
191  Double d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
192 
193  M(MMp, Mp, MINUS); flopcount.addFlops(M.nFlops());
194 
195  Double b = -cp/d;
196 
197  // r[1] += b[0] A . p[0];
198  r[sub] += Real(b)*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
199 
200  /* Compute the shifted bs and z */
201  multi1d<Double> bs(n_shift);
202  multi2d<Double> z(2, n_shift);
203  int iz;
204 
205  /* -- These are no longer special... */
206  /*
207  z[0][isz] = Double(1);
208  z[1][isz] = Double(1);
209  bs[isz] = b;
210  */
211 
212  iz = 1;
213 
214  for(s = 0; s < n_shift; ++s) {
215  z[1-iz][s] = Double(1);
216  // z[iz][s] = Double(1) / (Double(1) - (Double(poles[s])-Double(poles[isz]))*b);
217  z[iz][s] = Double(1) / (Double(1) - Double(poles[s])*b);
218  bs[s] = b * z[iz][s];
219  }
220 
221 
222  // X[1] -= b[0] p[0] = - b[0] chi;
223  for(s = 0; s < n_shift; ++s) {
224  X[s][sub] = -Real(bs[s])*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
225  }
226 
227  // c = |r[1]|^2
228  Double c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
229 
230  // Check convergence of first solution
231  multi1d<bool> convsP(n_shift);
232  for(s = 0; s < n_shift; ++s) {
233  convsP[s] = false;
234  }
235 
236  bool convP = toBool( c < rsd_sq[isz] );
237 
238 #if 0
239  QDPIO::cout << "MInvCG: k = 0 r = " << sqrt(c) << std::endl;
240 #endif
241 
242  // FOR k FROM 1 TO MaxCG DO
243  // IF |X[k+1] - X[k]| <= RsdCG |X[k+1]| THEN RETURN;
244  Double z0, z1;
245  Double ztmp;
246  Double cs;
247  Double a;
248  Double as;
249  Double bp;
250  int k;
251 
252 
253  for(k = 1; k <= MaxCG && !convP ; ++k)
254  {
255  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
256  a = c/cp;
257 
258  // Update p
259  p_0[sub] = r + Real(a)*p_0; flopcount.addSiteFlops(4*Nc*Ns,sub);
260  // p[k+1] := r[k+1] + a[k+1] p[k];
261  // Compute the shifted as */
262  // ps[k+1] := zs[k+1] r[k+1] + a[k+1] ps[k];
263  for(s = 0; s < n_shift; ++s) {
264 
265  // Don't update other p-s if converged.
266  if( ! convsP[s] ) {
267 
268  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
269  p[s][sub] = Real(z[iz][s])*r + Real(as)*p[s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
270  }
271  }
272 
273  // cp = | r[k] |**2
274  cp = c;
275 
276  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
277  // First compute d = < p, A.p >
278  // Ap = A . p
279  M(Mp, p_0, PLUS); flopcount.addFlops(M.nFlops());
280 
281  /* d = < p, A.p > */
282  d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
283 
284  M(MMp, Mp, MINUS); flopcount.addFlops(M.nFlops());
285 
286  bp = b;
287  b = -cp/d;
288  // r[k+1] += b[k] A . p[k] ;
289  r[sub] += Real(b)*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
290  // c = | r[k] |**2
291  c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
292 
293  // Compute the shifted bs and z
294  iz = 1 - iz;
295  for(s = 0; s < n_shift; s++) {
296  if ( !convsP[s] ) {
297  z0 = z[1-iz][s];
298  z1 = z[iz][s];
299  z[iz][s] = z0*z1*bp;
300  z[iz][s] /= b*a*(z1-z0) + z1*bp*(Double(1) - poles[s]*b);
301  bs[s] = b*z[iz][s]/z0;
302  }
303  }
304 
305 
306  // IF |X[k+1] - X[k]| <= RsdCG |X[k+1]| THEN RETURN;
307  // or IF |r[k+1]| <= RsdCG |chi| THEN RETURN;
308  convP = true;
309  for(s = 0; s < n_shift; s++) {
310  if (! convsP[s] ) {
311  // Convergence methods
312  // Check norm of shifted residuals
313  Double css = c * z[iz][s]* z[iz][s];
314  convsP[s] = toBool( css < rsd_sq[s] );
315  }
316  convP &= convsP[s];
317  }
318 
319 
320  // X[k+1] -= b[k] p[k] ;
321  T Delta = zero; // The amount of change
322  psi[sub] = norm*chi_internal; // Rebuild psi
323  flopcount.addSiteFlops(2*Nc*Ns,sub);
324 
325  for(s = 0; s < n_shift; ++s) {
326  if (! convsP[s] ) {
327  T tmp;
328  tmp[sub] = Real(bs[s])*p[s];
329  flopcount.addSiteFlops(2*Nc*Ns,sub); // This pole hasn't converged
330  // yet so update solution
331  X[s][sub] -= tmp;
332  flopcount.addSiteFlops(2*Nc*Ns,sub);
333 
334  Delta[sub] += residues[s]*tmp; // Accumulate "change std::vector" in the Xs
335  flopcount.addSiteFlops(4*Nc*Ns,sub);
336 
337  }
338  psi[sub] += residues[s]*X[s]; // Accumualte psi (from X's)
339  flopcount.addSiteFlops(4*Nc*Ns,sub);
340  }
341 
342  Double delta_norm = norm2(Delta,sub);
343  flopcount.addSiteFlops(4*Nc*Ns,sub);
344 
345  Double psi_norm = norm2(psi,sub);
346  flopcount.addSiteFlops(4*Nc*Ns,sub);
347 
348  convP |= toBool( delta_norm < rsdcg_sq[0]*psi_norm);
349  n_count = k;
350  }
351 
352  swatch.stop();
353 
354 
355  QDPIO::cout << "MInvCG2Accum: " << n_count << " iterations" << std::endl;
356  flopcount.report("minvcg", swatch.getTimeInSeconds());
357  revertFromFastMemoryHint(X,false);
358  revertFromFastMemoryHint(psi,true);
359 
360  if (n_count == MaxCG) {
361  QDP_error_exit("too many CG iterationns: %d\n", n_count);
362  }
363 
364  END_CODE();
365  return;
366  }
367 
368 
369 
370  /*! \ingroup invert */
371  template<>
373  const LatticeFermion& chi,
374  LatticeFermion& psi,
375  const Real& norm,
376  const multi1d<Real>& residues,
377  const multi1d<Real>& poles,
378  const Real& RsdCG,
379  int MaxCG,
380  int& n_count)
381  {
382  MInvCG2Accum_a(M, chi, psi, norm, residues, poles, RsdCG, MaxCG, n_count);
383  }
384 
385 
386  /*! \ingroup invert */
387  template<>
388  void MInvCG2Accum(const DiffLinearOperator<LatticeFermion,
389  multi1d<LatticeColorMatrix>,
390  multi1d<LatticeColorMatrix> >& M,
391  const LatticeFermion& chi,
392  LatticeFermion& psi,
393  const Real& norm,
394  const multi1d<Real>& residues,
395  const multi1d<Real>& poles,
396  const Real& RsdCG,
397  int MaxCG,
398  int& n_count)
399  {
400  MInvCG2Accum_a(M, chi, psi, norm, residues, poles, RsdCG, MaxCG, n_count);
401  }
402 
403 } // end namespace Chroma
Differentiable Linear Operator.
Definition: linearop.h:98
Linear Operator.
Definition: linearop.h:27
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
Definition: linearop.h:48
void MInvCG2Accum_a(const LinearOperator< T > &M, const T &chi, T &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
void MInvCG2Accum(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, LatticeFermion &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, int MaxCG, int &n_count)
Linear Operators.
int z
Definition: meslate.cc:36
int iz
Definition: meslate.cc:32
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
Complex b
Definition: invbicg.cc:96
Double chi_norm
Definition: invbicg.cc:79
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151
int norm
Definition: qtopcor.cc:35