CHROMA
minvcg2.cc
Go to the documentation of this file.
1 
2 /*! \file
3  * \brief Multishift Conjugate-Gradient algorithm for a Linear Operator
4  */
5 
6 #include "linearop.h"
8 #undef PAT
9 #ifdef PAT
10 #include <pat_api.h>
11 #endif
12 namespace Chroma
13 {
14 
15 
16  //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
17  /*! \ingroup invert
18  *
19  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
20  * the solution of the set of linear equations
21  * Method used is described in Jegerlehner, hep-lat/9708029
22 
23  * We are searching in a subspace orthogonal to the eigenvectors EigVec
24  * of A. The source chi is assumed to already be orthogonal!
25 
26  * Chi = M^\dag M . Psi
27 
28  * Algorithm:
29 
30  * Psi[0] := 0; Zeroed
31  * r[0] := Chi; Initial residual
32  * p[1] := Chi ; Initial direction
33  * b[0] := |r[0]|**2 / <M p[0], M p[0]> ;
34  * z[0] := 1 / (1 - (shift - shift(0))*b)
35  * bs[0] := b[0] * z[0]
36  * r[1] += b[k] M^\dag M . p[0] ; New residual
37  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
38  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
39  * FOR k FROM 1 TO MaxCG DO CG iterations
40  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
41  * p[k] := r[k] + a[k] p[k-1]; New direction
42  * b[k+1] := |r[k]|**2 / <M p[k], Mp[k]> ;
43  * r[k+1] += b[k+1] M^\dag M . p[k] ; New residual
44  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
45  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
46 
47  * Arguments:
48 
49  * A Hermitian linear operator (Read)
50  * Chi Source (Read)
51  * Psi array of solutions (Write)
52  * shifts shifts of form A + mass (Read)
53  * RsdCG residual accuracy (Read/Write)
54  * n_count Number of CG iteration (Write)
55 
56  * Local Variables:
57 
58  * p Direction std::vector
59  * r Residual std::vector
60  * cp | r[k] |**2
61  * c | r[k-1] |**2
62  * k CG iteration counter
63  * a a[k]
64  * b b[k+1]
65  * d < p[k], A.p[k] >
66  * Ap Temporary for M.p
67 
68  * MaxCG Maximum number of CG iterations allowed
69 
70  * Subroutines:
71  * A Apply matrix hermitian A to std::vector
72  */
73 
74  template<typename T, typename R>
75  void MInvCG2_a(const LinearOperator<T>& M,
76  const T& chi,
77  multi1d<T>& psi,
78  const multi1d<R>& shifts,
79  const multi1d<R>& RsdCG,
80  int MaxCG,
81  int& n_count)
82  {
83  START_CODE();
84 
85  const Subset& sub = M.subset();
86 
87  if (shifts.size() != RsdCG.size())
88  {
89  QDPIO::cerr << "MInvCG: number of shifts and residuals must match" << std::endl;
90  QDP_abort(1);
91  }
92 
93  int n_shift = shifts.size();
94 
95  if (n_shift == 0)
96  {
97  QDPIO::cerr << "MInvCG: You must supply at least 1 mass: mass.size() = "
98  << n_shift << std::endl;
99  QDP_abort(1);
100  }
101 
102  /* Now find the smallest mass */
103  int isz = 0;
104  for(int findit=1; findit < n_shift; ++findit) {
105  if ( toBool( shifts[findit] < shifts[isz]) ) {
106  isz = findit;
107  }
108  }
109 
110  // We need to make sure, that psi is at least as big as the number
111  // of shifts. We resize it if it is not big enough.
112  // However, it is allowed to be bigger.
113  if( psi.size() < n_shift ) {
114  psi.resize(n_shift);
115  }
116 
117  // For this algorithm, all the psi have to be 0 to start
118  // Only is that way the initial residuum r = chi
119  for(int i= 0; i < n_shift; ++i) {
120  psi[i][sub] = zero;
121  }
122 
123  T chi_internal; moveToFastMemoryHint(chi_internal);
124  chi_internal[sub] = chi;
125  moveToFastMemoryHint(psi,true);
126 
127  FlopCounter flopcount;
128  flopcount.reset();
129  StopWatch swatch;
130  swatch.reset();
131  swatch.start();
132 
133  // If chi has zero norm then the result is zero
134  Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
135  Double chi_norm = sqrt(chi_norm_sq);
136 
137  if( toBool( chi_norm < fuzz ))
138  {
139  swatch.stop();
140 
141  n_count = 0;
142 
143  QDPIO::cout << "MInvCG2: " << n_count << " iterations" << std::endl;
144  flopcount.report("minvcg2", swatch.getTimeInSeconds());
145  revertFromFastMemoryHint(psi,true);
146 
147  // The psi are all zero anyway at this point
148  // for(int i=0; i < n_shift; i++) { psi[i] = zero; }
149  END_CODE();
150  return;
151  }
152 
153  multi1d<Double> rsd_sq(n_shift);
154  multi1d<Double> rsdcg_sq(n_shift);
155 
156  Double cp = chi_norm_sq;
157  int s;
158  for(s = 0; s < n_shift; ++s) {
159  rsdcg_sq[s] = RsdCG[s] * RsdCG[s]; // RsdCG^2
160  rsd_sq[s] = Real(cp) * rsdcg_sq[s]; // || chi ||^2 RsdCG^2
161  }
162 
163 
164  // r[0] := p[0] := Chi
165  T r; moveToFastMemoryHint(r);
166  r[sub] = chi_internal; // no flops
167 
168 
169  T p_0; moveToFastMemoryHint(p_0);
170  p_0[sub] = chi_internal;
171 
172 
173  // Psi[0] := 0;
174  multi1d<T> p(n_shift); moveToFastMemoryHint(p);
175  for(s = 0; s < n_shift; ++s) {
176  p[s][sub] = chi_internal; // no flops
177  }
178 
179 
180  // b[0] := - | r[0] |**2 / < p[0], Ap[0] > ;/
181  // First compute d = < p, A.p >
182  // Ap = A . p */
183  T Mp, MMp; moveToFastMemoryHint(Mp); moveToFastMemoryHint(MMp);
184  M(Mp, p_0, PLUS); flopcount.addFlops(M.nFlops());
185 
186  /* d = < M p, M.p > */
187  Double d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
188 
189  M(MMp, Mp, MINUS); flopcount.addFlops(M.nFlops());
190 
191  Double b = -cp/d;
192 
193  // r[1] += b[0] A . p[0];
194  R b_r = b;
195  r[sub] += b_r*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
196 
197  /* Compute the shifted bs and z */
198  multi1d<Double> bs(n_shift);
199  multi2d<Double> z(2, n_shift);
200  int iz;
201 
202  /* -- These are no longer special... */
203  /*
204  z[0][isz] = Double(1);
205  z[1][isz] = Double(1);
206  bs[isz] = b;
207  */
208 
209  iz = 1;
210 
211  for(s = 0; s < n_shift; ++s)
212  {
213  z[1-iz][s] = Double(1);
214  // z[iz][s] = Double(1) / (Double(1) - (Double(shifts[s])-Double(shifts[isz]))*b);
215  z[iz][s] = Double(1) / (Double(1) - Double(shifts[s])*b);
216  bs[s] = b * z[iz][s];
217  }
218 
219 
220  // Psi[1] -= b[0] p[0] = - b[0] chi;
221  for(s = 0; s < n_shift; ++s) {
222  R bs_r = bs[s];
223  psi[s][sub] = - bs_r*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
224  }
225 
226  // c = |r[1]|^2
227  Double c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
228 
229  // Check convergence of first solution
230  multi1d<bool> convsP(n_shift);
231  for(s = 0; s < n_shift; ++s) {
232  convsP[s] = false;
233  }
234 
235  bool convP = toBool( c < rsd_sq[isz] );
236 
237 #if 0
238  QDPIO::cout << "MInvCG: k = 0 r = " << sqrt(c) << std::endl;
239 #endif
240 
241  // FOR k FROM 1 TO MaxCG DO
242  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
243  Double z0, z1;
244  Double ztmp;
245  Double cs;
246  Double a;
247  Double as;
248  Double bp;
249  int k;
250 
251  for(k = 1; k <= MaxCG && !convP ; ++k)
252  {
253  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
254  a = c/cp;
255 
256  // Update p
257  R a_r = a;
258  p_0[sub] = r + a_r*p_0; flopcount.addSiteFlops(4*Nc*Ns,sub);
259  // p[k+1] := r[k+1] + a[k+1] p[k];
260  // Compute the shifted as */
261  // ps[k+1] := zs[k+1] r[k+1] + a[k+1] ps[k];
262  for(s = 0; s < n_shift; ++s) {
263 
264  // Don't update other p-s if converged.
265  if( ! convsP[s] ) {
266 
267  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
268  R zizs= z[iz][s];
269  R as_r = as;
270  p[s][sub] = zizs*r + as_r*p[s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
271  }
272  }
273 
274  // cp = | r[k] |**2
275  cp = c;
276 
277  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
278  // First compute d = < p, A.p >
279  // Ap = A . p
280  M(Mp, p_0, PLUS); flopcount.addFlops(M.nFlops());
281 
282  /* d = < p, A.p > */
283  d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
284 
285  M(MMp, Mp, MINUS); flopcount.addFlops(M.nFlops());
286 
287  bp = b;
288  b = -cp/d;
289  // r[k+1] += b[k] A . p[k] ;
290  b_r = b;
291  r[sub] += b_r*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
292  // c = | r[k] |**2
293  c = norm2(r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
294 
295  // Compute the shifted bs and z
296  iz = 1 - iz;
297  for(s = 0; s < n_shift; s++) {
298  if ( !convsP[s] ) {
299  z0 = z[1-iz][s];
300  z1 = z[iz][s];
301  z[iz][s] = z0*z1*bp;
302  z[iz][s] /= b*a*(z1-z0) + z1*bp*(Double(1) - shifts[s]*b);
303  bs[s] = b*z[iz][s]/z0;
304  }
305  }
306 
307 
308 
309  // Psi[k+1] -= b[k] p[k] ;
310  for(s = 0; s < n_shift; ++s)
311  {
312  if (! convsP[s] )
313  {
314  R bs_r = bs[s];
315 
316  psi[s][sub] -= bs_r*p[s]; flopcount.addSiteFlops(2*Nc*Ns,sub);
317  }
318  }
319 
320 
321  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
322  // or IF |r[k+1]| <= RsdCG |chi| THEN RETURN;
323  convP = true;
324  for(s = 0; s < n_shift; s++)
325  {
326  if (! convsP[s] )
327  {
328  // Convergence methods
329  // Check norm of shifted residuals
330  Double css = c * z[iz][s]* z[iz][s];
331 
332 
333 
334  convsP[s] = toBool( css < rsd_sq[s] );
335 
336 
337 
338  }
339  convP &= convsP[s];
340  }
341 
342  n_count = k;
343  }
344 
345  swatch.stop();
346 
347 #if 0
348  // Check answers
349  for(int s=0; s < n_shift; s++) {
350  r[sub] = chi;
351  M(Mp, psi[s], PLUS);
352  M(MMp, Mp, MINUS);
353  MMp[sub] += shifts[s]*psi[s];
354  r[sub] -= MMp;
355  Double rnorm2 = norm2(r, sub);
356  Double chinorm2= norm2(chi,sub);
357  QDPIO::cout << "shift("<<s<<"): || r || / || chi || = "<<sqrt(rnorm2/chinorm2)<< std::endl;
358  }
359 #endif
360  QDPIO::cout << "MInvCG2: " << n_count << " iterations" << std::endl;
361  flopcount.report("minvcg", swatch.getTimeInSeconds());
362  revertFromFastMemoryHint(psi,true);
363 
364  if (n_count == MaxCG) {
365  QDP_error_exit("too many CG iterationns: %d\n", n_count);
366  }
367 
368  END_CODE();
369  return;
370  }
371 
372 
373 
374  /*! \ingroup invert */
375 
377  const LatticeFermionF& chi,
378  multi1d<LatticeFermionF>& psi,
379  const multi1d<RealF>& shifts,
380  const multi1d<RealF>& RsdCG,
381  int MaxCG,
382  int &n_count)
383  {
384 #ifdef PAT
385  int ierr=PAT_region_begin(22, "MInvCG2LinOp");
386 #endif
387  MInvCG2_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
388 #ifdef PAT
389  ierr=PAT_region_end(22);
390 #endif
391  }
392 
393  /*! \ingroup invert */
395  const LatticeFermionD& chi,
396  multi1d<LatticeFermionD>& psi,
397  const multi1d<RealD>& shifts,
398  const multi1d<RealD>& RsdCG,
399  int MaxCG,
400  int &n_count)
401  {
402 #ifdef PAT
403  int ierr=PAT_region_begin(22, "MInvCG2LinOp");
404 #endif
405  MInvCG2_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
406 #ifdef PAT
407  ierr=PAT_region_end(22);
408 #endif
409  }
410 
411 
412  /*! \ingroup invert */
413 
414  void MInvCG2(const DiffLinearOperator<LatticeFermionF,
415  multi1d<LatticeColorMatrixF>,
416  multi1d<LatticeColorMatrixF> >& M,
417  const LatticeFermionF& chi,
418  multi1d<LatticeFermionF>& psi,
419  const multi1d<RealF>& shifts,
420  const multi1d<RealF>& RsdCG,
421  int MaxCG,
422  int &n_count)
423  {
424 #ifdef PAT
425  int ierr=PAT_region_begin(23,"MInvCG2DiffLinOp");
426 #endif
427  MInvCG2_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
428 #ifdef PAT
429  ierr=PAT_region_end(23);
430 #endif
431  }
432 
433 
434  void MInvCG2(const DiffLinearOperator<LatticeFermionD,
435  multi1d<LatticeColorMatrixD>,
436  multi1d<LatticeColorMatrixD> >& M,
437  const LatticeFermionD& chi,
438  multi1d<LatticeFermionD>& psi,
439  const multi1d<RealD>& shifts,
440  const multi1d<RealD>& RsdCG,
441  int MaxCG,
442  int &n_count)
443  {
444 #ifdef PAT
445  int ierr=PAT_region_begin(23,"MInvCG2DiffLinOp");
446 #endif
447  MInvCG2_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
448 #ifdef PAT
449  ierr=PAT_region_end(23);
450 #endif
451  }
452 
453 } // end namespace Chroma
Differentiable Linear Operator.
Definition: linearop.h:98
Linear Operator.
Definition: linearop.h:27
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
Definition: linearop.h:48
void MInvCG2_a(const LinearOperator< T > &M, const T &chi, multi1d< T > &psi, const multi1d< R > &shifts, const multi1d< R > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minvcg2.cc:75
void MInvCG2(const LinearOperator< LatticeFermionF > &M, const LatticeFermionF &chi, multi1d< LatticeFermionF > &psi, const multi1d< RealF > &shifts, const multi1d< RealF > &RsdCG, int MaxCG, int &n_count)
Definition: minvcg2.cc:376
Linear Operators.
int z
Definition: meslate.cc:36
int iz
Definition: meslate.cc:32
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
Complex b
Definition: invbicg.cc:96
Double chi_norm
Definition: invbicg.cc:79
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151