CHROMA
minvcg_array.cc
Go to the documentation of this file.
1 
2 /*! \file
3  * \brief Multishift Conjugate-Gradient algorithm for a Linear Operator
4  */
5 
6 #include "linearop.h"
8 
9 using namespace QDP::Hints;
10 
11 namespace Chroma
12 {
13 
14 
15  //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
16  /*! \ingroup invert
17  *
18  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
19  * the solution of the set of linear equations
20  * Method used is described in Jegerlehner, hep-lat/9708029
21 
22  * We are searching in a subspace orthogonal to the eigenvectors EigVec
23  * of A. The source chi is assumed to already be orthogonal!
24 
25  * Chi = A . Psi
26 
27  * Algorithm:
28 
29  * Psi[0] := 0; Zeroed
30  * r[0] := Chi; Initial residual
31  * p[1] := Chi ; Initial direction
32  * b[0] := |r[0]|**2 / <p[0],Ap[0]> ;
33  * z[0] := 1 / (1 - (shift - shift(0))*b)
34  * bs[0] := b[0] * z[0]
35  * r[1] += b[k] A . p[0] ; New residual
36  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
37  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
38  * FOR k FROM 1 TO MaxCG DO CG iterations
39  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
40  * p[k] := r[k] + a[k] p[k-1]; New direction
41  * b[k+1] := |r[k]|**2 / <p[k],Ap[k]> ;
42  * r[k+1] += b[k+1] A . p[k] ; New residual
43  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
44  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
45 
46  * Arguments:
47 
48  * A Hermitian linear operator (Read)
49  * Chi Source (Read)
50  * Psi array of solutions (Write)
51  * shifts shifts of form A + mass (Read)
52  * RsdCG residual accuracy (Read/Write)
53  * n_count Number of CG iteration (Write)
54 
55  * Local Variables:
56 
57  * p Direction std::vector
58  * r Residual std::vector
59  * cp | r[k] |**2
60  * c | r[k-1] |**2
61  * k CG iteration counter
62  * a a[k]
63  * b b[k+1]
64  * d < p[k], A.p[k] >
65  * Ap Temporary for M.p
66 
67  * MaxCG Maximum number of CG iterations allowed
68 
69  * Subroutines:
70  * A Apply matrix hermitian A to std::vector
71  */
72 
73  template<typename T, typename C>
74  void MInvCG_a(const C& A,
75  const multi1d<T>& chi,
76  multi1d< multi1d<T> >& psi,
77  const multi1d<Real>& shifts,
78  const multi1d<Real>& RsdCG,
79  int MaxCG,
80  int& n_count)
81  {
82  START_CODE();
83 
84  // Setup
85  const Subset& sub = A.subset();
86  int n_shift = shifts.size();
87  int N = A.size();
88  FlopCounter flopcount;
89  StopWatch swatch;
90 
91  if (shifts.size() != RsdCG.size())
92  {
93  QDPIO::cerr << "MinvCG: number of shifts and residuals must match" << std::endl;
94  QDP_abort(1);
95  }
96 
97  if (n_shift == 0) {
98  QDPIO::cerr << "MinvCG: You must supply at least 1 shift. shift.size() = " << n_shift << std::endl;
99  QDP_abort(1);
100  }
101 
102  /* Now find the smallest mass */
103  int isz = 0;
104  for(int findit=1; findit < n_shift; ++findit) {
105  if ( toBool( shifts[findit] < shifts[isz]) ) {
106  isz = findit;
107  }
108  }
109 
110  // Now arrange the memory
111  // The outer size of the multi1d is n_shift
112  psi.resize(n_shift);
113 
114  // For this algorithm, all the psi have to be 0 to start
115  // Only is that way the initial residuum r = chi
116  for(int i= 0; i < n_shift; ++i) {
117  psi[i].resize(N);
118  }
119 
120  multi1d<T> r(N);
121  multi1d< multi1d<T> > p(n_shift);
122 
123  for(int s = 0; s < n_shift; ++s) {
124  p[s].resize(N);
125  }
126 
127  multi1d<T> Ap(N);
128  multi1d<T> chi_internal(N);
129 
130  // Now arrange the memory:
131  // These guys get most hits so locate them first
132  moveToFastMemoryHint(Ap);
133  moveToFastMemoryHint(p[isz]);
134  moveToFastMemoryHint(r);
135  moveToFastMemoryHint(psi[isz]);
136  moveToFastMemoryHint(chi_internal);
137 
138  {
139  multi1d<T> blocker1(N); moveToFastMemoryHint(blocker1);
140  multi1d<T> blocker2(N); moveToFastMemoryHint(blocker2);
141 
142  // Now the rest of the p-s
143  for(int i=0; i < n_shift; i++) {
144  if( i!=isz ) {
145  moveToFastMemoryHint(p[i]);
146  moveToFastMemoryHint(psi[i]);
147  }
148  }
149 
150  // Blockers get freed here
151  }
152 
153  // initialise psi-s
154  for(int i=0; i < n_shift; i++) {
155  for(int n=0; n < N; ++n) {
156  psi[i][n][sub] = zero;
157  }
158  }
159 
160  // initialise chi internal
161  for(int i=0; i < N; i++) {
162  chi_internal[i][sub] = chi[i];
163  }
164 
165  // -------- All memory setup and copies done. Timer starts here
166  QDPIO::cout << "MinvCG starting" << std::endl;
167  flopcount.reset();
168  swatch.reset();
169  swatch.start();
170 
171  // If chi has zero norm then the result is zero
172  Double chi_norm_sq = norm2(chi_internal,sub);
173  flopcount.addSiteFlops(4*Nc*Ns*N,sub);
174 
175  Double chi_norm = sqrt(chi_norm_sq);
176 
177  if( toBool( chi_norm < fuzz )) {
178  n_count = 0;
179  swatch.stop();
180  QDPIO::cout << "MinvCG: Finished. Iters taken = " << n_count << std::endl;
181  flopcount.report("MinvCGArray", swatch.getTimeInSeconds());
182 
183  // Revert psi-s
184  for(int i=0; i < n_shift; i++) {
185  revertFromFastMemoryHint(psi[i], true);
186  }
187 
188  END_CODE();
189  return;
190  }
191 
192  multi1d<Double> rsd_sq(n_shift);
193  multi1d<Double> rsdcg_sq(n_shift);
194 
195  Double cp = chi_norm_sq;
196  for(int s = 0; s < n_shift; ++s) {
197  rsdcg_sq[s] = RsdCG[s] * RsdCG[s]; // RsdCG^2
198  rsd_sq[s] = Real(cp) * rsdcg_sq[s]; // || chi ||^2 RsdCG^2
199  }
200 
201 
202  // r[0] := p[0] := Chi
203  for(int n=0; n < N; ++n) {
204  r[n][sub] = chi_internal[n];
205 
206  for(int s=0; s < n_shift; s++) {
207  p[s][n][sub] = chi_internal[n];
208  }
209  }
210 
211 
212 
213  // b[0] := - | r[0] |**2 / < p[0], Ap[0] > ;/
214  // First compute d = < p, A.p >
215  // Ap = A . p */
216  A(Ap, p[isz], PLUS);
217  flopcount.addFlops(A.nFlops());
218 
219  for(int n=0; n < N; ++n) {
220  Ap[n][sub] += shifts[isz] * p[isz][n];
221  }
222  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
223 
224  /* d = < p, A.p > */
225  Double d = innerProductReal(p[isz], Ap, sub);
226  flopcount.addSiteFlops(4*Nc*Ns*N ,sub);
227 
228  Double b = -cp/d;
229 
230  /* Compute the shifted bs and z */
231  multi1d<Double> bs(n_shift);
232  multi2d<Double> z(2, n_shift);
233  int iz;
234 
235  z[0][isz] = Double(1);
236  z[1][isz] = Double(1);
237  bs[isz] = b;
238  iz = 1;
239 
240  for(int s = 0; s < n_shift; ++s)
241  {
242  if( s != isz ) {
243  z[1-iz][s] = Double(1);
244  z[iz][s] = Double(1) / (Double(1) - (Double(shifts[s])-Double(shifts[isz]))*b);
245  bs[s] = b * z[iz][s];
246  }
247  }
248 
249  // r[1] += b[0] A . p[0];
250  for(int n=0; n < N; ++n) {
251  r[n][sub] += Real(b)* Ap[n];
252  }
253  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
254 
255  // Psi[1] -= b[0] p[0] = - b[0] chi;
256  // Psi[0] are all 0 so I can write this as a -= bs*chi
257  for(int s = 0; s < n_shift; ++s) {
258  for(int n=0; n < N; ++n) {
259  psi[s][n][sub] -= Real(bs[s])*chi_internal[n];
260  }
261  }
262  flopcount.addSiteFlops(4*Nc*Ns*N*n_shift, sub);
263 
264  // c = |r[1]|^2
265  Double c = norm2(r,sub);
266  flopcount.addSiteFlops(4*Nc*Ns*N,sub);
267 
268 
269  // Check convergence of first solution
270  multi1d<bool> convsP(n_shift);
271  for(int s = 0; s < n_shift; ++s) {
272  convsP[s] = false;
273  }
274 
275  bool convP = toBool( c < rsd_sq[isz] );
276 
277 
278  // FOR k FROM 1 TO MaxCG DO
279  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
280  Double z0, z1;
281  Double ztmp;
282  Double cs;
283  Double a;
284  Double as;
285  Double bp;
286  int k;
287 
288  for(k = 1; k <= MaxCG && !convP ; ++k)
289  {
290  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
291  a = c/cp;
292 
293  // p[k+1] := r[k+1] + a[k+1] p[k];
294  // Compute the shifted as */
295  // ps[k+1] := zs[k+1] r[k+1] + a[k+1] ps[k];
296  for(int s = 0; s < n_shift; ++s) {
297 
298  // Always update p[isz] even if isz is converged
299  // since the other p-s depend on it.
300  if (s == isz) {
301 
302  for(int n=0; n < N; ++n) {
303  p[s][n][sub] = r[n] + Real(a)*p[s][n];
304  // p[s][n][sub] *= Real(a);
305  // p[s][n][sub] += r[n];
306  }
307  flopcount.addSiteFlops(4*Nc*Ns*N,sub);
308 
309  }
310  else {
311  // Don't update other p-s if converged.
312  if( ! convsP[s] ) {
313  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
314 
315  for(int n=0; n < N; ++n) {
316  p[s][n][sub] = Real(as)*p[s][n] + Real(z[iz][s])*r[n];
317  //p[s][n][sub] *= Real(as);
318  //p[s][n][sub] += Real(z[iz][s])*r[n];
319  }
320  flopcount.addSiteFlops(6*Nc*Ns*N, sub);
321  }
322  }
323 
324  }
325 
326  // cp = | r[k] |**2
327  cp = c;
328 
329  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
330  // First compute d = < p, A.p >
331  // Ap = A . p
332  A(Ap, p[isz], PLUS);
333  flopcount.addFlops(A.nFlops());
334 
335  for(int n=0; n < N; ++n) {
336  Ap[n][sub] += shifts[isz] *p[isz][n];
337  }
338  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
339 
340  /* d = < p, A.p > */
341  d = innerProductReal(p[isz], Ap, sub);
342  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
343 
344  bp = b;
345  b = -cp/d;
346 
347  // Compute the shifted bs and z
348  bs[isz] = b;
349  iz = 1 - iz;
350  for(int s = 0; s < n_shift; s++) {
351 
352  if (s != isz && !convsP[s] ) {
353  z0 = z[1-iz][s];
354  z1 = z[iz][s];
355  z[iz][s] = z0*z1*bp;
356  z[iz][s] /= b*a*(z1-z0) + z1*bp*(Double(1) - (shifts[s] - shifts[isz])*b);
357  bs[s] = b*z[iz][s]/z0;
358  }
359  }
360 
361  // r[k+1] += b[k] A . p[k] ;
362  for(int n=0; n < N; ++n) {
363  r[n][sub] += Real(b)*Ap[n];
364  }
365  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
366 
367  // Psi[k+1] -= b[k] p[k] ;
368  for(int s = 0; s < n_shift; ++s) {
369  if (! convsP[s] ) {
370  for(int n=0; n < N; ++n) {
371  psi[s][n][sub] -= Real(bs[s])*p[s][n];
372  }
373  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
374  }
375  }
376 
377  // c = | r[k] |**2
378  c = norm2(r,sub);
379  flopcount.addSiteFlops(4*Nc*Ns*N, sub);
380 
381  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
382  // or IF |r[k+1]| <= RsdCG |chi| THEN RETURN;
383  convP = true;
384  for(int s = 0; s < n_shift; s++) {
385  if (! convsP[s] ) {
386 
387 
388  // Convergence methods
389  // Check norm of shifted residuals
390  Double css = c * z[iz][s]* z[iz][s];
391  convsP[s] = toBool( css < rsd_sq[s] );
392 
393  }
394  convP &= convsP[s];
395  }
396 
397  n_count = k;
398  }
399 
400  swatch.stop();
401  QDPIO::cout << "MinvCGArray finished: " << n_count << " iterations " << std::endl;
402  flopcount.report("MinvCGArray", swatch.getTimeInSeconds());
403  QDPIO::cout << "MinvCG Explicit solution check: " << std::endl;
404 
405  // Expicitly check the ALL solutions
406  for(int s = 0; s < n_shift; ++s)
407  {
408  A(Ap, psi[s], PLUS);
409  for(int n=0; n < N; ++n) {
410  Ap[n][sub] += shifts[s] * psi[s][n];
411 
412  Ap[n][sub] -= chi_internal[n];
413  }
414 
415  c = norm2(Ap,sub);
416 
417  QDPIO::cout << "MInvCG (conv): s = " << s
418  << " shift = " << shifts[s]
419  << " r = " << Real(sqrt(c)/chi_norm) << std::endl;
420 
421  }
422 
423  for(int i=0; i < n_shift; i++) {
424  revertFromFastMemoryHint(psi[i],true);
425  }
426 
427  if (n_count == MaxCG) {
428  QDPIO::cout << "too many CG iterationns: " << n_count << std::endl;
429  QDP_abort(1);
430 
431  }
432  END_CODE();
433  return;
434  }
435 
436 
437  /*! \ingroup invert */
438  template<>
440  const multi1d<LatticeFermion>& chi,
441  multi1d< multi1d<LatticeFermion> >& psi,
442  const multi1d<Real>& shifts,
443  const multi1d<Real>& RsdCG,
444  int MaxCG,
445  int &n_count)
446  {
447  MInvCG_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
448  }
449 
450 
451  /*! \ingroup invert */
452  template<>
453  void MInvCG(const DiffLinearOperatorArray<LatticeFermion,
454  multi1d<LatticeColorMatrix>,
455  multi1d<LatticeColorMatrix> >& M,
456  const multi1d<LatticeFermion>& chi,
457  multi1d< multi1d<LatticeFermion> >& psi,
458  const multi1d<Real>& shifts,
459  const multi1d<Real>& RsdCG,
460  int MaxCG,
461  int &n_count)
462  {
463  MInvCG_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
464  }
465 
466 } // end namespace Chroma
#define END_CODE()
Definition: chromabase.h:65
#define START_CODE()
Definition: chromabase.h:64
Differentiable Linear Operator.
Definition: linearop.h:150
Linear Operator to arrays.
Definition: linearop.h:61
EXTERN int MaxCG
void MInvCG(const DiffLinearOperatorArray< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > &M, const multi1d< LatticeFermion > &chi, multi1d< multi1d< LatticeFermion > > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
void MInvCG_a(const C &A, const multi1d< T > &chi, multi1d< multi1d< T > > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minvcg_array.cc:74
unsigned s
Definition: ldumul_w.cc:37
unsigned i
Definition: ldumul_w.cc:34
unsigned n
Definition: ldumul_w.cc:36
Linear Operators.
int z
Definition: meslate.cc:36
int c
Definition: meslate.cc:61
int iz
Definition: meslate.cc:32
Multishift Conjugate-Gradient algorithm for a Linear Operator.
SpinMatrix C()
C = Gamma(10)
Definition: barspinmat_w.cc:29
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
Real rsd_sq
Definition: invbicg.cc:121
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
Complex a
Definition: invbicg.cc:95
DComplex d
Definition: invbicg.cc:99
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
FloatingPoint< double > Double
Definition: gtest.h:7351
Double chi_norm
Definition: pade_trln_w.cc:85
int isz
Definition: pade_trln_w.cc:151
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
int n_count
Definition: pade_trln_w.cc:69