CHROMA
invcg2_timing_hacks.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 namespace Chroma {
9 
10 // This is a hack version of invcg2 designed to allow timing.
11 // In particular stuff that would make the CG converge has been
12 // DISABLED... The length of the CG is completely controlled by
13 // the INPUT n_count
14 
15 //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator
16 /*! \ingroup invert
17  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
18  * the solution of the set of linear equations
19  *
20  * Chi = A . Psi
21  *
22  * where A = M^dag . M
23  *
24  * Algorithm:
25 
26  * Psi[0] := initial guess; Linear interpolation (argument)
27  * r[0] := Chi - M^dag . M . Psi[0] ; Initial residual
28  * p[1] := r[0] ; Initial direction
29  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
30  * FOR k FROM 1 TO MaxCG DO CG iterations
31  * a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;
32  * Psi[k] += a[k] p[k] ; New solution std::vector
33  * r[k] -= a[k] M^dag . M . p[k] ; New residual
34  * IF |r[k]| <= RsdCG |Chi| THEN RETURN; Converged?
35  * b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
36  * p[k+1] := r[k] + b[k+1] p[k]; New direction
37  *
38  * Arguments:
39  *
40  * \param M Linear Operator (Read)
41  * \param chi Source (Read)
42  * \param psi Solution (Modify)
43  * \param RsdCG CG residual accuracy (Read)
44  * \param MaxCG Maximum CG iterations (Read)
45  * \param n_count Number of CG iteration (Write)
46  *
47  * Local Variables:
48  *
49  * p Direction std::vector
50  * r Residual std::vector
51  * cp | r[k] |**2
52  * c | r[k-1] |**2
53  * k CG iteration counter
54  * a a[k]
55  * b b[k+1]
56  * d < p[k], A.p[k] >
57  * Mp Temporary for M.p
58  *
59  * Subroutines:
60  * +
61  * A Apply matrix M or M to std::vector
62  *
63  * Operations:
64  *
65  * 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )
66  */
67 
68 template<typename T>
69 SystemSolverResults_t
71  const T& chi,
72  T& psi,
73  int MaxCG)
74 
75 {
76  START_CODE();
77 
78  const Subset& s = M.subset();
79 
81  T mp; moveToFastMemoryHint(mp);
82  T mmp; moveToFastMemoryHint(mmp);
83  T p; moveToFastMemoryHint(p);
84  moveToFastMemoryHint(psi,true);
85  T r; moveToFastMemoryHint(r);
86  T chi_internal; moveToFastMemoryHint(chi_internal);
87 
88  chi_internal[s] = chi;
89 
90  QDPIO::cout << "InvCG2: starting" << std::endl;
91  FlopCounter flopcount;
92  flopcount.reset();
93  StopWatch swatch;
94  swatch.reset();
95  swatch.start();
96 
97 
98  Real chi_sq = Real(norm2(chi_internal,s));
99  flopcount.addSiteFlops(4*Nc*Ns,s);
100 
101 
102  // +
103  // r[0] := Chi - A . Psi[0] where A = M . M
104 
105  // +
106  // r := [ Chi - M(u) . M(u) . psi ]
107  M(mp, psi, PLUS);
108  M(mmp, mp, MINUS);
109  flopcount.addFlops(2*M.nFlops());
110 
111  r[s] = chi_internal - mmp;
112  flopcount.addSiteFlops(2*Nc*Ns,s);
113 
114  // p[1] := r[0]
115  p[s] = r;
116 
117  // Cp = |r[0]|^2
118  Double cp = norm2(r, s); /* 2 Nc Ns flops */
119  flopcount.addSiteFlops(4*Nc*Ns, s);
120 
121  //
122  // FOR k FROM 1 TO MaxCG DO
123  //
124  Real a, b;
125  Double c, d;
126  int k;
127  for(k = 1; k <= MaxCG; ++k)
128  {
129  // c = | r[k-1] |**2
130  c = cp;
131 
132  // a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;
133  // +
134  // First compute d = < p, A.p > = < p, M . M . p > = < M.p, M.p >
135  // Mp = M(u) * p
136  M(mp, p, PLUS); flopcount.addFlops(M.nFlops());
137 
138  // d = | mp | ** 2
139  d = norm2(mp, s); flopcount.addSiteFlops(4*Nc*Ns,s);
140 
141  a = Real(1)/Real(1);
142 
143  // Psi[k] += a[k] p[k]
144  psi[s] += a * p; flopcount.addSiteFlops(4*Nc*Ns,s);
145 
146  // r[k] -= a[k] A . p[k] ;
147  // + +
148  // r = r - M(u) . Mp = M . M . p = A . p
149  M(mmp, mp, MINUS);
150  flopcount.addFlops(M.nFlops());
151 
152  r[s] -= a * mmp;
153  flopcount.addSiteFlops(4*Nc*Ns, s);
154 
155  // IF |r[k]| <= RsdCG |Chi| THEN RETURN;
156 
157  // cp = | r[k] |**2
158  cp = norm2(r, s); flopcount.addSiteFlops(4*Nc*Ns,s);
159 
160  // Disable convergence
161  b = Real(1)/Real(1);
162 
163  // p[k+1] := r[k] + b[k+1] p[k]
164  p[s] = r + b*p; flopcount.addSiteFlops(4*Nc*Ns,s);
165  }
166 
167 
168  swatch.stop();
169  flopcount.report("invcg2_timings", swatch.getTimeInSeconds());
170  revertFromFastMemoryHint(psi,true);
171  res.n_count = MaxCG;
172  res.resid = 1.0e-30;
173  QDPIO::cout << "InvCG2 Timings: Iteration Count = " << k << std::endl;
174  END_CODE();
175  return res;
176 
177 
178 }
179 
180 
181 // Fix here for now
182 template<>
183 SystemSolverResults_t
185  const LatticeFermion& chi,
186  LatticeFermion& psi,
187  int MaxCG)
188 {
189  return InvCG2_timings_a(M, chi, psi, MaxCG);
190 }
191 
192 } // end namespace Chroma
Primary include file for CHROMA library code.
Linear Operator.
Definition: linearop.h:27
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
Definition: linearop.h:48
SystemSolverResults_t InvCG2_timings(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, LatticeFermion &psi, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
SystemSolverResults_t InvCG2_timings_a(const LinearOperator< T > &M, const T &chi, T &psi, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Conjugate-Gradient algorithm for a generic Linear Operator.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
Holds return info from SystemSolver call.
Definition: syssolver.h:17