CHROMA
invcg2_timing_hacks_2.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 // This is a hack version of invcg2 designed to allow timing.
9 // In particular stuff that would make the CG converge has been
10 // DISABLED... The length of the CG is completely controlled by
11 // the INPUT n_count
12 
13 //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator
14 /*! \ingroup invert
15  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
16  * the solution of the set of linear equations
17  *
18  * Chi = A . Psi
19  *
20  * where A = M^dag . M
21  *
22  * Algorithm:
23 
24  * Psi[0] := initial guess; Linear interpolation (argument)
25  * r[0] := Chi - M^dag . M . Psi[0] ; Initial residual
26  * p[1] := r[0] ; Initial direction
27  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
28  * FOR k FROM 1 TO MaxCG DO CG iterations
29  * a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;
30  * Psi[k] += a[k] p[k] ; New solution std::vector
31  * r[k] -= a[k] M^dag . M . p[k] ; New residual
32  * IF |r[k]| <= RsdCG |Chi| THEN RETURN; Converged?
33  * b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
34  * p[k+1] := r[k] + b[k+1] p[k]; New direction
35  *
36  * Arguments:
37  *
38  * \param M Linear Operator (Read)
39  * \param chi Source (Read)
40  * \param psi Solution (Modify)
41  * \param RsdCG CG residual accuracy (Read)
42  * \param MaxCG Maximum CG iterations (Read)
43  * \param n_count Number of CG iteration (Write)
44  *
45  * Local Variables:
46  *
47  * p Direction std::vector
48  * r Residual std::vector
49  * cp | r[k] |**2
50  * c | r[k-1] |**2
51  * k CG iteration counter
52  * a a[k]
53  * b b[k+1]
54  * d < p[k], A.p[k] >
55  * Mp Temporary for M.p
56  *
57  * Subroutines:
58  * +
59  * A Apply matrix M or M to std::vector
60  *
61  * Operations:
62  *
63  * 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )
64  */
65 
67  const LFerm& chi,
68  LFerm& psi,
69  const LScal& mass,
70  const LScal& RsdCG,
71  int MaxCG,
72  int& n_count)
73 {
74  // Always work on checkerboard 1. -- Chroma convention for even odd prec solver
75  const Subset& s = rb[1];
76 
77  // Length of vectors; -- subset length times Ns (Nc*Complex taken care of below)
78  int n_3vec = (s.end() - s.start() + 1)*Ns;
79 
80  // Useful Coefficient
81  REAL mone = (REAL)(-1);
82 
83  // Stuff that usually gets packaged.
84  // Use LScal-s for scalars to be able to use a*x + y notation
85  LScal fact;
86  AT_REAL(fact) = (REAL)(Nd + AT_REAL(mass));
87 
88  LScal invfact;
89  AT_REAL(invfact)=(REAL)1/AT_REAL(fact);
90 
91  LScal mquarterinvfact;
92  AT_REAL(mquarterinvfact) = -0.25*AT_REAL(invfact);
93 
94  // Real rsd_sq = (RsdCG * RsdCG) * Real(norm2(chi,s));
95 
96  // Initial norm -- could be passed in I suppose
97  REAL chi_sq =(REAL)AT_REAL(norm2(chi,s));
98  //QDPIO::cout << "chi_norm = " << sqrt(chi_sq) << std::endl;
99 
100  // ( Target residuum * || chi || )^2. Ignored in this test
101  REAL rsd_sq = (AT_REAL(RsdCG) * AT_REAL(RsdCG)) * chi_sq;
102 
103  // +
104  // r[0] := Chi - A . Psi[0] where A = M . M
105 
106  // +
107  // r := [ Chi - M(u) . M(u) . psi ]
108  LFerm r, mp, mmp;
109 
110  LFerm tmp1, tmp2, tmp3;
111 
112  // Apply M^{dag}M to psi
113 
114  // M -- cb issues taken care of by D.apply
115  // could save on function call overhead here by calling
116  // Pete's routines directly
117 
118  D.apply(tmp1, psi, PLUS, 0);
119  D.apply(tmp2, tmp1,PLUS , 1);
120 
121  // One day there will be a single routine for these
122  tmp3[rb[1]] = mquarterinvfact*tmp2;
123  mp[rb[1]] = fact*psi + tmp3;
124 
125  // Mdag
126  D.apply(tmp1, mp, MINUS, 0);
127  D.apply(tmp2, tmp1, MINUS, 1);
128  tmp3[rb[1]] = mquarterinvfact*tmp2;
129  mmp[rb[1]] = fact*psi + tmp3;
130 
131  // Do a vaxpy3 norm here to get r and LOCAL || r ||^2 together
132  REAL cp;
133  LDble sum;
134  //r[s] = chi - mmp; and || r[s] ||^2
135  vaxpy3_norm( FIRST_ELEM(r,s), &mone, FIRST_ELEM(mmp,s), (REAL *)FIRST_ELEM(chi,s),
136  n_3vec, &cp);
137 
138  // Do DP sum. Could save function call overhead here
139  AT_REAL(sum) = (DOUBLE)cp;
140  QDPInternal::globalSum(sum);
141  cp = (REAL)AT_REAL(sum);
142 
143  // p[1] := r[0]
144  LFerm p;
145  p[s] = r;
146 
147 
148  //QDPIO::cout << "InvCG: k = 0 cp = " << cp << " rsd_sq = " << rsd_sq << std::endl;
149 
150  // Disable early termination
151 #if 0
152  // IF |r[0]| <= RsdCG |Chi| THEN RETURN;
153  // cp is now a REAL so no need for the Chroma toBool() ism
154  if ( cp <= rsd_sq )
155  {
156  n_count = 0;
157  return;
158  }
159 #endif
160 
161  //
162  // FOR k FROM 1 TO n_count DO
163  //
164  REAL a, b, c, d;
165  REAL *aptr;
166  LScal as,bs; // For expressions
167  REAL ma;
168 
169  // Go to n_count iters
170  for(int k = 1; k <= n_count; ++k)
171  {
172  // c = | r[k-1] |**2
173  c = cp;
174 
175  // a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;
176  // +
177  // First compute d = < p, A.p > = < p, M . M . p > = < M.p, M.p >
178  // Mp = M(u) * p
179  D.apply(tmp1, p, PLUS, 0);
180  D.apply(tmp2, tmp1,PLUS , 1);
181  tmp3[rb[1]] = mquarterinvfact*tmp2;
182 
183  // replace this with a vaxpy norm
184  // in the future will need a vaxpby_norm
185  // mp[rb[1]] = fact*psi + tmp3;
186  // d = | mp | ** 2
188  &AT_REAL(fact),
189  FIRST_ELEM(p,s),
190  FIRST_ELEM(tmp3,s),
191  n_3vec,
192  &d);
193 
194  // Global sum the local norm
195  AT_REAL(sum) = (DOUBLE)d;
196  QDPInternal::globalSum(sum);
197  d = (REAL)AT_REAL(sum);
198 
199  // Disable this. Set a = 1 to stop convergence
200 #if 0
201  a = c/d;
202 #else
203  a = 1/1;
204 #endif
205 
206  // Psi[k] += a[k] p[k]
207  AT_REAL(as) = a; // For expressions below
208  psi[s] += as * p; /* 2 Nc Ns flops */
209 
210  // r[k] -= a[k] A . p[k] ;
211  // + +
212  // r = r - M(u) . Mp = M . M . p = A . p
213  D.apply(tmp1, mp, MINUS, 0);
214  D.apply(tmp2, tmp1, MINUS, 1);
215  tmp3[rb[1]] = mquarterinvfact*tmp2;
216  mmp[rb[1]] = fact*mp + tmp3;
217 
218  // replace these with a vaxpy3 norm.
219  //r[s] -= a * mmp;
220  // cp = | r[k] |**2
221  ma = -a;
223  &ma,
224  FIRST_ELEM(mmp,s),
225  FIRST_ELEM(r,s), n_3vec, &cp);
226 
227  // Global sum the local norm
228  AT_REAL(sum)=(DOUBLE)cp;
229  QDPInternal::globalSum(sum);
230  cp = (REAL)AT_REAL(sum);
231 
232 
233  //QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
234 
235  // Disable termination
236 #if 0
237  // cp and rsd_sq are now REAL so no need for Chroma toBool() ism
238  if ( cp <= rsd_sq )
239  {
240  n_count = k;
241  return;
242  }
243 #endif
244 
245  // Disable convergence
246 #if 0
247  // b[k+1] := |r[k]|**2 / |r[k-1]|**2
248  b = cp / c;
249 #else
250  b = 1/1;
251 #endif
252 
253  // For expressions
254  AT_REAL(bs) = b;
255  // p[k+1] := r[k] + b[k+1] p[k]
256  p[s] = r + bs*p; /* Nc Ns flops */
257  }
258  // n_count = MaxCG;
259  // QDP_error_exit("too many CG iterations: count = %d", n_count);
260 }
261 
Primary include file for CHROMA library code.
EXTERN int MaxCG
void InvCG2EvenOddPrecWilsLinOpTHack(const WilsonDslash &D, const LFerm &chi, LFerm &psi, const LScal &mass, const LScal &RsdCG, int MaxCG, int &n_count)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
OLattice< PSpinVector< PColorVector< RComplex< PScalar< REAL > >, Nc >, Ns > > LFerm
Highly optimised Conjugate-Gradient (CGNE) algorithm for a Even Odd Preconditioned.
#define AT_REAL(a)
OScalar< PScalar< PScalar< RScalar< PScalar< DOUBLE > > > > > LDble
#define FIRST_ELEM(a, s)
OScalar< PScalar< PScalar< RScalar< PScalar< REAL > > > > > LScal
Conjugate-Gradient algorithm for a generic Linear Operator.
void vaxpy3_norm(REAL *Out, REAL *scalep, REAL *InScale, REAL *Add, int n_3vec, REAL *dsum)
unsigned s
Definition: ldumul_w.cc:37
int c
Definition: meslate.cc:61
Nd
Definition: meslate.cc:74
Double tmp2
Definition: mesq.cc:30
Double tmp3
Definition: mesq.cc:31
QDPWilsonDslash WilsonDslash
Definition: dslash_w.h:132
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
Real rsd_sq
Definition: invbicg.cc:121
Double mass
Definition: pbg5p_w.cc:54
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
Complex a
Definition: invbicg.cc:95
DComplex d
Definition: invbicg.cc:99
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
int n_count
Definition: pade_trln_w.cc:69
Double sum
Definition: qtopcor.cc:37