CHROMA
invcg2_timing_hacks_3.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 
9 // This is a hack version of invcg2 designed to allow timing.
10 // In particular stuff that would make the CG converge has been
11 // DISABLED... The length of the CG is completely controlled by
12 // the INPUT n_count
13 
14 //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator
15 /*! \ingroup invert
16  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
17  * the solution of the set of linear equations
18  *
19  * Chi = A . Psi
20  *
21  * where A = M^dag . M
22  *
23  * Algorithm:
24 
25  * Psi[0] := initial guess; Linear interpolation (argument)
26  * r[0] := Chi - M^dag . M . Psi[0] ; Initial residual
27  * p[1] := r[0] ; Initial direction
28  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
29  * FOR k FROM 1 TO MaxCG DO CG iterations
30  * a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;
31  * Psi[k] += a[k] p[k] ; New solution std::vector
32  * r[k] -= a[k] M^dag . M . p[k] ; New residual
33  * IF |r[k]| <= RsdCG |Chi| THEN RETURN; Converged?
34  * b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
35  * p[k+1] := r[k] + b[k+1] p[k]; New direction
36  *
37  * Arguments:
38  *
39  * \param M Linear Operator (Read)
40  * \param chi Source (Read)
41  * \param psi Solution (Modify)
42  * \param RsdCG CG residual accuracy (Read)
43  * \param MaxCG Maximum CG iterations (Read)
44  * \param n_count Number of CG iteration (Write)
45  *
46  * Local Variables:
47  *
48  * p Direction std::vector
49  * r Residual std::vector
50  * cp | r[k] |**2
51  * c | r[k-1] |**2
52  * k CG iteration counter
53  * a a[k]
54  * b b[k+1]
55  * d < p[k], A.p[k] >
56  * Mp Temporary for M.p
57  *
58  * Subroutines:
59  * +
60  * A Apply matrix M or M to std::vector
61  *
62  * Operations:
63  *
64  * 2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )
65  */
66 
68  const LFerm& chi,
69  LFerm& psi,
70  const LScal& mass,
71  const LScal& RsdCG,
72  int MaxCG,
73  int& n_count)
74 {
75  // Always work on checkerboard 1. -- Chroma convention for even odd prec solver
76  const Subset& s = rb[1];
77 
78  // Length of vectors; -- subset length times Ns (Nc*Complex taken care of below)
79  int n_3vec = (s.end() - s.start() + 1);
80 
81  // Useful Coefficient
82  REAL mone = (REAL)(-1);
83 
84  // Stuff that usually gets packaged.
85  // Use LScal-s for scalars to be able to use a*x + y notation
86  LScal fact;
87  AT_REAL(fact) = (REAL)(Nd + AT_REAL(mass));
88 
89  LScal invfact;
90  AT_REAL(invfact)=(REAL)1/AT_REAL(fact);
91 
92  LScal mquarterinvfact;
93  AT_REAL(mquarterinvfact) = -0.25*AT_REAL(invfact);
94 
95  // Real rsd_sq = (RsdCG * RsdCG) * Real(norm2(chi,s));
96 
97  // Initial norm -- could be passed in I suppose
98  REAL chi_sq =(REAL)AT_REAL(norm2(chi,s));
99  //QDPIO::cout << "chi_norm = " << sqrt(chi_sq) << std::endl;
100 
101  // ( Target residuum * || chi || )^2. Ignored in this test
102  REAL rsd_sq = (AT_REAL(RsdCG) * AT_REAL(RsdCG)) * chi_sq;
103 
104  // +
105  // r[0] := Chi - A . Psi[0] where A = M . M
106 
107  // +
108  // r := [ Chi - M(u) . M(u) . psi ]
109  LFerm r, mp, mmp;
110 
111  LFerm tmp1, tmp2, tmp3;
112 
113  // Apply M^{dag}M to psi
114 
115  // M -- cb issues taken care of by D.apply
116  // could save on function call overhead here by calling
117  // Pete's routines directly
118 
119  D.apply(tmp1, psi, PLUS, 0);
120  D.apply(tmp2, tmp1,PLUS , 1);
121 
122  // One day there will be a single routine for these
123  tmp3[rb[1]] = mquarterinvfact*tmp2;
124  mp[rb[1]] = fact*psi + tmp3;
125 
126  // Mdag
127  D.apply(tmp1, mp, MINUS, 0);
128  D.apply(tmp2, tmp1, MINUS, 1);
129  tmp3[rb[1]] = mquarterinvfact*tmp2;
130  mmp[rb[1]] = fact*psi + tmp3;
131 
132  // Do a vaxpy3 norm here to get r and LOCAL || r ||^2 together
133  REAL cp;
134  LDble sum;
135  //r[s] = chi - mmp; and || r[s] ||^2
136  vaxpy3_norm( FIRST_ELEM(r,s), &mone, FIRST_ELEM(mmp,s), (REAL *)FIRST_ELEM(chi,s),
137  n_3vec, &cp);
138 
139  // Do DP sum. Could save function call overhead here
140  AT_REAL(sum) = (DOUBLE)cp;
141  QDPInternal::globalSum(sum);
142  cp = (REAL)AT_REAL(sum);
143 
144  // p[1] := r[0]
145  LFerm p;
146  p[s] = r;
147 
148 
149  //QDPIO::cout << "InvCG: k = 0 cp = " << cp << " rsd_sq = " << rsd_sq << std::endl;
150 
151  // Disable early termination
152 #if 0
153  // IF |r[0]| <= RsdCG |Chi| THEN RETURN;
154  // cp is now a REAL so no need for the Chroma toBool() ism
155  if ( cp <= rsd_sq )
156  {
157  n_count = 0;
158  return;
159  }
160 #endif
161 
162  //
163  // FOR k FROM 1 TO n_count DO
164  //
165  REAL a, b, c, d;
166  REAL *aptr;
167  LScal as,bs; // For expressions
168  REAL ma;
169 
170  // Go to n_count iters
171  for(int k = 1; k <= n_count; ++k)
172  {
173  // c = | r[k-1] |**2
174  c = cp;
175 
176  // a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;
177  // +
178  // First compute d = < p, A.p > = < p, M . M . p > = < M.p, M.p >
179  // Mp = M(u) * p
180  D.apply(tmp1, p, PLUS, 0);
181  D.apply(tmp2, tmp1,PLUS , 1);
182  tmp3[rb[1]] = mquarterinvfact*tmp2;
183 
184  // replace this with a vaxpy norm
185  // in the future will need a vaxpby_norm
186  // mp[rb[1]] = fact*psi + tmp3;
187  // d = | mp | ** 2
189  &AT_REAL(fact),
190  FIRST_ELEM(p,s),
191  FIRST_ELEM(tmp3,s),
192  n_3vec,
193  &d);
194 
195  // Global sum the local norm
196  AT_REAL(sum) = (DOUBLE)d;
197  QDPInternal::globalSum(sum);
198  d = (REAL)AT_REAL(sum);
199 
200  // Disable this. Set a = 1 to stop convergence
201 #if 0
202  a = c/d;
203 #else
204  a = 1/1;
205 #endif
206 
207  // Psi[k] += a[k] p[k]
208  AT_REAL(as) = a; // For expressions below
209  psi[s] += as * p; /* 2 Nc Ns flops */
210 
211  // r[k] -= a[k] A . p[k] ;
212  // + +
213  // r = r - M(u) . Mp = M . M . p = A . p
214  D.apply(tmp1, mp, MINUS, 0);
215  D.apply(tmp2, tmp1, MINUS, 1);
216  tmp3[rb[1]] = mquarterinvfact*tmp2;
217  mmp[rb[1]] = fact*mp + tmp3;
218 
219  // replace these with a vaxpy3 norm.
220  //r[s] -= a * mmp;
221  // cp = | r[k] |**2
222  ma = -a;
224  &ma,
225  FIRST_ELEM(mmp,s),
226  FIRST_ELEM(r,s), n_3vec, &cp);
227 
228  // Global sum the local norm
229  AT_REAL(sum)=(DOUBLE)cp;
230  QDPInternal::globalSum(sum);
231  cp = (REAL)AT_REAL(sum);
232 
233 
234  //QDPIO::cout << "InvCG: k = " << k << " cp = " << cp << std::endl;
235 
236  // Disable termination
237 #if 0
238  // cp and rsd_sq are now REAL so no need for Chroma toBool() ism
239  if ( cp <= rsd_sq )
240  {
241  n_count = k;
242  return;
243  }
244 #endif
245 
246  // Disable convergence
247 #if 0
248  // b[k+1] := |r[k]|**2 / |r[k-1]|**2
249  b = cp / c;
250 #else
251  b = 1/1;
252 #endif
253 
254  // For expressions
255  AT_REAL(bs) = b;
256  // p[k+1] := r[k] + b[k+1] p[k]
257  p[s] = r + bs*p; /* Nc Ns flops */
258  }
259  // n_count = MaxCG;
260  // QDP_error_exit("too many CG iterations: count = %d", n_count);
261 }
262 
Primary include file for CHROMA library code.
EXTERN int MaxCG
void InvCG2EvenOddPrecWilsLinOpTHack(const WilsonDslash &D, const LFerm &chi, LFerm &psi, const LScal &mass, const LScal &RsdCG, int MaxCG, int &n_count)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
OLattice< PSpinVector< PColorVector< RComplex< PScalar< REAL > >, Nc >, Ns > > LFerm
Highly optimised Conjugate-Gradient (CGNE) algorithm for a Even Odd Preconditioned.
#define AT_REAL(a)
OScalar< PScalar< PScalar< RScalar< PScalar< DOUBLE > > > > > LDble
#define FIRST_ELEM(a, s)
OScalar< PScalar< PScalar< RScalar< PScalar< REAL > > > > > LScal
Conjugate-Gradient algorithm for a generic Linear Operator.
void vaxpy3_norm(REAL *Out, REAL *scalep, REAL *InScale, REAL *Add, int n_3vec, REAL *dsum)
unsigned s
Definition: ldumul_w.cc:37
int c
Definition: meslate.cc:61
Nd
Definition: meslate.cc:74
Double tmp2
Definition: mesq.cc:30
Double tmp3
Definition: mesq.cc:31
QDPWilsonDslash WilsonDslash
Definition: dslash_w.h:132
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
Real rsd_sq
Definition: invbicg.cc:121
Double mass
Definition: pbg5p_w.cc:54
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
Complex a
Definition: invbicg.cc:95
DComplex d
Definition: invbicg.cc:99
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
int n_count
Definition: pade_trln_w.cc:69
Double sum
Definition: qtopcor.cc:37