CHROMA
invbicg.cc
Go to the documentation of this file.
1 
2 // THIS ROUTINE IS NOT FUNCTIONAL YET - SIMPLE CLEANUP EDITING IS REQUIRED
3 
4 namespace Chroma {
5 
6 /* INVBICG */
7 
8 /* This subroutine uses the stabilized BiConjugate Gradient algorithm to */
9 /* determine the solution of the set of linear equations */
10 
11 /* Chi = A . Psi */
12 
13 /* Algorithm: */
14 
15 /* Psi[0] Argument */
16 /* r[0] := Chi - A . Psi[0] ; Initial residual */
17 /* rv := r[0] ; for second Krylov space */
18 /* p[1] := r[0] ; initial direction */
19 /* IF |r[0]| <= RsdBiCG |Chi| THEN RETURN; Converged? */
20 /* FOR k FROM 1 TO MaxCG DO BiCG iterations */
21 /* mp[k] := A.p[k] ; */
22 /* a[k] := <rv,r[k-1]> / <rv,mp[k]> ; */
23 /* s[k] := r[k-1] - a[k] mp[k] ; */
24 /* ms[k] := A.s[k] ; */
25 /* omega[k]:= <ms[k],s[k]> / <ms[k],ms[k]> ; */
26 /* Psi[k] += omega[k] s[k] + a[k] p[k] ; New solution std::vector */
27 /* r[k] := s[k] - omega[k] ms[k] ; New residual */
28 /* IF |r[k]| <= RsdBiCG |Chi| THEN RETURN; Converged? */
29 /* b[k+1] := (<rv,r[k]> / <rv,r[k-1]>) * (a[k]/omega[k]) ; */
30 /* p[k+1] := r[k] + b[k+1] * (p[k] - omega[k] mp[k]) */
31 
32 /* Arguments: */
33 
34 /* A Linear Operator (Read) */
35 /* Chi Pseudofermion Source (Read) */
36 /* Psi Solution (Modify) */
37 /* Chi_Norm |Chi| (Read) */
38 /* RsdBiCG BiCG residue (Read) */
39 /* Ncb Number of checkerboards (Write) */
40 /* N_Count Number of BiCG iteration (Write) */
41 
42 /* Local Variables: */
43 
44 /* r Residual std::vector */
45 /* rv For second Krylov space */
46 /* p Direction std::vector */
47 /* cp | r[k] |**2 */
48 /* c | r[k-1] |**2 */
49 /* rvr < rv, r[k-1] > */
50 /* k BiCG iteration counter */
51 /* a a[k] */
52 /* omega omega[k] */
53 /* ms2 | Ms[k] |**2 */
54 /* d Temporary for < rv, A.p[k] >, < A.s[k], s[k] > and rvr_old */
55 /* Mp Temporary for M.p */
56 /* s Temporary */
57 /* Ms Temporary for M.s */
58 
59 /* Global Variables: */
60 
61 /* MaxCG Maximum number of BiCG iterations allowed */
62 
63 /* Subroutines: */
64 
65 /* A Apply matrix linear operator to std::vector */
66 
67 /* Operations: */
68 
69 /* ???? */
70 
71 include(types.mh)
72 
73 SUBROUTINE(InvBiCGm, A,chi,psi,chi_norm,RsdBiCG,Ncb,n_count)
74 
76 multi1d<LatticeFermion> psi(Ncb);
77 multi1d<LatticeFermion> chi(Ncb);
78 int n_count;
80 int Ncb;
81 Real RsdBiCG;
82 
83 { /* Local variables */
84  include(COMMON_DECLARATIONS)
85 
86  PROTOTYPE(A, DESC, DESC, DESC, VAL, VAL)
87 
88  multi1d<LatticeFermion> r(Ncb);
89  multi1d<LatticeFermion> rv(Ncb);
90  multi1d<LatticeFermion> p(Ncb);
91  multi1d<LatticeFermion> mp(Ncb);
92  multi1d<LatticeFermion> s(Ncb);
93  multi1d<LatticeFermion> ms(Ncb);
94  multi1d<LatticeComplex> lc(Ncb);
95  Complex a;
96  Complex b;
97  Complex omega;
98  DComplex rvr;
99  DComplex d;
100  DComplex ct1;
101  DComplex ct2;
102  Complex ct3;
112  Real rt3;
113  Real rt4;
114  Real one_s;
115  Real re_a;
116  Real im_a;
117  Real re_b;
118  Real im_b;
119  int k;
120  int cb;
121  Real rsd_sq;
122 
124 
126 
127  one = 1;
128  one_s = 1;
129  zero = 0;
130 
131 
132  /* r[0] := Chi - A . Psi[0] */
133  /* r := A . Psi */
134  A (A, psi, r, Ncb, PLUS);
135 
136  /* r := ( Chi - R ) = [ Chi - M(u) . psi ] */
137  r = -r; /* Nc Ns flops */
138  r += chi; /* Nc Ns flops */
139 
140  /* Cp = |r[0]|^2 */
141  cp = 0;
142  for(cb = 0; cb < Ncb; ++cb)
143  cp += norm2(r[cb]); /* 2 Nc Ns flops */
144 
145  /* IF |r[0]| <= RsdBiCG |Chi| THEN RETURN; */
146 /* printf("ClovBiCG: k = 0 cp = %g\n",cp); */
147 
148  if ( TO_REAL(cp) <= rsd_sq )
149  {
150  n_count = 0;
151  END_CODE();
152  return;
153  }
154 
155 
156  /* p[1] := r[0] */
157  p = r;
158  /* rv := r[0] */
159  rv = r;
160 
161  /* rvr = < rv, r[0] > = |r[0]|^2 = cp */
162  rvr = cmplx(cp,zero);
163 
164  /* FOR k FROM 1 TO MaxCG DO */
165 /* printf("MaxCG = %d\n",MaxCG); */
166 /* printf("(RsdBiCG * chi_norm)**2 = %g\n", rsd_sq; */
167  for(k = 1; k <= MaxCG; ++k)
168  {
169  /* c = | r[k-1] |**2 */
170  c = cp;
171 
172  /* mp = M(u) * p */
173  A (A, p, mp, Ncb, PLUS);
174 
175  /* a[k] := < rv, r[k-1] > / < rv, mp > ; */
176  /* d = < rv, mp > */
177  lc = trace(adj[rv] * mp);
178  d = 0;
179  for(cb = 0; cb < Ncb; ++cb)
180  d += sum(lc[cb]); /* ?? flops */
181 
182  /* a = rvr / d */
183  /*#ct1 = rvr / d; */
184  /* Hack instead: */
185  rt1 = localNorm2(d);
186  ct2 = adj(d) * rvr;
187  rt2 = one / rt1;
188  ct1 = ct2 * rt2;
189  a = FLOAT(ct1);
190 
191  /* s = r - a mp */
192  s = r;
193  for(cb = 0; cb < Ncb; ++cb)
194  s[cb] -= mp[cb] * a; /* 2 Nc Ns flops */
195 
196  /* ms = A . s */
197  A (A, s, ms, Ncb, PLUS);
198 
199  /* omega[k] := < ms[k], s[k] > / < ms[k], ms[k] > ; */
200  /* d = < ms, s > */
201  lc = trace(adj[ms] * s);
202  d = 0;
203  for(cb = 0; cb < Ncb; ++cb)
204  d += sum(lc[cb]); /* ?? flops */
205 
206  /* ms2 = | Ms | ** 2 */
207  ms2 = 0;
208  for(cb = 0; cb < Ncb; ++cb)
209  ms2 += norm2(ms[cb]); /* 2 Nc Ns flops */
210 
211  /* omega = d / ms2 */
212  /*#ct1 = d / ms2; */
213  /* Hack instead: */
214  rt1 = one / ms2;
215  ct1 = d * rt1;
216  omega = FLOAT(ct1);
217 
218  /* Psi[k] += omega[k] s[k] + a[k] p[k] ; */
219  for(cb = 0; cb < Ncb; ++cb)
220  {
221  psi[cb] += s[cb] * omega; /* 2 Nc Ns flops */
222  psi[cb] += p[cb] * a; /* 2 Nc Ns flops */
223  }
224 
225  /* r[k] := s[k] - omega[k] ms[k] ; */
226  r = s;
227  for(cb = 0; cb < Ncb; ++cb)
228  r[cb] -= ms[cb] * omega; /* 2 Nc Ns flops */
229 
230  /* cp = | r[k] |**2 */
231  cp = 0;
232  for(cb = 0; cb < Ncb; ++cb)
233  cp += norm2(r[cb]); /* 2 Nc Ns flops */
234 
235 /* printf("ClovBiCG: k = %d cp = %g\n",k, cp); */
236 
237  if ( TO_REAL(cp) <= rsd_sq )
238  {
239  n_count = k;
240  END_CODE();
241  return;
242  }
243 
244  /* b[k+1] := (<rv,r[k]> / <rv,r[k-1]>) * (a[k]/omega[k]) ; */
245  /* d = < rv, r[k-1] > */
246  d = rvr;
247 
248  /* rvr = < rv, r[k] > */
249  lc = trace(adj[rv] * r);
250  rvr = 0;
251  for(cb = 0; cb < Ncb; ++cb)
252  rvr += sum(lc[cb]); /* ?? flops */
253 
254  /* b = rvr / d */
255  /*#ct1 = rvr / d; */
256  /* Hack instead: */
257  rt1 = localNorm2(d);
258  ct2 = adj(d) * rvr;
259  rt2 = one / rt1;
260  ct1 = ct2 * rt2;
261  b = FLOAT(ct1);
262 
263  /* b = b * a / omega */
264  ct3 = b * a;
265  /*#b = ct3 / omega; */
266  /* Hack instead: */
267  rt3 = localNorm2(omega);
268  a = adj(omega) * ct3;
269  rt4 = one_s / rt3;
270  b = ct3 * rt4;
271 
272  /* p[k+1] := r[k] + b[k+1] * (p[k] - omega[k] mp[k]) */
273  for(cb = 0; cb < Ncb; ++cb)
274  {
275  p[cb] -= mp[cb] * omega; /* 2 Nc Ns flops */
276  mp[cb] = p[cb] * b; /* 2 Nc Ns flops */
277  p[cb] = mp[cb];
278  p[cb] += r[cb]; /* Nc Ns flops */
279  }
280  }
281  n_count = MaxCG;
282  re_rvr = real(rvr);
283  im_rvr = imag(rvr);
284  re_a = real(a);
285  im_a = imag(a);
286  re_b = real(b);
287  im_b = imag(b);
288 
289 
290  QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b);
292 }
293 
294 } // end namespace Chroma
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
Real one_s
Definition: invbicg.cc:114
Real re_b
Definition: invbicg.cc:117
Real im_a
Definition: invbicg.cc:116
Real RsdBiCG
Definition: invbicg.cc:81
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
Real re_a
Definition: invbicg.cc:115
int n_count
Definition: invbicg.cc:78
Double ms2
Definition: invbicg.cc:109
Double im_rvr
Definition: invbicg.cc:104
Double one
Definition: invbicg.cc:105
Real rt3
Definition: invbicg.cc:112
Double c
Definition: invbicg.cc:108
DComplex rvr
Definition: invbicg.cc:98
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
multi1d< LatticeFermion > ms(Ncb)
@ PLUS
Definition: chromabase.h:45
Double re_rvr
Definition: invbicg.cc:103
Double cp
Definition: invbicg.cc:107
Real im_b
Definition: invbicg.cc:118
Double rt2
Definition: invbicg.cc:111
multi1d< LatticeFermion > chi(Ncb)
Double rt1
Definition: invbicg.cc:110
Complex a
Definition: invbicg.cc:95
Complex omega
Definition: invbicg.cc:97
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex ct1
Definition: invbicg.cc:100
DComplex d
Definition: invbicg.cc:99
multi1d< LatticeComplex > lc(Ncb)
START_CODE()
A(A, psi, r, Ncb, PLUS)
multi1d< LatticeFermion > mp(Ncb)
Complex b
Definition: invbicg.cc:96
int cb
Definition: invbicg.cc:120
include(types.mh) LINEAR_OPERATOR(A)
int Ncb
Definition: invbicg.cc:80
Double chi_norm
Definition: invbicg.cc:79
DComplex ct2
Definition: invbicg.cc:101
Double zero
Definition: invbicg.cc:106
Complex ct3
Definition: invbicg.cc:102
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
Real rt4
Definition: invbicg.cc:113
FloatingPoint< double > Double
Definition: gtest.h:7351
LINEAR_OPERATOR(D)
Double sum
Definition: qtopcor.cc:37