CHROMA
minv_rel_cg.cc
Go to the documentation of this file.
1 
2 /*! \file
3  * \brief Multishift Conjugate-Gradient algorithm for a Linear Operator
4  */
5 
6 #include "linearop.h"
8 
9 namespace Chroma {
10 
11 //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
12 /*! \ingroup invert
13  *
14  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
15  * the solution of the set of linear equations
16  * Method used is described in Jegerlehner, hep-lat/9708029
17 
18  * We are searching in a subspace orthogonal to the eigenvectors EigVec
19  * of A. The source chi is assumed to already be orthogonal!
20 
21  * Chi = A . Psi
22 
23  * Algorithm:
24 
25  * Psi[0] := 0; Zeroed
26  * r[0] := Chi; Initial residual
27  * p[1] := Chi ; Initial direction
28  * b[0] := |r[0]|**2 / <p[0],Ap[0]> ;
29  * z[0] := 1 / (1 - (shift - shift(0))*b)
30  * bs[0] := b[0] * z[0]
31  * r[1] += b[k] A . p[0] ; New residual
32  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
33  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
34  * FOR k FROM 1 TO MaxCG DO CG iterations
35  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
36  * p[k] := r[k] + a[k] p[k-1]; New direction
37  * b[k+1] := |r[k]|**2 / <p[k],Ap[k]> ;
38  * r[k+1] += b[k+1] A . p[k] ; New residual
39  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
40  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
41 
42  * Arguments:
43 
44  * A Hermitian linear operator (Read)
45  * Chi Source (Read)
46  * Psi array of solutions (Write)
47  * shifts shifts of form A + mass (Read)
48  * RsdCG residual accuracy (Read/Write)
49  * n_count Number of CG iteration (Write)
50 
51  * Local Variables:
52 
53  * p Direction std::vector
54  * r Residual std::vector
55  * cp | r[k] |**2
56  * c | r[k-1] |**2
57  * k CG iteration counter
58  * a a[k]
59  * b b[k+1]
60  * d < p[k], A.p[k] >
61  * Ap Temporary for M.p
62 
63  * MaxCG Maximum number of CG iterations allowed
64 
65  * Subroutines:
66  * A Apply matrix hermitian A to std::vector
67  */
68 
69 template<typename T>
71  const T& chi,
72  multi1d<T>& psi,
73  const multi1d<Real>& shifts,
74  const multi1d<Real>& RsdCG,
75  int MaxCG,
76  int& n_count)
77 {
78  START_CODE();
79 
80  const Subset& sub = A.subset();
81 
82  int n_shift = shifts.size();
83 
84  if (n_shift == 0) {
85  QDP_error_exit("MinvCG: You must supply at least 1 mass: mass.size() = %d",
86  n_shift);
87  }
88 
89  /* Now find the smallest mass */
90  int isz = 0;
91  for(int findit=1; findit < n_shift; ++findit) {
92  if ( toBool( shifts[findit] < shifts[isz]) ) {
93  isz = findit;
94  }
95  }
96 
97 #if 0
98  QDPIO::cout << "n_shift = " << n_shift << " isz = " << isz << " shift = " << shifts[0] << std::endl;
99 #endif
100 
101  // We need to make sure, that psi is at least as big as the number
102  // of shifts. We resize it if it is not big enough.
103  // However, it is allowed to be bigger.
104  if( psi.size() < n_shift ) {
105  psi.resize(n_shift);
106  }
107 
108  // For this algorithm, all the psi have to be 0 to start
109  // Only is that way the initial residuum r = chi
110  for(int i= 0; i < n_shift; ++i) {
111  psi[i][sub] = zero;
112  }
113 
114  // If chi has zero norm then the result is zero
115  Double chi_norm_sq = norm2(chi,sub);
116  Double chi_norm = sqrt(chi_norm_sq);
117 
118 
119  if( toBool( chi_norm < fuzz )) {
120  n_count = 0;
121 
122  // The psi are all zero anyway at this point
123  // for(int i=0; i < n_shift; i++) { psi[i] = zero; }
124  END_CODE();
125  return;
126  }
127 
128  multi1d<Double> rsd_sq(n_shift);
129  multi1d<Double> rsdcg_sq(n_shift);
130 
131  Double cp = chi_norm_sq;
132  int s;
133  for(s = 0; s < n_shift; ++s) {
134  rsdcg_sq[s] = RsdCG[s] * RsdCG[s]; // RsdCG^2
135  rsd_sq[s] = Real(cp) * rsdcg_sq[s]; // || chi ||^2 RsdCG^2
136  }
137 
138 
139  // r[0] := p[0] := Chi
140  T r;
141  r[sub] = chi;
142 
143  // Zeta relaxation factor
144  Double zeta = 1 / norm2(r,sub);
145 
146  // Psi[0] := 0;
147  multi1d<T> p(n_shift);
148  for(s = 0; s < n_shift; ++s) {
149  p[s][sub] = chi;
150  }
151 
152 
153  // b[0] := - | r[0] |**2 / < p[0], Ap[0] > ;/
154  // First compute d = < p, A.p >
155  // Ap = A . p */
156  LatticeFermion Ap;
157 
158 
159  // Relaxation -- apply A with espilon || chi ||^2 * sqrt(zeta)
160  // use the smallest active epsilon_i || chi ||^2
161 
162  // Find smallest active rsd_sq = epsilon_i
163  Real rsd_sq_min = rsd_sq[0];
164  for(s = 1; s < n_shift; ++s) {
165  if( toBool( rsd_sq[s] < rsd_sq_min ) ) {
166  rsd_sq_min = rsd_sq[s];
167  }
168  }
169 
170  Real inner_tol = sqrt(rsd_sq_min)*sqrt(zeta);
171  A(Ap, p[isz], PLUS, inner_tol);
172  Ap[sub] += p[isz] * shifts[isz];
173 
174  /* d = < p, A.p > */
175  Double d = real(innerProduct(p[isz], Ap, sub)); // 2Nc Ns flops
176 
177 
178 
179  Double b = -cp/d;
180 
181  /* Compute the shifted bs and z */
182  multi1d<Double> bs(n_shift);
183  multi2d<Double> z(2, n_shift);
184  int iz;
185 
186  z[0][isz] = Double(1);
187  z[1][isz] = Double(1);
188  bs[isz] = b;
189  iz = 1;
190 
191  for(s = 0; s < n_shift; ++s)
192  {
193  if( s != isz ) {
194  z[1-iz][s] = Double(1);
195  z[iz][s] = Double(1) / (Double(1) - (Double(shifts[s])-Double(shifts[isz]))*b);
196  bs[s] = b * z[iz][s];
197  }
198  }
199 
200  // r[1] += b[0] A . p[0];
201  r[sub] += Ap * Real(b); // 2 Nc Ns flops
202 
203  // Psi[1] -= b[0] p[0] = - b[0] chi;
204  for(s = 0; s < n_shift; ++s) {
205  psi[s][sub] = - Real(bs[s])*chi; // 2 Nc Ns flops
206  }
207 
208  // c = |r[1]|^2
209  Double c = norm2(r,sub); // 2 Nc Ns flops
210  zeta += Real(1)/c;
211 
212  // Check convergence of first solution
213  multi1d<bool> convsP(n_shift);
214  for(s = 0; s < n_shift; ++s) {
215  convsP[s] = false;
216  }
217 
218  bool convP = toBool( c < rsd_sq[isz] );
219 
220 #if 0
221  QDPIO::cout << "MInvCG: k = 0 r = " << sqrt(c) << std::endl;
222 #endif
223 
224  // FOR k FROM 1 TO MaxCG DO
225  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
226  Double z0, z1;
227  Double ztmp;
228  Double cs;
229  Double a;
230  Double as;
231  Double bp;
232  int k;
233 
234  for(k = 1; k <= MaxCG && !convP ; ++k)
235  {
236  // a[k+1] := |r[k]|**2 / |r[k-1]|**2 ;
237  a = c/cp;
238 
239  // p[k+1] := r[k+1] + a[k+1] p[k];
240  // Compute the shifted as */
241  // ps[k+1] := zs[k+1] r[k+1] + a[k+1] ps[k];
242  for(s = 0; s < n_shift; ++s) {
243 
244  // Always update p[isz] even if isz is converged
245  // since the other p-s depend on it.
246  if (s == isz) {
247  p[s][sub] *= Real(a); // Nc Ns flops
248  p[s][sub] += r; // Nc Ns flops
249  }
250  else {
251  // Don't update other p-s if converged.
252  if( ! convsP[s] ) {
253  as = a * z[iz][s]*bs[s] / (z[1-iz][s]*b);
254 
255  p[s][sub] *= Real(as); // Nc Ns flops
256  p[s][sub] += r * Real(z[iz][s]); // Nc Ns flops
257  }
258  }
259 
260  }
261 
262  // cp = | r[k] |**2
263  cp = c;
264 
265  // b[k] := | r[k] |**2 / < p[k], Ap[k] > ;
266  // First compute d = < p, A.p >
267  // Ap = A . p
268 
269  // Find smallest active rsd_sq = epsilon_i
270 
271  // First find the smallest unconverged rsd_sq:
272  // find first unconverged system
273  int unc=0;
274  while ( convsP[unc] == true ) {
275  unc++;
276  }
277 
278  // compare its rsd_sq with other unconverged systems
279  rsd_sq_min = rsd_sq[unc];
280  for(s = unc+1; s < n_shift; ++s) {
281  if( !convsP[s] ) {
282  if( toBool( rsd_sq[s] < rsd_sq_min ) ) {
283  rsd_sq_min = rsd_sq[s];
284  }
285  }
286  }
287 
288  inner_tol = sqrt(rsd_sq_min)*sqrt(zeta);
289  A(Ap, p[isz], PLUS, inner_tol);
290  Ap[sub] += p[isz] * shifts[isz];
291 
292  /* d = < p, A.p > */
293  d = real(innerProduct(p[isz], Ap, sub)); // 2 Nc Ns flops
294 
295  bp = b;
296  b = -cp/d;
297 
298  // Compute the shifted bs and z
299  bs[isz] = b;
300  iz = 1 - iz;
301  for(s = 0; s < n_shift; s++) {
302 
303 
304  if (s != isz && !convsP[s] ) {
305  z0 = z[1-iz][s];
306  z1 = z[iz][s];
307  z[iz][s] = z0*z1*bp;
308  z[iz][s] /= b*a*(z1-z0) + z1*bp*(Double(1) - (shifts[s] - shifts[isz])*b);
309  bs[s] = b*z[iz][s]/z0;
310  }
311  }
312 
313  // r[k+1] += b[k] A . p[k] ;
314  r[sub] += Ap * Real(b); // 2 Nc Ns flops
315 
316 
317  // Psi[k+1] -= b[k] p[k] ;
318  for(s = 0; s < n_shift; ++s) {
319  if (! convsP[s] ) {
320  psi[s][sub] -= p[s] * Real(bs[s]); // 2 Nc Ns flops
321  }
322  }
323 
324  // c = | r[k] |**2
325  c = norm2(r,sub); // 2 Nc Ns flops
326  zeta += Real(1)/c;
327 
328  // IF |psi[k+1] - psi[k]| <= RsdCG |psi[k+1]| THEN RETURN;
329  // or IF |r[k+1]| <= RsdCG |chi| THEN RETURN;
330  convP = true;
331  for(s = 0; s < n_shift; s++) {
332  if (! convsP[s] ) {
333 
334 
335  // Convergence methods
336  // Check norm of shifted residuals
337  Double css = c * z[iz][s]* z[iz][s];
338 
339 #if 0
340  QDPIO::cout << "MInvCG (shift=" << s << ") k = " << k <<" r = "
341  << css << " rsd_sq["<<s<<"] = " << rsd_sq[s] << std::endl;
342 #endif
343 
344  convsP[s] = toBool( css < rsd_sq[s] );
345 
346 
347 
348 #if 0
349 
350  //
351  // Check relative error of solution
352 
353  // cs holds | beta_s p_s |^2 = | psi_next |^2
354  cs = norm2(p[s],sub); // 2 Nc Ns flops
355  cs *= bs[s]*bs[s];
356 
357  // d holds | psi |^2 * epsilon^2
358  d = norm2(psi[s],sub); // 2 Nc Ns flops
359  d *= rsdcg_sq[s];
360 
361 
362  // Terminate if | psi |^2/|psi_next|^2 < epsilon^2
363  convsP[s] = toBool( cs < d );
364 
365 #if 0
366  QDPIO::cout << "MInvCG (shift=" << s << ") k = " << k << " cs = "
367  << cs << " d = " << d << std::endl;
368 #endif
369 #endif
370 
371  }
372  convP &= convsP[s];
373  }
374 
375  n_count = k;
376  }
377 
378 #if 1
379  // Expicitly check the ALL solutions
380  for(s = 0; s < n_shift; ++s)
381  {
382  A(Ap, psi[s], PLUS);
383  Ap[sub] += psi[s] * shifts[s];
384 
385  Ap[sub] -= chi;
386 
387  c = norm2(Ap,sub); /* 2 Nc Ns flops */
388 
389  QDPIO::cout << "MInvCG (conv): s = " << s
390  << " shift = " << shifts[s]
391  << " r = " << Real(sqrt(c)/chi_norm) << std::endl;
392 
393  }
394  /* end */
395 #endif
396 
397  if (n_count == MaxCG) {
398  QDP_error_exit("too many CG iterationns: %d\n", n_count);
399  }
400  else {
401  QDPIO::cout << "MinvCG: " << n_count << " iterations" << std::endl;
402  }
403 
404  END_CODE();
405  return;
406 }
407 
408 
409 template<>
411  const LatticeFermion& chi,
412  multi1d<LatticeFermion>& psi,
413  const multi1d<Real>& shifts,
414  const multi1d<Real>& RsdCG,
415  int MaxCG,
416  int &n_count)
417 {
418  MInvRelCG_a(M, chi, psi, shifts, RsdCG, MaxCG, n_count);
419 }
420 
421 } // end namespace Chroma
Linear Operator.
Definition: linearop.h:27
void MInvRelCG_a(const LinearOperator< T > &A, const T &chi, multi1d< T > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minv_rel_cg.cc:70
Linear Operators.
int z
Definition: meslate.cc:36
int iz
Definition: meslate.cc:32
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32
Real rsd_sq
Definition: invbicg.cc:121
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
void MInvRelCG(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, multi1d< LatticeFermion > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Definition: minv_rel_cg.cc:410
START_CODE()
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
Double chi_norm
Definition: invbicg.cc:79
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151