CHROMA
invbicgstab.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Conjugate-Gradient algorithm for a generic Linear Operator
3  */
4 
5 #include "chromabase.h"
7 
8 namespace Chroma {
9 
10  template<typename T, typename CR>
11 SystemSolverResults_t
13  const T& chi,
14  T& psi,
15  const Real& RsdBiCGStab,
16  int MaxBiCGStab,
17  enum PlusMinus isign)
18 
19 {
21  StopWatch swatch;
22  FlopCounter flopcount;
23  flopcount.reset();
24  const Subset& s = A.subset();
25  bool convP = false;
26  ret.n_count = MaxBiCGStab;
27 
28  swatch.reset();
29  swatch.start();
30 
31  Double chi_sq = norm2(chi,s);
32  flopcount.addSiteFlops(4*Nc*Ns,s);
33 
34 
35  Double rsd_sq = RsdBiCGStab*RsdBiCGStab*chi_sq;
36 
37  // First get r = r0 = chi - A psi
38  T r;
39  T r0;
40 
41  // Get A psi, use r0 as a temporary
42  A(r0, psi, isign);
43  flopcount.addFlops(A.nFlops());
44 
45  // now work out r= chi - Apsi = chi - r0
46  r[s] = chi - r0;
47  flopcount.addSiteFlops(2*Nc*Ns,s);
48 
49  // Also copy back to r0. We are no longer in need of the
50  // nth component
51  r0[s] = r;
52 
53  // Now we have r = r0 = chi - Mpsi
54 
55  // Now initialise v = p = 0
56  T p;
57  T v;
58 
59  p[s] = zero;
60  v[s] = zero;
61 
62  T tmp;
63  T t;
64 
65  ComplexD rho, rho_prev, alpha, omega;
66 
67  // rho_0 := alpha := omega = 1
68  // Iterations start at k=1, so rho_0 is in rho_prev
69  rho_prev = Double(1);
70  alpha = Double(1);
71  omega = Double(1);
72 
73  // The iterations
74  for(int k = 1; k <= MaxBiCGStab && !convP ; k++) {
75 
76  // rho_{k+1} = < r_0 | r >
77  rho = innerProduct(r0,r,s);
78 
79 
80  if( toBool( real(rho) == 0 ) && toBool( imag(rho) == 0 ) ) {
81  QDPIO::cout << "BiCGStab breakdown: rho = 0" << std::endl;
82  QDP_abort(1);
83  }
84 
85  // beta = ( rho_{k+1}/rho_{k})(alpha/omega)
86  ComplexD beta;
87  beta = ( rho / rho_prev ) * (alpha/omega);
88 
89  // p = r + beta(p - omega v)
90 
91  // first work out p - omega v
92  // into tmp
93  // then do p = r + beta tmp
94  CR omega_r = omega;
95  CR beta_r = beta;
96  tmp[s] = p - omega_r*v;
97  p[s] = r + beta_r*tmp;
98 
99 
100  // v = Ap
101  A(v,p,isign);
102 
103 
104  // alpha = rho_{k+1} / < r_0 | v >
105  // put <r_0 | v > into tmp
106  DComplex ctmp = innerProduct(r0,v,s);
107 
108 
109  if( toBool( real(ctmp) == 0 ) && toBool( imag(ctmp) == 0 ) ) {
110  QDPIO::cout << "BiCGStab breakdown: <r_0|v> = 0" << std::endl;
111  QDP_abort(1);
112  }
113 
114  alpha = rho / ctmp;
115 
116  // Done with rho now, so save it into rho_prev
117  rho_prev = rho;
118 
119  // s = r - alpha v
120  // I can overlap s with r, because I recompute it at the end.
121  CR alpha_r = alpha;
122  r[s] -= alpha_r*v;
123 
124 
125  // t = As = Ar
126  A(t,r,isign);
127  // omega = < t | s > / < t | t > = < t | r > / norm2(t);
128 
129  // This does the full 5D norm
130  Double t_norm = norm2(t,s);
131 
132 
133  if( toBool(t_norm == 0) ) {
134  QDPIO::cerr << "Breakdown || Ms || = || t || = 0 " << std::endl;
135  QDP_abort(1);
136  }
137 
138  // accumulate <t | s > = <t | r> into omega
139  omega = innerProduct(t,r,s);
140  omega /= t_norm;
141 
142  // psi = psi + omega s + alpha p
143  // = psi + omega r + alpha p
144  //
145  // use tmp to compute psi + omega r
146  // then add in the alpha p
147  omega_r = omega;
148  alpha_r = alpha;
149  tmp[s] = psi + omega_r*r;
150  psi[s] = tmp + alpha_r*p;
151 
152 
153 
154  // r = s - omega t = r - omega t1G
155 
156 
157  r[s] -= omega_r*t;
158 
159 
160  Double r_norm = norm2(r,s);
161 
162 
163  // QDPIO::cout << "Iteration " << k << " : r = " << r_norm << std::endl;
164  if( toBool(r_norm < rsd_sq ) ) {
165  convP = true;
166  ret.resid = sqrt(r_norm);
167  ret.n_count = k;
168 
169  }
170  else {
171  convP = false;
172  }
173 
174  //-------BiCGStab Flopcounting --------------------------------------
175  // flopcount.addSiteFlops(8*Nc*Ns,s); // <r0|r>
176  // flopcount.addSiteFlops(16*Nc*Ns,s); // p = r + beta p - beta_omega v
177  // flopcount.addSiteFlops(8*Nc*Ns,s); // <r0 | v>
178  // flopcount.addSiteFlops(8*Nc*Ns,s); // r -= alpha v
179  // flopcount.addSiteFlops(8*Nc*Ns, s); // < t, r>
180  // flopcount.addSiteFlops(4*Nc*Ns, s); // < t, t>
181  // flopcount.addSiteFlops(16*Nc*Ns,s); // psi += omega r + alpha_p
182  // flopcount.addSiteFlops(8*Nc*Ns,s); // r -=omega t
183  // flopcount.addSiteFlops(4*Nc*Ns,s); // norm2(r)
184  // flopcount.addFlops(2*A.nFlops()); // = 80*Nc*Ns cbsite flops + 2*A
185  //----------------------------------------------------------------------
186  flopcount.addSiteFlops(80*Nc*Ns,s);
187  flopcount.addFlops(2*A.nFlops());
188 
189 
190  }
191 
192  swatch.stop();
193 
194  QDPIO::cout << "InvBiCGStab: k = " << ret.n_count << " resid = " << ret.resid << std::endl;
195  flopcount.report("invbicgstab", swatch.getTimeInSeconds());
196 
197  if ( ret.n_count == MaxBiCGStab ) {
198  QDPIO::cerr << "Nonconvergence of BiCGStab. MaxIters reached " << std::endl;
199  }
200 
201  return ret;
202 }
203 
204 #if 0
205 // Fix here for now
206 template<>
207 SystemSolverResults_t
208 InvBiCGStab(const LinearOperator<LatticeFermion>& A,
209  const LatticeFermion& chi,
210  LatticeFermion& psi,
211  const Real& RsdBiCGStab,
212  int MaxBiCGStab,
213  enum PlusMinus isign)
214 
215 {
216  return InvBiCGStab_a<LatticeFermion, Complex>(A, chi, psi, RsdBiCGStab, MaxBiCGStab, isign);
217 }
218 #endif
219 
220 template<>
221 SystemSolverResults_t
223  const LatticeFermionF& chi,
224  LatticeFermionF& psi,
225  const Real& RsdBiCGStab,
226  int MaxBiCGStab,
227  enum PlusMinus isign)
228 
229 {
230  return InvBiCGStab_a<LatticeFermionF, ComplexF>(A, chi, psi, RsdBiCGStab, MaxBiCGStab, isign);
231 }
232 
233 template<>
234 SystemSolverResults_t
236  const LatticeFermionD& chi,
237  LatticeFermionD& psi,
238  const Real& RsdBiCGStab,
239  int MaxBiCGStab,
240  enum PlusMinus isign)
241 
242 {
243  return InvBiCGStab_a<LatticeFermionD, ComplexD>(A, chi, psi, RsdBiCGStab, MaxBiCGStab, isign);
244 }
245 
246 // Staggered
247 template<>
248 SystemSolverResults_t
250  const LatticeStaggeredFermion& chi,
251  LatticeStaggeredFermion& psi,
252  const Real& RsdBiCGStab,
253  int MaxBiCGStab,
254  enum PlusMinus isign)
255 
256 {
257  return InvBiCGStab_a<LatticeStaggeredFermion, Complex>(A, chi, psi, RsdBiCGStab, MaxBiCGStab, isign);
258 }
259 
260 } // end namespace Chroma
Primary include file for CHROMA library code.
Linear Operator.
Definition: linearop.h:27
Conjugate-Gradient algorithm for a generic Linear Operator.
int t
Definition: meslate.cc:37
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
LinOpSysSolverMGProtoClover::T T
SystemSolverResults_t InvBiCGStab(const LinearOperator< LatticeFermionF > &A, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdBiCGStab, int MaxBiCGStab, enum PlusMinus isign)
Definition: invbicgstab.cc:222
Real rsd_sq
Definition: invbicg.cc:121
multi1d< LatticeFermion > chi(Ncb)
Complex omega
Definition: invbicg.cc:97
LatticeFermion psi
Definition: mespbg5p_w.cc:35
SystemSolverResults_t InvBiCGStab_a(const LinearOperator< T > &A, const T &chi, T &psi, const Real &RsdBiCGStab, int MaxBiCGStab, enum PlusMinus isign)
Definition: invbicgstab.cc:12
A(A, psi, r, Ncb, PLUS)
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
Double r_norm
Definition: pade_trln_w.cc:86
int r0
Definition: qtopcor.cc:41
Holds return info from SystemSolver call.
Definition: syssolver.h:17