CHROMA
minvmr.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Multishift Minimal-residual algorithm for a Linear Operator
3  */
4 
5 #error "THIS ROUTINE IS NOT FULLY CONVERTED"
6 
7 #include "linearop.h"
9 
10 namespace Chroma
11 {
12 
13  //! Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator
14  /*! \ingroup invert
15  *
16  * This subroutine uses the Conjugate Gradient (CG) algorithm to find
17  * the solution of the set of linear equations
18  * Method used is described in Jegerlehner, hep-lat/9708029
19 
20  * We are searching in a subspace orthogonal to the eigenvectors EigVec
21  * of A. The source chi is assumed to already be orthogonal!
22 
23  * Chi = A . Psi
24 
25  * Algorithm:
26 
27  * Psi[0] := 0; Zeroed
28  * r[0] := Chi; Initial residual
29  * p[1] := Chi ; Initial direction
30  * b[0] := |r[0]|**2 / <p[0],Ap[0]> ;
31  * z[0] := 1 / (1 - (shift - shift(0))*b)
32  * bs[0] := b[0] * z[0]
33  * r[1] += b[k] A . p[0] ; New residual
34  * Psi[1] = - b[k] p[k] ; Starting solution std::vector
35  * IF |r[0]| <= RsdCG |Chi| THEN RETURN; Converged?
36  * FOR k FROM 1 TO MaxCG DO CG iterations
37  * a[k] := |r[k]|**2 / |r[k-1]|**2 ;
38  * p[k] := r[k] + a[k] p[k-1]; New direction
39  * b[k+1] := |r[k]|**2 / <p[k],Ap[k]> ;
40  * r[k+1] += b[k+1] A . p[k] ; New residual
41  * Psi[k+1] -= b[k+1] p[k] ; New solution std::vector
42  * IF |[k+1]| <= RsdCG |Chi| THEN RETURN; Converged?
43 
44  * Arguments:
45 
46  * A Hermitian linear operator (Read)
47  * Chi Source (Read)
48  * Psi array of solutions (Write)
49  * shifts shifts of form A + mass (Read)
50  * RsdCG residual accuracy (Read/Write)
51  * n_count Number of CG iteration (Write)
52 
53  * Local Variables:
54 
55  * p Direction std::vector
56  * r Residual std::vector
57  * cp | r[k] |**2
58  * c | r[k-1] |**2
59  * k CG iteration counter
60  * a a[k]
61  * b b[k+1]
62  * d < p[k], A.p[k] >
63  * Ap Temporary for M.p
64 
65  * MaxCG Maximum number of CG iterations allowed
66 
67  * Subroutines:
68  * A Apply matrix hermitian A to std::vector
69  *
70  * @{
71  */
72 
73  template<typename T>
75  const T& chi,
76  multi1d<T>& psi,
77  const multi1d<Real>& shifts,
78  const multi1d<Real>& RsdMR,
79  int MaxMR,
80  int& n_count)
81  {
82  START_CODE();
83 
84  const Subset& sub = A.subset();
85 
86  if (shifts.size() != RsdMR.size())
87  {
88  QDPIO::cerr << "MInvMR: number of shifts and residuals must match" << std::endl;
89  QDP_abort(1);
90  }
91 
92  int n_shift = shifts.size();
93 
94  if (n_shift == 0)
95  {
96  QDPIO::cerr << "MInvMR: You must supply at least 1 mass: mass.size() = "
97  << n_shift << std::endl;
98  QDP_abort(1);
99  }
100 
101  Complex a;
102  Complex as;
103  Complex atmp;
104  Complex atmp2;
105  Double d;
106  Real asq;
107  multi1d<Real> ass(Nmass);
108  multi1d<Complex> rhos(Nmass);
109  Boolean btmp;
110  Boolean convP;
111  multi1d<Boolean> convsP(Nmass);
112  multi1d<Real> rsdmr_sq(Nmass);
113 /* Real rsd_sq; */
114 
115  /* If exactly 0 norm, then solution must be 0 (for pos. def. operator) */
116  if (chi_norm == 0.0)
117  {
118  n_count = 0;
119  psi = zero;
120  END_CODE();
121  return;
122  }
123 
124  T Ar; moveToFastMemoryHint(Ar);
125  T chi_internal; moveToFastMemoryHint(chi_internal);
126  chi_internal[sub] = chi;
127  moveToFastMemoryHint(psi,true);
128 
129  FlopCounter flopcount;
130  flopcount.reset();
131  StopWatch swatch;
132  swatch.reset();
133  swatch.start();
134 
136  rsdmr_sq = localNorm2(RsdMR);
137 /* rsd_sq = cp * rsdmr_sq; */
138 
139  // For this algorithm, all the psi have to be 0 to start
140  // Only is that way the initial residuum r = chi
141  for(int i= 0; i < n_shift; ++i) {
142  psi[i][sub] = zero;
143  }
144 
145  /* Psi[0] := 0; */
146  /* r[0] := Chi */
147  T r = chi;
148  r[sub] = chi_internal;
149  FILL(convP,FALSE);
150  FILL(convsP,FALSE);
151  rhos = 1;
152 
153  /* FOR k FROM 1 TO MaxMR DO */
154  /* IF |psi[k+1] - psi[k]| <= RsdMR |psi[k+1]| THEN RETURN; */
155  for(k = 1; k <= MaxMR && ! convP ; ++k)
156  {
157  /* cp = |r[k-1]|^2 */
158  cp = norm2(r, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
159 
160  /* a[k-1] := < A.r[k-1], r[k-1] >/ < A.r[k-1], A.r[k-1] > ; */
161  /* Ar = A * r */
162  A(Ar, r, PLUS); flopcount.addFlops(A.nFlops());
163  Ar[sub] += r * mass[isz]; flopcount.addSiteFlops(4*Nc*Ns,sub);
164 
165  /* c = < A.r, r > */
166  DComplex c = innerProduct(Ar, r, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
167 
168  /* d = | A.r | ** 2 */
169  d = norm2(Ar, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
170 
171  /* a = c / d */
172  a = c / d;
173 
174  /* Compute the shifted as and rhos */
175  /* Psi[k+1] += a[k] r[k] ; */
176  for(s = 0; s < Nmass; ++s)
177  {
178  if (convsP[s]) continue;
179 
180  atmp = 1;
181  asq = mass[s] - mass[isz];
182  atmp += adj(a) * asq;
183  asq = localNorm2(atmp);
184  asq = ones / asq;
185  atmp = atmp * asq;
186 
187  atmp2 = a * atmp;
188  as = rhos[s] * atmp2;
189  atmp2 = rhos[s] * atmp;
190  rhos[s] = atmp2;
191 
192  ass[s] = localNorm2(as);
193 
194  for(cb = 0; cb < Ncb; ++cb)
195  psi[s][cb] += r[cb] * as; /* 2 Nc Ns flops */
196  }
197 
198  /* r[k] -= a[k-1] M . r[k-1] ; */
199  for(cb = 0; cb < Ncb; ++cb)
200  r[cb] -= Ar[cb] * a; /* 2 Nc Ns flops */
201 
202 #if 0
203  /* Project out eigenvectors */
204  GramSchm (r, 1, OperEigVec, NOperEig, Ncb);
205  GramSchm (psi, Nmass, OperEigVec, NOperEig, Ncb);
206 #endif
207 
208  /* IF |psi[k+1] - psi[k]| <= RsdMR |psi[k+1]| THEN RETURN; */
209  for(s = 0; s < Nmass; ++s)
210  {
211  if (convsP[s]) continue;
212 
213  d = zero;
214  for(cb = 0; cb < Ncb; ++cb)
215  {
216  d += norm2(psi[s][cb]); /* 2 Nc Ns flops */
217  }
218 
219  d *= rsdmr_sq[s];
220  Real cs = Real(ass[s]) * cp;
221  convsP[s] = cs < d;
222 
223 #if 0
224  PRINTF("MInvMR: k = %d s = %d r = %g d = %g\n",k,s,sqrt(cs),sqrt(d));
225  FLUSH_WRITE_NAMELIST(stdout);
226 #endif
227  }
228  /* Final convergence is determined by smallest shift */
229  convP = convsP[isz];
230 
231  n_count = k;
232  }
233 
234 #if 0
235  /* HACK **/
236  for(int s = 0; s < Nmass; ++s)
237  {
238  A(Ar, psi[s], PLUS);
239  Ar[sub] += psi[s] * mass[s];
240  Ar[sub] -= chi;
241 
242  cp = norm2(Ar, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
243  QDPIO::cout << "MInvMR (conv): s = " << s << " r = " << sqrt(cp) << " m = " << mass[s] << std::endl;
244  }
245  /* end */
246 #endif
247 
248  if (n_count == MaxMR)
249  QDP_error_exit("too many MR iterations", n_count, rsdmr_sq[isz]);
250  END_CODE();
251  return;
252  }
253 
254 
255 
256  /*! \ingroup invert */
257  template<>
259  const LatticeFermion& chi,
260  multi1d<LatticeFermion>& psi,
261  const multi1d<Real>& shifts,
262  const multi1d<Real>& RsdMR,
263  int MaxMR,
264  int &n_count)
265  {
266  MInvMR_a(M, chi, psi, shifts, RsdMR, MaxMR, n_count);
267  }
268 
269 
270  /*! \ingroup invert */
271  template<>
272  void MInvMR(const DiffLinearOperator<LatticeFermion,
273  multi1d<LatticeColorMatrix>,
274  multi1d<LatticeColorMatrix> >& M,
275  const LatticeFermion& chi,
276  multi1d<LatticeFermion>& psi,
277  const multi1d<Real>& shifts,
278  const multi1d<Real>& RsdMR,
279  int MaxMR,
280  int &n_count)
281  {
282  MInvMR_a(M, chi, psi, shifts, RsdMR, MaxMR, n_count);
283  }
284 
285  /*! @} */ // end of group invert
286 
287 } // end namespace Chroma
Differentiable Linear Operator.
Definition: linearop.h:98
Linear Operator.
Definition: linearop.h:27
int NOperEig
void GramSchm(multi1d< LatticeFermion > &psi, const int Npsi, const multi1d< LatticeFermion > &vec, const int Nvec, const Subset &sub)
Gramm-Schmidt orthogonolization.
Definition: gramschm.cc:127
void MInvMR_a(const LinearOperator< T > &A, const T &chi, multi1d< T > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdMR, int MaxMR, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minvmr.cc:74
void MInvMR(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, multi1d< LatticeFermion > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdMR, int MaxMR, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Definition: minvmr.cc:258
Linear Operators.
Multishift Conjugate-Gradient algorithm for a Linear Operator.
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
int n_count
Definition: invbicg.cc:78
Double c
Definition: invbicg.cc:108
LinOpSysSolverMGProtoClover::T T
int i
Definition: pbg5p_w.cc:55
Double mass
Definition: pbg5p_w.cc:54
@ PLUS
Definition: chromabase.h:45
Double cp
Definition: invbicg.cc:107
multi1d< LatticeFermion > chi(Ncb)
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
DComplex d
Definition: invbicg.cc:99
START_CODE()
A(A, psi, r, Ncb, PLUS)
int cb
Definition: invbicg.cc:120
int Ncb
Definition: invbicg.cc:80
Double chi_norm
Definition: invbicg.cc:79
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Definition: gtest.h:7351
int isz
Definition: pade_trln_w.cc:151
RsdMR
Definition: pade_trln_w.cc:26
PRINTF("TrLnSolve: n_count = %d, RsdMR = %16.8e, MaxRsd = %16.8e\n", n_count, RsdMR, MaxRsd)
FILL(ltmp_1, itmp)