76 const multi1d<Real>& residues,
77 const multi1d<Real>& poles,
85 const Subset& sub = M.
subset();
86 moveToFastMemoryHint(
psi,
true);
88 int n_shift = poles.size();
92 QDPIO::cerr <<
"MInvCG: You must supply at least 1 mass: mass.size() = "
93 << n_shift << std::endl;
99 for(
int findit=1; findit < n_shift; ++findit) {
100 if ( toBool( poles[findit] < poles[
isz]) ) {
106 multi1d<T> X(n_shift);
111 if( X.size() < n_shift ) {
117 for(
int i= 0;
i < n_shift; ++
i) {
121 T chi_internal; moveToFastMemoryHint(chi_internal);
122 chi_internal[sub] =
chi;
123 moveToFastMemoryHint(X,
true);
125 FlopCounter flopcount;
132 Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
141 QDPIO::cout <<
"MInvCG2: " <<
n_count <<
" iterations" << std::endl;
142 flopcount.report(
"minvcg2", swatch.getTimeInSeconds());
145 for(
int s=0;
s < n_shift;
s++) {
146 psi[sub] += residues[
s]*X[
s];
148 revertFromFastMemoryHint(X,
false);
149 revertFromFastMemoryHint(
psi,
true);
157 multi1d<Double>
rsd_sq(n_shift);
158 multi1d<Double> rsdcg_sq(n_shift);
162 for(
s = 0;
s < n_shift; ++
s) {
169 T r; moveToFastMemoryHint(
r);
170 r[sub] = chi_internal;
173 T p_0; moveToFastMemoryHint(p_0);
174 p_0[sub] = chi_internal;
178 multi1d<T>
p(n_shift); moveToFastMemoryHint(
p);
179 for(
s = 0;
s < n_shift; ++
s) {
180 p[
s][sub] = chi_internal;
187 T Mp, MMp; moveToFastMemoryHint(Mp); moveToFastMemoryHint(MMp);
188 M(Mp, p_0,
PLUS); flopcount.addFlops(M.
nFlops());
191 Double d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
198 r[sub] += Real(
b)*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
201 multi1d<Double> bs(n_shift);
202 multi2d<Double>
z(2, n_shift);
214 for(
s = 0;
s < n_shift; ++
s) {
223 for(
s = 0;
s < n_shift; ++
s) {
224 X[
s][sub] = -Real(bs[
s])*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
228 Double c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
231 multi1d<bool> convsP(n_shift);
232 for(
s = 0;
s < n_shift; ++
s) {
239 QDPIO::cout <<
"MInvCG: k = 0 r = " << sqrt(
c) << std::endl;
253 for(
k = 1;
k <=
MaxCG && !convP ; ++
k)
259 p_0[sub] =
r + Real(
a)*p_0; flopcount.addSiteFlops(4*Nc*Ns,sub);
263 for(
s = 0;
s < n_shift; ++
s) {
269 p[
s][sub] = Real(
z[
iz][
s])*
r + Real(as)*
p[
s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
279 M(Mp, p_0,
PLUS); flopcount.addFlops(M.
nFlops());
282 d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
289 r[sub] += Real(
b)*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
291 c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
295 for(
s = 0;
s < n_shift;
s++) {
309 for(
s = 0;
s < n_shift;
s++) {
314 convsP[
s] = toBool( css <
rsd_sq[
s] );
323 flopcount.addSiteFlops(2*Nc*Ns,sub);
325 for(
s = 0;
s < n_shift; ++
s) {
328 tmp[sub] = Real(bs[
s])*
p[
s];
329 flopcount.addSiteFlops(2*Nc*Ns,sub);
332 flopcount.addSiteFlops(2*Nc*Ns,sub);
334 Delta[sub] += residues[
s]*
tmp;
335 flopcount.addSiteFlops(4*Nc*Ns,sub);
338 psi[sub] += residues[
s]*X[
s];
339 flopcount.addSiteFlops(4*Nc*Ns,sub);
342 Double delta_norm = norm2(Delta,sub);
343 flopcount.addSiteFlops(4*Nc*Ns,sub);
346 flopcount.addSiteFlops(4*Nc*Ns,sub);
348 convP |= toBool( delta_norm < rsdcg_sq[0]*psi_norm);
355 QDPIO::cout <<
"MInvCG2Accum: " <<
n_count <<
" iterations" << std::endl;
356 flopcount.report(
"minvcg", swatch.getTimeInSeconds());
357 revertFromFastMemoryHint(X,
false);
358 revertFromFastMemoryHint(
psi,
true);
373 const LatticeFermion&
chi,
376 const multi1d<Real>& residues,
377 const multi1d<Real>& poles,
389 multi1d<LatticeColorMatrix>,
390 multi1d<LatticeColorMatrix> >& M,
391 const LatticeFermion&
chi,
394 const multi1d<Real>& residues,
395 const multi1d<Real>& poles,
Differentiable Linear Operator.
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
void MInvCG2Accum_a(const LinearOperator< T > &M, const T &chi, T &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
void MInvCG2Accum(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, LatticeFermion &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double