75 const multi1d<Real>& shifts,
76 const multi1d<Real>&
RsdCG,
82 const Subset& sub =
A.subset();
84 if (shifts.size() !=
RsdCG.size())
86 QDPIO::cerr <<
"MInvCG: number of shifts and residuals must match" << std::endl;
90 int n_shift = shifts.size();
94 QDPIO::cerr <<
"MInvCG: You must supply at least 1 mass: mass.size() = "
95 << n_shift << std::endl;
101 for(
int findit=1; findit < n_shift; ++findit) {
102 if ( toBool( shifts[findit] < shifts[
isz]) ) {
108 QDPIO::cout <<
"n_shift = " << n_shift <<
" isz = " <<
isz <<
" shift = " << shifts[0] << std::endl;
114 if(
psi.size() < n_shift ) {
120 for(
int i= 0;
i < n_shift; ++
i) {
124 T chi_internal; moveToFastMemoryHint(chi_internal);
125 chi_internal[sub] =
chi;
126 moveToFastMemoryHint(
psi,
true);
128 FlopCounter flopcount;
135 Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
144 QDPIO::cout <<
"MInvCG: " <<
n_count <<
" iterations" << std::endl;
145 flopcount.report(
"minvcg", swatch.getTimeInSeconds());
146 revertFromFastMemoryHint(
psi,
true);
154 multi1d<Double>
rsd_sq(n_shift);
155 multi1d<Double> rsdcg_sq(n_shift);
159 for(
s = 0;
s < n_shift; ++
s) {
166 T r; moveToFastMemoryHint(
r);
167 r[sub] = chi_internal;
170 multi1d<T>
p(n_shift); moveToFastMemoryHint(
p);
171 for(
s = 0;
s < n_shift; ++
s) {
172 p[
s][sub] = chi_internal;
179 T Ap; moveToFastMemoryHint(Ap);
180 A(Ap,
p[
isz],
PLUS); flopcount.addFlops(
A.nFlops());
181 Ap[sub] += shifts[
isz]*
p[
isz]; flopcount.addSiteFlops(4*Nc*Ns,sub);
184 Double d = innerProductReal(
p[
isz], Ap, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
190 multi1d<Double> bs(n_shift);
191 multi2d<Double>
z(2, n_shift);
199 for(
s = 0;
s < n_shift; ++
s)
209 r[sub] += Real(
b)*Ap; flopcount.addSiteFlops(4*Nc*Ns,sub);
212 for(
s = 0;
s < n_shift; ++
s) {
213 psi[
s][sub] = - Real(bs[
s])*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
217 Double c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
220 multi1d<bool> convsP(n_shift);
221 for(
s = 0;
s < n_shift; ++
s) {
228 QDPIO::cout <<
"MInvCG: k = 0 r = " << sqrt(
c) << std::endl;
241 for(
k = 1;
k <=
MaxCG && !convP ; ++
k)
249 for(
s = 0;
s < n_shift; ++
s)
258 p[
s][sub] =
r + Real(
a)*
p[
s]; flopcount.addSiteFlops(4*Nc*Ns,sub);
270 p[
s][sub] = Real(
z[
iz][
s])*
r + Real(as)*
p[
s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
281 A(Ap,
p[
isz],
PLUS); flopcount.addFlops(
A.nFlops());
282 Ap[sub] += shifts[
isz]*
p[
isz]; flopcount.addSiteFlops(4*Nc*Ns,sub);
285 d = innerProductReal(
p[
isz], Ap, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
293 for(
s = 0;
s < n_shift;
s++)
295 if (
s !=
isz && !convsP[
s] )
306 r[sub] += Real(
b)*Ap; flopcount.addSiteFlops(4*Nc*Ns,sub);
310 for(
s = 0;
s < n_shift; ++
s)
314 psi[
s][sub] -= Real(bs[
s])*
p[
s]; flopcount.addSiteFlops(2*Nc*Ns,sub);
319 c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
324 for(
s = 0;
s < n_shift;
s++)
333 QDPIO::cout <<
"MInvCG (shift=" <<
s <<
") k = " <<
k <<
" r = "
334 << css <<
" rsd_sq["<<
s<<
"] = " <<
rsd_sq[
s] << std::endl;
337 convsP[
s] = toBool( css <
rsd_sq[
s] );
345 cs = norm2(
p[
s],sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
349 d = norm2(
psi[
s],sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
354 convsP[
s] = toBool( cs <
d );
357 QDPIO::cout <<
"MInvCG (shift=" <<
s <<
") k = " <<
k <<
" cs = "
358 << cs <<
" d = " <<
d << std::endl;
375 for(
s = 0;
s < n_shift; ++
s)
378 Ap[sub] += shifts[
s]*
psi[
s];
380 Ap[sub] -= chi_internal;
384 QDPIO::cout <<
"MInvCG (conv): s = " <<
s
385 <<
" shift = " << shifts[
s]
386 <<
" r = " << Real(sqrt(
c)/
chi_norm) << std::endl;
392 QDPIO::cout <<
"MInvCG: " <<
n_count <<
" iterations" << std::endl;
393 flopcount.report(
"minvcg", swatch.getTimeInSeconds());
394 revertFromFastMemoryHint(
psi,
true);
408 const LatticeFermion&
chi,
409 multi1d<LatticeFermion>&
psi,
410 const multi1d<Real>& shifts,
411 const multi1d<Real>&
RsdCG,
422 multi1d<LatticeColorMatrix>,
423 multi1d<LatticeColorMatrix> >& M,
424 const LatticeFermion&
chi,
425 multi1d<LatticeFermion>&
psi,
426 const multi1d<Real>& shifts,
427 const multi1d<Real>&
RsdCG,
Differentiable Linear Operator.
void MInvCG(const LinearOperator< LatticeFermion > &M, const LatticeFermion &chi, multi1d< LatticeFermion > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
void MInvCG_a(const LinearOperator< T > &A, const T &chi, multi1d< T > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double