74 template<
typename T,
typename R>
78 const multi1d<R>& shifts,
79 const multi1d<R>&
RsdCG,
85 const Subset& sub = M.
subset();
87 if (shifts.size() !=
RsdCG.size())
89 QDPIO::cerr <<
"MInvCG: number of shifts and residuals must match" << std::endl;
93 int n_shift = shifts.size();
97 QDPIO::cerr <<
"MInvCG: You must supply at least 1 mass: mass.size() = "
98 << n_shift << std::endl;
104 for(
int findit=1; findit < n_shift; ++findit) {
105 if ( toBool( shifts[findit] < shifts[
isz]) ) {
113 if(
psi.size() < n_shift ) {
119 for(
int i= 0;
i < n_shift; ++
i) {
123 T chi_internal; moveToFastMemoryHint(chi_internal);
124 chi_internal[sub] =
chi;
125 moveToFastMemoryHint(
psi,
true);
127 FlopCounter flopcount;
134 Double chi_norm_sq = norm2(chi_internal,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
143 QDPIO::cout <<
"MInvCG2: " <<
n_count <<
" iterations" << std::endl;
144 flopcount.report(
"minvcg2", swatch.getTimeInSeconds());
145 revertFromFastMemoryHint(
psi,
true);
153 multi1d<Double>
rsd_sq(n_shift);
154 multi1d<Double> rsdcg_sq(n_shift);
158 for(
s = 0;
s < n_shift; ++
s) {
165 T r; moveToFastMemoryHint(
r);
166 r[sub] = chi_internal;
169 T p_0; moveToFastMemoryHint(p_0);
170 p_0[sub] = chi_internal;
174 multi1d<T>
p(n_shift); moveToFastMemoryHint(
p);
175 for(
s = 0;
s < n_shift; ++
s) {
176 p[
s][sub] = chi_internal;
183 T Mp, MMp; moveToFastMemoryHint(Mp); moveToFastMemoryHint(MMp);
184 M(Mp, p_0,
PLUS); flopcount.addFlops(M.
nFlops());
187 Double d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
195 r[sub] += b_r*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
198 multi1d<Double> bs(n_shift);
199 multi2d<Double>
z(2, n_shift);
211 for(
s = 0;
s < n_shift; ++
s)
221 for(
s = 0;
s < n_shift; ++
s) {
223 psi[
s][sub] = - bs_r*chi_internal; flopcount.addSiteFlops(2*Nc*Ns,sub);
227 Double c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
230 multi1d<bool> convsP(n_shift);
231 for(
s = 0;
s < n_shift; ++
s) {
238 QDPIO::cout <<
"MInvCG: k = 0 r = " << sqrt(
c) << std::endl;
251 for(
k = 1;
k <=
MaxCG && !convP ; ++
k)
258 p_0[sub] =
r + a_r*p_0; flopcount.addSiteFlops(4*Nc*Ns,sub);
262 for(
s = 0;
s < n_shift; ++
s) {
270 p[
s][sub] = zizs*
r + as_r*
p[
s]; flopcount.addSiteFlops(6*Nc*Ns,sub);
280 M(Mp, p_0,
PLUS); flopcount.addFlops(M.
nFlops());
283 d = norm2(Mp, sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
291 r[sub] += b_r*MMp; flopcount.addSiteFlops(4*Nc*Ns,sub);
293 c = norm2(
r,sub); flopcount.addSiteFlops(4*Nc*Ns,sub);
297 for(
s = 0;
s < n_shift;
s++) {
310 for(
s = 0;
s < n_shift; ++
s)
316 psi[
s][sub] -= bs_r*
p[
s]; flopcount.addSiteFlops(2*Nc*Ns,sub);
324 for(
s = 0;
s < n_shift;
s++)
334 convsP[
s] = toBool( css <
rsd_sq[
s] );
349 for(
int s=0;
s < n_shift;
s++) {
353 MMp[sub] += shifts[
s]*
psi[
s];
357 QDPIO::cout <<
"shift("<<
s<<
"): || r || / || chi || = "<<sqrt(rnorm2/chinorm2)<< std::endl;
360 QDPIO::cout <<
"MInvCG2: " <<
n_count <<
" iterations" << std::endl;
361 flopcount.report(
"minvcg", swatch.getTimeInSeconds());
362 revertFromFastMemoryHint(
psi,
true);
377 const LatticeFermionF&
chi,
378 multi1d<LatticeFermionF>&
psi,
379 const multi1d<RealF>& shifts,
380 const multi1d<RealF>&
RsdCG,
385 int ierr=PAT_region_begin(22,
"MInvCG2LinOp");
389 ierr=PAT_region_end(22);
395 const LatticeFermionD&
chi,
396 multi1d<LatticeFermionD>&
psi,
397 const multi1d<RealD>& shifts,
398 const multi1d<RealD>&
RsdCG,
403 int ierr=PAT_region_begin(22,
"MInvCG2LinOp");
407 ierr=PAT_region_end(22);
415 multi1d<LatticeColorMatrixF>,
416 multi1d<LatticeColorMatrixF> >& M,
417 const LatticeFermionF&
chi,
418 multi1d<LatticeFermionF>&
psi,
419 const multi1d<RealF>& shifts,
420 const multi1d<RealF>&
RsdCG,
425 int ierr=PAT_region_begin(23,
"MInvCG2DiffLinOp");
429 ierr=PAT_region_end(23);
435 multi1d<LatticeColorMatrixD>,
436 multi1d<LatticeColorMatrixD> >& M,
437 const LatticeFermionD&
chi,
438 multi1d<LatticeFermionD>&
psi,
439 const multi1d<RealD>& shifts,
440 const multi1d<RealD>&
RsdCG,
445 int ierr=PAT_region_begin(23,
"MInvCG2DiffLinOp");
449 ierr=PAT_region_end(23);
Differentiable Linear Operator.
virtual const Subset & subset() const =0
Return the subset on which the operator acts.
virtual unsigned long nFlops() const
void MInvCG2_a(const LinearOperator< T > &M, const T &chi, multi1d< T > &psi, const multi1d< R > &shifts, const multi1d< R > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
void MInvCG2(const LinearOperator< LatticeFermionF > &M, const LatticeFermionF &chi, multi1d< LatticeFermionF > &psi, const multi1d< RealF > &shifts, const multi1d< RealF > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient algorithm for a Linear Operator.
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
LinOpSysSolverMGProtoClover::T T
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double