9 using namespace QDP::Hints;
73 template<
typename T,
typename C>
75 const multi1d<T>&
chi,
78 const multi1d<Real>& residues,
79 const multi1d<Real>& poles,
87 const Subset& sub =
A.subset();
88 int n_shift = poles.size();
91 FlopCounter flopcount;
96 QDPIO::cerr <<
"MinvCG: You must supply at least 1 shift. shift.size() = " << n_shift << std::endl;
102 for(
int findit=1; findit < n_shift; ++findit) {
103 if ( toBool( poles[findit] < poles[
isz]) ) {
109 multi1d< multi1d<T> > X(n_shift);
117 for(
int i= 0;
i < n_shift; ++
i) {
122 for(
int i=0;
i < N;
i++) {
128 multi1d< multi1d<T> >
p(n_shift);
130 for(
int s = 0;
s < n_shift; ++
s) {
135 multi1d<T> chi_internal(N);
139 moveToFastMemoryHint(Ap);
140 moveToFastMemoryHint(
p[
isz]);
141 moveToFastMemoryHint(
r);
142 moveToFastMemoryHint(X[
isz]);
143 moveToFastMemoryHint(chi_internal);
146 multi1d<T> blocker1(N); moveToFastMemoryHint(blocker1);
147 multi1d<T> blocker2(N); moveToFastMemoryHint(blocker2);
150 for(
int i=0;
i < n_shift;
i++) {
152 moveToFastMemoryHint(
p[
i]);
153 moveToFastMemoryHint(X[
i]);
160 moveToFastMemoryHint(
psi);
163 for(
int i=0;
i < n_shift;
i++) {
164 for(
int n=0;
n < N; ++
n) {
170 for(
int i=0;
i < N;
i++) {
171 chi_internal[
i][sub] =
chi[
i];
175 QDPIO::cout <<
"MinvCG starting" << std::endl;
181 Double chi_norm_sq = norm2(chi_internal,sub);
182 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
189 QDPIO::cout <<
"MinvCG: Finished. Iters taken = " <<
n_count << std::endl;
190 flopcount.report(
"MinvCGArray", swatch.getTimeInSeconds());
194 for(
int n=0;
n < N;
n++) {
197 for(
int s=0;
s < n_shift;
s++) {
198 for(
int n=0;
n < N;
n++) {
199 psi[
n][sub] += residues[
s]*X[
s][
n];
204 for(
int i=0;
i < n_shift;
i++) {
205 revertFromFastMemoryHint(X[
i],
false);
208 revertFromFastMemoryHint(
psi,
true);
215 multi1d<Double>
rsd_sq(n_shift);
216 multi1d<Double> rsdcg_sq(n_shift);
219 for(
int s = 0;
s < n_shift; ++
s) {
226 for(
int n=0;
n < N; ++
n) {
227 r[
n][sub] = chi_internal[
n];
229 for(
int s=0;
s < n_shift;
s++) {
230 p[
s][
n][sub] = chi_internal[
n];
240 flopcount.addFlops(
A.nFlops());
242 for(
int n=0;
n < N; ++
n) {
245 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
249 flopcount.addSiteFlops(4*Nc*Ns*N ,sub);
254 multi1d<Double> bs(n_shift);
255 multi2d<Double>
z(2, n_shift);
263 for(
int s = 0;
s < n_shift; ++
s)
273 for(
int n=0;
n < N; ++
n) {
274 r[
n][sub] += Real(
b)* Ap[
n];
276 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
280 for(
int s = 0;
s < n_shift; ++
s) {
281 for(
int n=0;
n < N; ++
n) {
282 X[
s][
n][sub] -= Real(bs[
s])*chi_internal[
n];
285 flopcount.addSiteFlops(4*Nc*Ns*N*n_shift, sub);
290 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
294 multi1d<bool> convsP(n_shift);
295 for(
int s = 0;
s < n_shift; ++
s) {
313 for(
k = 1;
k <=
MaxCG && !convP ; ++
k)
321 for(
int s = 0;
s < n_shift; ++
s) {
327 for(
int n=0;
n < N; ++
n) {
332 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
340 for(
int n=0;
n < N; ++
n) {
345 flopcount.addSiteFlops(6*Nc*Ns*N, sub);
358 flopcount.addFlops(
A.nFlops());
360 for(
int n=0;
n < N; ++
n) {
363 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
366 d = innerProductReal(
p[
isz], Ap, sub);
367 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
375 for(
int s = 0;
s < n_shift;
s++) {
377 if (
s !=
isz && !convsP[
s] ) {
387 for(
int n=0;
n < N; ++
n) {
388 r[
n][sub] += Real(
b)*Ap[
n];
390 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
396 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
401 for(
int s = 0;
s < n_shift;
s++) {
408 convsP[
s] = toBool( css <
rsd_sq[
s] );
416 for(
int n=0;
n < N;
n++) {
417 Delta[
n][sub] =
zero;
420 for(
int s=0 ;
s < n_shift;
s++) {
423 for(
int n=0;
n < N;
n++) {
426 Delta[
n][sub] += residues[
s]*
tmp[
n];
430 for(
int n=0;
n < N;
n++) {
431 psi[
n][sub] += residues[
s]*X[
s][
n];
435 Double delta_norm = norm2(Delta, sub);
437 convP |= toBool( delta_norm < rsdcg_sq[0]*psi_norm);
442 QDPIO::cout <<
"MinvCGAccumArray finished: " <<
n_count <<
" iterations " << std::endl;
443 flopcount.report(
"MinvCGAccumArray", swatch.getTimeInSeconds());
445 for(
int i=0;
i < n_shift;
i++) {
446 revertFromFastMemoryHint(
psi[
i],
true);
450 QDPIO::cout <<
"too many CG iterationns: " <<
n_count << std::endl;
462 const multi1d<LatticeFermion>&
chi,
463 multi1d<LatticeFermion>&
psi,
465 const multi1d<Real>& residues,
466 const multi1d<Real>& poles,
478 multi1d<LatticeColorMatrix>,
479 multi1d<LatticeColorMatrix> >& M,
480 const multi1d<LatticeFermion>&
chi,
481 multi1d<LatticeFermion>&
psi,
483 const multi1d<Real>& residues,
484 const multi1d<Real>& poles,
Differentiable Linear Operator.
Linear Operator to arrays.
void MInvCGAccum(const DiffLinearOperatorArray< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > &M, const multi1d< LatticeFermion > &chi, multi1d< LatticeFermion > &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, const int MaxCG, int &n_count)
void MInvCGAccum_a(const C &A, const multi1d< T > &chi, multi1d< T > &psi, const Real &norm, const multi1d< Real > &residues, const multi1d< Real > &poles, const Real &RsdCG, const int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Multishift Conjugate-Gradient algorithm for a Linear Operator.
SpinMatrix C()
C = Gamma(10)
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
FloatingPoint< double > Double
multi1d< LatticeFermion > r(Ncb)