9 using namespace QDP::Hints;
73 template<
typename T,
typename C>
75 const multi1d<T>&
chi,
76 multi1d< multi1d<T> >&
psi,
77 const multi1d<Real>& shifts,
78 const multi1d<Real>&
RsdCG,
85 const Subset& sub =
A.subset();
86 int n_shift = shifts.size();
88 FlopCounter flopcount;
91 if (shifts.size() !=
RsdCG.size())
93 QDPIO::cerr <<
"MinvCG: number of shifts and residuals must match" << std::endl;
98 QDPIO::cerr <<
"MinvCG: You must supply at least 1 shift. shift.size() = " << n_shift << std::endl;
104 for(
int findit=1; findit < n_shift; ++findit) {
105 if ( toBool( shifts[findit] < shifts[
isz]) ) {
116 for(
int i= 0;
i < n_shift; ++
i) {
121 multi1d< multi1d<T> >
p(n_shift);
123 for(
int s = 0;
s < n_shift; ++
s) {
128 multi1d<T> chi_internal(N);
132 moveToFastMemoryHint(Ap);
133 moveToFastMemoryHint(
p[
isz]);
134 moveToFastMemoryHint(
r);
135 moveToFastMemoryHint(
psi[
isz]);
136 moveToFastMemoryHint(chi_internal);
139 multi1d<T> blocker1(N); moveToFastMemoryHint(blocker1);
140 multi1d<T> blocker2(N); moveToFastMemoryHint(blocker2);
143 for(
int i=0;
i < n_shift;
i++) {
145 moveToFastMemoryHint(
p[
i]);
146 moveToFastMemoryHint(
psi[
i]);
154 for(
int i=0;
i < n_shift;
i++) {
155 for(
int n=0;
n < N; ++
n) {
161 for(
int i=0;
i < N;
i++) {
162 chi_internal[
i][sub] =
chi[
i];
166 QDPIO::cout <<
"MinvCG starting" << std::endl;
172 Double chi_norm_sq = norm2(chi_internal,sub);
173 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
180 QDPIO::cout <<
"MinvCG: Finished. Iters taken = " <<
n_count << std::endl;
181 flopcount.report(
"MinvCGArray", swatch.getTimeInSeconds());
184 for(
int i=0;
i < n_shift;
i++) {
185 revertFromFastMemoryHint(
psi[
i],
true);
192 multi1d<Double>
rsd_sq(n_shift);
193 multi1d<Double> rsdcg_sq(n_shift);
196 for(
int s = 0;
s < n_shift; ++
s) {
203 for(
int n=0;
n < N; ++
n) {
204 r[
n][sub] = chi_internal[
n];
206 for(
int s=0;
s < n_shift;
s++) {
207 p[
s][
n][sub] = chi_internal[
n];
217 flopcount.addFlops(
A.nFlops());
219 for(
int n=0;
n < N; ++
n) {
222 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
226 flopcount.addSiteFlops(4*Nc*Ns*N ,sub);
231 multi1d<Double> bs(n_shift);
232 multi2d<Double>
z(2, n_shift);
240 for(
int s = 0;
s < n_shift; ++
s)
250 for(
int n=0;
n < N; ++
n) {
251 r[
n][sub] += Real(
b)* Ap[
n];
253 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
257 for(
int s = 0;
s < n_shift; ++
s) {
258 for(
int n=0;
n < N; ++
n) {
259 psi[
s][
n][sub] -= Real(bs[
s])*chi_internal[
n];
262 flopcount.addSiteFlops(4*Nc*Ns*N*n_shift, sub);
266 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
270 multi1d<bool> convsP(n_shift);
271 for(
int s = 0;
s < n_shift; ++
s) {
288 for(
k = 1;
k <=
MaxCG && !convP ; ++
k)
296 for(
int s = 0;
s < n_shift; ++
s) {
302 for(
int n=0;
n < N; ++
n) {
307 flopcount.addSiteFlops(4*Nc*Ns*N,sub);
315 for(
int n=0;
n < N; ++
n) {
320 flopcount.addSiteFlops(6*Nc*Ns*N, sub);
333 flopcount.addFlops(
A.nFlops());
335 for(
int n=0;
n < N; ++
n) {
338 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
341 d = innerProductReal(
p[
isz], Ap, sub);
342 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
350 for(
int s = 0;
s < n_shift;
s++) {
352 if (
s !=
isz && !convsP[
s] ) {
362 for(
int n=0;
n < N; ++
n) {
363 r[
n][sub] += Real(
b)*Ap[
n];
365 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
368 for(
int s = 0;
s < n_shift; ++
s) {
370 for(
int n=0;
n < N; ++
n) {
373 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
379 flopcount.addSiteFlops(4*Nc*Ns*N, sub);
384 for(
int s = 0;
s < n_shift;
s++) {
391 convsP[
s] = toBool( css <
rsd_sq[
s] );
401 QDPIO::cout <<
"MinvCGArray finished: " <<
n_count <<
" iterations " << std::endl;
402 flopcount.report(
"MinvCGArray", swatch.getTimeInSeconds());
403 QDPIO::cout <<
"MinvCG Explicit solution check: " << std::endl;
406 for(
int s = 0;
s < n_shift; ++
s)
409 for(
int n=0;
n < N; ++
n) {
410 Ap[
n][sub] += shifts[
s] *
psi[
s][
n];
412 Ap[
n][sub] -= chi_internal[
n];
417 QDPIO::cout <<
"MInvCG (conv): s = " <<
s
418 <<
" shift = " << shifts[
s]
419 <<
" r = " << Real(sqrt(
c)/
chi_norm) << std::endl;
423 for(
int i=0;
i < n_shift;
i++) {
424 revertFromFastMemoryHint(
psi[
i],
true);
428 QDPIO::cout <<
"too many CG iterationns: " <<
n_count << std::endl;
440 const multi1d<LatticeFermion>&
chi,
441 multi1d< multi1d<LatticeFermion> >&
psi,
442 const multi1d<Real>& shifts,
443 const multi1d<Real>&
RsdCG,
454 multi1d<LatticeColorMatrix>,
455 multi1d<LatticeColorMatrix> >& M,
456 const multi1d<LatticeFermion>&
chi,
457 multi1d< multi1d<LatticeFermion> >&
psi,
458 const multi1d<Real>& shifts,
459 const multi1d<Real>&
RsdCG,
Differentiable Linear Operator.
Linear Operator to arrays.
void MInvCG(const DiffLinearOperatorArray< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > &M, const multi1d< LatticeFermion > &chi, multi1d< multi1d< LatticeFermion > > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
void MInvCG_a(const C &A, const multi1d< T > &chi, multi1d< multi1d< T > > &psi, const multi1d< Real > &shifts, const multi1d< Real > &RsdCG, int MaxCG, int &n_count)
Multishift Conjugate-Gradient (CG1) algorithm for a Linear Operator.
Multishift Conjugate-Gradient algorithm for a Linear Operator.
SpinMatrix C()
C = Gamma(10)
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
FloatingPoint< double > Double
multi1d< LatticeFermion > r(Ncb)