66 template<
typename T,
typename C>
69 const multi1d<T> &
chi,
89 multi1d<T> chi_internal(N);
91 if (M.size() !=
chi.size())
93 QDPIO::cerr << __func__ <<
": linop has size=" << M.size()
94 <<
" but chi has size=" <<
chi.size() << std::endl;
98 for(
int i=0;
i < N;
i++) {
99 chi_internal[
i][ M.subset() ] =
chi[
i];
102 QDPIO::cout <<
"InvCG2: starting" << std::endl;
103 FlopCounter flopcount;
109 Real chi_sq = Real(norm2(chi_internal,M.subset()));
110 flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
124 flopcount.addFlops(2*M.nFlops());
126 for(
int n=0;
n < N; ++
n){
127 r[
n][ M.subset() ] = chi_internal[
n] - mmp[
n];
130 flopcount.addSiteFlops(2*Nc*Ns*N,M.subset());
132 #ifdef PRINT_5D_RESID
133 for(
int n=0;
n < N;
n++) {
134 Double norm_r = norm2(
r[
n],M.subset());
135 if( toBool( norm_r >
Double(1.0e-20)) ) {
136 QDPIO::cout <<
"Iteration 0 r[" <<
n <<
"] = " << norm_r << std::endl;
144 for(
int n=0;
n < N; ++
n)
145 p[
n][ M.subset() ] =
r[
n];
149 flopcount.addSiteFlops(4*Nc*Ns*N, M.subset());
159 QDPIO::cout <<
"InvCG: k = 0 cp = " <<
cp <<
" rsd_sq = " <<
rsd_sq << std::endl;
161 flopcount.report(
"invcg2_array", swatch.getTimeInSeconds());
162 revertFromFastMemoryHint(
psi,
true);
183 M(
mp,
p,
PLUS); flopcount.addFlops(M.nFlops());
186 d = norm2(
mp, M.subset()); flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
191 for(
int n=0;
n < N; ++
n) {
192 psi[
n][ M.subset() ] +=
a *
p[
n];
194 flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
201 flopcount.addFlops(M.nFlops());
203 for(
int n=0;
n < N; ++
n) {
204 r[
n][ M.subset() ] -=
a * mmp[
n];
206 flopcount.addSiteFlops(4*Nc*Ns*N, M.subset());
208 #ifdef PRINT_5D_RESID
209 for(
int n=0;
n < N;
n++) {
210 Double norm_r = norm2(
r[
n],M.subset());
211 if( toBool( norm_r >
Double(1.0e-20)) )
212 QDPIO::cout <<
"Iteration " <<
k <<
" r[" <<
n <<
"] = " << norm_r << std::endl;
219 cp = norm2(
r, M.subset());
220 flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
228 QDPIO::cout <<
"InvCG: k = " <<
k <<
" cp = " <<
cp << std::endl;
229 flopcount.report(
"invcg2_array", swatch.getTimeInSeconds());
230 revertFromFastMemoryHint(
psi,
true);
237 for(
int n=0;
n < N; ++
n)
241 actual_res += norm_r ;
244 res.
resid = sqrt(actual_res);
245 QDPIO::cout <<
"Actual residual r = " << sqrt(actual_res) <<
" Actual relative residual="<< sqrt(actual_res/chi_sq) << std::endl;
253 b = Real(
cp) / Real(
c);
258 for(
int n=0;
n < N; ++
n) {
259 p[
n][ M.subset() ] =
r[
n] +
b*
p[
n];
261 flopcount.addSiteFlops(4*Nc*Ns*N,M.subset());
267 QDPIO::cerr <<
"Nonconvergence Warning" << std::endl;
268 flopcount.report(
"invcg2_array", swatch.getTimeInSeconds());
269 revertFromFastMemoryHint(
psi,
true);
280 SystemSolverResults_t
282 const multi1d<LatticeFermionF>&
chi,
283 multi1d<LatticeFermionF>&
psi,
291 SystemSolverResults_t
293 const multi1d<LatticeFermionD>&
chi,
294 multi1d<LatticeFermionD>&
psi,
Primary include file for CHROMA library code.
Linear Operator to arrays.
SystemSolverResults_t InvCG2_a(const LinearOperator< T > &M, const T &chi, T &psi, const RT &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
SystemSolverResults_t InvCG2(const LinearOperator< LatticeFermionF > &M, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Conjugate-Gradient algorithm for a generic Linear Operator.
SpinMatrix C()
C = Gamma(10)
Asqtad Staggered-Dirac operator.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeFermion > mp(Ncb)
FloatingPoint< double > Double
Holds return info from SystemSolver call.