www/dox/reliable__cg_8cc_source.html

 /*! \file

  *  \brief Conjugate-Gradient algorithm for a generic Linear Operator

  */


 #include "chromabase.h"

 #include "actions/ferm/invert/reliable_cg.h"


 namespace Chroma {


   template<typename T, typename TF, typename RF>

 SystemSolverResults_t

 RelInvCG_a(const LinearOperator<T>& A,

            const LinearOperator<TF>& AF,

            const T& chi,

            T& psi,

            const Real& RsdCG,

            const Real& Delta,

            int MaxCG)

   {

     START_CODE();

     SystemSolverResults_t ret;


     const Subset& s = A.subset();


     bool convP = false;


     // First get r = r0 = chi - A psi

     TF r;

     T b;

     T r_dble;

     T x_dble;

     int k;


     StopWatch swatch;

     FlopCounter flopcount;

     flopcount.reset();

     swatch.reset();

     swatch.start();


     b[s] = chi;

     Double chi_norm = norm2(chi,s);

     Double rsd_sq=RsdCG*RsdCG*chi_norm;


     {

       T tmp1, tmp2;

       A(tmp1, psi, PLUS);

       A(tmp2, tmp1, MINUS);

       b[s] -= tmp2;

       flopcount.addFlops(2*A.nFlops());

       flopcount.addSiteFlops(2*Nc*Ns,s);

     }


     TF x; x[s]=zero;


     // now work out r= chi - Apsi = chi - r0

     r[s] = b;


     Double r_sq = norm2(r,s);

     flopcount.addSiteFlops(4*Nc*Ns,s);


     QDPIO::cout << "Reliable CG: || r0 ||/|| b ||=" << sqrt(r_sq/chi_norm) << std::endl;


     Double rNorm = sqrt(r_sq);

     Double r0Norm = rNorm;

     Double maxrx = rNorm;

     Double maxrr = rNorm;

     bool updateR = false;

     bool updateX = false;


     // Now initialise v = p = 0

     TF p;

     Double a, c, d;


     // The iterations

     for(k = 0; k < MaxCG && !convP; k++) {

       if( k == 0 ) {

         p[s] = r;

       }

       else {

         Double beta = r_sq / c;

         RF br = beta;

         p[s] = r + br*p;  flopcount.addSiteFlops(4*Nc*Ns,s);

       }


       c = r_sq;


       TF mmp,mp;

       AF(mp, p, PLUS);

       d = norm2(mp,s);

       AF(mmp,mp,MINUS);


       a = c/d;

       RF ar = a;

       x[s] += ar*p;

       r[s] -= ar*mmp;


       r_sq = norm2(r,s);


       //      flopcount.addSiteFlops(4*Nc*Ns,s); <mp, mp>

       //      flopcount.addSiteFlops(4*Nc*Ns,s); x += a * p

       //      flopcount.addSiteFlops(4*Nc*Ns,s); r -= a * mm

       //      flopcount.addSiteFlops(4*Nc*Ns,s); norm2(r)

       flopcount.addSiteFlops(16*Nc*Ns,s);

       flopcount.addFlops(2*A.nFlops());


       // Reliable update part...

       rNorm = sqrt(r_sq);

       if( toBool( rNorm > maxrx) ) maxrx = rNorm;

       if( toBool( rNorm > maxrr) ) maxrr = rNorm;


       updateX = toBool ( rNorm < Delta*r0Norm && r0Norm <= maxrx );

       updateR = toBool ( rNorm < Delta*maxrr && r0Norm <= maxrr ) || updateX;


       // Do the R update with real DP residual

       if( updateR ) {


         {

           T tmp1,tmp2;

           x_dble[s] = x;


           A(tmp1, x_dble, PLUS); // Use full solution so far

           A(tmp2, tmp1, MINUS); // Use full solution so far


           r_dble[s] = b - tmp2;

         }


         r[s] = r_dble;     // new R = b - Ax

         r_sq = norm2(r_dble,s);


         flopcount.addSiteFlops(6*Nc*Ns,s); // 4 from norm2, 2 from r=b-tmp2

         flopcount.addFlops(2*A.nFlops());


         rNorm = sqrt(r_sq);

         maxrr = rNorm;


         // Group wise x update

         if( updateX ) {

           if( ! updateR ) { x_dble[s]=x; } // if updateR then this is done already

           psi[s] += x_dble; // Add on group accumulated solution in y

           flopcount.addSiteFlops(2*Nc*Ns,s);


           x[s] = zero; // zero y

           b[s] = r_dble;

           r0Norm = rNorm;

           maxrx = rNorm;

         }


       }


       // Convergence check

       if( toBool(r_sq < rsd_sq ) ) {

         // We've converged.


         // if updateX true, then we have just updated psi

         // strictly x[s] should be zero, so it should be OK to add it

         // but why do the work if you don't need to

         x_dble[s] = x;

         psi[s]+=x_dble;

         flopcount.addSiteFlops(2*Nc*Ns,s);

         ret.resid = rNorm;

         ret.n_count = k;

         convP = true;

       }

       else {

         convP = false;

       }


     }


     // Loop is finished. Report FLOP Count...

     swatch.stop();

     flopcount.report("reliable_invcg2", swatch.getTimeInSeconds());


     // Check for nonconvergence

     if( k >= MaxCG ) {

       QDPIO::cout << "Nonconvergence: Reliable CG Failed to converge in " << MaxCG << " iterations " << std::endl;

       QDP_abort(1);

     }


     // Done

     END_CODE();

     return ret;

   }


 SystemSolverResults_t

 InvCGReliable(const LinearOperator<LatticeFermionF>& A,

               const LatticeFermionF& chi,

               LatticeFermionF& psi,

               const Real& RsdCG,

               const Real& Delta,

               int MaxCG)


 {

   return RelInvCG_a<LatticeFermionF,LatticeFermionF, RealF>(A,A, chi, psi, RsdCG, Delta, MaxCG);

 }


   // Pure double

 SystemSolverResults_t

 InvCGReliable(const LinearOperator<LatticeFermionD>& A,

                     const LatticeFermionD& chi,

                     LatticeFermionD& psi,

                     const Real& RsdCG,

                     const Real& Delta,

               int MaxCG)

 {

   return RelInvCG_a<LatticeFermionD, LatticeFermionD, RealD>(A,A, chi, psi, RsdCG, Delta, MaxCG);

 }


   // single double

 SystemSolverResults_t

 InvCGReliable(const LinearOperator<LatticeFermionD>& A,

                     const LinearOperator<LatticeFermionF>& AF,

                     const LatticeFermionD& chi,

                     LatticeFermionD& psi,

                     const Real& RsdCG,

                     const Real& Delta,

               int MaxCG)

 {

   return RelInvCG_a<LatticeFermionD, LatticeFermionF, RealF>(A,AF, chi, psi, RsdCG, Delta, MaxCG);

 }


 }  // end namespace Chroma

chromabase.h
Primary include file for CHROMA library code.

Chroma::LinearOperator
Linear Operator.
Definition: linearop.h:27

Chroma::InvCGReliable
SystemSolverResults_t InvCGReliable(const LinearOperator< LatticeFermionF > &A, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdCG, const Real &Delta, int MaxCG)
Bi-CG stabilized.
Definition: reliable_cg.cc:193

x
int x
Definition: meslate.cc:34

tmp2
Double tmp2
Definition: mesq.cc:30

Chroma::StagPhases::beta
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47

Chroma
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10

Chroma::RsdCG
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30

Chroma::p
p
Definition: invbicg.cc:157

Chroma::c
Double c
Definition: invbicg.cc:108

Chroma::T
LinOpSysSolverMGProtoClover::T T
Definition: syssolver_linop_clover_mg_proto.cc:63

Chroma::MaxCG
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > int MaxCG
Definition: pbg5p_w.cc:32

Chroma::rsd_sq
Real rsd_sq
Definition: invbicg.cc:121

Chroma::MINUS
@ MINUS
Definition: chromabase.h:45

Chroma::PLUS
@ PLUS
Definition: chromabase.h:45

Chroma::chi
multi1d< LatticeFermion > chi(Ncb)

Chroma::r
r
Definition: invbicg.cc:137

Chroma::a
Complex a
Definition: invbicg.cc:95

Chroma::psi
LatticeFermion psi
Definition: mespbg5p_w.cc:35

Chroma::RelInvCG_a
SystemSolverResults_t RelInvCG_a(const LinearOperator< T > &A, const LinearOperator< TF > &AF, const T &chi, T &psi, const Real &RsdCG, const Real &Delta, int MaxCG)
Definition: reliable_cg.cc:12

Chroma::d
DComplex d
Definition: invbicg.cc:99

Chroma::START_CODE
START_CODE()

Chroma::A
A(A, psi, r, Ncb, PLUS)

Chroma::END_CODE
END_CODE()

Chroma::mp
multi1d< LatticeFermion > mp(Ncb)

Chroma::b
Complex b
Definition: invbicg.cc:96

Chroma::chi_norm
Double chi_norm
Definition: invbicg.cc:79

Chroma::zero
Double zero
Definition: invbicg.cc:106

Chroma::k
int k
Definition: invbicg.cc:119

Chroma::s
multi1d< LatticeFermion > s(Ncb)

testing::internal::Double
FloatingPoint< double > Double
Definition: gtest.h:7351

reliable_cg.h
BiCGStab Solver with reliable updates.

Chroma::SystemSolverResults_t
Holds return info from SystemSolver call.
Definition: syssolver.h:17

Chroma::SystemSolverResults_t::n_count
int n_count
Definition: syssolver.h:20

Chroma::SystemSolverResults_t::resid
Real resid
Definition: syssolver.h:21

TF
LatticeFermionF TF
Definition: t_quda_tprec.cc:17