www/dox/invcg2_8cc_source.html

 /*! \file

  *  \brief Conjugate-Gradient algorithm for a generic Linear Operator

  */


 #include "chromabase.h"

 #include "actions/ferm/invert/invcg2.h"


 using namespace QDP::Hints;

 #undef PAT

 #ifdef PAT

 #include <pat_api.h>

 #endif


 namespace Chroma

 {


   //! Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator

   /*! \ingroup invert

    * This subroutine uses the Conjugate Gradient (CG) algorithm to find

    * the solution of the set of linear equations

    *

    *        Chi  =  A . Psi

    *

    * where       A = M^dag . M

    *

    * Algorithm:


    *  Psi[0]  :=  initial guess;               Linear interpolation (argument)

    *  r[0]    :=  Chi - M^dag . M . Psi[0] ;     Initial residual

    *  p[1]    :=  r[0] ;                               Initial direction

    *  IF |r[0]| <= RsdCG |Chi| THEN RETURN;      Converged?

    *  FOR k FROM 1 TO MaxCG DO                 CG iterations

    *      a[k] := |r[k-1]|**2 / <Mp[k],Mp[k]> ;

    *      Psi[k] += a[k] p[k] ;                New solution std::vector

    *      r[k] -= a[k] M^dag . M . p[k] ;        New residual

    *      IF |r[k]| <= RsdCG |Chi| THEN RETURN;  Converged?

    *      b[k+1] := |r[k]|**2 / |r[k-1]|**2 ;

    *      p[k+1] := r[k] + b[k+1] p[k];          New direction

    *

    * Arguments:

    *

    *  \param M       Linear Operator                   (Read)

    *  \param chi     Source                    (Read)

    *  \param psi     Solution                          (Modify)

    *  \param RsdCG   CG residual accuracy        (Read)

    *  \param MaxCG   Maximum CG iterations       (Read)

    *  \return res    System solver results

    *

    * Local Variables:

    *

    *  p                Direction std::vector

    *  r                Residual std::vector

    *  cp               | r[k] |**2

    *  c                | r[k-1] |**2

    *  k                CG iteration counter

    *  a                a[k]

    *  b                b[k+1]

    *  d                < p[k], A.p[k] >

    *  Mp               Temporary for  M.p

    *

    * Subroutines:

    *                             +

    *  A       Apply matrix M or M  to std::vector

    *

    * Operations:

    *

    *  2 A + 2 Nc Ns + N_Count ( 2 A + 10 Nc Ns )

    */


   template<typename T, typename RT>

   SystemSolverResults_t

   InvCG2_a(const LinearOperator<T>& M,

            const T& chi,

            T& psi,

            const RT& RsdCG,

            int MaxCG)

   {

     START_CODE();


     const Subset& s = M.subset();


     SystemSolverResults_t  res;

     T mp;                moveToFastMemoryHint(mp);

     T mmp;               moveToFastMemoryHint(mmp);

     T p;                 moveToFastMemoryHint(p);

     moveToFastMemoryHint(psi,true);

     T r;                 moveToFastMemoryHint(r);

     T chi_internal;      moveToFastMemoryHint(chi_internal);


     chi_internal[s] = chi;


     QDPIO::cout << "InvCG2: starting" << std::endl;

     FlopCounter flopcount;

     flopcount.reset();

     StopWatch swatch;

     swatch.reset();

     swatch.start();


 //  Real rsd_sq = (RsdCG * RsdCG) * Real(norm2(chi,s));

     Double chi_sq =  norm2(chi_internal,s);

     flopcount.addSiteFlops(4*Nc*Ns,s);


 #if 0

     QDPIO::cout << "chi_norm = " << sqrt(chi_sq) << std::endl;

 #endif


     Double rsd_sq = (RsdCG * RsdCG) * chi_sq;


     //                                            +

     //  r[0]  :=  Chi - A . Psi[0]    where  A = M  . M


     //                      +

     //  r  :=  [ Chi  -  M(u)  . M(u) . psi ]

     M(mp, psi, PLUS);

     M(mmp, mp, MINUS);

     flopcount.addFlops(2*M.nFlops());


     r[s] = chi_internal - mmp;

     flopcount.addSiteFlops(2*Nc*Ns,s);

     //  Cp = |r[0]|^2

     Double cp = norm2(r, s);               /* 2 Nc Ns  flops */

     flopcount.addSiteFlops(4*Nc*Ns, s);


 #if 0

     QDPIO::cout << "InvCG: k = 0  || r ||= " <<sqrt(cp) << std::endl;

 #endif


     //  p[1]  :=  r[0]

     p[s] = r;


     //  IF |r[0]| <= RsdCG |Chi| THEN RETURN;

     if ( toBool(cp  <=  rsd_sq) )

     {

       res.n_count = 0;

       res.resid   = sqrt(cp);

       swatch.stop();

       flopcount.report("invcg2", swatch.getTimeInSeconds());

       revertFromFastMemoryHint(psi,true);

       END_CODE();

       return res;

     }


     //

     //  FOR k FROM 1 TO MaxCG DO

     //


     Double a, b, c, d;


     for(int k = 1; k <= MaxCG; ++k)

     {

       //  c  =  | r[k-1] |**2

       c = cp;


       //  a[k] := | r[k-1] |**2 / < p[k], Ap[k] > ;

       //                                          +

       //  First compute  d  =  < p, A.p >  =  < p, M . M . p >  =  < M.p, M.p >

       //  Mp = M(u) * p

       M(mp, p, PLUS);  flopcount.addFlops(M.nFlops());


       //  d = | mp | ** 2

       d = norm2(mp, s);  flopcount.addSiteFlops(4*Nc*Ns,s);


       //  r[k] -= a[k] A . p[k] ;

       //               +            +

       //  r  =  r  -  M(u)  . Mp  =  M  . M . p  =  A . p

       M(mmp, mp, MINUS);

       flopcount.addFlops(M.nFlops());


       a = c/d;


       RT ar = a;


       r[s] -= ar * mmp;

       flopcount.addSiteFlops(4*Nc*Ns, s);


       //  cp  =  | r[k] |**2

       cp = norm2(r, s);    flopcount.addSiteFlops(4*Nc*Ns,s);


       //  Psi[k] += a[k] p[k]

       psi[s] += ar * p;    flopcount.addSiteFlops(4*Nc*Ns,s);


       //  IF |r[k]| <= RsdCG |Chi| THEN RETURN;


 //    QDPIO::cout << "InvCG: k = " << k << "  cp = " << cp << std::endl;


       if ( toBool(cp  <=  rsd_sq) )

       {

         res.n_count = k;

         res.resid   = sqrt(cp);

         swatch.stop();

         //      QDPIO::cout << "InvCG: k = " << k << "  cp = " << cp << std::endl;

         flopcount.report("invcg2", swatch.getTimeInSeconds());

         revertFromFastMemoryHint(psi,true);


         // Compute the actual residual

         {

           M(mp, psi, PLUS);

           M(mmp, mp, MINUS);

           Double actual_res = norm2(chi - mmp,s);

           res.resid = sqrt(actual_res);

         }


         END_CODE();

         return res;

       }


       //  b[k+1] := |r[k]|**2 / |r[k-1]|**2

       b = cp / c;

       RT br = b;


       //  p[k+1] := r[k] + b[k+1] p[k]

       p[s] = r + br*p;    flopcount.addSiteFlops(4*Nc*Ns,s);

     }

     res.n_count = MaxCG;

     res.resid   = sqrt(cp);

     swatch.stop();

     QDPIO::cerr << "Nonconvergence Warning" << std::endl;

     flopcount.report("invcg2", swatch.getTimeInSeconds());

     revertFromFastMemoryHint(psi,true);

     QDPIO::cerr << "too many CG iterations: count =" << res.n_count <<" rsd^2= " << cp << std::endl <<std::flush;


     END_CODE();

     return res;

   }


   //

   // Explicit versions

   //

   // Single precision

   SystemSolverResults_t

   InvCG2(const LinearOperator<LatticeFermionF>& M,

          const LatticeFermionF& chi,

          LatticeFermionF& psi,

          const Real& RsdCG,

          int MaxCG)

   {

 #ifdef PAT

     int ierr = PAT_region_begin(20, "InvCG2Single");

 #endif

     return InvCG2_a<LatticeFermionF,RealF>(M, chi, psi, RsdCG, MaxCG);

 #ifdef PAT

     ierr = PAT_region_end(20);

 #endif


   }


   // Double precision

   SystemSolverResults_t

   InvCG2(const LinearOperator<LatticeFermionD>& M,

          const LatticeFermionD& chi,

          LatticeFermionD& psi,

          const Real& RsdCG,

          int MaxCG)

   {

 #ifdef PAT

     int ierr=PAT_region_begin(21, "InvCG2Double");

 #endif

     return InvCG2_a<LatticeFermionD, RealD>(M, chi, psi, RsdCG, MaxCG);

 #ifdef PAT

     ierr= PAT_region_end(21);

 #endif

   }


   // Single precision

   SystemSolverResults_t

   InvCG2(const LinearOperator<LatticeStaggeredFermionF>& M,

          const LatticeStaggeredFermionF& chi,

          LatticeStaggeredFermionF& psi,

          const Real& RsdCG,

          int MaxCG)

   {

     return InvCG2_a<LatticeStaggeredFermionF,RealF>(M, chi, psi, RsdCG, MaxCG);

   }


   // Double precision

   SystemSolverResults_t

   InvCG2(const LinearOperator<LatticeStaggeredFermionD>& M,

          const LatticeStaggeredFermionD& chi,

          LatticeStaggeredFermionD& psi,

          const Real& RsdCG,

          int MaxCG)

   {

     return InvCG2_a<LatticeStaggeredFermionD,RealD>(M, chi, psi, RsdCG, MaxCG);

   }


 }  // end namespace Chroma

chromabase.h
Primary include file for CHROMA library code.

END_CODE
#define END_CODE()
Definition: chromabase.h:65

START_CODE
#define START_CODE()
Definition: chromabase.h:64

Chroma::LinearOperator
Linear Operator.
Definition: linearop.h:27

Chroma::LinearOperator::subset
virtual const Subset & subset() const =0
Return the subset on which the operator acts.

Chroma::LinearOperator::nFlops
virtual unsigned long nFlops() const
Definition: linearop.h:48

MaxCG
EXTERN int MaxCG
Definition: common_declarations.h:15

Chroma::InvCG2_a
SystemSolverResults_t InvCG2_a(const LinearOperator< T > &M, const T &chi, T &psi, const RT &RsdCG, int MaxCG)
Conjugate-Gradient (CGNE) algorithm for a generic Linear Operator.
Definition: invcg2.cc:72

Chroma::InvCG2
SystemSolverResults_t InvCG2(const LinearOperator< LatticeStaggeredFermionD > &M, const LatticeStaggeredFermionD &chi, LatticeStaggeredFermionD &psi, const Real &RsdCG, int MaxCG)
Definition: invcg2.cc:286

invcg2.h
Conjugate-Gradient algorithm for a generic Linear Operator.

s
unsigned s
Definition: ldumul_w.cc:37

c
int c
Definition: meslate.cc:61

Chroma
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10

Chroma::RsdCG
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > const multi1d< Real > enum InvType invType const multi1d< Real > & RsdCG
Definition: pbg5p_w.cc:30

Chroma::p
p
Definition: invbicg.cc:157

Chroma::T
LinOpSysSolverMGProtoClover::T T
Definition: syssolver_linop_clover_mg_proto.cc:63

Chroma::rsd_sq
Real rsd_sq
Definition: invbicg.cc:121

Chroma::MINUS
@ MINUS
Definition: chromabase.h:45

Chroma::PLUS
@ PLUS
Definition: chromabase.h:45

Chroma::cp
Double cp
Definition: invbicg.cc:107

Chroma::a
Complex a
Definition: invbicg.cc:95

Chroma::d
DComplex d
Definition: invbicg.cc:99

Chroma::mp
multi1d< LatticeFermion > mp(Ncb)

Chroma::b
Complex b
Definition: invbicg.cc:96

Chroma::k
int k
Definition: invbicg.cc:119

testing::internal::Double
FloatingPoint< double > Double
Definition: gtest.h:7351

r
multi1d< LatticeFermion > r(Ncb)

chi
chi
Definition: pade_trln_w.cc:24

psi
psi
Definition: pade_trln_w.cc:191

Chroma::SystemSolverResults_t
Holds return info from SystemSolver call.
Definition: syssolver.h:17

Chroma::SystemSolverResults_t::n_count
int n_count
Definition: syssolver.h:20

Chroma::SystemSolverResults_t::resid
Real resid
Definition: syssolver.h:21