www/dox/reliable__bicgstab_8cc_source.html

 /*! \file

  *  \brief Conjugate-Gradient algorithm for a generic Linear Operator

  */


 #include "chromabase.h"

 #include "actions/ferm/invert/reliable_bicgstab.h"


 #include "actions/ferm/invert/bicgstab_kernels.h"


 namespace Chroma {


   using namespace BiCGStabKernels;


   template<typename T, typename TF, typename CF>

 SystemSolverResults_t

 RelInvBiCGStab_a(const LinearOperator<T>& A,

               const LinearOperator<TF>& AF,

               const T& chi,

               T& psi,

               const Real& RsdBiCGStab,

               const Real& Delta,

               int MaxBiCGStab,

               enum PlusMinus isign)

   {

   SystemSolverResults_t ret;


   BiCGStabKernels::initKernels();


   const Subset& s = A.subset();


   bool convP = false;


   // These are all the vectors. There should be

   // None declared later on. These declarations do 'mallocs'

   // under the hood. Want those out of the main loop.

   T b;

   T tmp;

   T r_dble;

   T x_dble;


   TF r;

   TF r0;

   TF x;

   TF p;

   TF v;

   TF t;


   int k;


   StopWatch swatch;

   FlopCounter flopcount;

   flopcount.reset();

   swatch.reset();

   swatch.start();


   x[s]=zero;

   p[s] = zero;

   v[s] = zero;


   Double rsd_sq =  Double(RsdBiCGStab)*Double(RsdBiCGStab)*norm2(chi,s);

   Double b_sq;


   A(tmp, psi, isign);


   // We could do all this in a onner

   // b_sq = minusTmpB(tmp, b, r, r0,s)

   //

   //b[s] = chi-tmp;

   //b_sq = norm2(b,s);

   // r[s] = b;


   xymz_normx(b,chi,tmp,b_sq,s);

   r[s] = b;

   r0[s] = b;

   Double r_sq = b_sq;

   QDPIO::cout << "r0 = " << b_sq << std::endl;;


   flopcount.addFlops(A.nFlops());

   flopcount.addSiteFlops(2*Nc*Ns,s);

   flopcount.addSiteFlops(4*Nc*Ns,s);


   Double rNorm = sqrt(r_sq);

   Double r0Norm = rNorm;

   Double maxrx = rNorm;

   Double maxrr = rNorm;

   bool updateR = false;

   bool updateX = false;

   int rupdates = 0;

   int xupdates = 0;


   DComplex rho, rho_prev, alpha, omega;


   DComplex ctmp;

   Double t_norm;


   CF rho_r, alpha_r, omega_r;

   // rho_0 := alpha := omega = 1

   // Iterations start at k=1, so rho_0 is in rho_prev

   rho = Double(1);

   rho_prev = Double(1);

   alpha = Double(1);

   omega = Double(1);


   // The iterations

   for(k = 0; k < MaxBiCGStab && !convP ; k++) {


     if( k == 0 ) {

       // I know that r_0 = r so <r_0|r>=norm2(r) = r_sq

       // rho = innerProduct(r0,r,s);

       rho = r_sq;

       p[s] = r;

     }

     else {

       DComplex beta =(rho / rho_prev) * (alpha/omega);

       CF beta_r = beta;

       omega_r = omega;


       // NB: This could be done in a onner

       // rPlusBetaPMinusBetaOmegav(p, r, v, beta, omega, s)


       // p = r + beta(p - omega v)

       // first work out p - omega v

       // into tmp

       // then do p = r + beta tmp


       // tmp[s] = p - omega_r*v;

       // p[s] = r + beta_r*tmp;

       yxpaymabz(r, p, v, beta_r, omega_r, s);


     }


     // v = Ap

     AF(v,p,isign);


     // alpha = rho_{k+1} / < r_0 | v >

     // put <r_0 | v > into tmp

     ctmp = innerProduct(r0,v,s);


     if( toBool( real(ctmp) == 0 ) && toBool( imag(ctmp) == 0 ) ) {

       QDPIO::cout << "BiCGStab breakdown: <r_0|v> = 0" << std::endl;

       QDP_abort(1);

     }


     alpha = rho / ctmp;


     // Done with rho now, so save it into rho_prev

     rho_prev = rho;


     // s = r - alpha v

     // I can overlap s with r, because I recompute it at the end.

     alpha_r = alpha;

     // r[s]  -=  alpha_r*v;

     cxmay(r,v,alpha_r,s);


     // t = As  = Ar

     AF(t,r,isign);


     // omega = < t | s > / < t | t > = < t | r > / norm2(t);

     // accumulate <t | s > = <t | r> into omega


     // As Mike tells me, I could do these together.

     // I can probably reduce these to a single ALLREDUCE/QMP_sum_double_array()

     //

     // some routine like:  t_norm = normXCdotXY(t,r,s, iprod_r, iprod_i)

     // Double t_norm = norm2(t,s);

     // omega = innerProduct(t,r,s);


     norm2x_cdotxy(t,r, t_norm, omega, s);


     omega /= t_norm;


     // again

     // This is a simple xPlusAYPlusBz(x,r,p,omega,alpha)

     // psi = psi + omega s + alpha p

     //     = psi + omega r + alpha p

     //

     // use tmp to compute psi + omega r

     // then add in the alpha p

     omega_r = omega;

     // tmp[s] = x + omega_r*r;

     // x[s] = tmp + alpha_r*p;


     xpaypbz(x,r,p,omega_r, alpha_r,s);


     // r = s - omega t = r - omega t1G


     // I can roll this all together

     // r_sq = XMinusAYNormXCDotZX(r,t,r0,omega_r, omega_i, rho_r, rho_i, s),

     // r[s] -= omega_r*t;

     // r_sq = norm2(r,s);

     // rho = innerProduct(r0,r,s);


     xmay_normx_cdotzx(r, t, r0, omega_r, r_sq, rho,s);


     // Flops so far: Standard BiCGStab Flops

     // -----------------------------------------

     flopcount.addSiteFlops(80*Nc*Ns,s);

     flopcount.addFlops(2*A.nFlops());

     // ------------------------------------------


     rNorm = sqrt(r_sq);


     if( toBool( rNorm > maxrx) ) maxrx = rNorm;

     if( toBool( rNorm > maxrr) ) maxrr = rNorm;


     updateX = toBool ( rNorm < Delta*r0Norm && r0Norm <= maxrx );

     updateR = toBool ( rNorm < Delta*maxrr && r0Norm <= maxrr ) || updateX;


     if( updateR ) {

       // QDPIO::cout << "Iter " << k << ": updating r " << std::endl;

       rupdates++;


       x_dble[s] = x;


       A(tmp, x_dble, isign); // Use full solution so far


       // Roll this together - can eliminate r_dble which is an intermediary


       // r_dble[s] = b - tmp2;

       // r_sq = norm2(r_dble,s);

       // r[s] = r_dble;

       xymz_normx(r_dble, b,tmp, r_sq,s);

       r[s]=r_dble;


       flopcount.addSiteFlops(6*Nc*Ns,s);

       flopcount.addFlops(A.nFlops());


       rNorm = sqrt(r_sq);

       maxrr = rNorm;


       if( updateX ) {

         xupdates++;

         //QDPIO::cout << "Iter " << k << ": updating x " << std::endl;

         if( ! updateR ) { x_dble[s]=x; } // if updateR then this is done already

         psi[s] += x_dble; // Add on group accumulated solution in y

         flopcount.addSiteFlops(2*Nc*Ns,s);


         x[s] = zero; // zero y

         b[s] = r_dble;

         r0Norm = rNorm;

         maxrx = rNorm;

       }


     }


     if( toBool(r_sq < rsd_sq ) ) {


       convP = true;


       // if updateX true, then we have just updated psi

       // strictly x[s] should be zero, so it should be OK to add it

       // but why do the work if you don't need to

       x_dble[s] = x;

       psi[s]+=x_dble;

       flopcount.addSiteFlops(2*Nc*Ns,s);

       ret.resid = rNorm;

       ret.n_count = k;

     }

     else {

       convP = false;

     }


   }

   swatch.stop();

   if( k >= MaxBiCGStab ) {

     QDPIO::cerr << "Nonconvergence of reliable BiCGStab. MaxIters = " << MaxBiCGStab << " exceeded" << std::endl;

     QDP_abort(1);

   }

   else {

     QDPIO::cout << "reliable_bicgstab: n_count " << ret.n_count << " r-updates: " << rupdates << " xr-updates: " << xupdates  << std::endl;

     flopcount.report("reliable_bicgstab", swatch.getTimeInSeconds());

   }


   BiCGStabKernels::finishKernels();

   return ret;


 }


 SystemSolverResults_t

 InvBiCGStabReliable(const LinearOperator<LatticeFermionF>& A,

                     const LatticeFermionF& chi,

                     LatticeFermionF& psi,

                     const Real& RsdBiCGStab,

                     const Real& Delta,

                     int MaxBiCGStab,

                     enum PlusMinus isign)


 {

   return RelInvBiCGStab_a<LatticeFermionF,LatticeFermionF, ComplexF>(A,A, chi, psi, RsdBiCGStab, Delta, MaxBiCGStab, isign);

 }


   // Pure double

 SystemSolverResults_t

 InvBiCGStabReliable(const LinearOperator<LatticeFermionD>& A,

                     const LatticeFermionD& chi,

                     LatticeFermionD& psi,

                     const Real& RsdBiCGStab,

                     const Real& Delta,

                     int MaxBiCGStab,

                     enum PlusMinus isign)


 {

   return RelInvBiCGStab_a<LatticeFermionD, LatticeFermionD, ComplexD>(A,A, chi, psi, RsdBiCGStab, Delta, MaxBiCGStab, isign);

 }


   // single double

 SystemSolverResults_t

 InvBiCGStabReliable(const LinearOperator<LatticeFermionD>& A,

                     const LinearOperator<LatticeFermionF>& AF,

                     const LatticeFermionD& chi,

                     LatticeFermionD& psi,

                     const Real& RsdBiCGStab,

                     const Real& Delta,

                     int MaxBiCGStab,

                     enum PlusMinus isign)


 {

   return RelInvBiCGStab_a<LatticeFermionD, LatticeFermionF, ComplexF>(A,AF, chi, psi, RsdBiCGStab, Delta, MaxBiCGStab, isign);

 }


 #if 0


 #endif


 }  // end namespace Chroma

bicgstab_kernels.h

chromabase.h
Primary include file for CHROMA library code.

Chroma::LinearOperator
Linear Operator.
Definition: linearop.h:27

Chroma::InvBiCGStabReliable
SystemSolverResults_t InvBiCGStabReliable(const LinearOperator< LatticeFermionF > &A, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdBiCGStab, const Real &Delta, int MaxBiCGStab, enum PlusMinus isign)
Bi-CG stabilized.
Definition: reliable_bicgstab.cc:295

x
int x
Definition: meslate.cc:34

t
int t
Definition: meslate.cc:37

Chroma::BiCGStabKernels::xpaypbz
void xpaypbz(T &x, T &y, T &z, C &a, C &b, const Subset &s)
Definition: bicgstab_kernels_naive.h:38

Chroma::BiCGStabKernels::xmay_normx_cdotzx
void xmay_normx_cdotzx(T &x, const T &y, const T &z, C &a, Double &normx, DComplex &cdotzx, const Subset &s)
Definition: bicgstab_kernels_naive.h:46

Chroma::BiCGStabKernels::xymz_normx
void xymz_normx(T &x, const T &y, const T &z, Double &x_norm, const Subset &s)
Definition: bicgstab_kernels_naive.h:15

Chroma::BiCGStabKernels::norm2x_cdotxy
void norm2x_cdotxy(const T &x, const T &y, Double &norm2x, DComplex &cdotxy, const Subset &s)
Definition: bicgstab_kernels_naive.h:31

Chroma::BiCGStabKernels::cxmay
void cxmay(T &x, const T &y, const C &a, const Subset &s)
Definition: bicgstab_kernels_naive.h:55

Chroma::BiCGStabKernels::yxpaymabz
void yxpaymabz(T &x, T &y, T &z, const C &a, const C &b, const Subset &s)
Definition: bicgstab_kernels_naive.h:23

Chroma::BiCGStabKernels::finishKernels
void finishKernels()
Definition: bicgstab_kernels.h:31

Chroma::BiCGStabKernels::initKernels
void initKernels()
Definition: bicgstab_kernels.h:20

Chroma::InlinePropAndMatElemDistillation2Env::local::innerProduct
BinaryReturn< C1, C2, FnInnerProduct >::Type_t innerProduct(const QDPSubType< T1, C1 > &s1, const QDPType< T2, C2 > &s2)
Definition: inline_prop_and_matelem_distillation2_w.cc:463

Chroma::StagPhases::beta
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47

Chroma::StagPhases::alpha
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43

Chroma
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10

Chroma::p
p
Definition: invbicg.cc:157

Chroma::tmp
LatticeFermion tmp
Definition: mespbg5p_w.cc:36

Chroma::T
LinOpSysSolverMGProtoClover::T T
Definition: syssolver_linop_clover_mg_proto.cc:63

Chroma::rsd_sq
Real rsd_sq
Definition: invbicg.cc:121

Chroma::PlusMinus
PlusMinus
Definition: chromabase.h:45

Chroma::chi
multi1d< LatticeFermion > chi(Ncb)

Chroma::r
r
Definition: invbicg.cc:137

Chroma::RelInvBiCGStab_a
SystemSolverResults_t RelInvBiCGStab_a(const LinearOperator< T > &A, const LinearOperator< TF > &AF, const T &chi, T &psi, const Real &RsdBiCGStab, const Real &Delta, int MaxBiCGStab, enum PlusMinus isign)
Definition: reliable_bicgstab.cc:16

Chroma::omega
Complex omega
Definition: invbicg.cc:97

Chroma::psi
LatticeFermion psi
Definition: mespbg5p_w.cc:35

Chroma::A
A(A, psi, r, Ncb, PLUS)

Chroma::b
Complex b
Definition: invbicg.cc:96

Chroma::zero
Double zero
Definition: invbicg.cc:106

Chroma::k
int k
Definition: invbicg.cc:119

Chroma::s
multi1d< LatticeFermion > s(Ncb)

Chroma::isign
isign
Definition: pbg5p_w.cc:58

testing::internal::Double
FloatingPoint< double > Double
Definition: gtest.h:7351

r0
int r0
Definition: qtopcor.cc:41

reliable_bicgstab.h
BiCGStab Solver with reliable updates.

Chroma::SystemSolverResults_t
Holds return info from SystemSolver call.
Definition: syssolver.h:17

Chroma::SystemSolverResults_t::n_count
int n_count
Definition: syssolver.h:20

Chroma::SystemSolverResults_t::resid
Real resid
Definition: syssolver.h:21

TF
LatticeFermionF TF
Definition: t_quda_tprec.cc:17