6 #ifndef __syssolver_mdagm_clover_qphix_iter_refine_h__
7 #define __syssolver_mdagm_clover_qphix_iter_refine_h__
30 #include "qphix/geometry.h"
31 #include "qphix/qdp_packer.h"
32 #include "qphix/clover.h"
33 #include "qphix/invbicgstab.h"
34 #include "qphix/inv_richardson_multiprec.h"
45 namespace MdagMSysSolverQPhiXCloverIterRefineEnv
57 using namespace QPhiXVecTraits;
59 template<
typename T,
typename U>
60 class MdagMSysSolverQPhiXCloverIterRefine :
public MdagMSystemSolver<T>
64 using REALT =
typename WordType<T>::Type_t;
66 using InnerReal = CHROMA_QPHIX_INNER_TYPE;
68 static const int OuterVec = MixedVecTraits<REALT,InnerReal>::Vec;
69 static const int OuterSoa = MixedVecTraits<REALT,InnerReal>::Soa;
70 static const int InnerVec = MixedVecTraits<REALT,InnerReal>::VecInner;
71 static const int InnerSoa = MixedVecTraits<REALT,InnerReal>::SoaInner;
72 static const bool comp12 = MixedVecTraits<REALT,InnerReal>::compress12;
74 using OuterGeom = QPhiX::Geometry<REALT,OuterVec,OuterSoa,comp12>;
75 using InnerGeom = QPhiX::Geometry<InnerReal,InnerVec,InnerSoa,comp12>;
77 using QPhiX_Spinor =
typename OuterGeom::FourSpinorBlock;
78 using QPhiX_Gauge =
typename OuterGeom::SU3MatrixBlock;
79 using QPhiX_Clover =
typename OuterGeom::CloverBlock;
81 using QPhiX_InnerSpinor =
typename InnerGeom::FourSpinorBlock;
82 using QPhiX_InnerGauge =
typename InnerGeom::SU3MatrixBlock;
83 using QPhiX_InnerClover =
typename InnerGeom::CloverBlock;
91 MdagMSysSolverQPhiXCloverIterRefine(Handle< LinearOperator<T> > A_,
92 Handle< FermState<T,Q,Q> > state_,
93 const SysSolverQPhiXCloverParams& invParam_) :
98 QDPIO::cout <<
"MdagMSysSolverQPhiXCloverIterRefine:" << std::endl;
100 if ( toBool( invParam.Delta < 0 ) ) {
101 QDPIO::cout <<
"Error: Delta parameter for solve should be set" << std::endl;
105 QDPIO::cout <<
"AntiPeriodicT is: " << invParam.AntiPeriodicT << std::endl;
108 QDPIO::cout <<
"Veclen is " << OuterVec << std::endl;
109 QDPIO::cout <<
"Soalen is " << OuterSoa << std::endl;
110 QDPIO::cout <<
"Inner Veclen is " << InnerVec << std::endl;
111 QDPIO::cout <<
"Inner Soalen is " << InnerSoa << std::endl;
113 if ( OuterSoa > OuterVec ) {
114 QDPIO::cerr <<
"PROBLEM: Soalen > Veclen. Please set soalen appropriately (<=VECLEN) at compile time" << std::endl;
118 if ( InnerSoa > InnerVec ) {
119 QDPIO::cerr <<
"PROBLEM: Inner Soalen > Inner Veclen. Please set soalen appropriately at compile time" << std::endl;
125 u[
mu] = state_->getLinks()[
mu];
129 multi1d<Real> aniso_coeffs(
Nd);
130 for(
int mu=0;
mu <
Nd;
mu++) aniso_coeffs[
mu] = Real(1);
132 bool anisotropy = invParam.CloverParams.anisoParam.anisoP;
134 aniso_coeffs =
makeFermCoeffs( invParam.CloverParams.anisoParam );
137 double t_boundary=(double)(1);
141 if (invParam.AntiPeriodicT) {
142 t_boundary=(double)(-1);
144 u[3] *= where(Layout::latticeCoordinate(3) == (Layout::lattSize()[3]-1),
145 Real(t_boundary), Real(1));
148 cbsize_in_blocks = rb[0].numSiteTable()/OuterSoa;
150 const QPhiX::QPhiXCLIArgs& QPhiXParams = TheQPhiXParams::Instance();
155 int pad_xy = QPhiXParams.getPxy();
156 int pad_xyz = QPhiXParams.getPxyz();
158 n_blas_simt = QPhiXParams.getSy()*QPhiXParams.getSz();
161 geom_outer =
new OuterGeom( Layout::subgridLattSize().slice(),
164 QPhiXParams.getNCores(),
169 QPhiXParams.getMinCt());
172 geom_inner =
new InnerGeom( Layout::subgridLattSize().slice(),
175 QPhiXParams.getNCores(),
180 QPhiXParams.getMinCt());
183 #ifndef QDP_IS_QDPJIT
184 psi_qphix=(QPhiX_Spinor *)geom_outer->allocCBFourSpinor();
185 chi_qphix=(QPhiX_Spinor *)geom_outer->allocCBFourSpinor();
186 tmp_qphix=(QPhiX_Spinor *)geom_outer->allocCBFourSpinor();
195 QDPIO::cout <<
"Packing gauge field..." ;
197 QPhiX_Gauge* packed_gauge_cb0=(QPhiX_Gauge *)geom_outer->allocCBGauge();
198 QPhiX_Gauge* packed_gauge_cb1=(QPhiX_Gauge *)geom_outer->allocCBGauge();
201 QPhiX_InnerGauge* packed_gauge_cb0_inner=(QPhiX_InnerGauge *)geom_inner->allocCBGauge();
202 QPhiX_InnerGauge* packed_gauge_cb1_inner=(QPhiX_InnerGauge *)geom_inner->allocCBGauge();
205 QPhiX::qdp_pack_gauge<>(
u, packed_gauge_cb0,packed_gauge_cb1, *geom_outer);
206 u_packed[0] = packed_gauge_cb0;
207 u_packed[1] = packed_gauge_cb1;
210 QPhiX::qdp_pack_gauge<>(
u, packed_gauge_cb0_inner,packed_gauge_cb1_inner, *geom_inner);
211 u_packed_i[0] = packed_gauge_cb0_inner;
212 u_packed_i[1] = packed_gauge_cb1_inner;
215 QDPIO::cout <<
"Creating Clover Term" << std::endl;
217 clov->
create(state_, invParam.CloverParams);
218 QDPIO::cout <<
"Inverting Clover Term" << std::endl;
219 invclov->create(state_, invParam.CloverParams, (*clov));
220 for(
int cb=0;
cb < 2;
cb++) {
223 QDPIO::cout <<
"Done" << std::endl;
225 QPhiX_Clover* A_cb0=(QPhiX_Clover *)geom_outer->allocCBClov();
226 QPhiX_Clover* A_cb1=(QPhiX_Clover *)geom_outer->allocCBClov();
227 clov_packed[0] = A_cb0;
228 clov_packed[1] = A_cb1;
230 QPhiX_Clover* A_inv_cb0=(QPhiX_Clover *)geom_outer->allocCBClov();
231 QPhiX_Clover* A_inv_cb1=(QPhiX_Clover *)geom_outer->allocCBClov();
232 invclov_packed[0] = A_inv_cb0;
233 invclov_packed[1] = A_inv_cb1;
236 QPhiX_InnerClover* A_cb0_inner=(QPhiX_InnerClover *)geom_inner->allocCBClov();
237 QPhiX_InnerClover* A_cb1_inner=(QPhiX_InnerClover *)geom_inner->allocCBClov();
238 clov_packed_i[0] = A_cb0_inner;
239 clov_packed_i[1] = A_cb1_inner;
241 QPhiX_InnerClover* A_inv_cb0_inner=(QPhiX_InnerClover *)geom_inner->allocCBClov();
242 QPhiX_InnerClover* A_inv_cb1_inner=(QPhiX_InnerClover *)geom_inner->allocCBClov();
243 invclov_packed_i[0] = A_inv_cb0_inner;
244 invclov_packed_i[1] = A_inv_cb1_inner;
247 QDPIO::cout <<
"Packing Clover term..." << std::endl;
250 for(
int cb=0;
cb < 2;
cb++) {
251 QPhiX::qdp_pack_clover<>((*invclov), invclov_packed[
cb], *geom_outer,
cb);
255 for(
int cb=0;
cb < 2;
cb++) {
256 QPhiX::qdp_pack_clover<>((*clov), clov_packed[
cb], *geom_outer,
cb);
260 for(
int cb=0;
cb < 2;
cb++) {
261 QPhiX::qdp_pack_clover<>((*invclov), invclov_packed_i[
cb], *geom_inner,
cb);
265 for(
int cb=0;
cb < 2;
cb++) {
266 QPhiX::qdp_pack_clover<>((*clov), clov_packed_i[
cb], *geom_inner,
cb);
268 QDPIO::cout <<
"Done" << std::endl;
271 M_outer=
new QPhiX::EvenOddCloverOperator<REALT,OuterVec,OuterSoa,comp12>(u_packed,
276 toDouble(aniso_coeffs[0]),
277 toDouble(aniso_coeffs[3]));
279 M_inner=
new QPhiX::EvenOddCloverOperator<InnerReal,InnerVec,InnerSoa,comp12>(u_packed_i,
284 toDouble(aniso_coeffs[0]),
285 toDouble(aniso_coeffs[3]));
288 bicgstab_inner_solver =
new QPhiX::InvBiCGStab<InnerReal, InnerVec,InnerSoa,comp12>((*M_inner), invParam.MaxIter);
290 mixed_solver =
new QPhiX::InvRichardsonMultiPrec<REALT,
291 OuterVec,OuterSoa,comp12,
292 InnerReal,InnerVec,InnerSoa,comp12>((*M_outer), (*bicgstab_inner_solver), toDouble(invParam.Delta), invParam.MaxIter);
298 ~MdagMSysSolverQPhiXCloverIterRefine()
302 QDPIO::cout <<
"Destructing" << std::endl;
304 #ifndef QDP_IS_QDPJIT
305 geom_outer->free(psi_qphix);
306 geom_outer->free(chi_qphix);
307 geom_outer->free(tmp_qphix);
313 geom_outer->free(invclov_packed[0]);
314 geom_outer->free(invclov_packed[1]);
315 invclov_packed[0] =
nullptr;
316 invclov_packed[1] =
nullptr;
319 geom_outer->free(clov_packed[0]);
320 geom_outer->free(clov_packed[1]);
321 clov_packed[0] =
nullptr;
322 clov_packed[1] =
nullptr;
325 geom_outer->free(u_packed[0]);
326 geom_outer->free(u_packed[1]);
327 u_packed[0] =
nullptr;
328 u_packed[1] =
nullptr;
331 geom_inner->free(invclov_packed_i[0]);
332 geom_inner->free(invclov_packed_i[1]);
333 invclov_packed_i[0] =
nullptr;
334 invclov_packed_i[1] =
nullptr;
337 geom_inner->free(clov_packed_i[0]);
338 geom_inner->free(clov_packed_i[1]);
339 clov_packed_i[0] =
nullptr;
340 clov_packed_i[1] =
nullptr;
343 geom_inner->free(u_packed_i[0]);
344 geom_inner->free(u_packed_i[1]);
345 u_packed_i[0] =
nullptr;
346 u_packed_i[1] =
nullptr;
355 const Subset& subset()
const {
return A->subset();}
361 SystemSolverResults_t res;
362 Null4DChronoPredictor not_predicting;
363 res=(*this)(
psi,
chi,not_predicting);
374 SystemSolverResults_t
operator()(
T&
psi,
const T&
chi, AbsChronologicalPredictor4D<T>& predictor)
const
378 swatch.reset(); swatch.start();
380 SystemSolverResults_t res;
381 Handle< LinearOperator<T> >
MdagM(
new MdagMLinOp<T>(
A) );
384 Y[
A->subset() ] =
psi;
388 AbsTwoStepChronologicalPredictor4D<T>& two_step_predictor =
389 dynamic_cast<AbsTwoStepChronologicalPredictor4D<T>&
>(predictor);
393 two_step_predictor.predictY(Y,*
A,
chi);
396 catch( std::bad_cast) {
406 #ifndef QDP_IS_QDPJIT
407 QDPIO::cout <<
"Packing" << std::endl << std::flush ;
408 QPhiX::qdp_pack_cb_spinor<>(Y, tmp_qphix, *geom_outer,1);
409 QPhiX::qdp_pack_cb_spinor<>(
psi, psi_qphix, *geom_outer,1);
410 QPhiX::qdp_pack_cb_spinor<>(
chi, chi_qphix, *geom_outer,1);
412 tmp_qphix = (QPhiX_Spinor *)(Y.getFjit()) + cbsize_in_blocks;
413 psi_qphix = (QPhiX_Spinor *)(
psi.getFjit()) + cbsize_in_blocks;
414 chi_qphix = (QPhiX_Spinor *)(
chi.getFjit()) + cbsize_in_blocks;
416 QDPIO::cout <<
"Done" << std::endl << std::flush;
418 int num_cb_sites = Layout::vol()/2;
420 unsigned long site_flops1=0;
421 unsigned long mv_apps1=0;
424 unsigned long site_flops2=0;
425 unsigned long mv_apps2=0;
428 QDPIO::cout <<
"Starting Y solve" << std::endl << std::flush ;
429 double start = omp_get_wtime();
430 (*mixed_solver)(tmp_qphix,chi_qphix, toDouble(invParam.RsdTarget), n_count1, rsd_final, site_flops1, mv_apps1, -1, invParam.VerboseP);
431 double end = omp_get_wtime();
434 unsigned long total_flops = (site_flops1 + (1320+504+1320+504+48)*mv_apps1)*num_cb_sites;
435 double gflops = (double)(total_flops)/(1.0e9);
436 double total_time = end - start;
438 Double r_final = sqrt(toDouble(rsd_final));
439 QDPIO::cout <<
"QPHIX_CLOVER_ITER_REFINE_BICGSTAB_SOLVER: " << n_count1 <<
" iters, rsd_sq_final=" << rsd_final <<
" ||r||/||b|| (acc) = " << r_final <<std::endl;
440 QDPIO::cout <<
"QPHIX_CLOVER_ITER_REFINE_BICGSTAB_SOLVER: Solver Time="<< total_time <<
" (sec) Performance=" << gflops / total_time <<
" GFLOPS" << std::endl;
442 QDPIO::cout <<
"Starting X solve" << std::endl << std::flush ;
443 start = omp_get_wtime();
444 (*mixed_solver)(psi_qphix,tmp_qphix, toDouble(invParam.RsdTarget), n_count2, rsd_final, site_flops2, mv_apps2, +1, invParam.VerboseP);
445 end = omp_get_wtime();
446 total_flops = (site_flops2 + (1320+504+1320+504+48)*mv_apps2)*num_cb_sites;
447 gflops = (double)(total_flops)/(1.0e9);
448 total_time = end - start;
450 #ifndef QDP_IS_QDPJIT
452 QPhiX::qdp_unpack_cb_spinor<>(tmp_qphix, Y, *geom_outer,1);
455 QPhiX::norm2Spinor(norm2Y, tmp_qphix, *geom_outer, n_blas_simt);
456 r_final = sqrt(toDouble(rsd_final));
458 QDPIO::cout <<
"QPHIX_CLOVER_ITER_REFINE_BICGSTAB_SOLVER: " << n_count2 <<
" iters, rsd_sq_final=" << rsd_final <<
" ||r||/||b|| (acc) = " << r_final << std::endl;
459 QDPIO::cout <<
"QPHIX_CLOVER_ITER_REFINE_BICGSTAB_SOLVER: Solver Time="<< total_time <<
" (sec) Performance=" << gflops / total_time <<
" GFLOPS" << std::endl;
461 #ifndef QDP_IS_QDPJIT
462 QPhiX::qdp_unpack_cb_spinor<>(psi_qphix,
psi, *geom_outer,1);
467 AbsTwoStepChronologicalPredictor4D<T>& two_step_predictor =
468 dynamic_cast<AbsTwoStepChronologicalPredictor4D<T>&
>(predictor);
469 two_step_predictor.newYVector(Y);
470 two_step_predictor.newXVector(
psi);
473 catch( std::bad_cast) {
477 predictor.newVector(
psi);
482 Y[
A->subset() ] =
chi;
488 Y[
A->subset() ] -=
tmp2;
491 Double r2 = norm2(Y,
A->subset());
493 Double rel_resid = sqrt(r2/b2);
495 res.n_count = n_count1 + n_count2;
496 QDPIO::cout <<
"QPHIX_CLOVER_ITER_REFINE_BICGSTAB_SOLVER: total_iters="<<res.n_count<<
" || r || / || b || = " << res.resid << std::endl;
500 if ( !toBool ( rel_resid < invParam.RsdTarget*invParam.RsdToleranceFactor ) ) {
501 QDPIO::cout <<
"SOLVE FAILED" << std::endl;
506 QDPIO::cout <<
"QPHIX_MDAGM_SOLVER: total time: " << swatch.getTimeInSeconds() <<
" (sec)" << std::endl;
519 MdagMSysSolverQPhiXCloverIterRefine() {}
523 Handle< LinearOperator<T> >
A;
526 const SysSolverQPhiXCloverParams invParam;
529 Handle< CloverTermT<T, U> > clov;
530 Handle< CloverTermT<T, U> > invclov;
533 QPhiX::Geometry<REALT,
534 MixedVecTraits<REALT,InnerReal>::Vec,
535 MixedVecTraits<REALT,InnerReal>::Soa,
536 MixedVecTraits<REALT,InnerReal>::compress12>* geom_outer;
539 QPhiX::Geometry<InnerReal,
540 MixedVecTraits<REALT,InnerReal>::VecInner,
541 MixedVecTraits<REALT,InnerReal>::SoaInner,
542 MixedVecTraits<REALT,InnerReal>::compress12>* geom_inner;
545 Handle< QPhiX::EvenOddCloverOperator<REALT,
546 MixedVecTraits<REALT,InnerReal>::Vec,
547 MixedVecTraits<REALT,InnerReal>::Soa,
548 MixedVecTraits<REALT,InnerReal>::compress12> > M_outer;
552 Handle< QPhiX::EvenOddCloverOperator<InnerReal,
553 MixedVecTraits<REALT,InnerReal>::VecInner,
554 MixedVecTraits<REALT,InnerReal>::SoaInner,
555 MixedVecTraits<REALT,InnerReal>::compress12> > M_inner;
559 MixedVecTraits<REALT,InnerReal>::VecInner,
560 MixedVecTraits<REALT,InnerReal>::SoaInner,
561 MixedVecTraits<REALT,InnerReal>::compress12> > bicgstab_inner_solver;
565 Handle< QPhiX::InvRichardsonMultiPrec<REALT,
566 MixedVecTraits<REALT,InnerReal>::Vec,
567 MixedVecTraits<REALT,InnerReal>::Soa,
568 MixedVecTraits<REALT,InnerReal>::compress12,
570 MixedVecTraits<REALT,InnerReal>::VecInner,
571 MixedVecTraits<REALT,InnerReal>::SoaInner,
572 MixedVecTraits<REALT,InnerReal>::compress12> > mixed_solver;
574 QPhiX_Clover* invclov_packed[2];
575 QPhiX_Clover* clov_packed[2];
576 QPhiX_Gauge* u_packed[2];
578 QPhiX_InnerClover* invclov_packed_i[2];
579 QPhiX_InnerClover* clov_packed_i[2];
580 QPhiX_InnerGauge* u_packed_i[2];
583 mutable QPhiX_Spinor* psi_qphix;
584 mutable QPhiX_Spinor* chi_qphix;
585 mutable QPhiX_Spinor* tmp_qphix;
586 size_t cbsize_in_blocks;
void create(Handle< FermState< T, multi1d< U >, multi1d< U > > > fs, const CloverFermActParams ¶m_)
Creation routine.
Include possibly optimized Clover terms.
Even-odd preconditioned Clover fermion linear operator.
Class for counted reference semantics.
M^dag*M composition of a linear operator.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
static multi1d< LatticeColorMatrix > u
SystemSolverResults_t InvBiCGStab(const LinearOperator< LatticeFermionF > &A, const LatticeFermionF &chi, LatticeFermionF &psi, const Real &RsdBiCGStab, int MaxBiCGStab, enum PlusMinus isign)
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
FloatingPoint< double > Double
Null predictor: Leaves input x0 unchanged.
Periodic ferm state and a creator.
Fermion action factories.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
Handle< LinearOperator< T > > MdagM
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q