6 #ifndef __multi_syssolver_mdagm_clover_qphix_h__
7 #define __multi_syssolver_mdagm_clover_qphix_h__
31 #include "qphix/qphix_config.h"
32 #include "qphix/geometry.h"
33 #include "qphix/dslash_utils.h"
34 #include "qphix/qdp_packer.h"
35 #include "qphix/clover.h"
36 #include "qphix/minvcg.h"
37 #include "qphix/blas_new_c.h"
46 namespace MdagMMultiSysSolverQPhiXCloverEnv
57 using namespace QPhiXVecTraits;
58 template<
typename T,
typename U>
59 class MdagMMultiSysSolverQPhiXClover :
public MdagMMultiSystemSolver<T>
63 typedef typename WordType<T>::Type_t REALT;
64 typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::FourSpinorBlock QPhiX_Spinor;
65 typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::SU3MatrixBlock QPhiX_Gauge;
66 typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::CloverBlock QPhiX_Clover;
74 MdagMMultiSysSolverQPhiXClover(Handle< LinearOperator<T> > A_,
75 Handle< FermState<T,Q,Q> > state_,
76 const MultiSysSolverQPhiXCloverParams& invParam_) :
77 A(A_), invParam(invParam_),
81 QDPIO::cout <<
"MdagMMultiSysSolverQPhiXClover:" << std::endl;
82 QDPIO::cout <<
"AntiPeriodicT is: " << invParam.AntiPeriodicT << std::endl;
85 QDPIO::cout <<
"Veclen is " << VecTraits<REALT>::Vec << std::endl;
86 QDPIO::cout <<
"Soalen is " << VecTraits<REALT>::Soa << std::endl;
87 cbsize_in_blocks = rb[0].numSiteTable()/VecTraits<REALT>::Soa;
89 if ( VecTraits<REALT>::Soa > VecTraits<REALT>::Vec ) {
90 QDPIO::cerr <<
"PROBLEM: Soalen > Veclen. Please set soalen appropriately (<=VECLEN) at compile time" << std::endl;
96 u[
mu] = state_->getLinks()[
mu];
100 multi1d<Real> aniso_coeffs(
Nd);
101 for(
int mu=0;
mu <
Nd;
mu++) aniso_coeffs[
mu] = Real(1);
103 bool anisotropy = invParam.CloverParams.anisoParam.anisoP;
105 aniso_coeffs =
makeFermCoeffs( invParam.CloverParams.anisoParam );
108 double t_boundary=(double)(1);
112 if (invParam.AntiPeriodicT) {
113 t_boundary=(double)(-1);
115 u[3] *= where(Layout::latticeCoordinate(3) == (Layout::lattSize()[3]-1),
116 Real(t_boundary), Real(1));
119 const QPhiX::QPhiXCLIArgs& QPhiXParams = TheQPhiXParams::Instance();
121 QDPIO::cout <<
"About to grap a Dslash" << std::endl;
130 int pad_xy = QPhiXParams.getPxy();
131 int pad_xyz = QPhiXParams.getPxyz();
134 n_blas_simt = QPhiXParams.getSy()*QPhiXParams.getSz();
136 geom =
new QPhiX::Geometry<REALT,
137 VecTraits<REALT>::Vec,
138 VecTraits<REALT>::Soa,
139 VecTraits<REALT>::compress12>(Layout::subgridLattSize().slice(),
142 QPhiXParams.getNCores(),
147 QPhiXParams.getMinCt());
149 QDPIO::cout <<
" Allocating Clover" << std::endl << std::flush ;
150 QPhiX_Clover* A_cb0=(QPhiX_Clover *)geom->allocCBClov();
151 QPhiX_Clover* A_cb1=(QPhiX_Clover *)geom->allocCBClov();
152 clov_packed[0] = A_cb0;
153 clov_packed[1] = A_cb1;
155 QDPIO::cout <<
" Allocating CloverInv " << std::endl << std::flush ;
156 QPhiX_Clover* A_inv_cb0=(QPhiX_Clover *)geom->allocCBClov();
157 QPhiX_Clover* A_inv_cb1=(QPhiX_Clover *)geom->allocCBClov();
158 invclov_packed[0] = A_inv_cb0;
159 invclov_packed[1] = A_inv_cb1;
162 QDPIO::cout <<
"Packing gauge field..." << std::endl << std::flush ;
163 QPhiX_Gauge* packed_gauge_cb0=(QPhiX_Gauge *)geom->allocCBGauge();
164 QPhiX_Gauge* packed_gauge_cb1=(QPhiX_Gauge *)geom->allocCBGauge();
166 QPhiX::qdp_pack_gauge<>(
u, packed_gauge_cb0,packed_gauge_cb1, *geom);
167 u_packed[0] = packed_gauge_cb0;
168 u_packed[1] = packed_gauge_cb1;
171 QDPIO::cout <<
"Creating Clover Term" << std::endl;
173 clov->
create(state_, invParam.CloverParams);
174 QDPIO::cout <<
"Inverting Clover Term" << std::endl;
175 invclov->create(state_, invParam.CloverParams, (*clov));
176 for(
int cb=0;
cb < 2;
cb++) {
179 QDPIO::cout <<
"Done" << std::endl;
180 QDPIO::cout <<
"Packing Clover term..." << std::endl;
182 for(
int cb=0;
cb < 2;
cb++) {
183 QPhiX::qdp_pack_clover<>((*invclov), invclov_packed[
cb], *geom,
cb);
186 for(
int cb=0;
cb < 2;
cb++) {
187 QPhiX::qdp_pack_clover<>((*clov), clov_packed[
cb], *geom,
cb);
189 QDPIO::cout <<
"Done" << std::endl;
191 QDPIO::cout <<
"Creating the Even Odd Operator" << std::endl;
192 M=
new QPhiX::EvenOddCloverOperator<REALT,
193 VecTraits<REALT>::Vec,
194 VecTraits<REALT>::Soa,
195 VecTraits<REALT>::compress12>(u_packed,
200 toDouble(aniso_coeffs[0]),
201 toDouble(aniso_coeffs[3]));
204 mcg_solver =
new QPhiX::MInvCG<REALT,VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12>((*M), invParam.MaxIter, invParam.MaxShifts);
209 ~MdagMMultiSysSolverQPhiXClover()
213 QDPIO::cout <<
"Destructing" << std::endl;
215 geom->free(invclov_packed[0]);
216 geom->free(invclov_packed[1]);
217 invclov_packed[0] =
nullptr;
218 invclov_packed[1] =
nullptr;
220 geom->free(clov_packed[0]);
221 geom->free(clov_packed[1]);
222 clov_packed[0] =
nullptr;
223 clov_packed[1] =
nullptr;
225 geom->free(u_packed[0]);
226 geom->free(u_packed[1]);
227 u_packed[0] =
nullptr;
228 u_packed[1] =
nullptr;
236 const Subset& subset()
const {
return A->subset();}
239 const multi1d<Real>& shifts,
242 SystemSolverResults_t res;
246 swatch.reset(); swatch.start();
248 int n_shift = shifts.size();
249 if(
psi.size() != n_shift ) {
254 for(
int s=0;
s < n_shift;++
s) {
259 QDPIO::cout <<
"operator(): n_shift = " << n_shift << std::endl;
262 if (n_shift > invParam.MaxShifts) {
263 QDPIO::cerr <<
"n_shift="<<n_shift<<
" is greater than MaxShifts="
264 << invParam.MaxShifts << std::endl;
270 QPhiX_Spinor* chi_qphix;
272 #ifndef QDP_IS_QDPJIT
274 chi_qphix =(QPhiX_Spinor *)geom->allocCBFourSpinor();
275 if (chi_qphix ==
nullptr) {
276 QDPIO::cout <<
"Unable to allocate chi_qphix" << std::endl;
279 QDPIO::cout <<
"Packing chi" << std::endl;
280 QPhiX::qdp_pack_cb_spinor<>(
chi, chi_qphix, *geom, 1);
283 chi_qphix = (QPhiX_Spinor*)(
chi.getFjit())+cbsize_in_blocks;
287 QPhiX_Spinor** psi_qphix;
288 psi_qphix=(QPhiX_Spinor**)ALIGNED_MALLOC(n_shift*
sizeof(QPhiX_Spinor*),
289 QPHIX_LLC_CACHE_ALIGN);
290 if( psi_qphix ==
nullptr ) {
291 QDPIO::cout <<
"Unable toallocate psi_qphix" << std::endl;
295 for(
int s =0;
s < n_shift;
s++) {
297 #ifndef QDP_IS_QDPJIT
298 psi_qphix[
s] = (QPhiX_Spinor *)geom->allocCBFourSpinor();
299 if( psi_qphix[
s] ==
nullptr ) {
300 QDPIO::cout <<
"Unable to allocate psi_qphix["<<
s<<
"]" << std::endl;
304 psi_qphix[
s]=(QPhiX_Spinor *)(
psi[
s].getFjit()) + cbsize_in_blocks;
309 QDPIO::cout <<
"Zeroing solutions" << std::endl;
311 for(
int s=0;
s < n_shift;
s++) {
312 zeroSpinor(psi_qphix[
s],(*geom),n_blas_simt);
316 multi1d<double> rsd_final(n_shift);
317 multi1d<double> qphix_shifts(n_shift);
318 for(
int s=0;
s < n_shift;
s++) { qphix_shifts[
s] = toDouble(shifts[
s]); }
320 multi1d<double> qphix_rsd_target(n_shift);
322 if( invParam.RsdTarget.size() == n_shift ) {
324 for(
int s=0;
s < n_shift;
s++) {
325 qphix_rsd_target[
s] = toDouble(invParam.RsdTarget[
s]);
330 if( invParam.RsdTarget.size() == 1 ) {
332 for(
int s=0;
s < n_shift;
s++) {
333 qphix_rsd_target[
s] = toDouble(invParam.RsdTarget[0]);
338 QDPIO::cout <<
"Size Mismatch between invParam.rsdTarget (size="
339 << invParam.RsdTarget.size()
340 <<
") and n_shift=" << n_shift << std::endl;
346 unsigned long site_flops=0;
347 unsigned long mv_apps=0;
351 double start = omp_get_wtime();
352 (*mcg_solver)((QPhiX_Spinor **)psi_qphix,
353 (
const QPhiX_Spinor *)chi_qphix,
355 (
const double*)qphix_shifts.slice(),
356 (
const double*)qphix_rsd_target.slice(),
358 (
double *)rsd_final.slice(),
359 (
unsigned long &)site_flops,
360 (
unsigned long &)mv_apps,
362 (
bool)invParam.VerboseP);
363 double end = omp_get_wtime();
369 #ifndef QDP_IS_QDPJIT
372 geom->free(chi_qphix);
375 for(
int s=0;
s < n_shift;
s++) {
383 #ifndef QDP_IS_QDPJIT
384 QPhiX::qdp_unpack_cb_spinor<>(psi_qphix[
s],
psi[
s], *geom, 1);
385 geom->free(psi_qphix[
s]);
388 ALIGNED_FREE(psi_qphix);
394 for (
int s=1;
s < n_shift;
s++) {
395 if ( qphix_shifts[
s] < qphix_shifts[smallest] ) smallest =
s;
397 res.resid = rsd_final[smallest];
399 QDPIO::cout <<
"QPHIX_RESIDUUM_REPORT:" << std::endl;
400 for(
int s=0;
s < n_shift;
s++) {
401 QDPIO::cout <<
"\t Red_Final["<<
s<<
"]=" << rsd_final[
s] <<std::endl;
406 if ( invParam.SolutionCheckP ) {
408 QDPIO::cout <<
"QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: Residua Check: " << std::endl;
409 for(
int s=0;
s < n_shift;
s++) {
417 r[
A->subset()] -=
tmp;
419 Double rel_resid = sqrt(r2/b2);
420 QDPIO::cout <<
"\t shift["<<
s<<
"] Actual || r || / || b || = "
421 << rel_resid << std::endl;
422 if ( toDouble(rel_resid) > qphix_rsd_target[
s]*toDouble(invParam.RsdToleranceFactor) ) {
423 QDPIO::cout <<
"SOLVE FAILED: rel_resid=" << rel_resid
424 <<
" target=" << qphix_rsd_target[
s]
425 <<
" tolerance_factor=" << invParam.RsdToleranceFactor
426 <<
" max tolerated=" << qphix_rsd_target[
s]*toDouble(invParam.RsdToleranceFactor) << std::endl;
432 int num_cb_sites = Layout::vol()/2;
433 unsigned long total_flops = (site_flops + (1320+504+1320+504+48)*mv_apps)*num_cb_sites;
434 double gflops = (double)(total_flops)/(1.0e9);
436 double total_time = end - start;
438 QDPIO::cout <<
"QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: Iters=" << res.n_count <<
" Solver Time="<< total_time <<
" (sec) Performance=" << gflops / total_time <<
" GFLOPS" << std::endl;
440 QDPIO::cout <<
"QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: total time: " << swatch.getTimeInSeconds() <<
" (sec)" << std::endl;
447 MdagMMultiSysSolverQPhiXClover() {}
451 Handle< LinearOperator<T> >
A;
452 const MultiSysSolverQPhiXCloverParams invParam;
453 Handle< CloverTermT<T, U> > clov;
454 Handle< CloverTermT<T, U> > invclov;
456 QPhiX::Geometry<REALT, VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12>* geom;
458 Handle< QPhiX::EvenOddCloverOperator<REALT, VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12> > M;
460 Handle< QPhiX::MInvCG<REALT,VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12> > mcg_solver;
463 QPhiX_Clover* invclov_packed[2];
464 QPhiX_Clover* clov_packed[2];
465 QPhiX_Gauge* u_packed[2];
466 size_t cbsize_in_blocks;
void create(Handle< FermState< T, multi1d< U >, multi1d< U > > > fs, const CloverFermActParams ¶m_)
Creation routine.
Include possibly optimized Clover terms.
Even-odd preconditioned Clover fermion linear operator.
Class for counted reference semantics.
M^dag*M composition of a linear operator.
Disambiguator for multi-shift MdagM system solvers.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
static multi1d< LatticeColorMatrix > u
LinOpSysSolverMGProtoClover::Q Q
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
FloatingPoint< double > Double
multi1d< LatticeFermion > r(Ncb)
Periodic ferm state and a creator.
Fermion action factories.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q