6 #ifndef __multi_syssolver_mdagm_cg_clover_quda_w_h__
7 #define __multi_syssolver_mdagm_cg_clover_quda_w_h__
42 namespace MdagMMultiSysSolverCGQudaCloverEnv
51 class MdagMMultiSysSolverCGQudaClover :
public MdagMMultiSystemSolver<LatticeFermion>
54 typedef LatticeFermion
T;
55 typedef LatticeColorMatrix
U;
56 typedef multi1d<LatticeColorMatrix>
Q;
57 typedef multi1d<LatticeColorMatrix>
P;
59 typedef LatticeFermionF
TF;
60 typedef LatticeColorMatrixF
UF;
61 typedef multi1d<LatticeColorMatrixF>
QF;
62 typedef multi1d<LatticeColorMatrixF>
PF;
64 typedef LatticeFermionD
TD;
65 typedef LatticeColorMatrixD
UD;
66 typedef multi1d<LatticeColorMatrixD>
QD;
67 typedef multi1d<LatticeColorMatrixD>
PD;
69 typedef WordType<T>::Type_t REALT;
75 MdagMMultiSysSolverCGQudaClover(Handle< LinearOperator<T> > M_,
76 Handle< FermState<T,P,Q> > state_,
77 const MultiSysSolverQUDACloverParams& invParam_) :
81 QDPIO::cout <<
"MdagMMultiSysSolverCGQUDAClover: " << std::endl;
85 int s =
sizeof( WordType<T>::Type_t );
87 cpu_prec = QUDA_SINGLE_PRECISION;
90 cpu_prec = QUDA_DOUBLE_PRECISION;
95 switch( invParam.cudaPrecision ) {
97 gpu_prec = QUDA_HALF_PRECISION;
100 gpu_prec = QUDA_SINGLE_PRECISION;
103 gpu_prec = QUDA_DOUBLE_PRECISION;
112 switch( invParam.cudaSloppyPrecision ) {
114 gpu_half_prec = QUDA_HALF_PRECISION;
117 gpu_half_prec = QUDA_SINGLE_PRECISION;
120 gpu_half_prec = QUDA_DOUBLE_PRECISION;
123 gpu_half_prec = gpu_prec;
129 switch( invParam.cudaRefinementPrecision ) {
131 gpu_ref_prec = QUDA_HALF_PRECISION;
134 gpu_ref_prec = QUDA_SINGLE_PRECISION;
137 gpu_ref_prec = QUDA_DOUBLE_PRECISION;
140 gpu_ref_prec = gpu_prec;
146 QDPIO::cout <<
" Calling new QUDA Invert Param" << std::endl;
147 q_gauge_param = newQudaGaugeParam();
148 quda_inv_param = newQudaInvertParam();
151 const multi1d<int>& latdims = Layout::subgridLattSize();
153 q_gauge_param.X[0] = latdims[0];
154 q_gauge_param.X[1] = latdims[1];
155 q_gauge_param.X[2] = latdims[2];
156 q_gauge_param.X[3] = latdims[3];
161 q_gauge_param.type = QUDA_WILSON_LINKS;
163 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
164 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
167 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
168 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
175 if( invParam.AntiPeriodicT ) {
176 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
179 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
183 q_gauge_param.cpu_prec = cpu_prec;
184 q_gauge_param.cuda_prec = gpu_prec;
187 switch( invParam.cudaReconstruct ) {
189 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
192 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
195 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
198 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
202 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
203 q_gauge_param.cuda_prec_precondition = gpu_half_prec;
204 q_gauge_param.cuda_prec_refinement_sloppy = gpu_ref_prec;
206 switch( invParam.cudaSloppyReconstruct ) {
208 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
211 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
214 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
217 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
222 q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
228 switch( invParam.cudaRefinementReconstruct ) {
230 q_gauge_param.reconstruct_refinement_sloppy = QUDA_RECONSTRUCT_NO;
233 q_gauge_param.reconstruct_refinement_sloppy = QUDA_RECONSTRUCT_8;
236 q_gauge_param.reconstruct_refinement_sloppy = QUDA_RECONSTRUCT_12;
239 q_gauge_param.reconstruct_refinement_sloppy = QUDA_RECONSTRUCT_12;
252 links_single[
mu] = (state_->getLinks())[
mu];
256 if( invParam.axialGaugeP ) {
257 QDPIO::cout <<
"Fixing Temporal Gauge" << std::endl;
260 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
262 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
266 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
270 const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
272 Real gamma_f = aniso.xi_0 / aniso.nu;
273 q_gauge_param.anisotropy = toDouble(gamma_f);
276 q_gauge_param.anisotropy = 1.0;
281 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
286 links_single[
mu] *= cf[
mu];
292 quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
293 solver_string =
"MULTI_CG";
294 quda_inv_param.inv_type = QUDA_CG_INVERTER;
295 quda_inv_param.use_alternative_reliable = 1;
304 if( invParam.asymmetricP ) {
305 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
306 quda_inv_param.kappa = 0.5;
309 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
310 quda_inv_param.kappa = 0.5;
316 quda_inv_param.clover_coeff = 1.0;
318 quda_inv_param.tol = toDouble(invParam.RsdTarget[0]);
319 quda_inv_param.maxiter = invParam.MaxIter;
320 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
321 quda_inv_param.pipeline = invParam.Pipeline;
324 quda_inv_param.solution_type = QUDA_MATPCDAG_MATPC_SOLUTION;
327 switch( invParam.solverType ) {
329 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
332 QDPIO::cerr <<
"Only CG Is currently implemented for multi-shift" << std::endl;
339 if ( invParam.asymmetricP ) {
340 QDPIO::cout <<
"Working with asymmetric solution" << std::endl;
341 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
344 QDPIO::cout <<
"Working with symmetric solution" << std::endl;
345 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
348 quda_inv_param.dagger = QUDA_DAG_NO;
350 quda_inv_param.cpu_prec = cpu_prec;
351 quda_inv_param.cuda_prec = gpu_prec;
352 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
353 quda_inv_param.cuda_prec_precondition = gpu_half_prec;
354 quda_inv_param.cuda_prec_refinement_sloppy = gpu_ref_prec;
355 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
356 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
358 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
359 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
362 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
363 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
364 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
369 if( invParam.tuneDslashP ) {
370 QDPIO::cout <<
"Enabling Dslash Autotuning" << std::endl;
372 quda_inv_param.tune = QUDA_TUNE_YES;
375 QDPIO::cout <<
"Disabling Dslash Autotuning" << std::endl;
377 quda_inv_param.tune = QUDA_TUNE_NO;
384 multi1d<int> face_size(4);
385 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
386 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
387 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
388 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
390 int max_face = face_size[0];
391 for(
int i=1;
i <=3;
i++) {
392 if ( face_size[
i] > max_face ) {
393 max_face = face_size[
i];
398 q_gauge_param.ga_pad = max_face;
399 quda_inv_param.sp_pad = 0;
400 quda_inv_param.cl_pad = 0;
406 QDPIO::cout <<
"Setting Precondition stuff to defaults for not using" << std::endl;
407 quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
408 quda_inv_param.tol_precondition = 1.0e-1;
409 quda_inv_param.maxiter_precondition = 1000;
410 quda_inv_param.verbosity_precondition = QUDA_SILENT;
411 quda_inv_param.gcrNkrylov = 1;
414 quda_inv_param.clover_cpu_prec = cpu_prec;
415 quda_inv_param.clover_cuda_prec = gpu_prec;
416 quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
417 quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
418 quda_inv_param.clover_cuda_prec_refinement_sloppy = gpu_ref_prec;
420 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
421 quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
423 QDPIO::cout <<
"MULTI MDAGM clover CUDA location\n";
424 quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
425 quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
429 if( invParam.verboseP ) {
430 quda_inv_param.verbosity = QUDA_VERBOSE;
433 quda_inv_param.verbosity = QUDA_SUMMARIZE;
439 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
441 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
444 GetMemoryPtrGauge(gauge,links_single);
449 loadGaugeQuda((
void *)gauge, &q_gauge_param);
452 QDPIO::cout <<
"Creating CloverTerm" << std::endl;
453 clov->create(fstate, invParam_.CloverParams);
454 invclov->create(fstate, invParam_.CloverParams);
456 QDPIO::cout <<
"Inverting CloverTerm" << std::endl;
461 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
462 multi1d<QUDAPackedClovSite<REALT> > packed_clov;
464 packed_clov.resize(all.siteTable().size());
466 clov->packForQUDA(packed_clov, 0);
467 clov->packForQUDA(packed_clov, 1);
470 multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
471 invclov->packForQUDA(packed_invclov, 0);
472 invclov->packForQUDA(packed_invclov, 1);
474 loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]),&quda_inv_param);
477 quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
478 quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
483 GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
486 loadCloverQuda( (
void*)(clover) , (
void*)(cloverInv) ,&quda_inv_param);
492 ~MdagMMultiSysSolverCGQudaClover() {
493 QDPIO::cout <<
"Destructing" << std::endl;
499 const Subset& subset()
const {
return A->subset();}
507 SystemSolverResults_t
operator() (multi1d<T>&
psi,
const multi1d<Real>& shifts,
const T&
chi)
const
513 SystemSolverResults_t res;
516 if( invParam.RsdTarget.size() > 1 ) {
517 if( invParam.RsdTarget.size() != shifts.size()) {
518 QDPIO::cerr <<
"There are: " << invParam.RsdTarget.size() <<
" values for RsdTarget but " << shifts.size() <<
" shifts" << std::endl;
519 QDPIO::cerr <<
"This doesnt match. Aborting" << std::endl;
523 if ( invParam.axialGaugeP ) {
525 multi1d<T> g_psi(
psi.size());
528 QDPIO::cout <<
"Gauge Fixing source and initial guess" << std::endl;
529 g_chi[ rb[1] ] = GFixMat *
chi;
530 for(
int s=0;
s <
psi.size();
s++) {
531 g_psi[
s][ rb[1] ] =
zero;
534 QDPIO::cout <<
"Solving" << std::endl;
535 res = qudaInvertMulti(
539 QDPIO::cout <<
"Untransforming solution." << std::endl;
540 for(
int s=0;
s<
psi.size();
s++) {
541 psi[
s][ rb[1]] = adj(GFixMat)*g_psi[
s];
546 res = qudaInvertMulti(
chi,
552 double time = swatch.getTimeInSeconds();
554 bool abortFound =
false;
555 if (invParam.checkShiftsP ) {
557 multi1d<Double> r_rel(shifts.size());
560 for(
int i=0;
i < shifts.size();
i++) {
561 char normpsi_subset[256];
562 char normpsi_full[256];
563 std::sprintf( normpsi_subset,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi[
i],
A->subset())) );
564 std::sprintf( normpsi_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi[
i])) );
565 QDPIO::cout <<
"psi[ " <<
i <<
" ] : norm( A->subset() ) = " << normpsi_subset <<
" norm(total) = " << normpsi_full << std::endl;
568 for(
int i=0;
i < shifts.size();
i++) {
575 r_rel[
i] = sqrt(norm2(
r,
A->subset())/chinorm );
577 QDPIO::cout <<
"r[" <<
i <<
"] = " << r_rel[
i] << std::endl;
579 if ( toBool( r_rel[
i] > invParam.RsdTarget[
i]*invParam.RsdToleranceFactor ) ) {
580 QDPIO::cout <<
"Shift " <<
i <<
" has rel. residuum " << r_rel[
i] <<
" exceeding target "
581 << invParam.RsdTarget[
i] <<
" . Aborting! " << std::endl;
586 QDPIO::cout <<
"MULTI_CG_QUDA_CLOVER_SOLVER: " << res.n_count <<
" iterations. Rsd = " << res.resid << std::endl;
587 QDPIO::cout <<
"MULTI_CG_QUDA_CLOVER_SOLVER: "<<time<<
" sec" << std::endl;
588 if( abortFound ) QDP_abort(1);
599 MdagMMultiSysSolverCGQudaClover() {}
601 QudaPrecision_s cpu_prec;
602 QudaPrecision_s gpu_prec;
603 QudaPrecision_s gpu_half_prec;
604 QudaPrecision_s gpu_ref_prec;
606 Handle< LinearOperator<T> >
A;
607 const MultiSysSolverQUDACloverParams invParam;
608 QudaGaugeParam q_gauge_param;
609 mutable QudaInvertParam quda_inv_param;
611 Handle< CloverTermT<T, U> > clov;
612 Handle< CloverTermT<T, U> > invclov;
614 SystemSolverResults_t qudaInvertMulti(
const T& chi_s,
Include possibly optimized Clover terms.
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
M^dag*M composition of a linear operator.
multi1d< LatticeColorMatrix > P
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixF > PF
multi1d< LatticeColorMatrixD > PD
multi1d< LatticeColorMatrixD > QD