6 #ifndef __syssolver_linop_quda_clover_h__
7 #define __syssolver_linop_quda_clover_h__
37 namespace LinOpSysSolverQUDACloverEnv
50 class LinOpSysSolverQUDAClover :
public LinOpSystemSolver<LatticeFermion>
53 typedef LatticeFermion
T;
54 typedef LatticeColorMatrix
U;
55 typedef multi1d<LatticeColorMatrix>
Q;
57 typedef LatticeFermionF
TF;
58 typedef LatticeColorMatrixF
UF;
59 typedef multi1d<LatticeColorMatrixF>
QF;
61 typedef LatticeFermionF
TD;
62 typedef LatticeColorMatrixF
UD;
63 typedef multi1d<LatticeColorMatrixF>
QD;
65 typedef WordType<T>::Type_t REALT;
71 LinOpSysSolverQUDAClover(Handle< LinearOperator<T> > A_,
72 Handle< FermState<T,Q,Q> > state_,
73 const SysSolverQUDACloverParams& invParam_) :
76 QDPIO::cout <<
"LinOpSysSolverQUDAClover:" << std::endl;
81 int s =
sizeof( WordType<T>::Type_t );
83 cpu_prec = QUDA_SINGLE_PRECISION;
86 cpu_prec = QUDA_DOUBLE_PRECISION;
91 switch( invParam.cudaPrecision ) {
93 gpu_prec = QUDA_HALF_PRECISION;
96 gpu_prec = QUDA_SINGLE_PRECISION;
99 gpu_prec = QUDA_DOUBLE_PRECISION;
108 switch( invParam.cudaSloppyPrecision ) {
110 gpu_half_prec = QUDA_HALF_PRECISION;
113 gpu_half_prec = QUDA_SINGLE_PRECISION;
116 gpu_half_prec = QUDA_DOUBLE_PRECISION;
119 gpu_half_prec = gpu_prec;
124 q_gauge_param = newQudaGaugeParam();
125 quda_inv_param = newQudaInvertParam();
128 const multi1d<int>& latdims = Layout::subgridLattSize();
130 q_gauge_param.X[0] = latdims[0];
131 q_gauge_param.X[1] = latdims[1];
132 q_gauge_param.X[2] = latdims[2];
133 q_gauge_param.X[3] = latdims[3];
138 q_gauge_param.type = QUDA_WILSON_LINKS;
139 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
140 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
142 QDPIO::cout <<
"MDAGM Using QDP-JIT gauge order" << std::endl;
143 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
144 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
151 if( invParam.AntiPeriodicT ) {
152 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
155 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
159 q_gauge_param.cpu_prec = cpu_prec;
160 q_gauge_param.cuda_prec = gpu_prec;
163 switch( invParam.cudaReconstruct ) {
165 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
168 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
171 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
174 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
178 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
182 q_gauge_param.cuda_prec_precondition = gpu_half_prec;
184 switch( invParam.cudaSloppyReconstruct ) {
186 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
189 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
192 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
195 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
200 q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
209 links_single[
mu] = (state_->getLinks())[
mu];
213 if( invParam.axialGaugeP ) {
214 QDPIO::cout <<
"Fixing Temporal Gauge" << std::endl;
217 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
219 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
223 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
227 const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
229 Real gamma_f = aniso.xi_0 / aniso.nu;
230 q_gauge_param.anisotropy = toDouble(gamma_f);
233 q_gauge_param.anisotropy = 1.0;
238 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
243 links_single[
mu] *= cf[
mu];
249 quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
252 switch( invParam.solverType ) {
254 quda_inv_param.inv_type = QUDA_CG_INVERTER;
255 solver_string =
"CG";
258 quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
259 solver_string =
"BICGSTAB";
262 quda_inv_param.inv_type = QUDA_GCR_INVERTER;
263 solver_string =
"GCR";
266 QDPIO::cerr <<
"Unknown SOlver type" << std::endl;
279 quda_inv_param.kappa = 0.5;
284 quda_inv_param.clover_coeff = 1.0;
285 quda_inv_param.tol = toDouble(invParam.RsdTarget);
286 quda_inv_param.maxiter = invParam.MaxIter;
287 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
288 quda_inv_param.pipeline = invParam.Pipeline;
291 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
294 switch( invParam.solverType ) {
296 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
299 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
302 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
305 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
308 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
312 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
317 if( invParam.asymmetricP ) {
318 QDPIO::cout <<
"Using Asymmetric Linop: A_oo - D A^{-1}_ee D" << std::endl;
319 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
322 QDPIO::cout <<
"Using Symmetric Linop: 1 - A^{-1}_oo D A^{-1}_ee D" << std::endl;
323 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
326 quda_inv_param.dagger = QUDA_DAG_NO;
327 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
329 quda_inv_param.cpu_prec = cpu_prec;
330 quda_inv_param.cuda_prec = gpu_prec;
331 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
334 quda_inv_param.cuda_prec_precondition = gpu_half_prec;
337 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
338 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
340 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
341 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
343 QDPIO::cout <<
"MDAGM Using QDP-JIT spinor order" << std::endl;
344 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
345 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
346 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
351 quda_inv_param.clover_cpu_prec = cpu_prec;
352 quda_inv_param.clover_cuda_prec = gpu_prec;
353 quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
356 quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
358 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
359 #warning "NOT USING QUDA DEVICE IFACE"
360 quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
362 #warning "USING QUDA DEVICE IFACE"
363 QDPIO::cout <<
"MDAGM clover CUDA location\n";
364 quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
365 quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
369 if( invParam.tuneDslashP ) {
370 QDPIO::cout <<
"Enabling Dslash Autotuning" << std::endl;
372 quda_inv_param.tune = QUDA_TUNE_YES;
375 QDPIO::cout <<
"Disabling Dslash Autotuning" << std::endl;
377 quda_inv_param.tune = QUDA_TUNE_NO;
382 multi1d<int> face_size(4);
383 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
384 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
385 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
386 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
388 int max_face = face_size[0];
389 for(
int i=1;
i <=3;
i++) {
390 if ( face_size[
i] > max_face ) {
391 max_face = face_size[
i];
396 q_gauge_param.ga_pad = max_face;
398 quda_inv_param.sp_pad = 0;
399 quda_inv_param.cl_pad = 0;
401 if( invParam.innerParamsP ) {
402 QDPIO::cout <<
"Setting inner solver params" << std::endl;
404 const GCRInnerSolverParams& ip = *(invParam.innerParams);
407 switch( ip.precPrecondition ) {
409 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
410 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
411 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
415 quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
416 quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
417 q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
421 quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
422 quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
423 q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
426 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
427 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
428 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
432 switch( ip.reconstructPrecondition ) {
434 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
437 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
440 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
443 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
447 quda_inv_param.tol_precondition = toDouble(ip.tolPrecondition);
448 quda_inv_param.maxiter_precondition = ip.maxIterPrecondition;
449 quda_inv_param.gcrNkrylov = ip.gcrNkrylov;
450 switch( ip.schwarzType ) {
452 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
455 quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
458 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
461 quda_inv_param.precondition_cycle = ip.preconditionCycle;
463 if( ip.verboseInner ) {
464 quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
467 quda_inv_param.verbosity_precondition = QUDA_SILENT;
470 switch( ip.invTypePrecondition ) {
472 quda_inv_param.inv_type_precondition = QUDA_CG_INVERTER;
475 quda_inv_param.inv_type_precondition = QUDA_BICGSTAB_INVERTER;
480 quda_inv_param.inv_type_precondition= QUDA_CA_GCR_INVERTER;
483 quda_inv_param.inv_type_precondition= QUDA_MR_INVERTER;
487 quda_inv_param.inv_type_precondition = QUDA_MR_INVERTER;
492 QDPIO::cout <<
"Setting Precondition stuff to defaults for not using" << std::endl;
493 quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
494 quda_inv_param.tol_precondition = 1.0e-1;
495 quda_inv_param.maxiter_precondition = 1000;
496 quda_inv_param.verbosity_precondition = QUDA_SILENT;
497 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
498 quda_inv_param.gcrNkrylov = 1;
502 if( invParam.verboseP ) {
503 quda_inv_param.verbosity = QUDA_VERBOSE;
506 quda_inv_param.verbosity = QUDA_SUMMARIZE;
512 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
514 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
517 GetMemoryPtrGauge(gauge,links_single);
522 loadGaugeQuda((
void *)gauge, &q_gauge_param);
525 QDPIO::cout <<
"Creating CloverTerm" << std::endl;
526 clov->create(fstate, invParam_.CloverParams);
528 invclov->create(fstate, invParam_.CloverParams);
530 QDPIO::cout <<
"Inverting CloverTerm" << std::endl;
535 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
536 multi1d<QUDAPackedClovSite<REALT> > packed_clov;
539 packed_clov.resize(all.siteTable().size());
541 clov->packForQUDA(packed_clov, 0);
542 clov->packForQUDA(packed_clov, 1);
546 multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
547 invclov->packForQUDA(packed_invclov, 0);
548 invclov->packForQUDA(packed_invclov, 1);
551 loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]),&quda_inv_param);
557 GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
559 loadCloverQuda( (
void*)(clover) , (
void*)(cloverInv) ,&quda_inv_param);
569 ~LinOpSysSolverQUDAClover()
571 QDPIO::cout <<
"Destructing" << std::endl;
577 const Subset& subset()
const {
return A->subset();}
587 SystemSolverResults_t res;
598 if ( invParam.axialGaugeP ) {
602 QDPIO::cout <<
"Gauge Fixing source and initial guess" << std::endl;
603 g_chi[ rb[1] ] = GFixMat *
chi;
604 g_psi[ rb[1] ] = GFixMat *
psi;
605 QDPIO::cout <<
"Solving" << std::endl;
606 res = qudaInvert(*clov,
610 QDPIO::cout <<
"Untransforming solution." << std::endl;
611 psi[ rb[1]] = adj(GFixMat)*g_psi;
615 res = qudaInvert(*clov,
629 r[
A->subset()] -=
tmp;
630 res.resid = sqrt(norm2(
r,
A->subset()));
633 Double rel_resid = res.resid/sqrt(norm2(
chi,
A->subset()));
635 QDPIO::cout <<
"QUDA_"<< solver_string <<
"_CLOVER_SOLVER: " << res.n_count <<
" iterations. Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
638 if ( ! invParam.SilentFailP ) {
639 if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
640 QDPIO::cerr <<
"ERROR: QUDA Solver residuum is outside tolerance: QUDA resid="<< rel_resid <<
" Desired =" << invParam.RsdTarget <<
" Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
652 LinOpSysSolverQUDAClover() {}
659 QudaPrecision_s cpu_prec;
660 QudaPrecision_s gpu_prec;
661 QudaPrecision_s gpu_half_prec;
663 Handle< LinearOperator<T> >
A;
664 const SysSolverQUDACloverParams invParam;
665 QudaGaugeParam q_gauge_param;
666 QudaInvertParam quda_inv_param;
668 Handle< CloverTermT<T, U> > clov;
669 Handle< CloverTermT<T, U> > invclov;
671 SystemSolverResults_t qudaInvert(
const CloverTermT<T, U>& clover,
672 const CloverTermT<T, U>& inv_clov,
Include possibly optimized Clover terms.
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD