7 #ifndef __syssolver_linop_quda_multigrid_wilson_h__
8 #define __syssolver_linop_quda_multigrid_wilson_h__
37 namespace LinOpSysSolverQUDAMULTIGRIDWilsonEnv
50 class LinOpSysSolverQUDAMULTIGRIDWilson :
public LinOpSystemSolver<LatticeFermion>
53 typedef LatticeFermion
T;
54 typedef LatticeColorMatrix
U;
55 typedef multi1d<LatticeColorMatrix>
Q;
57 typedef LatticeFermionF
TF;
58 typedef LatticeColorMatrixF
UF;
59 typedef multi1d<LatticeColorMatrixF>
QF;
61 typedef LatticeFermionF
TD;
62 typedef LatticeColorMatrixF
UD;
63 typedef multi1d<LatticeColorMatrixF>
QD;
65 typedef WordType<T>::Type_t REALT;
71 LinOpSysSolverQUDAMULTIGRIDWilson(Handle< LinearOperator<T> > A_,
72 Handle< FermState<T,Q,Q> > state_,
73 const SysSolverQUDAMULTIGRIDWilsonParams& invParam_) :
74 A(A_), invParam(invParam_)
76 QDPIO::cout <<
"LinOpSysSolverQUDAMULTIGRIDWilson:" << std::endl;
81 int s =
sizeof( WordType<T>::Type_t );
83 cpu_prec = QUDA_SINGLE_PRECISION;
86 cpu_prec = QUDA_DOUBLE_PRECISION;
91 switch( invParam.cudaPrecision ) {
93 gpu_prec = QUDA_HALF_PRECISION;
96 gpu_prec = QUDA_SINGLE_PRECISION;
99 gpu_prec = QUDA_DOUBLE_PRECISION;
108 switch( invParam.cudaSloppyPrecision ) {
110 gpu_half_prec = QUDA_HALF_PRECISION;
113 gpu_half_prec = QUDA_SINGLE_PRECISION;
116 gpu_half_prec = QUDA_DOUBLE_PRECISION;
119 gpu_half_prec = gpu_prec;
124 q_gauge_param = newQudaGaugeParam();
125 quda_inv_param = newQudaInvertParam();
126 mg_inv_param = newQudaInvertParam();
127 mg_param = newQudaMultigridParam();
131 const multi1d<int>& latdims = Layout::subgridLattSize();
133 q_gauge_param.X[0] = latdims[0];
134 q_gauge_param.X[1] = latdims[1];
135 q_gauge_param.X[2] = latdims[2];
136 q_gauge_param.X[3] = latdims[3];
141 q_gauge_param.type = QUDA_WILSON_LINKS;
142 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
143 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
145 QDPIO::cout <<
"MDAGM Using QDP-JIT gauge order" << std::endl;
146 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
147 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
154 if( invParam.AntiPeriodicT ) {
155 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
158 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
162 q_gauge_param.cpu_prec = cpu_prec;
163 q_gauge_param.cuda_prec = gpu_prec;
166 switch( invParam.cudaReconstruct ) {
168 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
171 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
174 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
177 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
181 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
183 switch( invParam.cudaSloppyReconstruct ) {
185 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
188 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
191 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
194 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
206 links_single[
mu] = (state_->getLinks())[
mu];
210 if( invParam.axialGaugeP ) {
211 QDPIO::cout <<
"Fixing Temporal Gauge" << std::endl;
214 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
216 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
220 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
224 const AnisoParam_t& aniso = invParam.WilsonParams.anisoParam;
226 Real gamma_f = aniso.xi_0 / aniso.nu;
227 q_gauge_param.anisotropy = toDouble(gamma_f);
230 q_gauge_param.anisotropy = 1.0;
235 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
240 links_single[
mu] *= cf[
mu];
248 QDPIO::cout <<
"Remember for production to reset quda_inv_param.dslash_typeto QUDA_CLOVER_WILSON_DSLASH" << std::endl;
249 quda_inv_param.dslash_type = QUDA_WILSON_DSLASH;
250 mg_inv_param.dslash_type = QUDA_WILSON_DSLASH;
253 switch( invParam.solverType ) {
255 quda_inv_param.inv_type = QUDA_CG_INVERTER;
256 solver_string =
"CG";
259 quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
260 solver_string =
"BICGSTAB";
263 quda_inv_param.inv_type = QUDA_GCR_INVERTER;
264 solver_string =
"GCR";
267 QDPIO::cerr <<
"Unknown Solver type" << std::endl;
272 mg_inv_param.inv_type = QUDA_GCR_INVERTER;
273 mg_inv_param.tol = 1e-10;
274 mg_inv_param.maxiter = 10000;
275 mg_inv_param.reliable_delta = 1e-10;
276 mg_inv_param.verbosity = QUDA_VERBOSE;
277 mg_inv_param.verbosity_precondition = QUDA_VERBOSE;
284 auto wlparams = invParam.WilsonParams;
286 auto aniso = wlparams.anisoParam;
288 Real ff = where(aniso.anisoP, aniso.nu / aniso.xi_0, Real(1));
289 diag_mass = 1 + (
Nd-1)*ff + wlparams.Mass;
293 quda_inv_param.kappa =
static_cast<double>(1)/(
static_cast<double>(2)*toDouble(diag_mass));
296 quda_inv_param.tol = toDouble(invParam.RsdTarget);
297 quda_inv_param.maxiter = invParam.MaxIter;
298 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
299 quda_inv_param.pipeline = invParam.Pipeline;
304 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
305 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
307 QDPIO::cout <<
"Using Symmetric Linop: 1 - A^{-1}_oo D A^{-1}_ee D" << std::endl;
308 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
310 quda_inv_param.dagger = QUDA_DAG_NO;
311 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
313 quda_inv_param.cpu_prec = cpu_prec;
314 quda_inv_param.cuda_prec = gpu_prec;
315 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
317 mg_inv_param.cpu_prec = cpu_prec;
318 mg_inv_param.cuda_prec = gpu_prec;
319 mg_inv_param.cuda_prec_sloppy = gpu_half_prec;
324 mg_inv_param.clover_cpu_prec = cpu_prec;
325 mg_inv_param.clover_cuda_prec = gpu_prec;
326 mg_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
327 mg_inv_param.clover_cuda_prec_precondition = gpu_prec;
328 mg_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
331 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
332 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
334 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
335 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
337 QDPIO::cout <<
"MDAGM Using QDP-JIT spinor order" << std::endl;
338 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
339 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
340 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
344 if( invParam.tuneDslashP ) {
345 QDPIO::cout <<
"Enabling Dslash Autotuning" << std::endl;
347 quda_inv_param.tune = QUDA_TUNE_YES;
348 mg_inv_param.tune = QUDA_TUNE_YES;
351 QDPIO::cout <<
"Disabling Dslash Autotuning" << std::endl;
353 quda_inv_param.tune = QUDA_TUNE_NO;
354 mg_inv_param.tune = QUDA_TUNE_NO;
359 multi1d<int> face_size(4);
360 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
361 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
362 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
363 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
365 int max_face = face_size[0];
366 for(
int i=1;
i <=3;
i++) {
367 if ( face_size[
i] > max_face ) {
368 max_face = face_size[
i];
373 q_gauge_param.ga_pad = max_face;
375 quda_inv_param.sp_pad = 0;
376 quda_inv_param.cl_pad = 0;
378 if( invParam.MULTIGRIDParamsP ) {
379 QDPIO::cout <<
"Setting MULTIGRID solver params" << std::endl;
381 const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
386 mg_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
387 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
388 mg_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
389 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
390 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
394 mg_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
395 mg_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
396 quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
397 quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
398 q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
402 mg_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
403 mg_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
404 quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
405 quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
406 q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
409 mg_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
410 mg_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
411 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
412 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
413 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
417 switch( ip.reconstruct ) {
419 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
422 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
425 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
428 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
435 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
437 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
440 GetMemoryPtrGauge(gauge,links_single);
445 loadGaugeQuda((
void *)gauge, &q_gauge_param);
448 MULTIGRIDSolverParams ip = *(invParam.MULTIGRIDParams);
450 quda_inv_param.tol_precondition = toDouble(ip.tol[0]);
451 quda_inv_param.maxiter_precondition = ip.maxIterations[0];
452 quda_inv_param.gcrNkrylov = ip.outer_gcr_nkrylov;
453 mg_inv_param.gcrNkrylov = ip.precond_gcr_nkrylov;
455 switch( ip.schwarzType ) {
457 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
460 quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
463 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
466 quda_inv_param.precondition_cycle = 1;
469 quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
472 quda_inv_param.inv_type_precondition = QUDA_MG_INVERTER;
475 mg_inv_param.sp_pad = 0;
476 mg_inv_param.cl_pad = 0;
478 mg_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
479 mg_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
480 mg_inv_param.dirac_order = QUDA_DIRAC_ORDER;
482 mg_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
483 mg_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
485 mg_inv_param.kappa = quda_inv_param.kappa;
487 mg_inv_param.dagger = QUDA_DAG_NO;
488 mg_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
490 mg_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
491 mg_inv_param.solution_type = QUDA_MAT_SOLUTION;
492 mg_inv_param.solve_type = QUDA_DIRECT_SOLVE;
494 mg_param.invert_param = &mg_inv_param;
497 quda_inv_param.Ls = 1;
500 mg_param.run_verify = QUDA_BOOLEAN_NO;
502 mg_param.n_level = ip.mg_levels;
504 for (
int i=0;
i<mg_param.n_level-1; ++
i) {
505 if( ip.setup_on_gpu[
i] ) {
506 mg_param.setup_location[
i] = QUDA_CUDA_FIELD_LOCATION;
509 mg_param.setup_location[
i] = QUDA_CPU_FIELD_LOCATION;
514 for (
int i=0;
i<mg_param.n_level;
i++) {
515 for (
int j=0;
j<
Nd;
j++) {
516 if(
i < mg_param.n_level-1 ) {
517 mg_param.geo_block_size[
i][
j] = ip.blocking[
i][
j];
520 mg_param.geo_block_size[
i][
j] = 4;
523 mg_param.spin_block_size[
i] = 1;
524 if(
i < mg_param.n_level-1) {
525 mg_param.n_vec[
i] = ip.nvec[
i];
526 mg_param.nu_pre[
i] = ip.nu_pre[
i];
527 mg_param.nu_post[
i] = ip.nu_post[
i];
529 mg_param.smoother_tol[
i] = toDouble(ip.smootherTol[
i]);
530 mg_param.global_reduction[
i] = QUDA_BOOLEAN_YES;
532 switch( ip.smootherType[
i] ) {
534 mg_param.smoother[
i] = QUDA_MR_INVERTER;
535 mg_param.omega[
i] = 0.85;
538 mg_param.smoother[
i] = QUDA_CA_GCR_INVERTER;
539 mg_param.omega[
i] = 0.85;
542 QDPIO::cout <<
"Unknown or no smother type specified, no smoothing inverter will be used." << std::endl;
543 mg_param.smoother[
i] = QUDA_INVALID_INVERTER;
546 mg_param.location[
i] = QUDA_CUDA_FIELD_LOCATION;
547 mg_param.smoother_solve_type[
i] = QUDA_DIRECT_PC_SOLVE;
548 mg_param.coarse_grid_solution_type[
i] = QUDA_MATPC_SOLUTION;
552 mg_param.spin_block_size[0] = 2;
555 mg_param.smoother[ip.mg_levels-1] = QUDA_GCR_INVERTER;
557 mg_param.compute_null_vector = ip.generate_nullspace ? QUDA_COMPUTE_NULL_VECTOR_YES
558 : QUDA_COMPUTE_NULL_VECTOR_NO;
560 for(
int l=0;
l < ip.mg_levels;
l++) {
561 mg_param.vec_infile[
l][0] =
'\0';
562 mg_param.vec_outfile[
l][0] =
'\0';
565 QDPIO::cout<<
"Basic MULTIGRID params copied."<<std::endl;
566 quda_inv_param.verbosity = QUDA_VERBOSE;
571 void *mg_preconditioner = newMultigridQuda(&mg_param);
572 QDPIO::cout<<
"NewMultigridQuda state initialized."<<std::endl;
573 quda_inv_param.preconditioner = mg_preconditioner;
574 QDPIO::cout<<
"MULTIGRID preconditioner set."<<std::endl;
582 ~LinOpSysSolverQUDAMULTIGRIDWilson()
584 QDPIO::cout <<
"Destructing" << std::endl;
589 destroyMultigridQuda(quda_inv_param.preconditioner);
593 const Subset& subset()
const {
return A->subset();}
603 SystemSolverResults_t res;
616 if ( invParam.axialGaugeP ) {
620 QDPIO::cout <<
"Gauge Fixing source and initial guess" << std::endl;
621 g_chi[ rb[1] ] = GFixMat *
chi;
622 g_psi[ rb[1] ] = GFixMat *
psi;
623 QDPIO::cout <<
"Solving" << std::endl;
624 res = qudaInvert( g_chi,g_psi);
625 QDPIO::cout <<
"Untransforming solution." << std::endl;
626 psi[ rb[1]] = adj(GFixMat)*g_psi;
630 res = qudaInvert(
chi,
psi);
641 r[
A->subset()] -=
tmp;
642 res.resid = sqrt(norm2(
r,
A->subset()));
645 Double rel_resid = res.resid/sqrt(norm2(
chi,
A->subset()));
647 QDPIO::cout <<
"QUDA_MULTIGRID_"<< solver_string <<
"_WILSON_SOLVER: " << res.n_count <<
" iterations. Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
650 if ( ! invParam.SilentFailP ) {
651 if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
652 QDPIO::cerr <<
"ERROR: QUDA MULTIGRID Solver residuum is outside tolerance: QUDA resid="<< rel_resid <<
" Desired =" << invParam.RsdTarget <<
" Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
664 LinOpSysSolverQUDAMULTIGRIDWilson() {}
671 QudaPrecision_s cpu_prec;
672 QudaPrecision_s gpu_prec;
673 QudaPrecision_s gpu_half_prec;
675 Handle< LinearOperator<T> >
A;
676 const SysSolverQUDAMULTIGRIDWilsonParams invParam;
677 QudaGaugeParam q_gauge_param;
678 QudaInvertParam quda_inv_param;
679 QudaInvertParam mg_inv_param;
680 QudaMultigridParam mg_param;
682 SystemSolverResults_t qudaInvert(
const T& chi_s,
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD