6 #ifndef __syssolver_linop_quda_wilson_h__
7 #define __syssolver_linop_quda_wilson_h__
34 namespace LinOpSysSolverQUDAWilsonEnv
47 class LinOpSysSolverQUDAWilson :
public LinOpSystemSolver<LatticeFermion>
50 typedef LatticeFermion
T;
51 typedef LatticeColorMatrix
U;
52 typedef multi1d<LatticeColorMatrix>
Q;
54 typedef LatticeFermionF
TF;
55 typedef LatticeColorMatrixF
UF;
56 typedef multi1d<LatticeColorMatrixF>
QF;
58 typedef LatticeFermionF
TD;
59 typedef LatticeColorMatrixF
UD;
60 typedef multi1d<LatticeColorMatrixF>
QD;
62 typedef WordType<T>::Type_t REALT;
68 LinOpSysSolverQUDAWilson(Handle< LinearOperator<T> > A_,
69 Handle< FermState<T,Q,Q> > state_,
70 const SysSolverQUDAWilsonParams& invParam_) :
71 A(A_), invParam(invParam_)
73 QDPIO::cout <<
"LinOpSysSolverQUDAWilson:" << std::endl;
78 int s =
sizeof( WordType<T>::Type_t );
80 cpu_prec = QUDA_SINGLE_PRECISION;
83 cpu_prec = QUDA_DOUBLE_PRECISION;
88 switch( invParam.cudaPrecision ) {
90 gpu_prec = QUDA_HALF_PRECISION;
93 gpu_prec = QUDA_SINGLE_PRECISION;
96 gpu_prec = QUDA_DOUBLE_PRECISION;
105 switch( invParam.cudaSloppyPrecision ) {
107 gpu_half_prec = QUDA_HALF_PRECISION;
110 gpu_half_prec = QUDA_SINGLE_PRECISION;
113 gpu_half_prec = QUDA_DOUBLE_PRECISION;
116 gpu_half_prec = gpu_prec;
121 q_gauge_param = newQudaGaugeParam();
122 quda_inv_param = newQudaInvertParam();
125 const multi1d<int>& latdims = Layout::subgridLattSize();
127 q_gauge_param.X[0] = latdims[0];
128 q_gauge_param.X[1] = latdims[1];
129 q_gauge_param.X[2] = latdims[2];
130 q_gauge_param.X[3] = latdims[3];
135 q_gauge_param.type = QUDA_WILSON_LINKS;
136 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
142 if( invParam.AntiPeriodicT ) {
143 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
146 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
150 q_gauge_param.cpu_prec = cpu_prec;
151 q_gauge_param.cuda_prec = gpu_prec;
154 switch( invParam.cudaReconstruct ) {
156 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
159 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
162 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
165 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
169 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
171 switch( invParam.cudaSloppyReconstruct ) {
173 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
176 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
179 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
182 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
194 links_single[
mu] = (state_->getLinks())[
mu];
198 if( invParam.axialGaugeP ) {
199 QDPIO::cout <<
"Fixing Temporal Gauge" << std::endl;
202 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
204 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
208 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
212 const AnisoParam_t& aniso = invParam.WilsonParams.anisoParam;
214 Real gamma_f = aniso.xi_0 / aniso.nu;
215 q_gauge_param.anisotropy = toDouble(gamma_f);
218 q_gauge_param.anisotropy = 1.0;
223 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
228 links_single[
mu] *= cf[
mu];
234 quda_inv_param.dslash_type = QUDA_WILSON_DSLASH;
237 switch( invParam.solverType ) {
239 quda_inv_param.inv_type = QUDA_CG_INVERTER;
240 solver_string =
"CG";
243 quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
244 solver_string =
"BICGSTAB";
247 quda_inv_param.inv_type = QUDA_GCR_INVERTER;
248 solver_string =
"GCR";
251 QDPIO::cerr <<
"Unknown SOlver type" << std::endl;
258 Real massParam = Real(1) + Real(3)/Real(q_gauge_param.anisotropy) + invParam.WilsonParams.Mass;
263 quda_inv_param.kappa = 1.0/(2*toDouble(massParam));
264 quda_inv_param.clover_coeff = 0.0;
266 quda_inv_param.tol = toDouble(invParam.RsdTarget);
267 quda_inv_param.maxiter = invParam.MaxIter;
268 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
269 quda_inv_param.pipeline = invParam.Pipeline;
272 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
275 switch( invParam.solverType ) {
277 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
280 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
284 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
287 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
290 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
293 quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
298 if( invParam.asymmetricP ) {
299 QDPIO::cout <<
"Using Asymmetric Linop: A_oo - D A^{-1}_ee D" << std::endl;
300 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
304 QDPIO::cout <<
"Using Symmetric Linop: 1 - A^{-1}_oo D A^{-1}_ee D" << std::endl;
305 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
308 quda_inv_param.dagger = QUDA_DAG_NO;
309 quda_inv_param.mass_normalization = QUDA_ASYMMETRIC_MASS_NORMALIZATION;
312 quda_inv_param.cpu_prec = cpu_prec;
313 quda_inv_param.cuda_prec = gpu_prec;
314 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
315 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_YES;
316 quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_NO;
317 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
318 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
320 if( invParam.tuneDslashP ) {
321 QDPIO::cout <<
"Enabling Dslash Autotuning" << std::endl;
323 quda_inv_param.tune = QUDA_TUNE_YES;
326 QDPIO::cout <<
"Disabling Dslash Autotuning" << std::endl;
328 quda_inv_param.tune = QUDA_TUNE_NO;
333 multi1d<int> face_size(4);
334 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
335 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
336 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
337 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
339 int max_face = face_size[0];
340 for(
int i=1;
i <=3;
i++) {
341 if ( face_size[
i] > max_face ) {
342 max_face = face_size[
i];
347 q_gauge_param.ga_pad = max_face;
349 quda_inv_param.sp_pad = 0;
350 quda_inv_param.cl_pad = 0;
352 if( invParam.innerParamsP ) {
353 QDPIO::cout <<
"Setting inner solver params" << std::endl;
355 const GCRInnerSolverParams& ip = *(invParam.innerParams);
358 switch( ip.precPrecondition ) {
360 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
361 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
365 quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
366 q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
370 quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
371 q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
374 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
375 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
379 switch( ip.reconstructPrecondition ) {
381 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
384 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
387 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
390 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
394 quda_inv_param.tol_precondition = toDouble(ip.tolPrecondition);
395 quda_inv_param.maxiter_precondition = ip.maxIterPrecondition;
396 quda_inv_param.gcrNkrylov = ip.gcrNkrylov;
397 switch( ip.schwarzType ) {
399 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
402 quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
405 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
408 quda_inv_param.precondition_cycle = ip.preconditionCycle;
410 if( ip.verboseInner ) {
411 quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
414 quda_inv_param.verbosity_precondition = QUDA_SILENT;
417 switch( ip.invTypePrecondition ) {
419 quda_inv_param.inv_type_precondition = QUDA_CG_INVERTER;
422 quda_inv_param.inv_type_precondition = QUDA_BICGSTAB_INVERTER;
425 quda_inv_param.inv_type_precondition= QUDA_CA_GCR_INVERTER;
428 quda_inv_param.inv_type_precondition= QUDA_MR_INVERTER;
432 quda_inv_param.inv_type_precondition = QUDA_MR_INVERTER;
437 QDPIO::cout <<
"Setting Precondition stuff to defaults for not using" << std::endl;
438 quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
439 quda_inv_param.tol_precondition = 1.0e-1;
440 quda_inv_param.maxiter_precondition = 1000;
441 quda_inv_param.verbosity_precondition = QUDA_SILENT;
442 quda_inv_param.gcrNkrylov = 1;
447 if( invParam.verboseP ) {
448 quda_inv_param.verbosity = QUDA_VERBOSE;
451 quda_inv_param.verbosity = QUDA_SUMMARIZE;
458 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
462 loadGaugeQuda((
void *)gauge, &q_gauge_param);
470 ~LinOpSysSolverQUDAWilson()
472 QDPIO::cout <<
"Destructing" << std::endl;
477 const Subset& subset()
const {
return A->subset();}
487 SystemSolverResults_t res;
494 chiResc[
A->subset()] = invMassParam *
chi;
500 if ( invParam.axialGaugeP ) {
504 QDPIO::cout <<
"Gauge Fixing source and initial guess" << std::endl;
505 g_chi[ rb[1] ] = GFixMat * chiResc;
506 g_psi[ rb[1] ] = GFixMat *
psi;
507 QDPIO::cout <<
"Solving" << std::endl;
508 res = qudaInvert(g_chi,
510 QDPIO::cout <<
"Untransforming solution." << std::endl;
511 psi[ rb[1]] = adj(GFixMat)*g_psi;
515 res = qudaInvert(chiResc,
527 r[
A->subset()] -=
tmp;
528 res.resid = sqrt(norm2(
r,
A->subset()));
531 Double rel_resid = res.resid/sqrt(norm2(
chi,
A->subset()));
533 QDPIO::cout <<
"QUDA_"<< solver_string <<
"_WILSON_SOLVER: " << res.n_count <<
" iterations. Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
536 if ( ! invParam.SilentFailP ) {
537 if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
538 QDPIO::cerr <<
"ERROR: QUDA Solver residuum is outside tolerance: QUDA resid="<< rel_resid <<
" Desired =" << invParam.RsdTarget <<
" Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
550 LinOpSysSolverQUDAWilson() {}
557 QudaPrecision_s cpu_prec;
558 QudaPrecision_s gpu_prec;
559 QudaPrecision_s gpu_half_prec;
561 Handle< LinearOperator<T> >
A;
562 const SysSolverQUDAWilsonParams invParam;
564 QudaGaugeParam q_gauge_param;
565 QudaInvertParam quda_inv_param;
567 SystemSolverResults_t qudaInvert(
const T& chi_s,
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD