6 #ifndef __syssolver_linop_quda_multigrid_clover_h__
7 #define __syssolver_linop_quda_multigrid_clover_h__
38 namespace LinOpSysSolverQUDAMULTIGRIDCloverEnv
49 class LinOpSysSolverQUDAMULTIGRIDClover :
public LinOpSystemSolver<LatticeFermion>
52 typedef LatticeFermion
T;
53 typedef LatticeColorMatrix
U;
54 typedef multi1d<LatticeColorMatrix>
Q;
56 typedef LatticeFermionF
TF;
57 typedef LatticeColorMatrixF
UF;
58 typedef multi1d<LatticeColorMatrixF>
QF;
60 typedef LatticeFermionF
TD;
61 typedef LatticeColorMatrixF
UD;
62 typedef multi1d<LatticeColorMatrixF>
QD;
64 typedef WordType<T>::Type_t REALT;
70 LinOpSysSolverQUDAMULTIGRIDClover(Handle< LinearOperator<T> > A_,
71 Handle< FermState<T,Q,Q> > state_,
72 const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
75 StopWatch init_swatch;
76 init_swatch.reset(); init_swatch.start();
79 std::ostringstream solver_string_stream;
80 solver_string_stream <<
"QUDA_MULTIGRID_CLOVER_LINOP_SOLVER( "
81 << invParam.SaveSubspaceID <<
" ): ";
82 solver_string = solver_string_stream.str();
85 QDPIO::cout << solver_string <<
"Initializing" << std::endl;
90 int s =
sizeof( WordType<T>::Type_t );
92 cpu_prec = QUDA_SINGLE_PRECISION;
95 cpu_prec = QUDA_DOUBLE_PRECISION;
99 switch( invParam.cudaPrecision ) {
101 gpu_prec = QUDA_HALF_PRECISION;
104 gpu_prec = QUDA_SINGLE_PRECISION;
107 gpu_prec = QUDA_DOUBLE_PRECISION;
116 switch( invParam.cudaSloppyPrecision ) {
118 gpu_half_prec = QUDA_HALF_PRECISION;
121 gpu_half_prec = QUDA_SINGLE_PRECISION;
124 gpu_half_prec = QUDA_DOUBLE_PRECISION;
127 gpu_half_prec = gpu_prec;
132 q_gauge_param = newQudaGaugeParam();
133 quda_inv_param = newQudaInvertParam();
136 const multi1d<int>& latdims = Layout::subgridLattSize();
138 q_gauge_param.X[0] = latdims[0];
139 q_gauge_param.X[1] = latdims[1];
140 q_gauge_param.X[2] = latdims[2];
141 q_gauge_param.X[3] = latdims[3];
146 q_gauge_param.type = QUDA_WILSON_LINKS;
147 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
148 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
150 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
151 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
158 if( invParam.AntiPeriodicT ) {
159 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
162 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
166 q_gauge_param.cpu_prec = cpu_prec;
167 q_gauge_param.cuda_prec = gpu_prec;
169 switch( invParam.cudaReconstruct ) {
171 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
174 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
177 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
180 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
184 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
187 q_gauge_param.cuda_prec_precondition = gpu_half_prec;
189 switch( invParam.cudaSloppyReconstruct ) {
191 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
194 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
197 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
200 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
203 q_gauge_param.reconstruct_precondition = q_gauge_param.reconstruct_sloppy;
212 links_single[
mu] = (state_->getLinks())[
mu];
216 if( invParam.axialGaugeP ) {
219 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
221 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
225 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
229 const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
231 Real gamma_f = aniso.xi_0 / aniso.nu;
232 q_gauge_param.anisotropy = toDouble(gamma_f);
235 q_gauge_param.anisotropy = 1.0;
240 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
245 links_single[
mu] *= cf[
mu];
251 quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
254 quda_inv_param.inv_type = QUDA_GCR_INVERTER;
257 quda_inv_param.kappa = 0.5;
258 quda_inv_param.clover_coeff = 1.0;
259 quda_inv_param.Ls = 1;
261 quda_inv_param.tol = toDouble(invParam.RsdTarget);
262 quda_inv_param.maxiter = invParam.MaxIter;
263 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
264 quda_inv_param.pipeline = invParam.Pipeline;
269 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
270 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
294 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
296 quda_inv_param.dagger = QUDA_DAG_NO;
297 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
299 quda_inv_param.cpu_prec = cpu_prec;
300 quda_inv_param.cuda_prec = gpu_prec;
301 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
302 quda_inv_param.cuda_prec_precondition = gpu_half_prec;
306 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
307 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
309 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
310 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
311 quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
312 quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
315 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
316 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
317 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
321 if( invParam.tuneDslashP ) {
322 quda_inv_param.tune = QUDA_TUNE_YES;
325 quda_inv_param.tune = QUDA_TUNE_NO;
330 multi1d<int> face_size(4);
331 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
332 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
333 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
334 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
336 int max_face = face_size[0];
337 for(
int i=1;
i <=3;
i++) {
338 if ( face_size[
i] > max_face ) {
339 max_face = face_size[
i];
343 q_gauge_param.ga_pad = max_face;
345 quda_inv_param.sp_pad = 0;
346 quda_inv_param.cl_pad = 0;
349 quda_inv_param.clover_cpu_prec = cpu_prec;
350 quda_inv_param.clover_cuda_prec = gpu_prec;
351 quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
352 quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
353 if( invParam.MULTIGRIDParamsP ) {
354 const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
359 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
360 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
361 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
365 quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
366 quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
367 q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
371 quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
372 quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
373 q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
376 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
377 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
378 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
382 switch( ip.reconstruct ) {
384 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
387 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
390 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
393 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
400 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
402 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
406 GetMemoryPtrGauge(gauge,links_single);
409 loadGaugeQuda((
void *)gauge, &q_gauge_param);
411 MULTIGRIDSolverParams ip = *(invParam.MULTIGRIDParams);
413 quda_inv_param.tol_precondition = toDouble(ip.tol[0]);
414 quda_inv_param.maxiter_precondition = ip.maxIterations[0];
415 quda_inv_param.gcrNkrylov = ip.outer_gcr_nkrylov;
416 quda_inv_param.residual_type =
static_cast<QudaResidualType
>(QUDA_L2_RELATIVE_RESIDUAL);
420 switch( ip.schwarzType ) {
422 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
425 quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
428 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
431 quda_inv_param.precondition_cycle = 1;
435 if( invParam.verboseP ) {
436 quda_inv_param.verbosity = QUDA_VERBOSE;
439 quda_inv_param.verbosity = QUDA_SUMMARIZE;
442 quda_inv_param.verbosity_precondition = QUDA_SILENT;
444 quda_inv_param.inv_type_precondition = QUDA_MG_INVERTER;
446 QDPIO::cout<< solver_string <<
"Basic MULTIGRID params copied."<<std::endl;
449 QDPIO::cout << solver_string <<
"Creating CloverTerm" << std::endl;
450 clov->create(fstate, invParam_.CloverParams);
453 invclov->create(fstate, invParam_.CloverParams);
455 QDPIO::cout << solver_string <<
"Inverting CloverTerm" << std::endl;
459 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
460 #warning "NOT USING QUDA DEVICE IFACE"
461 quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
463 multi1d<QUDAPackedClovSite<REALT> > packed_clov;
466 packed_clov.resize(all.siteTable().size());
468 clov->packForQUDA(packed_clov, 0);
469 clov->packForQUDA(packed_clov, 1);
472 multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
473 invclov->packForQUDA(packed_invclov, 0);
474 invclov->packForQUDA(packed_invclov, 1);
476 loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]), &quda_inv_param);
480 #warning "USING QUDA DEVICE IFACE"
481 quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
482 quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
487 GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
489 loadCloverQuda( (
void*)(clover), (
void *)(cloverInv), &quda_inv_param);
493 quda_inv_param.omega = toDouble(ip.relaxationOmegaOuter);
499 StopWatch update_swatch;
500 update_swatch.reset(); update_swatch.start();
502 QDPIO::cout<< solver_string <<
"Recovering subspace..."<<std::endl;
503 subspace_pointers =
TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
504 for(
int j=0;
j < ip.mg_levels-1;++
j) {
505 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
507 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
508 update_swatch.stop();
510 QDPIO::cout << solver_string <<
" subspace_update_time = "
511 << update_swatch.getTimeInSeconds() <<
" sec. " << std::endl;
516 StopWatch create_swatch;
517 create_swatch.reset(); create_swatch.start();
518 QDPIO::cout << solver_string <<
"Creating Subspace" << std::endl;
519 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
520 XMLBufferWriter file_xml;
521 push(file_xml,
"FileXML");
526 XMLBufferWriter record_xml;
527 push(record_xml,
"RecordXML");
528 write(record_xml,
"foo", foo);
536 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
537 create_swatch.stop();
538 QDPIO::cout << solver_string <<
" subspace_create_time = "
539 << create_swatch.getTimeInSeconds() <<
" sec. " << std::endl;
542 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
546 QDPIO::cout << solver_string <<
"init_time = "
547 << init_swatch.getTimeInSeconds() <<
" sec. " << std::endl;
552 ~LinOpSysSolverQUDAMULTIGRIDClover()
554 QDPIO::cout << solver_string <<
"Destructing" << std::endl;
555 quda_inv_param.preconditioner =
nullptr;
556 subspace_pointers =
nullptr;
563 const Subset& subset()
const {
return A->subset();}
573 SystemSolverResults_t res;
585 if ( invParam.axialGaugeP ) {
589 g_chi[ rb[1] ] = GFixMat *
chi;
590 g_psi[ rb[1] ] = GFixMat *
psi;
591 res = qudaInvert(*clov,
595 psi[ rb[1]] = adj(GFixMat)*g_psi;
599 res = qudaInvert(*clov,
608 if( invParam.SolutionCheckP ) {
615 r[
A->subset()] -=
tmp;
616 res.resid = sqrt(norm2(
r,
A->subset()));
619 rel_resid = res.resid/sqrt(norm2(
chi,
A->subset()));
621 QDPIO::cout << solver_string << res.n_count <<
" iterations. Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
626 rel_resid = res.resid;
630 if ( ! invParam.SilentFailP ) {
631 if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
632 QDPIO::cerr << solver_string <<
"ERROR: Solver residuum is outside tolerance: QUDA resid="<< rel_resid <<
" Desired =" << invParam.RsdTarget <<
" Max Tolerated = " << invParam.RsdToleranceFactor*invParam.RsdTarget << std::endl;
643 LinOpSysSolverQUDAMULTIGRIDClover() {}
650 QudaPrecision_s cpu_prec;
651 QudaPrecision_s gpu_prec;
652 QudaPrecision_s gpu_half_prec;
654 Handle< LinearOperator<T> >
A;
655 const SysSolverQUDAMULTIGRIDCloverParams invParam;
656 QudaGaugeParam q_gauge_param;
657 QudaInvertParam quda_inv_param;
658 mutable QUDAMGUtils::MGSubspacePointers* subspace_pointers;
660 Handle< CloverTermT<T, U> > clov;
661 Handle< CloverTermT<T, U> > invclov;
663 SystemSolverResults_t qudaInvert(
const CloverTermT<T, U>& clover,
664 const CloverTermT<T, U>& inv_clov,
Include possibly optimized Clover terms.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams ¶m)
Writer parameters.
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
push(xml_out,"Condensates")
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
Periodic ferm state and a creator.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD