6 #ifndef __syssolver_mdagm_quda_multigrid_clover_h__
7 #define __syssolver_mdagm_quda_multigrid_clover_h__
50 namespace MdagMSysSolverQUDAMULTIGRIDCloverEnv
57 class MdagMSysSolverQUDAMULTIGRIDClover :
public MdagMSystemSolver<LatticeFermion>
60 typedef LatticeFermion
T;
61 typedef LatticeColorMatrix
U;
62 typedef multi1d<LatticeColorMatrix>
Q;
64 typedef LatticeFermionF
TF;
65 typedef LatticeColorMatrixF
UF;
66 typedef multi1d<LatticeColorMatrixF>
QF;
68 typedef LatticeFermionF
TD;
69 typedef LatticeColorMatrixF
UD;
70 typedef multi1d<LatticeColorMatrixF>
QD;
72 typedef WordType<T>::Type_t REALT;
76 MdagMSysSolverQUDAMULTIGRIDClover(Handle< LinearOperator<T> > A_,
77 Handle< FermState<T,Q,Q> > state_,
78 const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
81 StopWatch init_swatch;
82 init_swatch.reset(); init_swatch.start();
86 std::ostringstream solver_string_stream;
87 solver_string_stream <<
"QUDA_MULTIGRID_CLOVER_MDAGM_SOLVER( Mass = " << invParam.CloverParams.Mass <<
" , Id = "
88 << invParam.SaveSubspaceID <<
" ): ";
89 solver_string = solver_string_stream.str();
92 QDPIO::cout << solver_string <<
"Initializing" << std::endl;
96 QDPIO::cout << solver_string <<
"MEMCHECK: free mem = " << free_mem << std::endl;
101 int s =
sizeof( WordType<T>::Type_t );
103 cpu_prec = QUDA_SINGLE_PRECISION;
106 cpu_prec = QUDA_DOUBLE_PRECISION;
110 switch( invParam.cudaPrecision ) {
112 gpu_prec = QUDA_HALF_PRECISION;
115 gpu_prec = QUDA_SINGLE_PRECISION;
118 gpu_prec = QUDA_DOUBLE_PRECISION;
127 switch( invParam.cudaSloppyPrecision ) {
129 gpu_half_prec = QUDA_HALF_PRECISION;
132 gpu_half_prec = QUDA_SINGLE_PRECISION;
135 gpu_half_prec = QUDA_DOUBLE_PRECISION;
138 gpu_half_prec = gpu_prec;
143 q_gauge_param = newQudaGaugeParam();
144 quda_inv_param = newQudaInvertParam();
147 const multi1d<int>& latdims = Layout::subgridLattSize();
149 q_gauge_param.X[0] = latdims[0];
150 q_gauge_param.X[1] = latdims[1];
151 q_gauge_param.X[2] = latdims[2];
152 q_gauge_param.X[3] = latdims[3];
157 q_gauge_param.type = QUDA_WILSON_LINKS;
158 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
159 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
161 q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
162 q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
169 if( invParam.AntiPeriodicT ) {
170 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
173 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
177 q_gauge_param.cpu_prec = cpu_prec;
178 q_gauge_param.cuda_prec = gpu_prec;
180 switch( invParam.cudaReconstruct ) {
182 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
185 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
188 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
191 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
195 q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
196 q_gauge_param.cuda_prec_precondition = gpu_half_prec;
198 switch( invParam.cudaSloppyReconstruct ) {
200 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
203 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
206 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
209 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
214 q_gauge_param.reconstruct_precondition=q_gauge_param.reconstruct_sloppy;
224 links_single[
mu] = (state_->getLinks())[
mu];
228 if( invParam.axialGaugeP ) {
231 links_single[
mu] = GFixMat*(state_->getLinks())[
mu]*adj(shift(GFixMat,
FORWARD,
mu));
233 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
237 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
241 const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
243 Real gamma_f = aniso.xi_0 / aniso.nu;
244 q_gauge_param.anisotropy = toDouble(gamma_f);
247 q_gauge_param.anisotropy = 1.0;
252 Handle<FermState<T,Q,Q> > fstate(
new PeriodicFermState<T,Q,Q>(links_single));
257 links_single[
mu] *= cf[
mu];
263 quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
266 quda_inv_param.inv_type = QUDA_GCR_INVERTER;
267 quda_inv_param.compute_true_res = 0;
269 quda_inv_param.kappa = 0.5;
270 quda_inv_param.clover_coeff = 1.0;
272 quda_inv_param.tol = toDouble(invParam.RsdTarget);
273 quda_inv_param.maxiter = invParam.MaxIter;
274 quda_inv_param.reliable_delta = toDouble(invParam.Delta);
275 quda_inv_param.pipeline = invParam.Pipeline;
277 quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
278 quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
281 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
283 quda_inv_param.dagger = QUDA_DAG_NO;
284 quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
286 quda_inv_param.cpu_prec = cpu_prec;
287 quda_inv_param.cuda_prec = gpu_prec;
288 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
289 quda_inv_param.cuda_prec_precondition = gpu_half_prec;
290 quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
291 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
293 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
294 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
295 quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
296 quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
299 quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
300 quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
301 quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
305 if( invParam.tuneDslashP ) {
306 quda_inv_param.tune = QUDA_TUNE_YES;
309 quda_inv_param.tune = QUDA_TUNE_NO;
313 multi1d<int> face_size(4);
314 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
315 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
316 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
317 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
319 int max_face = face_size[0];
320 for(
int i=1;
i <=3;
i++) {
321 if ( face_size[
i] > max_face ) {
322 max_face = face_size[
i];
326 q_gauge_param.ga_pad = max_face;
328 quda_inv_param.sp_pad = 0;
329 quda_inv_param.cl_pad = 0;
332 quda_inv_param.clover_cpu_prec = cpu_prec;
333 quda_inv_param.clover_cuda_prec = gpu_prec;
334 quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
335 quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
337 if( !invParam.MULTIGRIDParamsP ) {
338 QDPIO::cout << solver_string <<
"ERROR: MG Solver had MULTIGRIDParamsP set to false" << std::endl;
343 const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
349 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
350 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
351 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
355 quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
356 quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
357 q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
361 quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
362 quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
363 q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
366 quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
367 quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
368 q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
372 switch( ip.reconstruct ) {
374 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
377 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
380 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
383 q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
390 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
392 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
395 GetMemoryPtrGauge(gauge,links_single);
404 loadGaugeQuda((
void *)gauge, &q_gauge_param);
407 quda_inv_param.tol_precondition = toDouble(ip.tol[0]);
408 quda_inv_param.maxiter_precondition = ip.maxIterations[0];
409 quda_inv_param.gcrNkrylov = ip.outer_gcr_nkrylov;
410 quda_inv_param.residual_type =
static_cast<QudaResidualType
>(QUDA_L2_RELATIVE_RESIDUAL);
413 switch( ip.schwarzType ) {
415 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
418 quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
421 quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
424 quda_inv_param.precondition_cycle = 1;
428 if( invParam.verboseP ) {
429 quda_inv_param.verbosity = QUDA_VERBOSE;
432 quda_inv_param.verbosity = QUDA_SUMMARIZE;
435 quda_inv_param.verbosity_precondition = QUDA_SILENT;
437 quda_inv_param.inv_type_precondition = QUDA_MG_INVERTER;
440 QDPIO::cout <<solver_string <<
"Creating CloverTerm" << std::endl;
441 clov->create(fstate, invParam_.CloverParams);
444 invclov->create(fstate, invParam_.CloverParams);
446 QDPIO::cout <<solver_string<<
"Inverting CloverTerm" << std::endl;
450 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
451 #warning "NOT USING QUDA DEVICE IFACE"
452 quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
454 multi1d<QUDAPackedClovSite<REALT> > packed_clov;
456 packed_clov.resize(all.siteTable().size());
458 clov->packForQUDA(packed_clov, 0);
459 clov->packForQUDA(packed_clov, 1);
462 multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
463 invclov->packForQUDA(packed_invclov, 0);
464 invclov->packForQUDA(packed_invclov, 1);
466 loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]), &quda_inv_param);
470 #warning "USING QUDA DEVICE IFACE"
472 quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
473 quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
478 GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
480 loadCloverQuda( (
void*)(clover), (
void *)(cloverInv), &quda_inv_param);
483 quda_inv_param.omega = toDouble(ip.relaxationOmegaOuter);
486 threshold_counts = invParam.ThresholdCount;
488 if(TheNamedObjMap::Instance().check(invParam.SaveSubspaceID))
490 StopWatch update_swatch;
491 update_swatch.reset(); update_swatch.start();
493 QDPIO::cout<< solver_string <<
"Recovering subspace..."<<std::endl;
494 subspace_pointers = TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
495 for(
int j=0;
j < ip.mg_levels-1;++
j) {
496 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
498 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
499 update_swatch.stop();
501 QDPIO::cout << solver_string <<
" subspace_update_time = "
502 << update_swatch.getTimeInSeconds() <<
" sec. " << std::endl;
507 StopWatch create_swatch;
508 create_swatch.reset(); create_swatch.start();
509 QDPIO::cout << solver_string <<
"Creating Subspace" << std::endl;
510 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
511 XMLBufferWriter file_xml;
512 push(file_xml,
"FileXML");
517 XMLBufferWriter record_xml;
518 push(record_xml,
"RecordXML");
519 write(record_xml,
"foo", foo);
523 TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
524 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
525 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
527 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
528 create_swatch.stop();
529 QDPIO::cout << solver_string <<
" subspace_create_time = "
530 << create_swatch.getTimeInSeconds() <<
" sec. " << std::endl;
533 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
536 QDPIO::cout << solver_string <<
" init_time = "
537 << init_swatch.getTimeInSeconds() <<
" sec. "
543 ~MdagMSysSolverQUDAMULTIGRIDClover()
546 quda_inv_param.preconditioner =
nullptr;
547 subspace_pointers =
nullptr;
553 const Subset& subset()
const {
return A->subset();}
563 SystemSolverResults_t res1;
564 SystemSolverResults_t res2;
565 SystemSolverResults_t res;
575 ZeroGuess4DChronoPredictor dummy_predictor;
576 res = (*this)(
psi,
chi, dummy_predictor);
593 MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
595 Double norm2chi=sqrt(norm2(
chi,
A->subset()));
598 QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
599 quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
602 SystemSolverResults_t res;
603 SystemSolverResults_t res1;
604 SystemSolverResults_t res2;
607 Handle< LinearOperator<T> >
MdagM(
new MdagMLinOp<T>(
A) );
610 QDPIO::cout << solver_string <<
"Two Step Solve" << std::endl;
614 StopWatch X_prediction_timer; X_prediction_timer.reset();
615 StopWatch Y_prediction_timer; Y_prediction_timer.reset();
616 StopWatch Y_solve_timer; Y_solve_timer.reset();
617 StopWatch X_solve_timer; X_solve_timer.reset();
618 StopWatch Y_predictor_add_timer; Y_predictor_add_timer.reset();
619 StopWatch X_predictor_add_timer; X_predictor_add_timer.reset();
620 StopWatch X_refresh_timer; X_refresh_timer.reset();
621 StopWatch Y_refresh_timer; Y_refresh_timer.reset();
623 QDPIO::cout << solver_string <<
"Predicting Y" << std::endl;
624 Y_prediction_timer.start();
631 Y_prime = Gamma(
Nd*
Nd-1)*tmp_vec;
633 Y_prediction_timer.stop();
639 Y_solve_timer.start();
644 g5chi[rb[1]]= Gamma(
Nd*
Nd-1)*
chi;
647 quda_inv_param.tol = toDouble(Real(0.5)*invParam.RsdTarget);
648 if( invParam.asymmetricP ==
true ) {
649 res1 = qudaInvert(*clov,
653 Y[rb[1]] = Gamma(
Nd*
Nd -1)*Y_prime;
659 res1 = qudaInvert(*clov,
665 char Y_prime_norm[256];
666 char Y_prime_norm_full[256];
667 std::sprintf(Y_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime,
A->subset())));
668 std::sprintf(Y_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
669 QDPIO::cout <<
"Y solution: norm2(subset) = " << Y_prime_norm <<
" norm(full) = " << Y_prime_norm_full << std::endl;
672 tmp[rb[1]] = Gamma(
Nd*
Nd-1)*Y_prime;
678 bool solution_good =
true;
686 r[
A->subset()] -=
tmp;
688 res1.resid = sqrt(norm2(
r,
A->subset()));
689 QDPIO::cout <<
"Y-solve: ||r||=" << res1.resid <<
" ||r||/||b||="
690 << res1.resid/sqrt(norm2(
chi,rb[1])) << std::endl;
691 if ( toBool( res1.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
692 solution_good =
false;
696 if ( solution_good ) {
697 if( res1.n_count >= threshold_counts ) {
698 QDPIO::cout << solver_string <<
"Iteration Threshold Exceeded! Y Solver iters = " << res1.n_count <<
" Threshold=" << threshold_counts << std::endl;
699 QDPIO::cout << solver_string <<
"Refreshing Subspace" << std::endl;
701 Y_refresh_timer.start();
704 for(
int j=0;
j < ip.mg_levels-1;
j++) {
705 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = ip.maxIterSubspaceRefresh[
j];
707 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
708 for(
int j=0;
j < ip.mg_levels-1;
j++) {
709 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
711 Y_refresh_timer.stop();
712 QDPIO::cout << solver_string <<
"Subspace Refresh Time = " << Y_refresh_timer.getTimeInSeconds() <<
" secs\n";
716 QDPIO::cout << solver_string <<
"Y-Solve failed (seq: "<< seqno <<
"). Blowing away and reiniting subspace" << std::endl;
717 StopWatch reinit_timer; reinit_timer.reset();
718 reinit_timer.start();
724 bool saved_value = ip.check_multigrid_setup;
725 ip.check_multigrid_setup =
true;
726 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
727 ip.check_multigrid_setup = saved_value;
730 XMLBufferWriter file_xml;
731 push(file_xml,
"FileXML");
735 XMLBufferWriter record_xml;
736 push(record_xml,
"RecordXML");
737 write(record_xml,
"foo", foo);
742 TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
743 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
744 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
747 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
748 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
751 QDPIO::cout << solver_string <<
"Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() <<
" sec." << std::endl;
754 QDPIO::cout << solver_string <<
"Re-Solving for Y with zero guess" << std::endl;
755 SystemSolverResults_t res_tmp;
758 if( invParam.asymmetricP ==
true ) {
759 res_tmp = qudaInvert(*clov,
763 Y[rb[1]] = Gamma(
Nd*
Nd -1)*Y_prime;
768 res_tmp = qudaInvert(*clov,
775 char Y_prime_norm[256];
776 char Y_prime_norm_full[256];
777 std::sprintf(Y_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime,
A->subset())));
778 std::sprintf(Y_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
779 QDPIO::cout <<
"Y solution: norm2(subset) = " << Y_prime_norm <<
" norm(full) = " << Y_prime_norm_full << std::endl;
783 tmp[rb[1]] = Gamma(
Nd*
Nd-1)*Y_prime;
794 r[
A->subset()] -=
tmp;
796 res_tmp.resid = sqrt(norm2(
r,
A->subset()));
797 if ( toBool( res_tmp.resid/sqrt(norm2(
chi)) > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
798 QDPIO::cout << solver_string <<
"Re Solve for Y Failed (seq: " << seqno <<
" ) Rsd = " << res_tmp.resid/norm2chi <<
" RsdTarget = " << invParam.RsdTarget << std::endl;
799 QDPIO::cout << solver_string <<
"Throwing Exception! This will ABORT" << std::endl;
801 dumpYSolver(g5chi,Y_prime);
803 MGSolverException convergence_fail(invParam.CloverParams.Mass,
804 invParam.SaveSubspaceID,
806 Real(res_tmp.resid/norm2chi),
807 invParam.RsdTarget*invParam.RsdToleranceFactor);
808 throw convergence_fail;
813 res1.n_count += res_tmp.n_count;
814 res1.resid = res_tmp.resid;
817 Y_solve_timer.stop();
820 Y_predictor_add_timer.start();
822 Y_predictor_add_timer.stop();
824 X_prediction_timer.start();
827 X_prediction_timer.stop();
830 quda_inv_param.tol = toDouble(invParam.RsdTarget);
831 X_solve_timer.start();
833 res2 = qudaInvert(*clov,
839 char X_prime_norm[256];
840 char X_prime_norm_full[256];
841 std::sprintf(X_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi,
A->subset())));
842 std::sprintf(X_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi)));
843 QDPIO::cout <<
"X solution: norm2(subset) = " << X_prime_norm <<
" norm(full) = " << X_prime_norm_full << std::endl;
846 solution_good =
true;
854 r[
A->subset()] -=
tmp;
856 res2.resid = sqrt(norm2(
r,
A->subset()));
857 if ( toBool( res2.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
858 solution_good =
false;
862 if( solution_good ) {
863 if( res2.n_count >= threshold_counts ) {
864 QDPIO::cout << solver_string <<
"Threshold Reached! X Solver iters = " << res2.n_count <<
" Threshold=" << threshold_counts << std::endl;
865 QDPIO::cout << solver_string <<
"Refreshing Subspace" << std::endl;
867 X_refresh_timer.start();
871 for(
int j=0;
j < ip.mg_levels-1;
j++) {
872 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = ip.maxIterSubspaceRefresh[
j];
874 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
875 for(
int j=0;
j < ip.mg_levels-1;
j++) {
876 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
878 X_refresh_timer.stop();
880 QDPIO::cout << solver_string <<
"X Subspace Refresh Time = " << X_refresh_timer.getTimeInSeconds() <<
" secs\n";
885 QDPIO::cout << solver_string <<
"X-Solve failed (seq: "<<seqno<<
") . Blowing away and reiniting subspace" << std::endl;
886 StopWatch reinit_timer; reinit_timer.reset();
887 reinit_timer.start();
893 bool saved_value = ip.check_multigrid_setup;
894 ip.check_multigrid_setup =
true;
895 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
896 ip.check_multigrid_setup = saved_value;
900 XMLBufferWriter file_xml;
901 push(file_xml,
"FileXML");
905 XMLBufferWriter record_xml;
906 push(record_xml,
"RecordXML");
907 write(record_xml,
"foo", foo);
912 TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
913 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
914 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
917 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
918 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
920 QDPIO::cout << solver_string <<
"Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() <<
" sec." << std::endl;
923 QDPIO::cout << solver_string <<
"Re-Solving for X with zero guess" << std::endl;
924 SystemSolverResults_t res_tmp;
926 res_tmp = qudaInvert(*clov,
932 char X_prime_norm[256];
933 char X_prime_norm_full[256];
934 std::sprintf(X_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi,
A->subset())));
935 std::sprintf(X_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi)));
936 QDPIO::cout <<
"X solution: norm2(subset) = " << X_prime_norm <<
" norm(full) = " << X_prime_norm_full << std::endl;
947 r[
A->subset()] -=
tmp;
949 res_tmp.resid = sqrt(norm2(
r,
A->subset()));
950 if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
951 QDPIO::cout << solver_string <<
"Re Solve for X Failed (seq: " << seqno <<
" ) Rsd = " << res_tmp.resid/norm2chi <<
" RsdTarget = " << invParam.RsdTarget << std::endl;
952 QDPIO::cout << solver_string <<
"Throwing Exception! This will ABORT" << std::endl;
956 MGSolverException convergence_fail(invParam.CloverParams.Mass,
957 invParam.SaveSubspaceID,
959 Real(res_tmp.resid/norm2chi),
960 invParam.RsdTarget*invParam.RsdToleranceFactor);
961 throw convergence_fail;
967 res2.n_count += res_tmp.n_count;
968 res2.resid = res_tmp.resid;
971 X_solve_timer.stop();
973 X_predictor_add_timer.start();
975 X_predictor_add_timer.stop();
977 double time = swatch.getTimeInSeconds();
979 res.n_count = res1.n_count + res2.n_count;
980 res.resid = res2.resid;
982 Double rel_resid = res.resid/norm2chi;
984 QDPIO::cout << solver_string <<
" seq: " << (seqno++) <<
" iterations: " << res1.n_count <<
" + "
985 << res2.n_count <<
" = " << res.n_count
986 <<
" Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
988 QDPIO::cout <<solver_string <<
"Time: Y_predict: " << Y_prediction_timer.getTimeInSeconds() <<
" (s) "
989 <<
"Y_solve: " << Y_solve_timer.getTimeInSeconds() <<
" (s) "
990 <<
"Y_register: " << Y_predictor_add_timer.getTimeInSeconds() <<
" (s) "
991 <<
"X_predict: " << X_prediction_timer.getTimeInSeconds() <<
" (s) "
992 <<
"X_solve: " << X_solve_timer.getTimeInSeconds() <<
" (s) "
993 <<
"X_register: " << X_predictor_add_timer.getTimeInSeconds() <<
" (s) "
994 <<
"Total time: " << time <<
"(s)" << std::endl;
996 quda_inv_param.use_init_guess = old_guess_policy;
1010 MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
1012 Double norm2chi=sqrt(norm2(
chi,
A->subset()));
1015 QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
1016 quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
1019 SystemSolverResults_t res;
1020 SystemSolverResults_t res1;
1021 SystemSolverResults_t res2;
1024 Handle< LinearOperator<T> >
MdagM(
new MdagMLinOp<T>(
A) );
1027 QDPIO::cout << solver_string <<
"Two Step Solve" << std::endl;
1031 StopWatch Y_solve_timer; Y_solve_timer.reset();
1032 StopWatch X_solve_timer; X_solve_timer.reset();
1033 StopWatch Y_refresh_timer; Y_refresh_timer.reset();
1034 StopWatch X_refresh_timer; X_refresh_timer.reset();
1038 QDPIO::cout <<
"Two Step Solve using QUDA predictor: (Y_index,X_index) = ( " << Y_index <<
" , " << X_index <<
" ) \n";
1045 quda_inv_param.chrono_max_dim = predictor.
getMaxChrono();
1046 quda_inv_param.chrono_index = Y_index;
1047 quda_inv_param.chrono_make_resident =
true;
1048 quda_inv_param.chrono_use_resident =
true;
1049 quda_inv_param.chrono_replace_last =
false;
1052 quda_inv_param.tol = toDouble(Real(0.5)*invParam.RsdTarget);
1054 QDPIO::cout <<
"Setting Default Chrono precision of " << cpu_prec << std::endl;
1055 quda_inv_param.chrono_precision = cpu_prec;
1058 quda_inv_param.chrono_precision = theChromaToQudaPrecisionTypeMap::Instance()[ predictor.
getChronoPrecision() ];
1059 QDPIO::cout <<
"Setting Chrono precision of " << quda_inv_param.chrono_precision << std::endl;
1068 Y_solve_timer.start();
1070 g5chi[rb[1]]= Gamma(
Nd*
Nd-1)*
chi;
1071 if( invParam.asymmetricP ==
true ) {
1072 res1 = qudaInvert(*clov,
1076 Y[rb[1]] = Gamma(
Nd*
Nd -1)*Y_prime;
1082 res1 = qudaInvert(*clov,
1089 char Y_prime_norm[256];
1090 char Y_prime_norm_full[256];
1091 std::sprintf(Y_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime,
A->subset())));
1092 std::sprintf(Y_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
1093 QDPIO::cout <<
"Y solution: norm2(subset) = " << Y_prime_norm <<
" norm(full) = " << Y_prime_norm_full << std::endl;
1097 tmp[rb[1]] = Gamma(
Nd*
Nd-1)*Y_prime;
1102 bool solution_good =
true;
1110 r[
A->subset()] -=
tmp;
1112 res1.resid = sqrt(norm2(
r,
A->subset()));
1113 if ( toBool( res1.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1114 solution_good =
false;
1118 if ( solution_good ) {
1119 if( res1.n_count >= threshold_counts ) {
1120 QDPIO::cout << solver_string <<
"Iteration Threshold Exceeded:Y Solver iters = " << res1.n_count <<
" Threshold=" << threshold_counts << std::endl;
1121 QDPIO::cout << solver_string <<
"Refreshing Subspace" << std::endl;
1123 Y_refresh_timer.start();
1126 for(
int j=0;
j < ip.mg_levels-1;
j++) {
1127 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = ip.maxIterSubspaceRefresh[
j];
1129 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
1130 for(
int j=0;
j < ip.mg_levels-1;
j++) {
1131 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
1133 Y_refresh_timer.stop();
1134 QDPIO::cout << solver_string <<
"Y Subspace Refresh Time = " << Y_refresh_timer.getTimeInSeconds() <<
" secs\n";
1138 QDPIO::cout << solver_string <<
"Y-Solve failed (seq: "<<seqno<<
"). Blowing away and reiniting subspace" << std::endl;
1139 StopWatch reinit_timer; reinit_timer.reset();
1140 reinit_timer.start();
1145 bool saved_value = ip.check_multigrid_setup;
1146 ip.check_multigrid_setup =
true;
1147 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
1148 ip.check_multigrid_setup = saved_value;
1152 XMLBufferWriter file_xml;
1153 push(file_xml,
"FileXML");
1157 XMLBufferWriter record_xml;
1158 push(record_xml,
"RecordXML");
1159 write(record_xml,
"foo", foo);
1164 TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
1165 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
1166 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
1169 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
1170 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
1171 reinit_timer.stop();
1172 QDPIO::cout << solver_string <<
"Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() <<
" sec." << std::endl;
1177 quda_inv_param.chrono_use_resident =
false;
1181 quda_inv_param.chrono_replace_last =
true;
1183 QDPIO::cout << solver_string <<
"Re-Solving for Y (zero guess)" << std::endl;
1184 SystemSolverResults_t res_tmp;
1187 if( invParam.asymmetricP ==
true ) {
1188 res_tmp = qudaInvert(*clov,
1192 Y[rb[1]] = Gamma(
Nd*
Nd -1)*Y_prime;
1198 res_tmp = qudaInvert(*clov,
1205 char Y_prime_norm[256];
1206 char Y_prime_norm_full[256];
1207 std::sprintf(Y_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime,
A->subset())));
1208 std::sprintf(Y_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
1209 QDPIO::cout <<
"Y solution: norm2(subset) = " << Y_prime_norm <<
" norm(full) = " << Y_prime_norm_full << std::endl;
1212 tmp[rb[1]] = Gamma(
Nd*
Nd-1)*Y_prime;
1222 r[
A->subset()] -=
tmp;
1224 res_tmp.resid = sqrt(norm2(
r,
A->subset()));
1225 if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1227 QDPIO::cout << solver_string <<
"Re Solve for Y Failed (seq: " << seqno <<
" ) Rsd = " << res_tmp.resid/norm2chi <<
" RsdTarget = " << invParam.RsdTarget << std::endl;
1229 dumpYSolver(g5chi,Y_prime);
1231 QDPIO::cout << solver_string <<
"Throwing Exception! This will ABORT" << std::endl;
1233 MGSolverException convergence_fail(invParam.CloverParams.Mass,
1234 invParam.SaveSubspaceID,
1236 Real(res_tmp.resid/norm2chi),
1237 invParam.RsdTarget*invParam.RsdToleranceFactor);
1238 throw convergence_fail;
1243 res1.n_count += res_tmp.n_count;
1244 res1.resid = res_tmp.resid;
1247 Y_solve_timer.stop();
1256 quda_inv_param.chrono_max_dim = predictor.
getMaxChrono();
1257 quda_inv_param.chrono_index = X_index;
1258 quda_inv_param.chrono_make_resident =
true;
1259 quda_inv_param.chrono_use_resident =
true;
1260 quda_inv_param.chrono_replace_last =
false;
1263 quda_inv_param.tol = toDouble(invParam.RsdTarget);
1264 X_solve_timer.start();
1268 res2 = qudaInvert(*clov,
1274 char X_prime_norm[256];
1275 char X_prime_norm_full[256];
1276 std::sprintf(X_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi,
A->subset())));
1277 std::sprintf(X_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi)));
1278 QDPIO::cout <<
"X solution: norm2(subset) = " << X_prime_norm <<
" norm(full) = " << X_prime_norm_full << std::endl;
1283 solution_good =
true;
1291 r[
A->subset() ] -=
tmp;
1292 Double resid_MXY = sqrt(norm2(
r,
A->subset()));
1293 Double normY = sqrt(norm2(Y,
A->subset()));
1294 QDPIO::cout <<
"X solve: || Y - MX || / || Y || = " << resid_MXY/normY << std::endl;
1297 r[
A->subset()] -=
tmp;
1299 res2.resid = sqrt(norm2(
r,
A->subset()));
1300 if ( toBool( res2.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1301 solution_good =
false;
1306 if( solution_good ) {
1307 if( res2.n_count >= threshold_counts ) {
1308 QDPIO::cout << solver_string <<
"Threshold Reached: X Solver iters = " << res2.n_count <<
" Threshold=" << threshold_counts << std::endl;
1309 QDPIO::cout << solver_string <<
"Refreshing Subspace" << std::endl;
1311 X_refresh_timer.start();
1315 for(
int j=0;
j < ip.mg_levels-1;
j++) {
1316 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = ip.maxIterSubspaceRefresh[
j];
1318 updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
1319 for(
int j=0;
j < ip.mg_levels-1;
j++) {
1320 (subspace_pointers->mg_param).setup_maxiter_refresh[
j] = 0;
1322 X_refresh_timer.stop();
1324 QDPIO::cout << solver_string <<
"Subspace Refresh Time = " << X_refresh_timer.getTimeInSeconds() <<
" secs\n";
1329 QDPIO::cout << solver_string <<
"X-Solve failed (seq: "<<seqno<<
") Blowing away and reiniting subspace" << std::endl;
1330 StopWatch reinit_timer; reinit_timer.reset();
1331 reinit_timer.start();
1337 bool saved_value = ip.check_multigrid_setup;
1338 ip.check_multigrid_setup =
true;
1339 subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
1340 ip.check_multigrid_setup = saved_value;
1343 XMLBufferWriter file_xml;
1344 push(file_xml,
"FileXML");
1348 XMLBufferWriter record_xml;
1349 push(record_xml,
"RecordXML");
1350 write(record_xml,
"foo", foo);
1355 TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
1356 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
1357 TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
1360 TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
1361 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
1362 reinit_timer.stop();
1363 QDPIO::cout << solver_string <<
"Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() <<
" sec." << std::endl;
1368 quda_inv_param.chrono_use_resident =
false;
1372 quda_inv_param.chrono_replace_last =
true;
1374 QDPIO::cout << solver_string <<
"Re-Solving for X (zero guess)" << std::endl;
1376 SystemSolverResults_t res_tmp;
1380 res_tmp = qudaInvert(*clov,
1387 char X_prime_norm[256];
1388 char X_prime_norm_full[256];
1389 std::sprintf(X_prime_norm,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi,
A->subset())));
1390 std::sprintf(X_prime_norm_full,
"%.*e", DECIMAL_DIG, toDouble(norm2(
psi)));
1391 QDPIO::cout <<
"X solution: norm2(subset) = " << X_prime_norm <<
" norm(full) = " << X_prime_norm_full << std::endl;
1401 r[
A->subset() ] -=
tmp;
1402 Double resid_MXY = sqrt(norm2(
r,
A->subset()));
1403 Double normY = sqrt(norm2(Y,
A->subset()));
1404 QDPIO::cout <<
"X re-solve: || Y - MX || / || Y || = " << resid_MXY/normY << std::endl;
1407 r[
A->subset()] -=
tmp;
1409 res_tmp.resid = sqrt(norm2(
r,
A->subset()));
1410 if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1411 QDPIO::cout << solver_string <<
"Re Solve for X Failed (seq: " << seqno <<
" ) Rsd = " << res_tmp.resid/norm2chi <<
" RsdTarget = " << invParam.RsdTarget << std::endl;
1413 QDPIO::cout <<
"Dumping state (solve seqno : " << seqno <<
" ) " << std::endl;
1417 QDPIO::cout << solver_string <<
"Throwing Exception! This will ABORT" << std::endl;
1418 MGSolverException convergence_fail(invParam.CloverParams.Mass,
1419 invParam.SaveSubspaceID,
1421 Real(res_tmp.resid/norm2chi),
1422 invParam.RsdTarget*invParam.RsdToleranceFactor);
1423 throw convergence_fail;
1427 res2.n_count += res_tmp.n_count;
1428 res2.resid = res_tmp.resid;
1431 X_solve_timer.stop();
1433 double time = swatch.getTimeInSeconds();
1438 res.n_count = res1.n_count + res2.n_count;
1439 res.resid = res2.resid;
1441 Double rel_resid = res.resid/norm2chi;
1443 QDPIO::cout << solver_string <<
" seq: " << (seqno++) <<
" iterations: " << res1.n_count <<
" + "
1444 << res2.n_count <<
" = " << res.n_count
1445 <<
" Rsd = " << res.resid <<
" Relative Rsd = " << rel_resid << std::endl;
1447 QDPIO::cout <<
"Y_solve: " << Y_solve_timer.getTimeInSeconds() <<
" (s) "
1448 <<
"X_solve: " << X_solve_timer.getTimeInSeconds() <<
" (s) "
1449 <<
"Total time: " << time <<
"(s)" << std::endl;
1451 quda_inv_param.use_init_guess = old_guess_policy;
1454 quda_inv_param.chrono_make_resident =
false;
1455 quda_inv_param.chrono_use_resident =
false;
1456 quda_inv_param.chrono_replace_last =
false;
1465 SystemSolverResults_t res;
1472 res = (*this)(
psi,
chi,quda_pred);
1475 catch(MGSolverException &e) {
1479 QDPIO::cout <<
"Failed to cast predictor to QUDA predictor"
1488 res = (*this)(
psi,
chi,two_step_pred);
1491 catch(MGSolverException &e) {
1495 QDPIO::cout <<
"Failed to cast predictor to QUDA or Two Step predictor"
1504 MdagMSysSolverQUDAMULTIGRIDClover() {}
1511 QudaPrecision_s cpu_prec;
1512 QudaPrecision_s gpu_prec;
1513 QudaPrecision_s gpu_half_prec;
1515 Handle< LinearOperator<T> >
A;
1516 Handle< FermState<T,Q,Q> > gstate;
1517 mutable SysSolverQUDAMULTIGRIDCloverParams invParam;
1518 QudaGaugeParam q_gauge_param;
1519 mutable QudaInvertParam quda_inv_param;
1520 mutable QUDAMGUtils::MGSubspacePointers* subspace_pointers;
1523 Handle< CloverTermT<T, U> > clov;
1524 Handle< CloverTermT<T, U> > invclov;
1526 SystemSolverResults_t qudaInvert(
const CloverTermT<T, U>& clover,
1527 const CloverTermT<T, U>& inv_clov,
1533 int threshold_counts;
1535 void dumpYSolver(
const LatticeFermion&
chi,
1536 const LatticeFermion& Y)
const;
1538 void dumpXSolver(
const LatticeFermion&
chi,
1539 const LatticeFermion& Y,
1540 const LatticeFermion& X)
const;
1542 static unsigned long seqno;
Primary include file for CHROMA library code.
Abstract interface for a Chronological Solution predictor.
Abstract interface for a Chronological Solution predictor.
virtual void predictY(T &Y, const T &chi, const Subset &s) const
virtual void newYVector(const T &Y)=0
virtual void predictX(T &X, const T &chi, const Subset &s) const
virtual void newXVector(const T &X)=0
Zero initial guess predictor.
QudaPrecisionType getChronoPrecision() const
Include possibly optimized Clover terms.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams ¶m)
Writer parameters.
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
M^dag*M composition of a linear operator.
Named object function std::map.
bool registerAll()
Register all the factories.
void delete_subspace(const std::string SubspaceID)
size_t getCUDAFreeMem(void)
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
QDPCloverTermT< T, U > CloverTermT
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
FloatingPoint< double > Double
multi1d< LatticeFermion > r(Ncb)
Periodic ferm state and a creator.
Pick channel for QUDA Predictor.
Reunitarize in place a color matrix to SU(N)
Support class for fermion actions and linear operators.
Handle< LinearOperator< T > > MdagM
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > Q
multi1d< LatticeColorMatrixF > QF
multi1d< LatticeColorMatrixD > QD
push(xml_out,"Cooled_Topology")
Zero initial guess predictor.