CHROMA
syssolver_mdagm_clover_quda_multigrid_w.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \QUDA MULTIGRID MdagM Clover solver.
4  */
5 
6 #ifndef __syssolver_mdagm_quda_multigrid_clover_h__
7 #define __syssolver_mdagm_quda_multigrid_clover_h__
8 
9 #include "chroma_config.h"
10 #include "chromabase.h"
11 #include <cfloat>
12 #include <cstdio>
13 
14 using namespace QDP;
15 
16 
17 
18 #include "handle.h"
19 #include "state.h"
20 #include "syssolver.h"
21 #include "linearop.h"
27 #include "io/aniso_io.h"
28 #include <string>
29 #include <sstream>
30 
31 #include "lmdagm.h"
32 #include "util/gauge/reunit.h"
35 
36 //#include <util_quda.h>
37 #ifdef BUILD_QUDA
38 #include <quda.h>
39 #ifdef QDP_IS_QDPJIT
41 #endif
42 
46 
47 namespace Chroma
48 {
49 
50 namespace MdagMSysSolverQUDAMULTIGRIDCloverEnv
51 {
52 //! Register the syssolver
53 bool registerAll();
54 
55 }
56 
57 class MdagMSysSolverQUDAMULTIGRIDClover : public MdagMSystemSolver<LatticeFermion>
58 {
59 public:
60  typedef LatticeFermion T;
61  typedef LatticeColorMatrix U;
62  typedef multi1d<LatticeColorMatrix> Q;
63 
64  typedef LatticeFermionF TF;
65  typedef LatticeColorMatrixF UF;
66  typedef multi1d<LatticeColorMatrixF> QF;
67 
68  typedef LatticeFermionF TD;
69  typedef LatticeColorMatrixF UD;
70  typedef multi1d<LatticeColorMatrixF> QD;
71 
72  typedef WordType<T>::Type_t REALT;
73 
74 
75 
76  MdagMSysSolverQUDAMULTIGRIDClover(Handle< LinearOperator<T> > A_,
77  Handle< FermState<T,Q,Q> > state_,
78  const SysSolverQUDAMULTIGRIDCloverParams& invParam_) :
79  A(A_), gstate(state_), invParam(invParam_), clov(new CloverTermT<T, U>() ), invclov(new CloverTermT<T, U>())
80  {
81  StopWatch init_swatch;
82  init_swatch.reset(); init_swatch.start();
83 
84  // Set the solver string
85  {
86  std::ostringstream solver_string_stream;
87  solver_string_stream << "QUDA_MULTIGRID_CLOVER_MDAGM_SOLVER( Mass = " << invParam.CloverParams.Mass <<" , Id = "
88  << invParam.SaveSubspaceID << " ): ";
89  solver_string = solver_string_stream.str();
90 
91  }
92  QDPIO::cout << solver_string << "Initializing" << std::endl;
93 
94  // Check free mem
95  size_t free_mem = QUDAMGUtils::getCUDAFreeMem();
96  QDPIO::cout << solver_string << "MEMCHECK: free mem = " << free_mem << std::endl;
97 
98  // FOLLOWING INITIALIZATION in test QUDA program
99 
100  // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
101  int s = sizeof( WordType<T>::Type_t );
102  if (s == 4) {
103  cpu_prec = QUDA_SINGLE_PRECISION;
104  }
105  else {
106  cpu_prec = QUDA_DOUBLE_PRECISION;
107  }
108 
109  // Work out GPU precision
110  switch( invParam.cudaPrecision ) {
111  case HALF:
112  gpu_prec = QUDA_HALF_PRECISION;
113  break;
114  case SINGLE:
115  gpu_prec = QUDA_SINGLE_PRECISION;
116  break;
117  case DOUBLE:
118  gpu_prec = QUDA_DOUBLE_PRECISION;
119  break;
120  default:
121  gpu_prec = cpu_prec;
122  break;
123  }
124 
125  // Work out GPU Sloppy precision
126  // Default: No Sloppy
127  switch( invParam.cudaSloppyPrecision ) {
128  case HALF:
129  gpu_half_prec = QUDA_HALF_PRECISION;
130  break;
131  case SINGLE:
132  gpu_half_prec = QUDA_SINGLE_PRECISION;
133  break;
134  case DOUBLE:
135  gpu_half_prec = QUDA_DOUBLE_PRECISION;
136  break;
137  default:
138  gpu_half_prec = gpu_prec;
139  break;
140  }
141 
142  // 2) pull 'new; GAUGE and Invert params
143  q_gauge_param = newQudaGaugeParam();
144  quda_inv_param = newQudaInvertParam();
145 
146  // 3) set lattice size
147  const multi1d<int>& latdims = Layout::subgridLattSize();
148 
149  q_gauge_param.X[0] = latdims[0];
150  q_gauge_param.X[1] = latdims[1];
151  q_gauge_param.X[2] = latdims[2];
152  q_gauge_param.X[3] = latdims[3];
153 
154  // 4) - deferred (anisotropy)
155 
156  // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
157  q_gauge_param.type = QUDA_WILSON_LINKS;
158 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
159  q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
160 #else
161  q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
162  q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
163 #endif
164 
165  // 6) - set t_boundary
166  // Convention: BC has to be applied already
167  // This flag just tells QUDA that this is so,
168  // so that QUDA can take care in the reconstruct
169  if( invParam.AntiPeriodicT ) {
170  q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
171  }
172  else {
173  q_gauge_param.t_boundary = QUDA_PERIODIC_T;
174  }
175 
176  // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
177  q_gauge_param.cpu_prec = cpu_prec;
178  q_gauge_param.cuda_prec = gpu_prec;
179 
180  switch( invParam.cudaReconstruct ) {
181  case RECONS_NONE:
182  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
183  break;
184  case RECONS_8:
185  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
186  break;
187  case RECONS_12:
188  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
189  break;
190  default:
191  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
192  break;
193  };
194 
195  q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
196  q_gauge_param.cuda_prec_precondition = gpu_half_prec;
197 
198  switch( invParam.cudaSloppyReconstruct ) {
199  case RECONS_NONE:
200  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
201  break;
202  case RECONS_8:
203  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
204  break;
205  case RECONS_12:
206  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
207  break;
208  default:
209  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
210  break;
211  };
212 
213  // This may be overridden later
214  q_gauge_param.reconstruct_precondition=q_gauge_param.reconstruct_sloppy;
215 
216  // Gauge fixing:
217 
218  // These are the links
219  // They may be smeared and the BC's may be applied
220  Q links_single(Nd);
221 
222  // Now downcast to single prec fields.
223  for(int mu=0; mu < Nd; mu++) {
224  links_single[mu] = (state_->getLinks())[mu];
225  }
226 
227  // GaugeFix
228  if( invParam.axialGaugeP ) {
229  temporalGauge(links_single, GFixMat, Nd-1);
230  for(int mu=0; mu < Nd; mu++) {
231  links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
232  }
233  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
234  }
235  else {
236  // No GaugeFix
237  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;// No Gfix yet
238  }
239 
240  // deferred 4) Gauge Anisotropy
241  const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
242  if( aniso.anisoP ) { // Anisotropic case
243  Real gamma_f = aniso.xi_0 / aniso.nu;
244  q_gauge_param.anisotropy = toDouble(gamma_f);
245  }
246  else {
247  q_gauge_param.anisotropy = 1.0;
248  }
249 
250  // MAKE FSTATE BEFORE RESCALING links_single
251  // Because the clover term expects the unrescaled links...
252  Handle<FermState<T,Q,Q> > fstate( new PeriodicFermState<T,Q,Q>(links_single));
253 
254  if( aniso.anisoP ) { // Anisotropic case
255  multi1d<Real> cf=makeFermCoeffs(aniso);
256  for(int mu=0; mu < Nd; mu++) {
257  links_single[mu] *= cf[mu];
258  }
259  }
260 
261  // Now onto the inv param:
262  // Dslash type
263  quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
264 
265  // Hardwire to GCR
266  quda_inv_param.inv_type = QUDA_GCR_INVERTER;
267  quda_inv_param.compute_true_res = 0;
268 
269  quda_inv_param.kappa = 0.5;
270  quda_inv_param.clover_coeff = 1.0; // Dummy, not used
271  quda_inv_param.Ls=1;
272  quda_inv_param.tol = toDouble(invParam.RsdTarget);
273  quda_inv_param.maxiter = invParam.MaxIter;
274  quda_inv_param.reliable_delta = toDouble(invParam.Delta);
275  quda_inv_param.pipeline = invParam.Pipeline;
276 
277  quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
278  quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
279 
280 
281  quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
282 
283  quda_inv_param.dagger = QUDA_DAG_NO;
284  quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
285 
286  quda_inv_param.cpu_prec = cpu_prec;
287  quda_inv_param.cuda_prec = gpu_prec;
288  quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
289  quda_inv_param.cuda_prec_precondition = gpu_half_prec;
290  quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
291  quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
292 
293 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
294  quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
295  quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
296  quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
297 
298 #else
299  quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
300  quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
301  quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
302 #endif
303 
304  // Autotuning
305  if( invParam.tuneDslashP ) {
306  quda_inv_param.tune = QUDA_TUNE_YES;
307  }
308  else {
309  quda_inv_param.tune = QUDA_TUNE_NO;
310  }
311 
312  // Setup padding
313  multi1d<int> face_size(4);
314  face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
315  face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
316  face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
317  face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
318 
319  int max_face = face_size[0];
320  for(int i=1; i <=3; i++) {
321  if ( face_size[i] > max_face ) {
322  max_face = face_size[i];
323  }
324  }
325 
326  q_gauge_param.ga_pad = max_face;
327  // PADDING
328  quda_inv_param.sp_pad = 0;
329  quda_inv_param.cl_pad = 0;
330 
331  // Clover precision and order
332  quda_inv_param.clover_cpu_prec = cpu_prec;
333  quda_inv_param.clover_cuda_prec = gpu_prec;
334  quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
335  quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
336 
337  if( !invParam.MULTIGRIDParamsP ) {
338  QDPIO::cout << solver_string << "ERROR: MG Solver had MULTIGRIDParamsP set to false" << std::endl;
339  QDP_abort(1);
340  }
341 
342  // Dereference handle
343  const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
344 
345 
346  // Set preconditioner precision
347  switch( ip.prec ) {
348  case HALF:
349  quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
350  quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
351  q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
352  break;
353 
354  case SINGLE:
355  quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
356  quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
357  q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
358  break;
359 
360  case DOUBLE:
361  quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
362  quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
363  q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
364  break;
365  default:
366  quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
367  quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
368  q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
369  break;
370  }
371 
372  switch( ip.reconstruct ) {
373  case RECONS_NONE:
374  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
375  break;
376  case RECONS_8:
377  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
378  break;
379  case RECONS_12:
380  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
381  break;
382  default:
383  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
384  break;
385  };
386 
387  // Set up the links
388  void* gauge[4];
389 
390 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
391  for(int mu=0; mu < Nd; mu++) {
392  gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
393  }
394 #else
395  GetMemoryPtrGauge(gauge,links_single);
396  // std::vector<int> ids;
397  // for(int mu=0; mu < Nd; mu++)
398  // ids.push_back( links_single[mu].getId() );
399  // std::vector<void*> dev_ptr = GetMemoryPtr( ids );
400  // for(int mu=0; mu < Nd; mu++)
401  // gauge[mu] = dev_ptr[mu];
402 #endif
403 
404  loadGaugeQuda((void *)gauge, &q_gauge_param);
405 
406 
407  quda_inv_param.tol_precondition = toDouble(ip.tol[0]);
408  quda_inv_param.maxiter_precondition = ip.maxIterations[0];
409  quda_inv_param.gcrNkrylov = ip.outer_gcr_nkrylov;
410  quda_inv_param.residual_type = static_cast<QudaResidualType>(QUDA_L2_RELATIVE_RESIDUAL);
411 
412  //Replacing above with what's in the invert test.
413  switch( ip.schwarzType ) {
414  case ADDITIVE_SCHWARZ :
415  quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
416  break;
418  quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
419  break;
420  default:
421  quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
422  break;
423  }
424  quda_inv_param.precondition_cycle = 1;
425  //Invert test always sets this to 1.
426 
427 
428  if( invParam.verboseP ) {
429  quda_inv_param.verbosity = QUDA_VERBOSE;
430  }
431  else {
432  quda_inv_param.verbosity = QUDA_SUMMARIZE;
433  }
434 
435  quda_inv_param.verbosity_precondition = QUDA_SILENT;
436 
437  quda_inv_param.inv_type_precondition = QUDA_MG_INVERTER;
438 
439  // Setup the clover term...
440  QDPIO::cout <<solver_string << "Creating CloverTerm" << std::endl;
441  clov->create(fstate, invParam_.CloverParams);
442 
443  // Don't recompute, just copy
444  invclov->create(fstate, invParam_.CloverParams);
445 
446  QDPIO::cout <<solver_string<< "Inverting CloverTerm" << std::endl;
447  invclov->choles(0);
448  invclov->choles(1);
449 
450 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
451 #warning "NOT USING QUDA DEVICE IFACE"
452  quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
453 
454  multi1d<QUDAPackedClovSite<REALT> > packed_clov;
455 
456  packed_clov.resize(all.siteTable().size());
457 
458  clov->packForQUDA(packed_clov, 0);
459  clov->packForQUDA(packed_clov, 1);
460 
461  // Always need inverse
462  multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
463  invclov->packForQUDA(packed_invclov, 0);
464  invclov->packForQUDA(packed_invclov, 1);
465 
466  loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]), &quda_inv_param);
467 
468 #else
469 
470 #warning "USING QUDA DEVICE IFACE"
471 
472  quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
473  quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
474 
475  void *clover[2];
476  void *cloverInv[2];
477 
478  GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
479 
480  loadCloverQuda( (void*)(clover), (void *)(cloverInv), &quda_inv_param);
481 #endif
482 
483  quda_inv_param.omega = toDouble(ip.relaxationOmegaOuter);
484 
485 // Copy ThresholdCount from invParams into threshold_counts.
486 threshold_counts = invParam.ThresholdCount;
487 
488 if(TheNamedObjMap::Instance().check(invParam.SaveSubspaceID))
489 {
490  StopWatch update_swatch;
491  update_swatch.reset(); update_swatch.start();
492  // Subspace ID exists add it to mg_state
493  QDPIO::cout<< solver_string <<"Recovering subspace..."<<std::endl;
494  subspace_pointers = TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
495  for(int j=0; j < ip.mg_levels-1;++j) {
496  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
497  }
498  updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
499  update_swatch.stop();
500 
501  QDPIO::cout << solver_string << " subspace_update_time = "
502  << update_swatch.getTimeInSeconds() << " sec. " << std::endl;
503 }
504 else
505 {
506  // Create the subspace.
507  StopWatch create_swatch;
508  create_swatch.reset(); create_swatch.start();
509  QDPIO::cout << solver_string << "Creating Subspace" << std::endl;
510  subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
511  XMLBufferWriter file_xml;
512  push(file_xml, "FileXML");
513  pop(file_xml);
514 
515  int foo = 5;
516 
517  XMLBufferWriter record_xml;
518  push(record_xml, "RecordXML");
519  write(record_xml, "foo", foo);
520  pop(record_xml);
521 
522 
523  TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
524  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
525  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
526 
527  TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
528  create_swatch.stop();
529  QDPIO::cout << solver_string << " subspace_create_time = "
530  << create_swatch.getTimeInSeconds() << " sec. " << std::endl;
531 
532 }
533 quda_inv_param.preconditioner = subspace_pointers->preconditioner;
534 
535 init_swatch.stop();
536 QDPIO::cout << solver_string << " init_time = "
537  << init_swatch.getTimeInSeconds() << " sec. "
538  << std::endl;
539 
540  }
541 
542  //! Destructor is not automatic
543  ~MdagMSysSolverQUDAMULTIGRIDClover()
544  {
545 
546  quda_inv_param.preconditioner = nullptr;
547  subspace_pointers = nullptr;
548  freeGaugeQuda();
549  freeCloverQuda();
550  }
551 
552  //! Return the subset on which the operator acts
553  const Subset& subset() const {return A->subset();}
554 
555  //! Solver the linear system
556  /*!
557  * \param psi solution ( Modify )
558  * \param chi source ( Read )
559  * \return syssolver results
560  */
561  SystemSolverResults_t operator() (T& psi, const T& chi) const
562  {
563  SystemSolverResults_t res1;
564  SystemSolverResults_t res2;
565  SystemSolverResults_t res;
566 
567  START_CODE();
568  StopWatch swatch;
569  swatch.start();
570 
571  // I want to use the predictor versions of the code as they have been made robust.
572  // So I should use either a null predictor or a zero guess predictor here.
573  // The MG two step solve logic is quite complicated and may need to reinit the fields.
574  // I don't want to triplicate that logic so I'll just use a dummy predictor and call through.
575  ZeroGuess4DChronoPredictor dummy_predictor;
576  res = (*this)(psi, chi, dummy_predictor);
577 
578 
579  END_CODE();
580  return res;
581  }
582 
583 
584 
585  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::AbsTwoStepChronologicalPredictor4D<T>& predictor ) const
586  {
587 
588  START_CODE();
589 
590  StopWatch swatch;
591  swatch.start();
592 
593  MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
594  // Use this in residuum checks.
595  Double norm2chi=sqrt(norm2(chi, A->subset()));
596 
597  // Allow QUDA to use initial guess
598  QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
599  quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
600 
601 
602  SystemSolverResults_t res;
603  SystemSolverResults_t res1;
604  SystemSolverResults_t res2;
605 
606  // Create MdagM op
607  Handle< LinearOperator<T> > MdagM( new MdagMLinOp<T>(A) );
608 
609 
610  QDPIO::cout << solver_string <<"Two Step Solve" << std::endl;
611 
612 
613  // Try to cast the predictor to a two step predictor
614  StopWatch X_prediction_timer; X_prediction_timer.reset();
615  StopWatch Y_prediction_timer; Y_prediction_timer.reset();
616  StopWatch Y_solve_timer; Y_solve_timer.reset();
617  StopWatch X_solve_timer; X_solve_timer.reset();
618  StopWatch Y_predictor_add_timer; Y_predictor_add_timer.reset();
619  StopWatch X_predictor_add_timer; X_predictor_add_timer.reset();
620  StopWatch X_refresh_timer; X_refresh_timer.reset();
621  StopWatch Y_refresh_timer; Y_refresh_timer.reset();
622 
623  QDPIO::cout << solver_string << "Predicting Y" << std::endl;
624  Y_prediction_timer.start();
625  T Y_prime = zero;
626  {
627  T tmp_vec = psi;
628  predictor.predictY(tmp_vec, *A, chi); // Predicts for M^\dagger Y = chi
629 
630  // We are going to solve M \gamma
631  Y_prime = Gamma(Nd*Nd-1)*tmp_vec;
632  }
633  Y_prediction_timer.stop();
634 
635  // Y solve: M^\dagger Y = chi
636  // g_5 M g_5 Y = chi
637  // => M Y' = chi' with chi' = gamma_5*chi
638 
639  Y_solve_timer.start();
640 
641 
642  T g5chi = zero;
643  T Y = zero;
644  g5chi[rb[1]]= Gamma(Nd*Nd-1)*chi;
645 
646  // Y solve at 0.5 * Target Residuum -- Evan's bound
647  quda_inv_param.tol = toDouble(Real(0.5)*invParam.RsdTarget);
648  if( invParam.asymmetricP == true ) {
649  res1 = qudaInvert(*clov,
650  *invclov,
651  g5chi,
652  Y_prime);
653  Y[rb[1]] = Gamma(Nd*Nd -1)*Y_prime;
654  }
655  else {
656  T tmp = zero;
657  invclov->apply(tmp,g5chi,MINUS,1);
658 
659  res1 = qudaInvert(*clov,
660  *invclov,
661  tmp,
662  Y_prime);
663 #ifdef QUDA_DEBUG
664  {
665  char Y_prime_norm[256];
666  char Y_prime_norm_full[256];
667  std::sprintf(Y_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime, A->subset())));
668  std::sprintf(Y_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
669  QDPIO::cout << "Y solution: norm2(subset) = " << Y_prime_norm << " norm(full) = " << Y_prime_norm_full << std::endl;
670  }
671 #endif
672  tmp[rb[1]] = Gamma(Nd*Nd-1)*Y_prime;
673  clov->apply(Y,tmp,MINUS,1);
674 
675  }
676 
677 
678  bool solution_good = true;
679 
680  // Check solution
681  {
682  T r=zero;
683  r[A->subset()]=chi;
684  T tmp;
685  (*A)(tmp, Y, MINUS);
686  r[A->subset()] -= tmp;
687 
688  res1.resid = sqrt(norm2(r, A->subset()));
689  QDPIO::cout << "Y-solve: ||r||=" << res1.resid << " ||r||/||b||="
690  << res1.resid/sqrt(norm2(chi,rb[1])) << std::endl;
691  if ( toBool( res1.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
692  solution_good = false;
693  }
694  }
695 
696  if ( solution_good ) {
697  if( res1.n_count >= threshold_counts ) {
698  QDPIO::cout << solver_string << "Iteration Threshold Exceeded! Y Solver iters = " << res1.n_count << " Threshold=" << threshold_counts << std::endl;
699  QDPIO::cout << solver_string << "Refreshing Subspace" << std::endl;
700 
701  Y_refresh_timer.start();
702  // refresh the subspace
703  // Setup the number of subspace Iterations
704  for(int j=0; j < ip.mg_levels-1; j++) {
705  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = ip.maxIterSubspaceRefresh[j];
706  }
707  updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
708  for(int j=0; j < ip.mg_levels-1; j++) {
709  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
710  }
711  Y_refresh_timer.stop();
712  QDPIO::cout << solver_string << "Subspace Refresh Time = " << Y_refresh_timer.getTimeInSeconds() << " secs\n";
713  }
714  }
715  else {
716  QDPIO::cout << solver_string << "Y-Solve failed (seq: "<< seqno <<"). Blowing away and reiniting subspace" << std::endl;
717  StopWatch reinit_timer; reinit_timer.reset();
718  reinit_timer.start();
719 
720  // Delete the saved subspace completely
721  QUDAMGUtils::delete_subspace(invParam.SaveSubspaceID);
722 
723  // Recreate the subspace
724  bool saved_value = ip.check_multigrid_setup;
725  ip.check_multigrid_setup = true;
726  subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
727  ip.check_multigrid_setup = saved_value;
728 
729  // Make subspace XML snippets
730  XMLBufferWriter file_xml;
731  push(file_xml, "FileXML");
732  pop(file_xml);
733 
734  int foo = 5;
735  XMLBufferWriter record_xml;
736  push(record_xml, "RecordXML");
737  write(record_xml, "foo", foo);
738  pop(record_xml);
739 
740 
741  // Create named object entry.
742  TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
743  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
744  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
745 
746  // Assign the pointer into the named object
747  TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
748  quda_inv_param.preconditioner = subspace_pointers->preconditioner;
749 
750  reinit_timer.stop();
751  QDPIO::cout << solver_string << "Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() << " sec." << std::endl;
752 
753  // Re-solve
754  QDPIO::cout << solver_string << "Re-Solving for Y with zero guess" << std::endl;
755  SystemSolverResults_t res_tmp;
756 
757  Y_prime = zero;
758  if( invParam.asymmetricP == true ) {
759  res_tmp = qudaInvert(*clov,
760  *invclov,
761  g5chi,
762  Y_prime);
763  Y[rb[1]] = Gamma(Nd*Nd -1)*Y_prime;
764  }
765  else {
766  T tmp = zero;
767  invclov->apply(tmp,g5chi,MINUS,1);
768  res_tmp = qudaInvert(*clov,
769  *invclov,
770  tmp,
771  Y_prime);
772 
773 #ifdef QUDA_DEBUG
774  {
775  char Y_prime_norm[256];
776  char Y_prime_norm_full[256];
777  std::sprintf(Y_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime, A->subset())));
778  std::sprintf(Y_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
779  QDPIO::cout << "Y solution: norm2(subset) = " << Y_prime_norm << " norm(full) = " << Y_prime_norm_full << std::endl;
780  }
781 #endif
782 
783  tmp[rb[1]] = Gamma(Nd*Nd-1)*Y_prime;
784  clov->apply(Y,tmp,MINUS,1);
785 
786  }
787 
788  // Check solution
789  {
790  T r=zero;
791  r[A->subset()]=chi;
792  T tmp;
793  (*A)(tmp, Y,MINUS);
794  r[A->subset()] -= tmp;
795 
796  res_tmp.resid = sqrt(norm2(r, A->subset()));
797  if ( toBool( res_tmp.resid/sqrt(norm2(chi)) > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
798  QDPIO::cout << solver_string << "Re Solve for Y Failed (seq: " << seqno << " ) Rsd = " << res_tmp.resid/norm2chi << " RsdTarget = " << invParam.RsdTarget << std::endl;
799  QDPIO::cout << solver_string << "Throwing Exception! This will ABORT" << std::endl;
800 
801  dumpYSolver(g5chi,Y_prime);
802 
803  MGSolverException convergence_fail(invParam.CloverParams.Mass,
804  invParam.SaveSubspaceID,
805  res_tmp.n_count,
806  Real(res_tmp.resid/norm2chi),
807  invParam.RsdTarget*invParam.RsdToleranceFactor);
808  throw convergence_fail;
809  }
810  } // Check solution
811 
812  // threhold count is good, and solution is good
813  res1.n_count += res_tmp.n_count; // Add resolve iterations
814  res1.resid = res_tmp.resid; // Copy new residuum.
815 
816  }
817  Y_solve_timer.stop();
818 
819  // At this point we should have a good solution.
820  Y_predictor_add_timer.start();
821  predictor.newYVector(Y);
822  Y_predictor_add_timer.stop();
823 
824  X_prediction_timer.start();
825  // Can predict psi in the usual way without reference to Y
826  predictor.predictX(psi, (*MdagM), chi);
827  X_prediction_timer.stop();
828 
829  // Restore resid target for X solve
830  quda_inv_param.tol = toDouble(invParam.RsdTarget);
831  X_solve_timer.start();
832  // Solve for psi
833  res2 = qudaInvert(*clov,
834  *invclov,
835  Y,
836  psi);
837 #ifdef QUDA_DEBUG
838  {
839  char X_prime_norm[256];
840  char X_prime_norm_full[256];
841  std::sprintf(X_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(psi, A->subset())));
842  std::sprintf(X_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(psi)));
843  QDPIO::cout << "X solution: norm2(subset) = " << X_prime_norm << " norm(full) = " << X_prime_norm_full << std::endl;
844  }
845 #endif
846  solution_good = true;
847 
848  // Check solution
849  {
850  T r;
851  r[A->subset()]=chi;
852  T tmp;
853  (*MdagM)(tmp, psi, PLUS);
854  r[A->subset()] -= tmp;
855 
856  res2.resid = sqrt(norm2(r, A->subset()));
857  if ( toBool( res2.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
858  solution_good = false;
859  }
860  }
861 
862  if( solution_good ) {
863  if( res2.n_count >= threshold_counts ) {
864  QDPIO::cout << solver_string <<"Threshold Reached! X Solver iters = " << res2.n_count << " Threshold=" << threshold_counts << std::endl;
865  QDPIO::cout << solver_string << "Refreshing Subspace" << std::endl;
866 
867  X_refresh_timer.start();
868  // refresh the subspace
869  // Regenerate space. Destroy and recreate
870  // Setup the number of subspace Iterations
871  for(int j=0; j < ip.mg_levels-1; j++) {
872  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = ip.maxIterSubspaceRefresh[j];
873  }
874  updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
875  for(int j=0; j < ip.mg_levels-1; j++) {
876  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
877  }
878  X_refresh_timer.stop();
879 
880  QDPIO::cout << solver_string << "X Subspace Refresh Time = " << X_refresh_timer.getTimeInSeconds() << " secs\n";
881  }
882  }
883  else {
884 
885  QDPIO::cout << solver_string << "X-Solve failed (seq: "<<seqno<<") . Blowing away and reiniting subspace" << std::endl;
886  StopWatch reinit_timer; reinit_timer.reset();
887  reinit_timer.start();
888 
889  // Delete the saved subspace completely
890  QUDAMGUtils::delete_subspace(invParam.SaveSubspaceID);
891 
892  // Recreate the subspace
893  bool saved_value = ip.check_multigrid_setup;
894  ip.check_multigrid_setup = true;
895  subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
896  ip.check_multigrid_setup = saved_value;
897 
898 
899  // Make subspace XML snippets
900  XMLBufferWriter file_xml;
901  push(file_xml, "FileXML");
902  pop(file_xml);
903 
904  int foo = 5;
905  XMLBufferWriter record_xml;
906  push(record_xml, "RecordXML");
907  write(record_xml, "foo", foo);
908  pop(record_xml);
909 
910 
911  // Create named object entry.
912  TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
913  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
914  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
915 
916  // Assign the pointer into the named object
917  TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
918  quda_inv_param.preconditioner = subspace_pointers->preconditioner;
919  reinit_timer.stop();
920  QDPIO::cout << solver_string << "Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() << " sec." << std::endl;
921 
922  // Re-solve
923  QDPIO::cout << solver_string << "Re-Solving for X with zero guess" << std::endl;
924  SystemSolverResults_t res_tmp;
925  psi = zero;
926  res_tmp = qudaInvert(*clov,
927  *invclov,
928  Y,
929  psi);
930 #ifdef QUDA_DEBUG
931  {
932  char X_prime_norm[256];
933  char X_prime_norm_full[256];
934  std::sprintf(X_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(psi, A->subset())));
935  std::sprintf(X_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(psi)));
936  QDPIO::cout << "X solution: norm2(subset) = " << X_prime_norm << " norm(full) = " << X_prime_norm_full << std::endl;
937  }
938 #endif
939 
940 
941  // Check solution
942  {
943  T r;
944  r[A->subset()]=chi;
945  T tmp;
946  (*MdagM)(tmp, psi, PLUS);
947  r[A->subset()] -= tmp;
948 
949  res_tmp.resid = sqrt(norm2(r, A->subset()));
950  if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
951  QDPIO::cout << solver_string << "Re Solve for X Failed (seq: " << seqno << " ) Rsd = " << res_tmp.resid/norm2chi << " RsdTarget = " << invParam.RsdTarget << std::endl;
952  QDPIO::cout << solver_string << "Throwing Exception! This will ABORT" << std::endl;
953 
954  dumpXSolver(chi,Y,psi);
955 
956  MGSolverException convergence_fail(invParam.CloverParams.Mass,
957  invParam.SaveSubspaceID,
958  res_tmp.n_count,
959  Real(res_tmp.resid/norm2chi),
960  invParam.RsdTarget*invParam.RsdToleranceFactor);
961  throw convergence_fail;
962 
963  QDP_abort(1);
964  }
965  }
966  // At this point the solution is good
967  res2.n_count += res_tmp.n_count;
968  res2.resid = res_tmp.resid;
969 
970  }
971  X_solve_timer.stop();
972 
973  X_predictor_add_timer.start();
974  predictor.newXVector(psi);
975  X_predictor_add_timer.stop();
976  swatch.stop();
977  double time = swatch.getTimeInSeconds();
978 
979  res.n_count = res1.n_count + res2.n_count;
980  res.resid = res2.resid;
981 
982  Double rel_resid = res.resid/norm2chi;
983 
984  QDPIO::cout << solver_string << " seq: " << (seqno++) << " iterations: " << res1.n_count << " + "
985  << res2.n_count << " = " << res.n_count
986  << " Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl;
987 
988  QDPIO::cout <<solver_string << "Time: Y_predict: " << Y_prediction_timer.getTimeInSeconds() << " (s) "
989  << "Y_solve: " << Y_solve_timer.getTimeInSeconds() << " (s) "
990  << "Y_register: " << Y_predictor_add_timer.getTimeInSeconds() << " (s) "
991  << "X_predict: " << X_prediction_timer.getTimeInSeconds() << " (s) "
992  << "X_solve: " << X_solve_timer.getTimeInSeconds() << " (s) "
993  << "X_register: " << X_predictor_add_timer.getTimeInSeconds() << " (s) "
994  << "Total time: " << time << "(s)" << std::endl;
995 
996  quda_inv_param.use_init_guess = old_guess_policy;
997 
998  return res;
999  }
1000 
1001  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::QUDA4DChronoPredictor& predictor ) const
1002  {
1003 
1004 
1005  START_CODE();
1006 
1007  StopWatch swatch;
1008  swatch.start();
1009 
1010  MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
1011  // Use this in residuum checks.
1012  Double norm2chi=sqrt(norm2(chi, A->subset()));
1013 
1014  // Allow QUDA to use initial guess
1015  QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
1016  quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
1017 
1018 
1019  SystemSolverResults_t res;
1020  SystemSolverResults_t res1;
1021  SystemSolverResults_t res2;
1022 
1023  // Create MdagM op
1024  Handle< LinearOperator<T> > MdagM( new MdagMLinOp<T>(A) );
1025 
1026 
1027  QDPIO::cout << solver_string <<"Two Step Solve" << std::endl;
1028 
1029 
1030  // Try to cast the predictor to a two step predictor
1031  StopWatch Y_solve_timer; Y_solve_timer.reset();
1032  StopWatch X_solve_timer; X_solve_timer.reset();
1033  StopWatch Y_refresh_timer; Y_refresh_timer.reset();
1034  StopWatch X_refresh_timer; X_refresh_timer.reset();
1035  int X_index=predictor.getXIndex();
1036  int Y_index=predictor.getYIndex();
1037 
1038  QDPIO::cout << "Two Step Solve using QUDA predictor: (Y_index,X_index) = ( " << Y_index << " , " << X_index << " ) \n";
1039 
1040 
1041  // Select the channel for QUDA's predictor here.
1042  //
1043  //
1044 
1045  quda_inv_param.chrono_max_dim = predictor.getMaxChrono();
1046  quda_inv_param.chrono_index = Y_index;
1047  quda_inv_param.chrono_make_resident = true;
1048  quda_inv_param.chrono_use_resident = true;
1049  quda_inv_param.chrono_replace_last = false;
1050 
1051  // Y solve is at 0.5*RsdTarget -- Evan's analysis
1052  quda_inv_param.tol = toDouble(Real(0.5)*invParam.RsdTarget);
1053  if ( predictor.getChronoPrecision() == DEFAULT ) {
1054  QDPIO::cout << "Setting Default Chrono precision of " << cpu_prec << std::endl;
1055  quda_inv_param.chrono_precision = cpu_prec;
1056  }
1057  else {
1058  quda_inv_param.chrono_precision = theChromaToQudaPrecisionTypeMap::Instance()[ predictor.getChronoPrecision() ];
1059  QDPIO::cout << "Setting Chrono precision of " << quda_inv_param.chrono_precision << std::endl;
1060  }
1061 
1062  /// channel set done
1063  T Y_prime = zero;
1064  T Y = zero;
1065  // Y solve: M^\dagger Y = chi
1066  // g_5 M g_5 Y = chi
1067  // => M Y' = chi' with chi' = gamma_5*chi
1068  Y_solve_timer.start();
1069  T g5chi = zero;
1070  g5chi[rb[1]]= Gamma(Nd*Nd-1)*chi;
1071  if( invParam.asymmetricP == true ) {
1072  res1 = qudaInvert(*clov,
1073  *invclov,
1074  g5chi,
1075  Y_prime);
1076  Y[rb[1]] = Gamma(Nd*Nd -1)*Y_prime;
1077  }
1078  else {
1079  T tmp = zero;
1080  invclov->apply(tmp,g5chi,MINUS,1);
1081 
1082  res1 = qudaInvert(*clov,
1083  *invclov,
1084  tmp,
1085  Y_prime);
1086 
1087 #ifdef QUDA_DEBUG
1088  {
1089  char Y_prime_norm[256];
1090  char Y_prime_norm_full[256];
1091  std::sprintf(Y_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime, A->subset())));
1092  std::sprintf(Y_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
1093  QDPIO::cout << "Y solution: norm2(subset) = " << Y_prime_norm << " norm(full) = " << Y_prime_norm_full << std::endl;
1094  }
1095 #endif
1096 
1097  tmp[rb[1]] = Gamma(Nd*Nd-1)*Y_prime;
1098  clov->apply(Y,tmp,MINUS,1);
1099 
1100  }
1101 
1102  bool solution_good = true;
1103 
1104  // Check solution
1105  {
1106  T r;
1107  r[A->subset()]=chi;
1108  T tmp;
1109  (*A)(tmp, Y, MINUS);
1110  r[A->subset()] -= tmp;
1111 
1112  res1.resid = sqrt(norm2(r, A->subset()));
1113  if ( toBool( res1.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1114  solution_good = false;
1115  }
1116  }
1117 
1118  if ( solution_good ) {
1119  if( res1.n_count >= threshold_counts ) {
1120  QDPIO::cout << solver_string << "Iteration Threshold Exceeded:Y Solver iters = " << res1.n_count << " Threshold=" << threshold_counts << std::endl;
1121  QDPIO::cout << solver_string << "Refreshing Subspace" << std::endl;
1122 
1123  Y_refresh_timer.start();
1124  // refresh the subspace
1125  // Setup the number of subspace Iterations
1126  for(int j=0; j < ip.mg_levels-1; j++) {
1127  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = ip.maxIterSubspaceRefresh[j];
1128  }
1129  updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
1130  for(int j=0; j < ip.mg_levels-1; j++) {
1131  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
1132  }
1133  Y_refresh_timer.stop();
1134  QDPIO::cout << solver_string << "Y Subspace Refresh Time = " << Y_refresh_timer.getTimeInSeconds() << " secs\n";
1135  }
1136  }
1137  else {
1138  QDPIO::cout << solver_string << "Y-Solve failed (seq: "<<seqno<<"). Blowing away and reiniting subspace" << std::endl;
1139  StopWatch reinit_timer; reinit_timer.reset();
1140  reinit_timer.start();
1141  // BLow away subspace, re-set it up and then re-solve
1142  QUDAMGUtils::delete_subspace(invParam.SaveSubspaceID);
1143 
1144  // Recreate the subspace
1145  bool saved_value = ip.check_multigrid_setup;
1146  ip.check_multigrid_setup = true;
1147  subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
1148  ip.check_multigrid_setup = saved_value;
1149 
1150 
1151  // Make subspace XML snippets
1152  XMLBufferWriter file_xml;
1153  push(file_xml, "FileXML");
1154  pop(file_xml);
1155 
1156  int foo = 5;
1157  XMLBufferWriter record_xml;
1158  push(record_xml, "RecordXML");
1159  write(record_xml, "foo", foo);
1160  pop(record_xml);
1161 
1162 
1163  // Create named object entry.
1164  TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
1165  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
1166  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
1167 
1168  // Assign the pointer into the named object
1169  TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
1170  quda_inv_param.preconditioner = subspace_pointers->preconditioner;
1171  reinit_timer.stop();
1172  QDPIO::cout << solver_string << "Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() << " sec." << std::endl;
1173 
1174  // Re-solve
1175  // This is a re-solve. So use_resident=false means used my initial guess
1176  // (do not repredict)
1177  quda_inv_param.chrono_use_resident = false;
1178 
1179  // The last solve, stored a chrono vector. We will overwrite this
1180  // thanks to the setting below
1181  quda_inv_param.chrono_replace_last = true;
1182 
1183  QDPIO::cout << solver_string << "Re-Solving for Y (zero guess)" << std::endl;
1184  SystemSolverResults_t res_tmp;
1185  Y_prime = zero;
1186 
1187  if( invParam.asymmetricP == true ) {
1188  res_tmp = qudaInvert(*clov,
1189  *invclov,
1190  g5chi,
1191  Y_prime);
1192  Y[rb[1]] = Gamma(Nd*Nd -1)*Y_prime;
1193  }
1194  else {
1195  T tmp = zero;
1196  invclov->apply(tmp,g5chi,MINUS,1);
1197 
1198  res_tmp = qudaInvert(*clov,
1199  *invclov,
1200  tmp,
1201  Y_prime);
1202 
1203 #ifdef QUDA_DEBUG
1204  {
1205  char Y_prime_norm[256];
1206  char Y_prime_norm_full[256];
1207  std::sprintf(Y_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime, A->subset())));
1208  std::sprintf(Y_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(Y_prime)));
1209  QDPIO::cout << "Y solution: norm2(subset) = " << Y_prime_norm << " norm(full) = " << Y_prime_norm_full << std::endl;
1210  }
1211 #endif
1212  tmp[rb[1]] = Gamma(Nd*Nd-1)*Y_prime;
1213  clov->apply(Y,tmp,MINUS,1);
1214  }
1215 
1216  // Check solution
1217  {
1218  T r;
1219  r[A->subset()]=chi;
1220  T tmp;
1221  (*A)(tmp, Y, MINUS);
1222  r[A->subset()] -= tmp;
1223 
1224  res_tmp.resid = sqrt(norm2(r, A->subset()));
1225  if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1226  // If we fail on the resolve then barf
1227  QDPIO::cout << solver_string << "Re Solve for Y Failed (seq: " << seqno << " ) Rsd = " << res_tmp.resid/norm2chi << " RsdTarget = " << invParam.RsdTarget << std::endl;
1228 
1229  dumpYSolver(g5chi,Y_prime);
1230 
1231  QDPIO::cout << solver_string << "Throwing Exception! This will ABORT" << std::endl;
1232 
1233  MGSolverException convergence_fail(invParam.CloverParams.Mass,
1234  invParam.SaveSubspaceID,
1235  res_tmp.n_count,
1236  Real(res_tmp.resid/norm2chi),
1237  invParam.RsdTarget*invParam.RsdToleranceFactor);
1238  throw convergence_fail;
1239  }
1240  }
1241 
1242  // At this point solution should be good again and subspace should be reinited
1243  res1.n_count += res_tmp.n_count; // Add resolve iterations
1244  res1.resid = res_tmp.resid; // Copy new residuum.
1245 
1246  }
1247  Y_solve_timer.stop();
1248 
1249  // At this point we should have a good solution.
1250  // After the good solve, solution will be added to the right channel
1251  // by QUDA
1252  // Some diagnostics would be nice
1253 
1254 
1255  // Now select QUDA Chrono Index here
1256  quda_inv_param.chrono_max_dim = predictor.getMaxChrono();
1257  quda_inv_param.chrono_index = X_index;
1258  quda_inv_param.chrono_make_resident = true;
1259  quda_inv_param.chrono_use_resident = true;
1260  quda_inv_param.chrono_replace_last = false;
1261 
1262  // Reset Target Residuum for X solve
1263  quda_inv_param.tol = toDouble(invParam.RsdTarget);
1264  X_solve_timer.start();
1265  //psi[A->subset()]=zero;
1266  psi = zero;
1267  // Solve for psi
1268  res2 = qudaInvert(*clov,
1269  *invclov,
1270  Y,
1271  psi);
1272 #ifdef QUDA_DEBUG
1273  {
1274  char X_prime_norm[256];
1275  char X_prime_norm_full[256];
1276  std::sprintf(X_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(psi, A->subset())));
1277  std::sprintf(X_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(psi)));
1278  QDPIO::cout << "X solution: norm2(subset) = " << X_prime_norm << " norm(full) = " << X_prime_norm_full << std::endl;
1279  }
1280 #endif
1281 
1282 
1283  solution_good = true;
1284  // Check solution
1285  {
1286  T r=zero;
1287  r[A->subset()]=Y;
1288  T tmp=zero;
1289  // Checkin MX = Y solve
1290  (*A)(tmp, psi, PLUS);
1291  r[ A->subset() ] -= tmp;
1292  Double resid_MXY = sqrt(norm2(r,A->subset()));
1293  Double normY = sqrt(norm2(Y,A->subset()));
1294  QDPIO::cout << "X solve: || Y - MX || / || Y || = " << resid_MXY/normY << std::endl;
1295  r[A->subset()]=chi;
1296  (*MdagM)(tmp, psi, PLUS);
1297  r[A->subset()] -= tmp;
1298 
1299  res2.resid = sqrt(norm2(r, A->subset()));
1300  if ( toBool( res2.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1301  solution_good = false;
1302  }
1303  }
1304 
1305 
1306  if( solution_good ) {
1307  if( res2.n_count >= threshold_counts ) {
1308  QDPIO::cout << solver_string <<"Threshold Reached: X Solver iters = " << res2.n_count << " Threshold=" << threshold_counts << std::endl;
1309  QDPIO::cout << solver_string << "Refreshing Subspace" << std::endl;
1310 
1311  X_refresh_timer.start();
1312  // refresh the subspace
1313  // Regenerate space. Destroy and recreate
1314  // Setup the number of subspace Iterations
1315  for(int j=0; j < ip.mg_levels-1; j++) {
1316  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = ip.maxIterSubspaceRefresh[j];
1317  }
1318  updateMultigridQuda(subspace_pointers->preconditioner, &(subspace_pointers->mg_param));
1319  for(int j=0; j < ip.mg_levels-1; j++) {
1320  (subspace_pointers->mg_param).setup_maxiter_refresh[j] = 0;
1321  }
1322  X_refresh_timer.stop();
1323 
1324  QDPIO::cout << solver_string << "Subspace Refresh Time = " << X_refresh_timer.getTimeInSeconds() << " secs\n";
1325  }
1326  }
1327  else {
1328 
1329  QDPIO::cout << solver_string << "X-Solve failed (seq: "<<seqno<<") Blowing away and reiniting subspace" << std::endl;
1330  StopWatch reinit_timer; reinit_timer.reset();
1331  reinit_timer.start();
1332 
1333  // Delete the saved subspace completely
1334  QUDAMGUtils::delete_subspace(invParam.SaveSubspaceID);
1335 
1336  // Recreate the subspace
1337  bool saved_value = ip.check_multigrid_setup;
1338  ip.check_multigrid_setup = true;
1339  subspace_pointers = QUDAMGUtils::create_subspace<T>(invParam);
1340  ip.check_multigrid_setup = saved_value;
1341 
1342  // Make subspace XML snippets
1343  XMLBufferWriter file_xml;
1344  push(file_xml, "FileXML");
1345  pop(file_xml);
1346 
1347  int foo = 5;
1348  XMLBufferWriter record_xml;
1349  push(record_xml, "RecordXML");
1350  write(record_xml, "foo", foo);
1351  pop(record_xml);
1352 
1353 
1354  // Create named object entry.
1355  TheNamedObjMap::Instance().create< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID);
1356  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setFileXML(file_xml);
1357  TheNamedObjMap::Instance().get(invParam.SaveSubspaceID).setRecordXML(record_xml);
1358 
1359  // Assign the pointer into the named object
1360  TheNamedObjMap::Instance().getData< QUDAMGUtils::MGSubspacePointers* >(invParam.SaveSubspaceID) = subspace_pointers;
1361  quda_inv_param.preconditioner = subspace_pointers->preconditioner;
1362  reinit_timer.stop();
1363  QDPIO::cout << solver_string << "Subspace Reinit Time: " << reinit_timer.getTimeInSeconds() << " sec." << std::endl;
1364 
1365  // Re-solve
1366  // This is a re-solve. So use_resident=false means used my initial guess
1367  // (do not repredict)
1368  quda_inv_param.chrono_use_resident = false;
1369 
1370  // The last solve, stored a chrono vector. We will overwrite this
1371  // thanks to the setting below
1372  quda_inv_param.chrono_replace_last = true;
1373 
1374  QDPIO::cout << solver_string << "Re-Solving for X (zero guess)" << std::endl;
1375 
1376  SystemSolverResults_t res_tmp;
1377 
1378  // psi[rb[1]] = zero;
1379  psi = zero;
1380  res_tmp = qudaInvert(*clov,
1381  *invclov,
1382  Y,
1383  psi);
1384 
1385 #ifdef QUDA_DEBUG
1386  {
1387  char X_prime_norm[256];
1388  char X_prime_norm_full[256];
1389  std::sprintf(X_prime_norm, "%.*e", DECIMAL_DIG, toDouble(norm2(psi, A->subset())));
1390  std::sprintf(X_prime_norm_full, "%.*e", DECIMAL_DIG, toDouble(norm2(psi)));
1391  QDPIO::cout << "X solution: norm2(subset) = " << X_prime_norm << " norm(full) = " << X_prime_norm_full << std::endl;
1392  }
1393 #endif
1394  // Check solution
1395  {
1396  T r=zero;
1397  r[A->subset()]=Y;
1398  T tmp=zero;
1399  // Checkin MX = Y solve
1400  (*A)(tmp, psi, PLUS);
1401  r[ A->subset() ] -= tmp;
1402  Double resid_MXY = sqrt(norm2(r,A->subset()));
1403  Double normY = sqrt(norm2(Y,A->subset()));
1404  QDPIO::cout << "X re-solve: || Y - MX || / || Y || = " << resid_MXY/normY << std::endl;
1405  r[A->subset()]=chi;
1406  (*MdagM)(tmp, psi, PLUS);
1407  r[A->subset()] -= tmp;
1408 
1409  res_tmp.resid = sqrt(norm2(r, A->subset()));
1410  if ( toBool( res_tmp.resid/norm2chi > invParam.RsdToleranceFactor * invParam.RsdTarget ) ) {
1411  QDPIO::cout << solver_string << "Re Solve for X Failed (seq: " << seqno << " ) Rsd = " << res_tmp.resid/norm2chi << " RsdTarget = " << invParam.RsdTarget << std::endl;
1412 
1413  QDPIO::cout << "Dumping state (solve seqno : " << seqno << " ) " << std::endl;
1414  dumpXSolver(chi,Y,psi);
1415 
1416 
1417  QDPIO::cout << solver_string << "Throwing Exception! This will ABORT" << std::endl;
1418  MGSolverException convergence_fail(invParam.CloverParams.Mass,
1419  invParam.SaveSubspaceID,
1420  res_tmp.n_count,
1421  Real(res_tmp.resid/norm2chi),
1422  invParam.RsdTarget*invParam.RsdToleranceFactor);
1423  throw convergence_fail;
1424  }
1425  }
1426  // At this point the solution is good
1427  res2.n_count += res_tmp.n_count;
1428  res2.resid = res_tmp.resid;
1429 
1430  }
1431  X_solve_timer.stop();
1432  swatch.stop();
1433  double time = swatch.getTimeInSeconds();
1434 
1435 
1436 
1437  // Stats and done
1438  res.n_count = res1.n_count + res2.n_count;
1439  res.resid = res2.resid;
1440 
1441  Double rel_resid = res.resid/norm2chi;
1442 
1443  QDPIO::cout << solver_string << " seq: " << (seqno++) << " iterations: " << res1.n_count << " + "
1444  << res2.n_count << " = " << res.n_count
1445  << " Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl;
1446 
1447  QDPIO::cout << "Y_solve: " << Y_solve_timer.getTimeInSeconds() << " (s) "
1448  << "X_solve: " << X_solve_timer.getTimeInSeconds() << " (s) "
1449  << "Total time: " << time << "(s)" << std::endl;
1450 
1451  quda_inv_param.use_init_guess = old_guess_policy;
1452 
1453  // Turn off chrono. Next solve can turn it on again
1454  quda_inv_param.chrono_make_resident = false;
1455  quda_inv_param.chrono_use_resident = false;
1456  quda_inv_param.chrono_replace_last = false;
1457 
1458 
1459  return res;
1460  }
1461 
1462 
1463  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::AbsChronologicalPredictor4D<T>& predictor ) const
1464  {
1465  SystemSolverResults_t res;
1466 
1467  // Try using QUDA predictor
1468  try {
1469  Chroma::QUDA4DChronoPredictor& quda_pred =
1470  dynamic_cast<Chroma::QUDA4DChronoPredictor&>(predictor);
1471 
1472  res = (*this)(psi,chi,quda_pred);
1473  return res;
1474  }
1475  catch(MGSolverException &e) {
1476  throw;
1477  }
1478  catch(...) {
1479  QDPIO::cout << "Failed to cast predictor to QUDA predictor"
1480  << std::endl;
1481  }
1482 
1483  // QUDA Predictor failed -- Try abs 2 step
1484  try {
1486  dynamic_cast< Chroma::AbsTwoStepChronologicalPredictor4D<T>&>(predictor);
1487 
1488  res = (*this)(psi,chi,two_step_pred);
1489  return res;
1490  }
1491  catch(MGSolverException &e) {
1492  throw;
1493  }
1494  catch(...) {
1495  QDPIO::cout << "Failed to cast predictor to QUDA or Two Step predictor"
1496  << std::endl;
1497  QDP_abort(1);
1498  }
1499  }
1500 
1501 
1502 private:
1503  // Hide default constructor
1504  MdagMSysSolverQUDAMULTIGRIDClover() {}
1505 
1506 #if 1
1507  Q links_orig;
1508 #endif
1509 
1510  U GFixMat;
1511  QudaPrecision_s cpu_prec;
1512  QudaPrecision_s gpu_prec;
1513  QudaPrecision_s gpu_half_prec;
1514 
1515  Handle< LinearOperator<T> > A;
1516  Handle< FermState<T,Q,Q> > gstate;
1517  mutable SysSolverQUDAMULTIGRIDCloverParams invParam;
1518  QudaGaugeParam q_gauge_param;
1519  mutable QudaInvertParam quda_inv_param;
1520  mutable QUDAMGUtils::MGSubspacePointers* subspace_pointers;
1521 
1522 
1523  Handle< CloverTermT<T, U> > clov;
1524  Handle< CloverTermT<T, U> > invclov;
1525 
1526  SystemSolverResults_t qudaInvert(const CloverTermT<T, U>& clover,
1527  const CloverTermT<T, U>& inv_clov,
1528  const T& chi_s,
1529  T& psi_s
1530  )const;
1531 
1532  std::string solver_string;
1533  int threshold_counts;
1534 
1535  void dumpYSolver(const LatticeFermion& chi,
1536  const LatticeFermion& Y) const;
1537 
1538  void dumpXSolver(const LatticeFermion& chi,
1539  const LatticeFermion& Y,
1540  const LatticeFermion& X) const;
1541 
1542  static unsigned long seqno;
1543 
1544 };
1545 
1546 } // End namespace
1547 
1548 #endif // BUILD_QUDA
1549 #endif
1550 
Anisotropy parameters.
Primary include file for CHROMA library code.
#define END_CODE()
Definition: chromabase.h:65
#define START_CODE()
Definition: chromabase.h:64
Abstract interface for a Chronological Solution predictor.
Abstract interface for a Chronological Solution predictor.
virtual void predictY(T &Y, const T &chi, const Subset &s) const
virtual void newYVector(const T &Y)=0
virtual void predictX(T &X, const T &chi, const Subset &s) const
virtual void newXVector(const T &X)=0
Zero initial guess predictor.
QudaPrecisionType getChronoPrecision() const
Include possibly optimized Clover terms.
int mu
Definition: cool.cc:24
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
unsigned s
Definition: ldumul_w.cc:37
unsigned j
Definition: ldumul_w.cc:35
unsigned i
Definition: ldumul_w.cc:34
Linear Operators.
M^dag*M composition of a linear operator.
Double tmp
Definition: meslate.cc:60
Nd
Definition: meslate.cc:74
Named object function std::map.
bool registerAll()
Register all the factories.
void delete_subspace(const std::string SubspaceID)
size_t getCUDAFreeMem(void)
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
LinOpSysSolverMGProtoClover::Q Q
LinOpSysSolverMGProtoClover::T T
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
QDPCloverTermT< T, U > CloverTermT
Definition: clover_term_w.h:97
A(A, psi, r, Ncb, PLUS)
Double zero
Definition: invbicg.cc:106
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
Definition: aniso_io.cc:63
@ ADDITIVE_SCHWARZ
Definition: enum_quda_io.h:103
@ MULTIPLICATIVE_SCHWARZ
Definition: enum_quda_io.h:104
@ RECONS_12
Definition: enum_quda_io.h:80
@ RECONS_NONE
Definition: enum_quda_io.h:78
@ RECONS_8
Definition: enum_quda_io.h:79
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
Periodic ferm state and a creator.
#define FORWARD
Definition: primitives.h:82
Pick channel for QUDA Predictor.
Reunitarize in place a color matrix to SU(N)
Simple fermionic BC.
Support class for fermion actions and linear operators.
Linear system solvers.
Handle< LinearOperator< T > > MdagM
multi1d< LatticeColorMatrix > U
LatticeFermion T
Definition: t_clover.cc:11
multi1d< LatticeColorMatrix > Q
Definition: t_clover.cc:12
multi1d< LatticeColorMatrixF > QF
Definition: t_quda_tprec.cc:19
LatticeColorMatrixF UF
Definition: t_quda_tprec.cc:18
LatticeFermionD TD
Definition: t_quda_tprec.cc:22
LatticeColorMatrixD UD
Definition: t_quda_tprec.cc:23
LatticeFermionF TF
Definition: t_quda_tprec.cc:17
multi1d< LatticeColorMatrixD > QD
Definition: t_quda_tprec.cc:24
Axial gauge fixing.
push(xml_out,"Cooled_Topology")
pop(xml_out)
Zero initial guess predictor.
#define HALF