CHROMA
syssolver_mdagm_clover_quda_w.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \brief Solve a MdagM*psi=chi linear system by BiCGStab
4  */
5 
6 #ifndef __syssolver_mdagm_quda_clover_w_h__
7 #define __syssolver_mdagm_quda_clover_w_h__
8 
9 #include "chroma_config.h"
10 
11 
12 #ifdef BUILD_QUDA
13 
14 #include "handle.h"
15 #include "state.h"
16 #include "syssolver.h"
17 #include "linearop.h"
18 #include "lmdagm.h"
24 #include "io/aniso_io.h"
25 #include <string>
28 #include "util/gauge/reunit.h"
29 
30 #include <quda.h>
31 #ifdef QDP_IS_QDPJIT
33 #endif
34 
35 //#undef BUILD_QUDA_DEVIFACE_GAUGE
36 //#undef BUILD_QUDA_DEVIFACE_SPINOR
37 //#undef BUILD_QUDA_DEVIFACE_CLOVER
38 
39 //#include <util_quda.h>
40 
41 namespace Chroma
42 {
43 
44 //! Richardson system solver namespace
45 namespace MdagMSysSolverQUDACloverEnv
46 {
47 //! Register the syssolver
48 bool registerAll();
49 }
50 
51 
52 
53 //! Solve a Clover Fermion System using the QUDA inverter
54 /*! \ingroup invert
55  *** WARNING THIS SOLVER WORKS FOR Clover FERMIONS ONLY ***
56  */
57 
58 class MdagMSysSolverQUDAClover : public MdagMSystemSolver<LatticeFermion>
59 {
60 public:
61  typedef LatticeFermion T;
62  typedef LatticeColorMatrix U;
63  typedef multi1d<LatticeColorMatrix> Q;
64 
65  typedef LatticeFermionF TF;
66  typedef LatticeColorMatrixF UF;
67  typedef multi1d<LatticeColorMatrixF> QF;
68 
69  typedef LatticeFermionF TD;
70  typedef LatticeColorMatrixF UD;
71  typedef multi1d<LatticeColorMatrixF> QD;
72 
73  typedef WordType<T>::Type_t REALT;
74  //! Constructor
75  /*!
76  * \param M_ Linear operator ( Read )
77  * \param invParam inverter parameters ( Read )
78  */
79  MdagMSysSolverQUDAClover(Handle< LinearOperator<T> > A_,
80  Handle< FermState<T,Q,Q> > state_,
81  const SysSolverQUDACloverParams& invParam_) :
82  A(A_), state(state_), invParam(invParam_), clov(new CloverTermT<T, U>()), invclov(new CloverTermT<T, U>())
83  {
84  QDPIO::cout << "MdagMSysSolverQUDAClover:" << std::endl;
85 
86  // FOLLOWING INITIALIZATION in test QUDA program
87 
88  // 1) work out cpu_prec, cuda_prec, cuda_prec_sloppy
89  int s = sizeof( WordType<T>::Type_t );
90  if (s == 4) {
91  cpu_prec = QUDA_SINGLE_PRECISION;
92  }
93  else {
94  cpu_prec = QUDA_DOUBLE_PRECISION;
95  }
96 
97 
98  // Work out GPU precision
99  switch( invParam.cudaPrecision ) {
100  case HALF:
101  gpu_prec = QUDA_HALF_PRECISION;
102  break;
103  case SINGLE:
104  gpu_prec = QUDA_SINGLE_PRECISION;
105  break;
106  case DOUBLE:
107  gpu_prec = QUDA_DOUBLE_PRECISION;
108  break;
109  default:
110  gpu_prec = cpu_prec;
111  break;
112  }
113 
114  // Work out GPU Sloppy precision
115  // Default: No Sloppy
116  switch( invParam.cudaSloppyPrecision ) {
117  case HALF:
118  gpu_half_prec = QUDA_HALF_PRECISION;
119  break;
120  case SINGLE:
121  gpu_half_prec = QUDA_SINGLE_PRECISION;
122  break;
123  case DOUBLE:
124  gpu_half_prec = QUDA_DOUBLE_PRECISION;
125  break;
126  default:
127  gpu_half_prec = gpu_prec;
128  break;
129  }
130 
131  // 2) pull 'new; GAUGE and Invert params
132  q_gauge_param = newQudaGaugeParam();
133  quda_inv_param = newQudaInvertParam();
134 
135  // 3) set lattice size
136  const multi1d<int>& latdims = Layout::subgridLattSize();
137 
138  q_gauge_param.X[0] = latdims[0];
139  q_gauge_param.X[1] = latdims[1];
140  q_gauge_param.X[2] = latdims[2];
141  q_gauge_param.X[3] = latdims[3];
142 
143  // 4) - deferred (anisotropy)
144 
145  // 5) - set QUDA_WILSON_LINKS, QUDA_GAUGE_ORDER
146  q_gauge_param.type = QUDA_WILSON_LINKS;
147 
148 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
149  q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; // gauge[mu], p
150 #else
151  //QDPIO::cout << "MDAGM Using QDP-JIT gauge order" << std::endl;
152  q_gauge_param.location = QUDA_CUDA_FIELD_LOCATION;
153  q_gauge_param.gauge_order = QUDA_QDPJIT_GAUGE_ORDER;
154 #endif
155 
156  // 6) - set t_boundary
157  // Convention: BC has to be applied already
158  // This flag just tells QUDA that this is so,
159  // so that QUDA can take care in the reconstruct
160  if( invParam.AntiPeriodicT ) {
161  q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
162  }
163  else {
164  q_gauge_param.t_boundary = QUDA_PERIODIC_T;
165  }
166 
167  // Set cpu_prec, cuda_prec, reconstruct and sloppy versions
168  q_gauge_param.cpu_prec = cpu_prec;
169  q_gauge_param.cuda_prec = gpu_prec;
170 
171 
172  switch( invParam.cudaReconstruct ) {
173  case RECONS_NONE:
174  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
175  break;
176  case RECONS_8:
177  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_8;
178  break;
179  case RECONS_12:
180  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
181  break;
182  default:
183  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
184  break;
185  };
186 
187  q_gauge_param.cuda_prec_sloppy = gpu_half_prec;
188 
189  // Default for no preconditioning.
190  // Data read from the innner preconditioner struct
191  // can overwrite this later
192  q_gauge_param.cuda_prec_precondition = gpu_half_prec;
193 
194  switch( invParam.cudaSloppyReconstruct ) {
195  case RECONS_NONE:
196  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_NO;
197  break;
198  case RECONS_8:
199  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_8;
200  break;
201  case RECONS_12:
202  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
203  break;
204  default:
205  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
206  break;
207  };
208 
209  // Default: may be overwritten later
210  q_gauge_param.reconstruct_precondition=q_gauge_param.reconstruct_sloppy;
211  // Gauge fixing:
212 
213  // These are the links
214  // They may be smeared and the BC's may be applied
215  Q links_single(Nd);
216 
217  // Now downcast to single prec fields.
218  for(int mu=0; mu < Nd; mu++) {
219  links_single[mu] = (state_->getLinks())[mu];
220  }
221 
222  // GaugeFix
223  if( invParam.axialGaugeP ) {
224  QDPIO::cout << "Fixing Temporal Gauge" << std::endl;
225  temporalGauge(links_single, GFixMat, Nd-1);
226  for(int mu=0; mu < Nd; mu++){
227  links_single[mu] = GFixMat*(state_->getLinks())[mu]*adj(shift(GFixMat, FORWARD, mu));
228  }
229  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_YES;
230  }
231  else {
232  // No GaugeFix
233  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; // No Gfix yet
234  }
235 
236  // deferred 4) Gauge Anisotropy
237  const AnisoParam_t& aniso = invParam.CloverParams.anisoParam;
238  if( aniso.anisoP ) { // Anisotropic case
239  Real gamma_f = aniso.xi_0 / aniso.nu;
240  q_gauge_param.anisotropy = toDouble(gamma_f);
241  }
242  else {
243  q_gauge_param.anisotropy = 1.0;
244  }
245 
246  // MAKE FSTATE BEFORE RESCALING links_single
247  // Because the clover term expects the unrescaled links...
248  Handle<FermState<T,Q,Q> > fstate( new PeriodicFermState<T,Q,Q>(links_single));
249 
250  if( aniso.anisoP ) { // Anisotropic case
251  multi1d<Real> cf=makeFermCoeffs(aniso);
252  for(int mu=0; mu < Nd; mu++) {
253  links_single[mu] *= cf[mu];
254  }
255  }
256 
257  // Now onto the inv param:
258  // Dslash type
259  quda_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
260 
261  // Invert type:
262  switch( invParam.solverType ) {
263  case CG:
264  quda_inv_param.inv_type = QUDA_CG_INVERTER;
265  solver_string = "CG";
266  break;
267  case BICGSTAB:
268  quda_inv_param.inv_type = QUDA_BICGSTAB_INVERTER;
269  solver_string = "BICGSTAB";
270  break;
271  case GCR:
272  quda_inv_param.inv_type = QUDA_GCR_INVERTER;
273  solver_string = "GCR";
274  break;
275  default:
276  quda_inv_param.inv_type = QUDA_CG_INVERTER;
277  solver_string = "CG";
278  break;
279  }
280 
281  // Turn off true residuum calculation for MdagM solves
282  // Since Chroma will check on this.
283  quda_inv_param.compute_true_res = 0;
284 
285  // Mass
286 
287  // Fiendish idea from Ron. Set the kappa=1/2 and use
288  // unmodified clover term, and ask for Kappa normalization
289  // This should give us A - (1/2)D as the unpreconditioned operator
290  // and probabl 1 - {1/4} A^{-1} D A^{-1} D as the preconditioned
291  // op. Apart from the A_oo stuff on the antisymmetric we have
292  // nothing to do...
293  quda_inv_param.kappa = 0.5;
294 
295  // Dummy parameter to pass check_params
296  // FIXME: If we ever use QUDA to compute our clover term we have to sort out anisotropy
297  // RIght now this won't do anything since we pass in the clover term
298  quda_inv_param.clover_coeff = 1.0;
299  quda_inv_param.tol = toDouble(invParam.RsdTarget);
300  quda_inv_param.maxiter = invParam.MaxIter;
301  quda_inv_param.reliable_delta = toDouble(invParam.Delta);
302  quda_inv_param.pipeline = invParam.Pipeline;
303 
304  // Solution type
305  quda_inv_param.solution_type = QUDA_MATPCDAG_MATPC_SOLUTION;
306 
307  // Solve type
308  switch( invParam.solverType ) {
309  case CG:
310  quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
311  break;
312  case BICGSTAB:
313  quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
314  break;
315 
316  case GCR:
317  quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
318  break;
319  case CA_GCR:
320  quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
321  break;
322  case MR:
323  quda_inv_param.solve_type = QUDA_DIRECT_PC_SOLVE;
324  break;
325 
326  default:
327  quda_inv_param.solve_type = QUDA_NORMOP_PC_SOLVE;
328 
329  break;
330  }
331 
332 
333  if( invParam.asymmetricP ) {
334  QDPIO::cout << "Working with Asymmetric LinOp" << std::endl;
335  quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD_ASYMMETRIC;
336  }
337  else {
338  QDPIO::cout << "Working with Symmetric LinOp" << std::endl;
339  quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
340  }
341 
342  quda_inv_param.dagger = QUDA_DAG_NO;
343  quda_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
344 
345  quda_inv_param.cpu_prec = cpu_prec;
346  quda_inv_param.cuda_prec = gpu_prec;
347  quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
348 
349  // Default for no preconditioining
350  // Data read from inner parameters can override this later.
351  quda_inv_param.cuda_prec_precondition = gpu_half_prec;
352 
353  quda_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
354  quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
355 
356 #ifndef BUILD_QUDA_DEVIFACE_SPINOR
357  quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
358  quda_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
359  quda_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
360 #else
361  //QDPIO::cout << "MDAGM Using QDP-JIT spinor order" << std::endl;
362  quda_inv_param.dirac_order = QUDA_QDPJIT_DIRAC_ORDER;
363  quda_inv_param.input_location = QUDA_CUDA_FIELD_LOCATION;
364  quda_inv_param.output_location = QUDA_CUDA_FIELD_LOCATION;
365 #endif
366 
367  // Clover precision and order
368  quda_inv_param.clover_cpu_prec = cpu_prec;
369  quda_inv_param.clover_cuda_prec = gpu_prec;
370  quda_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
371 
372  // Default for no preconditioning
373  // Data read from iner parameters can overwrite this later.
374  quda_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
375 
376 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
377  quda_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
378 #else
379  //QDPIO::cout << "MDAGM Clover CUDA location\n";
380  quda_inv_param.clover_location = QUDA_CUDA_FIELD_LOCATION;
381  quda_inv_param.clover_order = QUDA_QDPJIT_CLOVER_ORDER;
382 #endif
383 
384 
385  // Autotuning
386  if( invParam.tuneDslashP ) {
387  QDPIO::cout << "Enabling Dslash Autotuning" << std::endl;
388 
389  quda_inv_param.tune = QUDA_TUNE_YES;
390  }
391  else {
392  QDPIO::cout << "Disabling Dslash Autotuning" << std::endl;
393 
394  quda_inv_param.tune = QUDA_TUNE_NO;
395  }
396 
397 
398  // Setup padding
399  multi1d<int> face_size(4);
400  face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
401  face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
402  face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
403  face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
404 
405  int max_face = face_size[0];
406  for(int i=1; i <=3; i++) {
407  if ( face_size[i] > max_face ) {
408  max_face = face_size[i];
409  }
410  }
411 
412 
413  q_gauge_param.ga_pad = max_face;
414  quda_inv_param.sp_pad = 0;
415  quda_inv_param.cl_pad = 0;
416 
417  if( invParam.innerParamsP ) {
418  QDPIO::cout << "Setting inner solver params" << std::endl;
419  // Dereference handle
420  GCRInnerSolverParams ip = *(invParam.innerParams);
421 
422  switch( ip.precPrecondition ) {
423  case HALF:
424  quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
425  quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
426  q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
427  break;
428 
429  case SINGLE:
430  quda_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
431  quda_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
432  q_gauge_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
433  break;
434 
435  case DOUBLE:
436  quda_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
437  quda_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
438  q_gauge_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
439  break;
440  default:
441  quda_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
442  quda_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
443  q_gauge_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
444  break;
445  }
446 
447  switch( ip.reconstructPrecondition ) {
448  case RECONS_NONE:
449  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_NO;
450  break;
451  case RECONS_8:
452  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_8;
453  break;
454  case RECONS_12:
455  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
456  break;
457  default:
458  q_gauge_param.reconstruct_precondition = QUDA_RECONSTRUCT_12;
459  break;
460  };
461 
462  quda_inv_param.tol_precondition = toDouble(ip.tolPrecondition);
463  quda_inv_param.maxiter_precondition = ip.maxIterPrecondition;
464  quda_inv_param.gcrNkrylov = ip.gcrNkrylov;
465  switch( ip.schwarzType ) {
466  case ADDITIVE_SCHWARZ :
467  quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
468  break;
470  quda_inv_param.schwarz_type = QUDA_MULTIPLICATIVE_SCHWARZ;
471  break;
472  default:
473  quda_inv_param.schwarz_type = QUDA_ADDITIVE_SCHWARZ;
474  break;
475  }
476  quda_inv_param.precondition_cycle = ip.preconditionCycle;
477 
478  if( ip.verboseInner ) {
479  quda_inv_param.verbosity_precondition = QUDA_VERBOSE;
480  }
481  else {
482  quda_inv_param.verbosity_precondition = QUDA_SILENT;
483  }
484 
485  switch( ip.invTypePrecondition ) {
486  case CG:
487  quda_inv_param.inv_type_precondition = QUDA_CG_INVERTER;
488  break;
489  case BICGSTAB:
490  quda_inv_param.inv_type_precondition = QUDA_BICGSTAB_INVERTER;
491  break;
492  case CA_GCR:
493  quda_inv_param.inv_type_precondition= QUDA_CA_GCR_INVERTER;
494  break;
495  case MR:
496  quda_inv_param.inv_type_precondition= QUDA_MR_INVERTER;
497  break;
498 
499  default:
500  quda_inv_param.inv_type_precondition = QUDA_MR_INVERTER;
501  break;
502  }
503  }
504  else {
505  QDPIO::cout << "Setting Precondition stuff to defaults for not using" << std::endl;
506  quda_inv_param.inv_type_precondition= QUDA_INVALID_INVERTER;
507  quda_inv_param.tol_precondition = 1.0e-1;
508  quda_inv_param.maxiter_precondition = 1000;
509  quda_inv_param.verbosity_precondition = QUDA_SILENT;
510  quda_inv_param.gcrNkrylov = 1;
511 
512  }
513 
514  if( invParam.verboseP ) {
515  quda_inv_param.verbosity = QUDA_VERBOSE;
516  }
517  else {
518  quda_inv_param.verbosity = QUDA_SUMMARIZE;
519  }
520 
521  // Set up the links
522  void* gauge[4];
523 
524 #ifndef BUILD_QUDA_DEVIFACE_GAUGE
525  for(int mu=0; mu < Nd; mu++) {
526  gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
527  }
528 #else
529  GetMemoryPtrGauge(gauge,links_single);
530  //gauge[mu] = GetMemoryPtr( links_single[mu].getId() );
531  //std::cout << "MDAGM CUDA gauge[" << mu << "] in = " << gauge[mu] << "\n";
532 #endif
533 
534 
535  loadGaugeQuda((void *)gauge, &q_gauge_param);
536 
537  // Setup Clover Term
538  QDPIO::cout << "Creating CloverTerm" << std::endl;
539  clov->create(fstate, invParam_.CloverParams);
540  // Don't recompute, just copy
541  invclov->create(fstate, invParam_.CloverParams);
542 
543  QDPIO::cout << "Inverting CloverTerm" << std::endl;
544  invclov->choles(0);
545  invclov->choles(1);
546 
547 #ifndef BUILD_QUDA_DEVIFACE_CLOVER
548  multi1d<QUDAPackedClovSite<REALT> > packed_clov;
549 
550  packed_clov.resize(all.siteTable().size());
551 
552  clov->packForQUDA(packed_clov, 0);
553  clov->packForQUDA(packed_clov, 1);
554 
555  // Always need inverse
556  multi1d<QUDAPackedClovSite<REALT> > packed_invclov(all.siteTable().size());
557  invclov->packForQUDA(packed_invclov, 0);
558  invclov->packForQUDA(packed_invclov, 1);
559 
560  loadCloverQuda(&(packed_clov[0]), &(packed_invclov[0]),&quda_inv_param);
561 #else
562  void *clover[2];
563  void *cloverInv[2];
564 
565  GetMemoryPtrClover(clov->getOffId(),clov->getDiaId(),invclov->getOffId(),invclov->getDiaId());
566 
567 
568  loadCloverQuda( (void*)(clover) , (void*)(cloverInv) ,&quda_inv_param);
569 #endif
570 
571  }
572 
573 
574  //! Destructor is automatic
575  ~MdagMSysSolverQUDAClover()
576  {
577  QDPIO::cout << "Destructing" << std::endl;
578  freeGaugeQuda();
579  freeCloverQuda();
580  }
581 
582  //! Return the subset on which the operator acts
583  const Subset& subset() const {return A->subset();}
584 
585  //! Solver the linear system
586  /*!
587  * \param psi solution ( Modify )
588  * \param chi source ( Read )
589  * \return syssolver results
590  */
591  SystemSolverResults_t operator() (T& psi, const T& chi ) const
592  {
593  SystemSolverResults_t res;
594  Null4DChronoPredictor not_predicting;
595  (*this)(psi,chi, not_predicting);
596  START_CODE();
597  END_CODE();
598  return res;
599  }
600 
601 
602  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::QUDA4DChronoPredictor& predictor ) const
603  {
604  SystemSolverResults_t res;
605  SystemSolverResults_t res1;
606  SystemSolverResults_t res2;
607 
608  START_CODE();
609  // This is a solve with initial guess. So reset the policy (quda_inv_param is mutable)
610  QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
611  quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
612 
613 
614  StopWatch swatch;
615  swatch.start();
616 
617  // Create MdagM op
618  Handle< LinearOperator<T> > MdagM( new MdagMLinOp<T>(A) );
619 
620  StopWatch Y_solve_timer; Y_solve_timer.reset();
621  StopWatch X_solve_timer; X_solve_timer.reset();
622 
623 
624  // TWO STEP SOLVE
625  QDPIO::cout << "Two Step Solve using QUDA predictor: (X_index,Y_index) = ( " << predictor.getXIndex() << " , " << predictor.getYIndex() << " ) \n";
626  quda_inv_param.chrono_max_dim = predictor.getMaxChrono();
627  quda_inv_param.chrono_index = predictor.getYIndex();
628  quda_inv_param.chrono_make_resident = true;
629  quda_inv_param.chrono_use_resident = true;
630  quda_inv_param.chrono_replace_last = false;
631 
632  T Y;
633  Y[ A->subset() ] = psi; // Y is initial guess
634 
635  // Set up prediction for Y in QUDA here.
636 
637  Y_solve_timer.start();
638  // Now want to solve with M^\dagger on this.
639  quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
640  quda_inv_param.dagger = QUDA_DAG_YES;
641  res1 = qudaInvert(*clov,
642  *invclov,
643  chi,
644  Y);
645  Y_solve_timer.stop();
646 
647 
648  // Set up prediction for X in QUDA here
649  quda_inv_param.chrono_index = predictor.getXIndex();
650  quda_inv_param.chrono_make_resident = true;
651  quda_inv_param.chrono_use_resident = true;
652  quda_inv_param.chrono_replace_last = false;
653 
654 
655  // Step 2: Solve M X = Y
656  // Predict X
657 
658  X_solve_timer.start();
659  quda_inv_param.dagger = QUDA_DAG_NO; // Solve without dagger
660  res2 = qudaInvert(*clov,
661  *invclov,
662  Y,
663  psi);
664  X_solve_timer.stop();
665  swatch.stop();
666  double time = swatch.getTimeInSeconds();
667 
668  // reset init guess policy
669 
670  // Check solution
671  {
672  T r;
673  r[A->subset()]=chi;
674  T tmp;
675  (*MdagM)(tmp, psi, PLUS);
676  r[A->subset()] -= tmp;
677  res.resid = sqrt(norm2(r, A->subset()));
678  }
679 
680  Double rel_resid = res.resid/sqrt(norm2(chi,A->subset()));
681  res.n_count = res1.n_count + res2.n_count;
682 
683  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: " << res.n_count << " iterations. Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl
684  ;
685 
686  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Time: "
687  << "Y_solve: " << Y_solve_timer.getTimeInSeconds() << " (s) "
688  << "X_solve: " << X_solve_timer.getTimeInSeconds() << " (s) "
689  << "Total time: " << time << "(s)" << std::endl;
690 
691  quda_inv_param.use_init_guess = old_guess_policy;
692  quda_inv_param.chrono_make_resident = false;
693  quda_inv_param.chrono_use_resident = false;
694  quda_inv_param.chrono_replace_last = false;
695 
696  // Check for failure
697  if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
698  QDPIO::cout << "QUDA_" << solver_string << "_CLOVER_SOLVER: SOLVE Failed to converge" << std::endl;
699  QDP_abort(1);
700  }
701 
702 
703 
704  END_CODE();
705  return res;
706 
707  }
708 
709 
710  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::AbsTwoStepChronologicalPredictor4D<T>& predictor ) const
711  {
712  SystemSolverResults_t res;
713  SystemSolverResults_t res1;
714  SystemSolverResults_t res2;
715 
716  START_CODE();
717  // This is a solve with initial guess. So reset the policy (quda_inv_param is mutable)
718  QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
719  quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
720 
721 
722  StopWatch swatch;
723  swatch.start();
724 
725  // This is a solver whic will use an initial guess:
726 
727  // Create MdagM op
728  Handle< LinearOperator<T> > MdagM( new MdagMLinOp<T>(A) );
729 
730  StopWatch X_prediction_timer; X_prediction_timer.reset();
731  StopWatch Y_prediction_timer; Y_prediction_timer.reset();
732  StopWatch Y_solve_timer; Y_solve_timer.reset();
733  StopWatch X_solve_timer; X_solve_timer.reset();
734  StopWatch Y_predictor_add_timer; Y_predictor_add_timer.reset();
735  StopWatch X_predictor_add_timer; X_predictor_add_timer.reset();
736 
737  QDPIO::cout << "Two Step Solve" << std::endl;
738 
739  T Y;
740  Y[ A->subset() ] = psi; // Y is initial guess
741 
742  // Predict Y
743  Y_prediction_timer.start();
744  predictor.predictY(Y,*A,chi);
745  Y_prediction_timer.stop();
746 
747  Y_solve_timer.start();
748  // Now want to solve with M^\dagger on this.
749  quda_inv_param.solution_type = QUDA_MATPC_SOLUTION;
750  quda_inv_param.dagger = QUDA_DAG_YES;
751  res1 = qudaInvert(*clov,
752  *invclov,
753  chi,
754  Y);
755  Y_solve_timer.stop();
756 
757  Y_predictor_add_timer.start();
758  predictor.newYVector(Y);
759  Y_predictor_add_timer.stop();
760 
761 
762  // Step 2: Solve M X = Y
763  // Predict X
764  X_prediction_timer.start();
765  predictor.predictX(psi,*MdagM, chi);
766  X_prediction_timer.stop();
767  X_solve_timer.start();
768  quda_inv_param.dagger = QUDA_DAG_NO; // Solve without dagger
769  res2 = qudaInvert(*clov,
770  *invclov,
771  Y,
772  psi);
773  X_solve_timer.stop();
774  X_predictor_add_timer.start();
775  predictor.newXVector(psi);
776  X_predictor_add_timer.stop();
777 
778  res.n_count = res1.n_count + res2.n_count; // Two step solve so combine iteration count
779 
780  swatch.stop();
781  double time = swatch.getTimeInSeconds();
782 
783  // reset init guess policy
784  quda_inv_param.use_init_guess = old_guess_policy;
785 
786  // Check solution
787  {
788  T r;
789  r[A->subset()]=chi;
790  T tmp;
791  (*MdagM)(tmp, psi, PLUS);
792  r[A->subset()] -= tmp;
793  res.resid = sqrt(norm2(r, A->subset()));
794  }
795 
796  Double rel_resid = res.resid/sqrt(norm2(chi,A->subset()));
797 
798  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: " << res.n_count << " iterations. Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl
799  ;
800 
801 
802 
803 
804  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Time: Y_predict: " << Y_prediction_timer.getTimeInSeconds() << " (s) "
805  << "Y_solve: " << Y_solve_timer.getTimeInSeconds() << " (s) "
806  << "Y_register: " << Y_predictor_add_timer.getTimeInSeconds() << " (s) "
807  << "X_predict: " << X_prediction_timer.getTimeInSeconds() << " (s) "
808  << "X_solve: " << X_solve_timer.getTimeInSeconds() << " (s) "
809  << "X_register: " << X_predictor_add_timer.getTimeInSeconds() << " (s) "
810  << "Total time: " << time << "(s)" << std::endl;
811 
812  if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
813  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Solver Failed to Converge! Aborting" << std::endl;
814  QDP_abort(1);
815  }
816 
817 
818  END_CODE();
819  return res;
820  }
821 
822  SystemSolverResults_t operator() (T& psi, const T& chi, Chroma::AbsChronologicalPredictor4D<T>& predictor ) const
823  {
824  SystemSolverResults_t res;
825  START_CODE();
826 
827 
828  StopWatch swatch;
829 
830  // Determine if 2 step solve is needed
831  bool two_step=false;
832  switch( invParam.solverType ) {
833  case BICGSTAB:
834  two_step = true;
835  break;
836  case GCR:
837  two_step = true;
838  break;
839  case CA_GCR:
840  two_step = true;
841  break;
842  case MR:
843  two_step = true;
844  break;
845  case CG:
846  two_step = false;
847  break;
848  default:
849  two_step = false;
850  break;
851  };
852 
853  // This is a solver whic will use an initial guess:
854 
855 
856  if ( two_step ) {
857  // User must supply a two step predictor. This can be
858  // a general one or a specific one. Try to cast to the specific one first.
859  try {
860  QUDA4DChronoPredictor& quda_predictor =
861  dynamic_cast<QUDA4DChronoPredictor&>(predictor);
862 
863  res = (*this)(psi,chi,quda_predictor);
864  END_CODE();
865  return res;
866 
867  }
868  catch(std::bad_cast) {} // We have another to try
869 
870  // Try a general two step predictor cast
871  try {
872  AbsTwoStepChronologicalPredictor4D<T>& abs_two_step_predictor =
873  dynamic_cast<AbsTwoStepChronologicalPredictor4D<T>&>(predictor);
874  res = (*this)(psi,chi,abs_two_step_predictor);
875  END_CODE();
876  return res;
877  }
878  catch(std::bad_cast) {
879  // Now we are stuck.
880  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Two Step Solver with two step incapable predictor" << std::endl;
881  QDP_abort(1);
882  }
883 
884  }
885 
886 
887  StopWatch X_prediction_timer; X_prediction_timer.reset();
888  StopWatch X_solve_timer; X_solve_timer.reset();
889  StopWatch X_predictor_add_timer; X_predictor_add_timer.reset();
890 
891  // This is a solve with initial guess. So reset the policy (quda_inv_param is mutable)
892  QudaUseInitGuess old_guess_policy = quda_inv_param.use_init_guess;
893  quda_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_YES;
894 
895 
896  // Create MdagM op
897  Handle< LinearOperator<T> > MdagM( new MdagMLinOp<T>(A) );
898 
899  // If we are here it is not a two step.
900  swatch.start();
901 
902  // Single Step Solve
903  QDPIO::cout << "Single Step Solve" << std::endl;
904  X_prediction_timer.start();
905  predictor(psi, (*MdagM), chi);
906  X_prediction_timer.stop();
907 
908  X_solve_timer.start();
909  res = qudaInvert(*clov,
910  *invclov,
911  chi,
912  psi);
913  X_solve_timer.stop();
914 
915  X_predictor_add_timer.start();
916  predictor.newVector(psi);
917  X_predictor_add_timer.stop();
918 
919  // reset init guess policy
920  quda_inv_param.use_init_guess = old_guess_policy;
921 
922  // Check solution
923  {
924  T r;
925  r[A->subset()]=chi;
926  T tmp;
927  (*MdagM)(tmp, psi, PLUS);
928  r[A->subset()] -= tmp;
929  res.resid = sqrt(norm2(r, A->subset()));
930  }
931  swatch.stop();
932  double time=swatch.getTimeInSeconds();
933  Double rel_resid = res.resid/sqrt(norm2(chi,A->subset()));
934 
935  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: " << res.n_count << " iterations. Rsd = " << res.resid << " Relative Rsd = " << rel_resid << std::endl
936  ;
937  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Time: X_predict: " << X_prediction_timer.getTimeInSeconds() << " (s) "
938  << "X_solve: " << X_solve_timer.getTimeInSeconds() << " (s) "
939  << "X_register: " << X_predictor_add_timer.getTimeInSeconds() << " (s) "
940  << "Total time: " << time << "(s)" << std::endl;
941 
942  if ( toBool( rel_resid > invParam.RsdToleranceFactor*invParam.RsdTarget) ) {
943  QDPIO::cout << "QUDA_"<< solver_string <<"_CLOVER_SOLVER: Solver Failed to Converge! Aborting" << std::endl;
944  QDP_abort(1);
945  }
946 
947  END_CODE();
948  return res;
949 
950  }
951 
952 
953 private:
954  // Hide default constructor
955  MdagMSysSolverQUDAClover() {}
956 
957  Q links_orig;
958 
959  U GFixMat;
960  QudaPrecision_s cpu_prec;
961  QudaPrecision_s gpu_prec;
962  QudaPrecision_s gpu_half_prec;
963 
964  Handle< LinearOperator<T> > A;
965  Handle< FermState<T,Q,Q> > state;
966  const SysSolverQUDACloverParams invParam;
967  QudaGaugeParam q_gauge_param;
968  mutable QudaInvertParam quda_inv_param;
969 
970  Handle< CloverTermT<T, U> > clov;
971  Handle< CloverTermT<T, U> > invclov;
972 
973  SystemSolverResults_t qudaInvert(const CloverTermT<T, U>& clover,
974  const CloverTermT<T, U>& inv_clov,
975  const T& chi_s,
976  T& psi_s
977  )const ;
978 
979  std::string solver_string;
980 };
981 
982 
983 } // End namespace
984 
985 #endif // BUILD_QUDA
986 #endif
987 
Anisotropy parameters.
Abstract interface for a Chronological Solution predictor.
virtual void newVector(const T &psi)=0
Abstract interface for a Chronological Solution predictor.
virtual void predictY(T &Y, const T &chi, const Subset &s) const
virtual void newYVector(const T &Y)=0
virtual void predictX(T &X, const T &chi, const Subset &s) const
virtual void newXVector(const T &X)=0
Zero initial guess predictor.
Include possibly optimized Clover terms.
int mu
Definition: cool.cc:24
void temporalGauge(multi1d< LatticeColorMatrix > &ug, LatticeColorMatrix &g, int decay_dir)
Temporal gauge fixing.
Class for counted reference semantics.
Linear Operators.
M^dag*M composition of a linear operator.
Nd
Definition: meslate.cc:74
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
LinOpSysSolverMGProtoClover::Q Q
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
LinOpSysSolverMGProtoClover::T T
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
QDPCloverTermT< T, U > CloverTermT
Definition: clover_term_w.h:97
START_CODE()
A(A, psi, r, Ncb, PLUS)
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
Definition: aniso_io.cc:63
multi1d< LatticeFermion > s(Ncb)
@ ADDITIVE_SCHWARZ
Definition: enum_quda_io.h:103
@ MULTIPLICATIVE_SCHWARZ
Definition: enum_quda_io.h:104
@ RECONS_12
Definition: enum_quda_io.h:80
@ RECONS_NONE
Definition: enum_quda_io.h:78
@ RECONS_8
Definition: enum_quda_io.h:79
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
Null predictor: Leaves input x0 unchanged.
Periodic ferm state and a creator.
#define FORWARD
Definition: primitives.h:82
Pick channel for QUDA Predictor.
Reunitarize in place a color matrix to SU(N)
Simple fermionic BC.
Support class for fermion actions and linear operators.
Linear system solvers.
Handle< LinearOperator< T > > MdagM
multi1d< LatticeColorMatrix > U
LatticeFermion T
Definition: t_clover.cc:11
multi1d< LatticeColorMatrix > Q
Definition: t_clover.cc:12
multi1d< LatticeColorMatrixF > QF
Definition: t_quda_tprec.cc:19
LatticeColorMatrixF UF
Definition: t_quda_tprec.cc:18
LatticeFermionD TD
Definition: t_quda_tprec.cc:22
LatticeColorMatrixD UD
Definition: t_quda_tprec.cc:23
LatticeFermionF TF
Definition: t_quda_tprec.cc:17
multi1d< LatticeColorMatrixD > QD
Definition: t_quda_tprec.cc:24
Axial gauge fixing.