CHROMA
quda_mg_utils.h
Go to the documentation of this file.
1 /*
2  * quda_mg_utils.h
3  *
4  * Created on: Oct 18, 2016
5  * Author: bjoo
6  */
7 
8 #ifndef LIB_ACTIONS_FERM_INVERT_QUDA_SOLVERS_QUDA_MG_UTILS_H_
9 #define LIB_ACTIONS_FERM_INVERT_QUDA_SOLVERS_QUDA_MG_UTILS_H_
10 
11 #include "chromabase.h"
12 
13 #include <quda.h>
16 
17 #include <cuda_runtime_api.h>
18 
19 namespace Chroma {
20 
21  namespace QUDAMGUtils {
22 
23  // A Strut for holding pointers essentially
25  QudaInvertParam mg_inv_param;
26  QudaMultigridParam mg_param;
29  mg_inv_param = newQudaInvertParam();
30  mg_param = newQudaMultigridParam();
31  preconditioner = nullptr;
32  }
33  };
34 
35 
36  template<typename T>
38  {
39  MGSubspacePointers* ret_val = new MGSubspacePointers();
40 
41  // References so I can keep code below
42  QudaInvertParam& mg_inv_param = ret_val->mg_inv_param;
43  QudaMultigridParam& mg_param = ret_val->mg_param;
44 
45  QudaPrecision_s cpu_prec;
46  QudaPrecision_s gpu_prec;
47  QudaPrecision_s gpu_half_prec;
48  int s = sizeof( typename WordType<T>::Type_t );
49  if (s == 4) {
50  cpu_prec = QUDA_SINGLE_PRECISION;
51  }
52  else {
53  cpu_prec = QUDA_DOUBLE_PRECISION;
54  }
55 
56  // CUDA_PRECISION
57  {
58  auto found = theChromaToQudaPrecisionTypeMap::Instance().find(invParam.cudaPrecision);
59  if ( found != theChromaToQudaPrecisionTypeMap::Instance().end() ) {
60  gpu_prec = found->second;
61  }
62  else {
63  // Not found
64  gpu_prec = cpu_prec;
65  }
66  }
67 
68  #if 0
69  // Work out GPU precision
70  switch( invParam.cudaPrecision ) {
71  case HALF:
72  gpu_prec = QUDA_HALF_PRECISION;
73  break;
74  case SINGLE:
75  gpu_prec = QUDA_SINGLE_PRECISION;
76  break;
77  case DOUBLE:
78  gpu_prec = QUDA_DOUBLE_PRECISION;
79  break;
80  default:
81  gpu_prec = cpu_prec;
82  break;
83  }
84 #endif
85 
86  // CUDA_PRECISION
87  {
89  if ( found != theChromaToQudaPrecisionTypeMap::Instance().end() ) {
90  gpu_half_prec = found->second;
91  }
92  else {
93  // Not found
94  gpu_half_prec = gpu_prec;
95  }
96  }
97 
98 #if 0
99  // Work out GPU Sloppy precision
100  // Default: No Sloppy
101  switch( invParam.cudaSloppyPrecision ) {
102  case HALF:
103  gpu_half_prec = QUDA_HALF_PRECISION;
104  break;
105  case SINGLE:
106  gpu_half_prec = QUDA_SINGLE_PRECISION;
107  break;
108  case DOUBLE:
109  gpu_half_prec = QUDA_DOUBLE_PRECISION;
110  break;
111  default:
112  gpu_half_prec = gpu_prec;
113  break;
114  }
115 #endif
116 
117  QDPIO::cout<<"Creating MG subspace."<<std::endl;
118  //Taken from various places in the old constructor.
119 
120  mg_inv_param.dslash_type = QUDA_CLOVER_WILSON_DSLASH;
121  mg_inv_param.inv_type = QUDA_GCR_INVERTER;
122  mg_inv_param.tol = 1e-10;
123  mg_inv_param.maxiter = 10000;
124  mg_inv_param.reliable_delta = 1e-10;
125  mg_inv_param.cpu_prec = cpu_prec;
126  mg_inv_param.cuda_prec = gpu_prec;
127  mg_inv_param.cuda_prec_sloppy = gpu_half_prec;
128  mg_inv_param.cuda_prec_precondition = gpu_half_prec;
129  //Clover stuff
130  mg_inv_param.clover_cpu_prec = cpu_prec;
131  mg_inv_param.clover_cuda_prec = gpu_prec;
132  mg_inv_param.clover_cuda_prec_sloppy = gpu_half_prec;
133  mg_inv_param.clover_cuda_prec_precondition = gpu_half_prec;
134  mg_inv_param.clover_order = QUDA_PACKED_CLOVER_ORDER;
135  //
136  //Done...
137  // Autotuning
138  if( invParam.tuneDslashP ) {
139  QDPIO::cout << "Enabling MG Dslash Autotuning" << std::endl;
140  mg_inv_param.tune = QUDA_TUNE_YES;
141  }
142  else {
143  QDPIO::cout << "Disabling MG Dslash Autotuning" << std::endl;
144  mg_inv_param.tune = QUDA_TUNE_NO;
145  }
146  if( invParam.MULTIGRIDParamsP ) {
147  QDPIO::cout << "Setting MULTIGRID solver params" << std::endl;
148  // Dereference handle
149  const MULTIGRIDSolverParams& ip = *(invParam.MULTIGRIDParams);
150 
151  // CUDA_PRECISION
152  {
153  auto found = theChromaToQudaPrecisionTypeMap::Instance().find(ip.prec);
154  if ( found != theChromaToQudaPrecisionTypeMap::Instance().end() ) {
155  mg_inv_param.cuda_prec_precondition = found->second;
156  mg_inv_param.clover_cuda_prec_precondition = found->second;
157  }
158  else {
159  // Default:
160  mg_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
161  mg_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
162  }
163  }
164 
165 #if 0
166  // Set preconditioner precision
167  switch( ip.prec ) {
168  case HALF:
169  mg_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
170  mg_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
171  break;
172  case SINGLE:
173  mg_inv_param.cuda_prec_precondition = QUDA_SINGLE_PRECISION;
174  mg_inv_param.clover_cuda_prec_precondition = QUDA_SINGLE_PRECISION;
175  break;
176  case DOUBLE:
177  mg_inv_param.cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
178  mg_inv_param.clover_cuda_prec_precondition = QUDA_DOUBLE_PRECISION;
179  break;
180  default:
181  mg_inv_param.cuda_prec_precondition = QUDA_HALF_PRECISION;
182  mg_inv_param.clover_cuda_prec_precondition = QUDA_HALF_PRECISION;
183  break;
184  }
185 #endif
186  mg_inv_param.gcrNkrylov = ip.precond_gcr_nkrylov;
187  if( ip.verbosity == true ) {
188  mg_inv_param.verbosity = QUDA_VERBOSE;
189  }
190  else {
191  mg_inv_param.verbosity = QUDA_SUMMARIZE;
192  }
193 
194 
195 
196 
197  mg_inv_param.verbosity_precondition = QUDA_SILENT;
198 
199 
200 
201  mg_inv_param.sp_pad = 0;
202  mg_inv_param.cl_pad = 0;
203  mg_inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO;
204  mg_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
205  mg_inv_param.dirac_order = QUDA_DIRAC_ORDER;
206  mg_inv_param.input_location = QUDA_CPU_FIELD_LOCATION;
207  mg_inv_param.output_location = QUDA_CPU_FIELD_LOCATION;
208  // mg_inv_param.use_init_guess = QUDA_USE_INIT_GUESS_NO;
209  mg_inv_param.dagger = QUDA_DAG_NO;
210  mg_inv_param.kappa = 0.5;
211  mg_inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION;
212  mg_inv_param.clover_coeff = 1.0; // Dummy not used
213  mg_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
214  mg_inv_param.solution_type = QUDA_MAT_SOLUTION;
215  mg_inv_param.solve_type = QUDA_DIRECT_SOLVE;
216  mg_param.invert_param = &mg_inv_param;
217  mg_inv_param.Ls = 1;
218  mg_param.n_level = ip.mg_levels;
219 
220  if (ip.check_multigrid_setup == true ) {
221  mg_param.run_verify = QUDA_BOOLEAN_YES;
222  }
223  else {
224  mg_param.run_verify = QUDA_BOOLEAN_NO;
225  }
226 
227  for (int i=0; i<mg_param.n_level-1; ++i) {
228  if( ip.setup_on_gpu[i] ) {
229  mg_param.setup_location[i] = QUDA_CUDA_FIELD_LOCATION;
230  }
231  else {
232  mg_param.setup_location[i] = QUDA_CPU_FIELD_LOCATION;
233  }
234 
235  }
236 
237  for (int i=0; i<mg_param.n_level; i++) {
238  for (int j=0; j< Nd; j++) {
239  if( i < mg_param.n_level-1 ) {
240  mg_param.geo_block_size[i][j] = ip.blocking[i][j];
241  }
242  else {
243  mg_param.geo_block_size[i][j] = 4;
244  }
245  }
246 
247  QDPIO::cout << "Set Level " << i << " blocking as: ";
248  for(int j=0; j < 4; ++j) {
249  QDPIO::cout << " " << mg_param.geo_block_size[i][j];
250  }
251  QDPIO::cout << std::endl;
252 
253 
254  mg_param.spin_block_size[i] = 1;
255  // FIXME: Elevate ip.nvec, ip.nu_pre, ip.nu_post, ip.tol to arrays in the XML
256  if ( i < mg_param.n_level-1) {
257  mg_param.n_vec[i] = ip.nvec[i];
258  mg_param.nu_pre[i] = ip.nu_pre[i];
259  mg_param.nu_post[i] = ip.nu_post[i];
260  }
261 
262  mg_param.mu_factor[i] = 1.0; // default is one in QUDA test program
263 
264  // Hardwire setup solver now
265  if ( i < mg_param.n_level-1) {
266  mg_param.setup_inv_type[i] = theChromaToQudaSolverTypeMap::Instance()[ ip.subspaceSolver[i]];
267  mg_param.setup_tol[i] = toDouble(ip.rsdTargetSubspaceCreate[i]);
268  mg_param.setup_maxiter[i] = ip.maxIterSubspaceCreate[i];
269  mg_param.setup_maxiter_refresh[i] = ip.maxIterSubspaceRefresh[i]; // Will set this from outside...
270  mg_param.num_setup_iter[i] =1; // 1 refine for now
271  mg_param.precision_null[i] = mg_inv_param.cuda_prec_precondition;
272  }
273 
274  // FIXME: Allow coarse_solver as an XML Parameter
275  // FIXME: What about bottom solver. Always GCR or should I make it BiCGStab?
276 
277  if ( i > 0 ) {
278 
279  switch(ip.coarseSolverType[i-1]) {
280  case GCR:
281  mg_param.coarse_solver[i] = QUDA_GCR_INVERTER;
282  break;
283  case CA_GCR:
284  mg_param.coarse_solver[i] = QUDA_CA_GCR_INVERTER;
285  if ( i != mg_param.n_level-1 ) {
286  QDPIO::cout << "ERROR: Right now CA_GCR is only allowed on the bottom level as a coarse solver"
287  << std::endl;
288  QDP_abort(1);
289 
290  }
291  break;
292  default:
293  QDPIO::cout << "Invalid coarse solver. Right now only GCR and CA_GCR solvers are allowed" << std::endl;
294  break;
295  };
296  }
297  else {
298  // Level 0 isolver is the outer solver. So this parameter is ignored. Set it to something sensible.
299  mg_param.coarse_solver[0] = QUDA_GCR_INVERTER;
300 
301  }
302 
303  // However maxiter and tol are used on level 0 to set precond_tol and precond_maxiter
304  mg_param.coarse_solver_tol[i] = toDouble(ip.tol[i]);
305  mg_param.coarse_solver_maxiter[i] = ip.maxIterations[i];
306 
307 
308  // Smoother Type is needed for every level as we may want to use a smoother
309  // as a preconditioner on the coarsest level too (even tho we don't recurse to yet another leve0
310  switch( ip.smootherType[i] ) {
311  case MR:
312  mg_param.smoother[i] = QUDA_MR_INVERTER;
313  mg_param.smoother_tol[i] = toDouble(ip.smootherTol[i]);
314  mg_param.smoother_solve_type[i] = QUDA_DIRECT_PC_SOLVE;
315  mg_param.omega[i] = toDouble(ip.relaxationOmegaMG[i]);
316  mg_param.smoother_schwarz_type[i] = theChromaToQudaSchwarzTypeMap::Instance()[ ip.smootherSchwarzType[i] ];
317  mg_param.smoother_schwarz_cycle[i] = ip.smootherSchwarzCycle[i];
318  break;
319  case CA_GCR:
320  mg_param.smoother[i] = QUDA_CA_GCR_INVERTER;
321  mg_param.smoother_tol[i] = toDouble(ip.smootherTol[i]);
322  mg_param.smoother_solve_type[i] = QUDA_DIRECT_PC_SOLVE;
323  mg_param.omega[i] = toDouble(ip.relaxationOmegaMG[i]);
324  mg_param.smoother_schwarz_type[i] = theChromaToQudaSchwarzTypeMap::Instance()[ ip.smootherSchwarzType[i] ];
325  mg_param.smoother_schwarz_cycle[i] = ip.smootherSchwarzCycle[i];
326  break;
327  default:
328  QDPIO::cout << "Unknown or no smother type specified, no smoothing inverter will be used." << std::endl;
329  mg_param.smoother[i] = QUDA_INVALID_INVERTER;
330  QDP_abort(1);
331  break;
332  }
333 
334  // if the value is DEFAULT -- leave the smoother halo precision unset.
335  if( ip.smootherHaloPrecision[i] != DEFAULT ) {
336  mg_param.smoother_halo_precision[i] = (theChromaToQudaPrecisionTypeMap::Instance())[ip.smootherHaloPrecision[i]];
337  }
338 #if 0
339  if( ip.smootherHaloPrecision[i] != DEFAULT ) {
340 
341  switch( ip.smootherHaloPrecision[i] ) {
342  case QUARTER :
343  mg_param.smoother_halo_precision[i] = QUDA_QUARTER_PRECISION;
344  break;
345  case HALF :
346  mg_param.smoother_halo_precision[i] = QUDA_HALF_PRECISION;
347  break;
348  case SINGLE :
349  mg_param.smoother_halo_precision[i] = QUDA_SINGLE_PRECISION;
350  break;
351  case DOUBLE :
352  mg_param.smoother_halo_precision[i] = QUDA_DOUBLE_PRECISION;
353  break;
354  default:
355  // Leave unset -- default behaviour -- should never be reached
356  break;
357  }
358  }
359  QDPIO::cout << "CKheckopoint 2" << std::endl << std::flush;
360 #endif
361  mg_param.global_reduction[i] = (mg_param.smoother_schwarz_type[i] == QUDA_INVALID_SCHWARZ) ? QUDA_BOOLEAN_YES : QUDA_BOOLEAN_NO;
362  mg_param.location[i] = QUDA_CUDA_FIELD_LOCATION;
363 
364  if ( ip.cycle_type == "MG_VCYCLE" ) {
365  mg_param.cycle_type[i] = QUDA_MG_CYCLE_VCYCLE;
366  } else if (ip.cycle_type == "MG_RECURSIVE" ) {
367  mg_param.cycle_type[i] = QUDA_MG_CYCLE_RECURSIVE;
368  } else {
369  QDPIO::cout << "Unknown Cycle Type" << ip.cycle_type << std::endl;
370  QDP_abort(1);
371  }
372 
373 
374  switch( mg_param.cycle_type[i] ) {
375  case QUDA_MG_CYCLE_RECURSIVE :
376  mg_param.coarse_grid_solution_type[i] = QUDA_MATPC_SOLUTION;
377  break;
378  case QUDA_MG_CYCLE_VCYCLE :
379  mg_param.coarse_grid_solution_type[i] = QUDA_MAT_SOLUTION;
380  break;
381  default:
382  QDPIO::cerr << "Should never get here" << std::endl;
383  QDP_abort(1);
384  break;
385  }
386  }
387  mg_param.setup_type = QUDA_NULL_VECTOR_SETUP;
388  mg_param.pre_orthonormalize = QUDA_BOOLEAN_NO;
389  mg_param.post_orthonormalize = QUDA_BOOLEAN_YES;
390 
391  // LEvel 0 must always be matpc
392  mg_param.coarse_grid_solution_type[0] = QUDA_MATPC_SOLUTION;
393  // only coarsen the spin on the first restriction
394  mg_param.spin_block_size[0] = 2;
395  // coarse grid solver is GCR
396  mg_param.smoother[ip.mg_levels-1] = QUDA_GCR_INVERTER;
397  mg_param.compute_null_vector = ip.generate_nullspace ? QUDA_COMPUTE_NULL_VECTOR_YES
398  : QUDA_COMPUTE_NULL_VECTOR_NO;
399  mg_param.generate_all_levels = ip.generate_all_levels ? QUDA_BOOLEAN_YES
400  : QUDA_BOOLEAN_NO;
401  for(int l=0; l < ip.mg_levels; ++l) {
402  mg_param.vec_infile[l][0] = '\0';
403  mg_param.vec_outfile[l][0] = '\0';
404  }
405  QDPIO::cout<<"Basic MULTIGRID params copied."<<std::endl;
406  }
407  // setup the multigrid solver
408  // this allocates memory
409  QDPIO::cout << "About to Call newMultigridQuda" << std::endl;
410 
411  ret_val->preconditioner = newMultigridQuda(&mg_param);
412  QDPIO::cout<<"NewMultigridQuda state initialized."<<std::endl;
413  QDPIO::cout<<"MULTIGRID preconditioner set."<<std::endl;
414  QDPIO::cout << "After call to newMultigridQuda" <<std::endl;
415 
416  for(int i=0; i < mg_param.n_level; ++i) {
417  QDPIO::cout << "Set Level " << i << " blocking as: ";
418 
419  for(int j=0; j < 4; ++j) {
420  QDPIO::cout << " " << mg_param.geo_block_size[i][j];
421  }
422  QDPIO::cout << std::endl;
423  }
424 
425 #if 1
426  QDPIO::cout <<"MULTIGrid Param Dump" << std::endl;
427  printQudaMultigridParam(&mg_param);
428 #endif
429  // We have just refreshed so not due for refresh.
430  return ret_val;
431  }
432 
433  inline
434  void delete_subspace(const std::string SubspaceID)
435  {
436  // REcover pointer to subspace pointers array
437  MGSubspacePointers* subspace_pointers = TheNamedObjMap::Instance().getData< MGSubspacePointers* >(SubspaceID);
438 
439  // Nuke the preconditioner
440  destroyMultigridQuda(subspace_pointers->preconditioner);
441 
442  // Delete the storage (this automatically takes care the mg_params and mg_inv_param
443  // Which are just member structs.
444  delete subspace_pointers;
445 
446  // Erase the entry
447  TheNamedObjMap::Instance().erase(SubspaceID);
448 
449  }
450 
451  inline
452  size_t getCUDAFreeMem(void)
453  {
454  cudaError_t ret;
455  size_t free, total;
456  ret = cudaMemGetInfo( &free, &total );
457  if ( ret != cudaSuccess ) {
458  QDPIO::cout << "cudaMemGetInfo() returned unsuccesful status: " << ret << std::endl;
459  QDP_abort(1);
460  }
461  return free;
462  }
463 
464 
465 
466  } // MG Utils
467 } // Chroma
468 
469 
470 
471 
472 #endif /* LIB_ACTIONS_FERM_INVERT_QUDA_SOLVERS_QUDA_MG_UTILS_H_ */
Primary include file for CHROMA library code.
static T & Instance()
Definition: singleton.h:432
unsigned j
Definition: ldumul_w.cc:35
Nd
Definition: meslate.cc:74
Named object function std::map.
MGSubspacePointers * create_subspace(const SysSolverQUDAMULTIGRIDCloverParams &invParam)
Definition: quda_mg_utils.h:37
void delete_subspace(const std::string SubspaceID)
size_t getCUDAFreeMem(void)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
int i
Definition: pbg5p_w.cc:55
multi1d< LatticeFermion > s(Ncb)
::std::string string
Definition: gtest.h:1979
int l
Definition: pade_trln_w.cc:111
multi1d< QudaSchwarzMethod > smootherSchwarzType
multi1d< QudaSolverType > subspaceSolver
multi1d< QudaPrecisionType > smootherHaloPrecision
multi1d< QudaSolverType > coarseSolverType
multi1d< QudaSolverType > smootherType
multi1d< multi1d< int > > blocking