CHROMA
multi_syssolver_mdagm_cg_clover_qphix_w.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 /*! \file
3  * \brief Solve a M*psi=chi linear system by BiCGStab
4  */
5 
6 #ifndef __multi_syssolver_mdagm_clover_qphix_h__
7 #define __multi_syssolver_mdagm_clover_qphix_h__
8 
9 #include "chroma_config.h"
10 
11 #ifdef BUILD_QPHIX
12 #include "handle.h"
13 #include "state.h"
14 #include "syssolver.h"
15 #include "linearop.h"
16 #include "lmdagm.h"
23 #include "io/aniso_io.h"
24 #include <string>
25 
26 #include "chroma_config.h"
27 #include "util/gauge/reunit.h"
28 #include "io/aniso_io.h"
29 
30 // Header files from the dslash package
31 #include "qphix/qphix_config.h"
32 #include "qphix/geometry.h"
33 #include "qphix/dslash_utils.h"
34 #include "qphix/qdp_packer.h"
35 #include "qphix/clover.h"
36 #include "qphix/minvcg.h"
37 #include "qphix/blas_new_c.h"
39 #include "qphix_singleton.h"
40 using namespace QDP;
41 
42 namespace Chroma
43 {
44 
45 //! Richardson system solver namespace
46 namespace MdagMMultiSysSolverQPhiXCloverEnv
47 {
48 //! Register the syssolver
49 bool registerAll();
50 }
51 
52 
53 //! Solve a Clover Fermion System using the QPhiX inverter
54 /*! \ingroup invert
55  *** WARNING THIS SOLVER WORKS FOR Clover FERMIONS ONLY ***
56  */
57 using namespace QPhiXVecTraits;
58 template<typename T, typename U>
59 class MdagMMultiSysSolverQPhiXClover : public MdagMMultiSystemSolver<T>
60 {
61 public:
62  typedef multi1d<U> Q;
63  typedef typename WordType<T>::Type_t REALT;
64  typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::FourSpinorBlock QPhiX_Spinor;
65  typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::SU3MatrixBlock QPhiX_Gauge;
66  typedef typename QPhiX::Geometry<REALT,VecTraits<REALT>::Vec,VecTraits<REALT>::Soa,VecTraits<REALT>::compress12>::CloverBlock QPhiX_Clover;
67 
68 
69  //! Constructor
70  /*!
71  * \param M_ Linear operator ( Read )
72  * \param invParam inverter parameters ( Read )
73  */
74  MdagMMultiSysSolverQPhiXClover(Handle< LinearOperator<T> > A_,
75  Handle< FermState<T,Q,Q> > state_,
76  const MultiSysSolverQPhiXCloverParams& invParam_) :
77  A(A_), invParam(invParam_),
78  clov(new CloverTermT<T, U>()),
79  invclov(new CloverTermT<T, U>())
80  {
81  QDPIO::cout << "MdagMMultiSysSolverQPhiXClover:" << std::endl;
82  QDPIO::cout << "AntiPeriodicT is: " << invParam.AntiPeriodicT << std::endl;
83 
84 
85  QDPIO::cout << "Veclen is " << VecTraits<REALT>::Vec << std::endl;
86  QDPIO::cout << "Soalen is " << VecTraits<REALT>::Soa << std::endl;
87  cbsize_in_blocks = rb[0].numSiteTable()/VecTraits<REALT>::Soa;
88 
89  if ( VecTraits<REALT>::Soa > VecTraits<REALT>::Vec ) {
90  QDPIO::cerr << "PROBLEM: Soalen > Veclen. Please set soalen appropriately (<=VECLEN) at compile time" << std::endl;
91  QDP_abort(1);
92  }
93 
94  Q u(Nd);
95  for(int mu=0; mu < Nd; mu++) {
96  u[mu] = state_->getLinks()[mu];
97  }
98 
99  // Set up aniso coefficients
100  multi1d<Real> aniso_coeffs(Nd);
101  for(int mu=0; mu < Nd; mu++) aniso_coeffs[mu] = Real(1);
102 
103  bool anisotropy = invParam.CloverParams.anisoParam.anisoP;
104  if( anisotropy ) {
105  aniso_coeffs = makeFermCoeffs( invParam.CloverParams.anisoParam );
106  }
107 
108  double t_boundary=(double)(1);
109  // NB: In this case, the state will have boundaries applied.
110  // So we only need to apply our own boundaries if Compression is enabled
111  //
112  if (invParam.AntiPeriodicT) {
113  t_boundary=(double)(-1);
114  // Flip off the boundaries -- Dslash expects them off..
115  u[3] *= where(Layout::latticeCoordinate(3) == (Layout::lattSize()[3]-1),
116  Real(t_boundary), Real(1));
117  }
118 
119  const QPhiX::QPhiXCLIArgs& QPhiXParams = TheQPhiXParams::Instance();
120 
121  QDPIO::cout << "About to grap a Dslash" << std::endl;
122  // For normal QDP++ we need to pack stuff.
123  // For QDP-JIT/LLVM we need to make sure data layout is conformant,
124  // so use 0 padding
125 
126 #ifdef QDP_IS_QDPJIT
127  int pad_xy = 0;
128  int pad_xyz = 0;
129 #else
130  int pad_xy = QPhiXParams.getPxy();
131  int pad_xyz = QPhiXParams.getPxyz();
132 #endif
133 
134  n_blas_simt = QPhiXParams.getSy()*QPhiXParams.getSz(); // The number of SIMTs
135 
136  geom = new QPhiX::Geometry<REALT,
137  VecTraits<REALT>::Vec,
138  VecTraits<REALT>::Soa,
139  VecTraits<REALT>::compress12>(Layout::subgridLattSize().slice(),
140  QPhiXParams.getBy(),
141  QPhiXParams.getBz(),
142  QPhiXParams.getNCores(),
143  QPhiXParams.getSy(),
144  QPhiXParams.getSz(),
145  pad_xy,
146  pad_xyz,
147  QPhiXParams.getMinCt());
148 
149  QDPIO::cout << " Allocating Clover" << std::endl << std::flush ;
150  QPhiX_Clover* A_cb0=(QPhiX_Clover *)geom->allocCBClov();
151  QPhiX_Clover* A_cb1=(QPhiX_Clover *)geom->allocCBClov();
152  clov_packed[0] = A_cb0;
153  clov_packed[1] = A_cb1;
154 
155  QDPIO::cout << " Allocating CloverInv " << std::endl << std::flush ;
156  QPhiX_Clover* A_inv_cb0=(QPhiX_Clover *)geom->allocCBClov();
157  QPhiX_Clover* A_inv_cb1=(QPhiX_Clover *)geom->allocCBClov();
158  invclov_packed[0] = A_inv_cb0;
159  invclov_packed[1] = A_inv_cb1;
160 
161  // Pack the gauge field
162  QDPIO::cout << "Packing gauge field..." << std::endl << std::flush ;
163  QPhiX_Gauge* packed_gauge_cb0=(QPhiX_Gauge *)geom->allocCBGauge();
164  QPhiX_Gauge* packed_gauge_cb1=(QPhiX_Gauge *)geom->allocCBGauge();
165 
166  QPhiX::qdp_pack_gauge<>(u, packed_gauge_cb0,packed_gauge_cb1, *geom);
167  u_packed[0] = packed_gauge_cb0;
168  u_packed[1] = packed_gauge_cb1;
169 
170 
171  QDPIO::cout << "Creating Clover Term" << std::endl;
172  CloverTerm clov_qdp;
173  clov->create(state_, invParam.CloverParams);
174  QDPIO::cout << "Inverting Clover Term" << std::endl;
175  invclov->create(state_, invParam.CloverParams, (*clov));
176  for(int cb=0; cb < 2; cb++) {
177  invclov->choles(cb);
178  }
179  QDPIO::cout << "Done" << std::endl;
180  QDPIO::cout << "Packing Clover term..." << std::endl;
181 
182  for(int cb=0; cb < 2; cb++) {
183  QPhiX::qdp_pack_clover<>((*invclov), invclov_packed[cb], *geom, cb);
184  }
185 
186  for(int cb=0; cb < 2; cb++) {
187  QPhiX::qdp_pack_clover<>((*clov), clov_packed[cb], *geom, cb);
188  }
189  QDPIO::cout << "Done" << std::endl;
190 
191  QDPIO::cout << "Creating the Even Odd Operator" << std::endl;
192  M=new QPhiX::EvenOddCloverOperator<REALT,
193  VecTraits<REALT>::Vec,
194  VecTraits<REALT>::Soa,
195  VecTraits<REALT>::compress12>(u_packed,
196  clov_packed[1],
197  invclov_packed[0],
198  geom,
199  t_boundary,
200  toDouble(aniso_coeffs[0]),
201  toDouble(aniso_coeffs[3]));
202 
203 
204  mcg_solver = new QPhiX::MInvCG<REALT,VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12>((*M), invParam.MaxIter, invParam.MaxShifts);
205  }
206 
207 
208  //! Destructor is automatic
209  ~MdagMMultiSysSolverQPhiXClover()
210  {
211 
212  // Need to unalloc all the memory...
213  QDPIO::cout << "Destructing" << std::endl;
214 
215  geom->free(invclov_packed[0]);
216  geom->free(invclov_packed[1]);
217  invclov_packed[0] = nullptr;
218  invclov_packed[1] = nullptr;
219 
220  geom->free(clov_packed[0]);
221  geom->free(clov_packed[1]);
222  clov_packed[0] = nullptr;
223  clov_packed[1] = nullptr;
224 
225  geom->free(u_packed[0]);
226  geom->free(u_packed[1]);
227  u_packed[0] = nullptr;
228  u_packed[1] = nullptr;
229 
230  delete geom;
231 
232  // Evertyhing else freed as handles freed.
233  }
234 
235  //! Return the subset on which the operator acts
236  const Subset& subset() const {return A->subset();}
237 
238  virtual SystemSolverResults_t operator()(multi1d<T>& psi,
239  const multi1d<Real>& shifts,
240  const T& chi) const
241  {
242  SystemSolverResults_t res;
243 
244  START_CODE();
245  StopWatch swatch;
246  swatch.reset(); swatch.start();
247 
248  int n_shift = shifts.size();
249  if( psi.size() != n_shift ) {
250  psi.resize(n_shift);
251  }
252 
253 #if 0
254  for(int s=0; s < n_shift;++s) {
255  psi[s] = zero;
256  }
257 #endif
258 
259  QDPIO::cout << "operator(): n_shift = " << n_shift << std::endl;
260 
261  // Sanity check 1:
262  if (n_shift > invParam.MaxShifts) {
263  QDPIO::cerr << "n_shift="<<n_shift<<" is greater than MaxShifts="
264  << invParam.MaxShifts << std::endl;
265  QDP_abort(1);
266  }
267 
268 
269  // Allocate and pack the RHS
270  QPhiX_Spinor* chi_qphix;
271 
272 #ifndef QDP_IS_QDPJIT
273  // Non QDP-JIT -- we need to allocate and pack a chi
274  chi_qphix =(QPhiX_Spinor *)geom->allocCBFourSpinor();
275  if (chi_qphix == nullptr) {
276  QDPIO::cout << "Unable to allocate chi_qphix" << std::endl;
277  QDP_abort(1);
278  }
279  QDPIO::cout << "Packing chi" << std::endl;
280  QPhiX::qdp_pack_cb_spinor<>(chi, chi_qphix, *geom, 1);
281 #else
282  // QDP_JIT -- we need to grab the pointer to cb=1
283  chi_qphix = (QPhiX_Spinor*)(chi.getFjit())+cbsize_in_blocks;
284 #endif
285 
286  // Allocate the solutions
287  QPhiX_Spinor** psi_qphix;
288  psi_qphix=(QPhiX_Spinor**)ALIGNED_MALLOC(n_shift*sizeof(QPhiX_Spinor*),
289  QPHIX_LLC_CACHE_ALIGN);
290  if( psi_qphix == nullptr ) {
291  QDPIO::cout << "Unable toallocate psi_qphix" << std::endl;
292  QDP_abort(1);
293  }
294 
295  for(int s =0; s < n_shift; s++) {
296 
297 #ifndef QDP_IS_QDPJIT
298  psi_qphix[s] = (QPhiX_Spinor *)geom->allocCBFourSpinor();
299  if( psi_qphix[s] == nullptr ) {
300  QDPIO::cout << "Unable to allocate psi_qphix["<<s<<"]" << std::endl;
301  QDP_abort(1);
302  }
303 #else
304  psi_qphix[s]=(QPhiX_Spinor *)(psi[s].getFjit()) + cbsize_in_blocks;
305 #endif
306  }
307  // Initialize the solutions to zero.
308  {
309  QDPIO::cout << "Zeroing solutions" << std::endl;
310 
311  for(int s=0; s < n_shift; s++) {
312  zeroSpinor(psi_qphix[s],(*geom),n_blas_simt);
313  }
314  }
315 
316  multi1d<double> rsd_final(n_shift);
317  multi1d<double> qphix_shifts(n_shift);
318  for(int s=0; s < n_shift; s++) { qphix_shifts[s] = toDouble(shifts[s]); }
319 
320  multi1d<double> qphix_rsd_target(n_shift);
321 
322  if( invParam.RsdTarget.size() == n_shift ) {
323  // There is a target for each pole. Just copy them over.
324  for(int s=0; s < n_shift; s++) {
325  qphix_rsd_target[s] = toDouble(invParam.RsdTarget[s]);
326  }
327  }
328  else {
329 
330  if( invParam.RsdTarget.size() == 1 ) {
331  // There is only 1 target, for all the poles. Replicate
332  for(int s=0; s < n_shift; s++) {
333  qphix_rsd_target[s] = toDouble(invParam.RsdTarget[0]);
334  }
335  }
336  else{
337  // Blow up!
338  QDPIO::cout << "Size Mismatch between invParam.rsdTarget (size="
339  << invParam.RsdTarget.size()
340  << ") and n_shift=" << n_shift << std::endl;
341  QDP_abort(1);
342  }
343  }
344 
345  // Zero counters
346  unsigned long site_flops=0;
347  unsigned long mv_apps=0;
348  int my_isign=1;
349 
350  // Call solver
351  double start = omp_get_wtime();
352  (*mcg_solver)((QPhiX_Spinor **)psi_qphix,
353  (const QPhiX_Spinor *)chi_qphix,
354  (const int)n_shift,
355  (const double*)qphix_shifts.slice(),
356  (const double*)qphix_rsd_target.slice(),
357  (int &)res.n_count,
358  (double *)rsd_final.slice(),
359  (unsigned long &)site_flops,
360  (unsigned long &)mv_apps,
361  (int)my_isign,
362  (bool)invParam.VerboseP);
363  double end = omp_get_wtime();
364 
365  // For non-QDPJIT where the layout is not conformant
366  // we malloced this chi_qphix spinor, so need to free it
367  // For QDPJIT, we used a raw pointer from an OLattice
368  // So no need to free
369 #ifndef QDP_IS_QDPJIT
370  // Non QDPJIT case:
371  // delete chi_qphix -- we are done with it.
372  geom->free(chi_qphix);
373 #endif
374 
375  for(int s=0; s < n_shift; s++) {
376 
377  // For no-QDPJIT where the layout is non-conformant
378  // we needed to allocate psi_qphix[s]
379  // in that case we need to unpack the data and free psi_qphix[s]
380  //
381  // But in the QDP-JIT case, these pointers come from OLattice<>::getFjit()
382  // and we don't need to unpack or free
383 #ifndef QDP_IS_QDPJIT
384  QPhiX::qdp_unpack_cb_spinor<>(psi_qphix[s],psi[s], *geom, 1);
385  geom->free(psi_qphix[s]); // done with it.
386 #endif
387  }
388  ALIGNED_FREE(psi_qphix); // done with it.
389 
390  // Report the residuum for the smallest shift
391  // tho since other shifts can have looser criteria, this is
392  // somewhat arbitrary
393  int smallest = 0;
394  for (int s=1; s < n_shift; s++) {
395  if ( qphix_shifts[s] < qphix_shifts[smallest] ) smallest = s;
396  }
397  res.resid = rsd_final[smallest];
398 
399  QDPIO::cout << "QPHIX_RESIDUUM_REPORT:" << std::endl;
400  for(int s=0; s < n_shift; s++) {
401  QDPIO::cout << "\t Red_Final["<<s<<"]=" << rsd_final[s] <<std::endl;
402  }
403 
404  // check results -- if native code is slow, this may be slow
405  // so I give the option to turn it off...
406  if ( invParam.SolutionCheckP ) {
407  Double b2 = norm2(chi, A->subset());
408  QDPIO::cout << "QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: Residua Check: " << std::endl;
409  for( int s=0; s < n_shift; s++) {
410  T r = chi;
411  T tmp,tmp2;
412 
413  (*A)(tmp, psi[s], PLUS);
414  (*A)(tmp2, tmp, MINUS);
415  tmp[A->subset()] = shifts[s]*psi[s] + tmp2; // Shift it.
416 
417  r[ A->subset()] -= tmp;
418  Double r2 = norm2(r,A->subset());
419  Double rel_resid = sqrt(r2/b2);
420  QDPIO::cout << "\t shift["<<s<<"] Actual || r || / || b || = "
421  << rel_resid << std::endl;
422  if ( toDouble(rel_resid) > qphix_rsd_target[s]*toDouble(invParam.RsdToleranceFactor) ) {
423  QDPIO::cout << "SOLVE FAILED: rel_resid=" << rel_resid
424  << " target=" << qphix_rsd_target[s]
425  << " tolerance_factor=" << invParam.RsdToleranceFactor
426  << " max tolerated=" << qphix_rsd_target[s]*toDouble(invParam.RsdToleranceFactor) << std::endl;
427  QDP_abort(1);
428  }
429  }
430  }
431 
432  int num_cb_sites = Layout::vol()/2;
433  unsigned long total_flops = (site_flops + (1320+504+1320+504+48)*mv_apps)*num_cb_sites;
434  double gflops = (double)(total_flops)/(1.0e9);
435 
436  double total_time = end - start;
437 
438  QDPIO::cout << "QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: Iters=" << res.n_count << " Solver Time="<< total_time <<" (sec) Performance=" << gflops / total_time << " GFLOPS" << std::endl;
439  swatch.stop();
440  QDPIO::cout << "QPHIX_CLOVER_MULTI_SHIFT_CG_MDAGM_SOLVER: total time: " << swatch.getTimeInSeconds() << " (sec)" << std::endl;
441  END_CODE();
442  return res;
443  }
444 
445 private:
446  // Hide default constructor
447  MdagMMultiSysSolverQPhiXClover() {}
448 
449 
450 
451  Handle< LinearOperator<T> > A;
452  const MultiSysSolverQPhiXCloverParams invParam;
453  Handle< CloverTermT<T, U> > clov;
454  Handle< CloverTermT<T, U> > invclov;
455 
456  QPhiX::Geometry<REALT, VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12>* geom;
457 
458  Handle< QPhiX::EvenOddCloverOperator<REALT, VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12> > M;
459 
460  Handle< QPhiX::MInvCG<REALT,VecTraits<REALT>::Vec, VecTraits<REALT>::Soa, VecTraits<REALT>::compress12> > mcg_solver;
461 
462 
463  QPhiX_Clover* invclov_packed[2];
464  QPhiX_Clover* clov_packed[2];
465  QPhiX_Gauge* u_packed[2];
466  size_t cbsize_in_blocks;
467  int n_blas_simt;
468 
469 };
470 
471 
472 } // End namespace
473 
474 #endif // BUILD_QPHIX
475 #endif
476 
Anisotropy parameters.
#define END_CODE()
Definition: chromabase.h:65
#define START_CODE()
Definition: chromabase.h:64
void create(Handle< FermState< T, multi1d< U >, multi1d< U > > > fs, const CloverFermActParams &param_)
Creation routine.
Include possibly optimized Clover terms.
int mu
Definition: cool.cc:24
Even-odd preconditioned Clover fermion linear operator.
Class for counted reference semantics.
unsigned s
Definition: ldumul_w.cc:37
Linear Operators.
M^dag*M composition of a linear operator.
Double tmp
Definition: meslate.cc:60
Nd
Definition: meslate.cc:74
Double tmp2
Definition: mesq.cc:30
Disambiguator for multi-shift MdagM system solvers.
bool registerAll()
Register all the factories.
multi1d< Hadron2PtContraction_t > operator()(const multi1d< LatticeColorMatrix > &u)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
static multi1d< LatticeColorMatrix > u
LinOpSysSolverMGProtoClover::Q Q
QDPCloverTerm CloverTerm
Definition: clover_term_w.h:92
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
QDPCloverTermT< T, U > CloverTermT
Definition: clover_term_w.h:97
A(A, psi, r, Ncb, PLUS)
int cb
Definition: invbicg.cc:120
Double zero
Definition: invbicg.cc:106
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
Definition: aniso_io.cc:63
FloatingPoint< double > Double
Definition: gtest.h:7351
multi1d< LatticeFermion > r(Ncb)
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191
Periodic ferm state and a creator.
Fermion action factories.
Reunitarize in place a color matrix to SU(N)
Simple fermionic BC.
Support class for fermion actions and linear operators.
Linear system solvers.
multi1d< LatticeColorMatrix > U
LatticeFermion T
Definition: t_clover.cc:11
multi1d< LatticeColorMatrix > Q
Definition: t_clover.cc:12