CHROMA
eoprec_nef_linop_array_w.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief 4D-style even-odd preconditioned NEF domain-wall linear operator
3  */
4 
5 #include "chromabase.h"
7 
8 using namespace QDP::Hints;
9 
10 
11 namespace Chroma
12 {
13 
14  //! Creation routine
15  /*! \ingroup fermact
16  *
17  * \param fs gauge field (Read)
18  * \param WilsonMass_ DWF height (Read)
19  * \param b5_ NEF parameter (Read)
20  * \param c5_ NEF parameter (Read)
21  * \param m_q_ quark mass (Read)
22  * \param N5_ extent of 5D (Read)
23  */
24  void
25  EvenOddPrecNEFDWLinOpArray::create(Handle< FermState<T,P,Q> > fs,
26  const Real& WilsonMass_, const Real &b5_,
27  const Real &c5_, const Real& m_q_, int N5_)
28  {
29  START_CODE();
30 
31  WilsonMass = WilsonMass_;
32  m_q = m_q_;
33  b5 = b5_;
34  c5 = c5_;
35  N5 = N5_;
36 
37  D.create(fs, N5);
38 
39 
40  c5InvTwoKappa = 1.0 - c5*(Nd-WilsonMass) ;
41  c5TwoKappa = 1.0 / c5InvTwoKappa ;
42 
43  b5InvTwoKappa = 1.0 + b5*(Nd-WilsonMass) ;
44  b5TwoKappa = 1.0 / b5InvTwoKappa ;
45 
46  //InvTwoKappa = b5InvTwoKappa/c5InvTwoKappa ;
47  TwoKappa = c5InvTwoKappa/b5InvTwoKappa ;
48  Kappa = TwoKappa/2.0 ;
49 
50  invDfactor =1.0/(1.0 + m_q*pow(TwoKappa,N5)) ;
51 
52 
53  END_CODE();
54  }
55 
56 
57  //! Apply the even-even (odd-odd) coupling piece of the domain-wall fermion operator
58  /*!
59  * \ingroup linop
60  *
61  * The operator acts on the entire lattice.
62  * Total flopcount: 6 *N5*Nc*Ns flops/site
63  *
64  * \param psi Pseudofermion field (Read)
65  * \param isign Flag ( PLUS | MINUS ) (Read)
66  * \param cb checkerboard ( 0 | 1 ) (Read)
67  */
68  void
69  EvenOddPrecNEFDWLinOpArray::applyDiag(multi1d<LatticeFermion>& chi,
70  const multi1d<LatticeFermion>& psi,
71  enum PlusMinus isign,
72  const int cb) const
73  {
74  START_CODE();
75 
76  if( chi.size() != N5 ) chi.resize(N5);
77 
78  // Real c5Fact(0.5*c5InvTwoKappa) ; // The 0.5 is for the P+ and P-
79 
80  Real c5InvTwoKappamf = m_q*c5InvTwoKappa;
81  switch ( isign ) {
82 
83  case PLUS:
84  {
85  // Total flopcount:6 N5 Nc Ns flops/site
86 
87  // This loop (N5-2) * 6 Nc Ns flops/site
88  for(int s(1);s<N5-1;s++) { // 1/2k psi[s] - P_- * psi[s+1] - P_+ * psi[s-1]
89 
90 
91  // chi[s][rb[cb]] = b5InvTwoKappa*psi[s] -
92  // c5Fact*( psi[s+1] + psi[s-1] + GammaConst<Ns,Ns*Ns-1>()*(psi[s-1] - psi[s+1]) ) ;
93 
94  // Recoded using chiralProject and BLASology
95  // 4Nc Ns flops/site
96  chi[s][rb[cb]] = b5InvTwoKappa*psi[s] - c5InvTwoKappa*chiralProjectPlus(psi[s-1]);
97  // 2Nc Ns flops/site
98  chi[s][rb[cb]] -= c5InvTwoKappa*chiralProjectMinus(psi[s+1]);
99 
100  }
101 
102 
103 
104  //s=0 -- 1/2k psi[0] - P_- * psi[1] + mf* P_+ * psi[N5-1]
105  // chi[0][rb[cb]] = b5InvTwoKappa*psi[0] -
106  // c5Fact*( psi[1] - m_q*psi[N5m1] - GammaConst<Ns,Ns*Ns-1>()*(m_q*psi[N5m1] + psi[1]) ) ;
107 
108  // Recoded using chiralProject with BLAS-ology. c5Fact-s factor of
109  // 1/2 absorbed by projectors
110 
111  // These two lines 6Nc Ns flops
112  // 4Nc Ns flops/site
113  chi[0][rb[cb]] = b5InvTwoKappa*psi[0] - c5InvTwoKappa*chiralProjectMinus(psi[1]);
114  // 2Nc Ns flops/site
115  chi[0][rb[cb]] += c5InvTwoKappamf*chiralProjectPlus(psi[N5-1]);
116 
117 
118  //s=N5-1 -- 1/2k psi[N5-1] +mf* P_- * psi[0] - P_+ * psi[N5-2]
119  // chi[N5m1][rb[cb]] = b5InvTwoKappa*psi[N5m1] -
120  // c5Fact*( psi[N5m2] - m_q *psi[0] + GammaConst<Ns,Ns*Ns-1>()*(psi[N5m2] + m_q * psi[0]) );
121 
122  // Recoded with chiral projector and BLAS ology
123  // These two lines: 6Nc Ns flops /site
124  // 4Nc Ns flops/site
125  chi[N5-1][rb[cb]] = b5InvTwoKappa*psi[N5-1] - c5InvTwoKappa*chiralProjectPlus(psi[N5-2]);
126 
127  // 2Nc Ns flops/site
128  chi[N5-1][rb[cb]] += c5InvTwoKappamf*chiralProjectMinus(psi[0]);
129  }
130  break ;
131 
132  case MINUS:
133  {
134  for(int s(1);s<N5-1;s++) { // 1/2k psi[s] - P_+ * psi[s+1] - P_- * psi[s-1]
135  // chi[s][rb[cb]] = b5InvTwoKappa*psi[s] -
136  // c5Fact*( psi[s+1] + psi[s-1] + GammaConst<Ns,Ns*Ns-1>()*(psi[s+1] - psi[s-1]) ) ;
137 
138  // Recoded using chiral projector and BLASology
139  chi[s][rb[cb]] = b5InvTwoKappa*psi[s] - c5InvTwoKappa*chiralProjectPlus(psi[s+1]);
140  chi[s][rb[cb]] -= c5InvTwoKappa*chiralProjectMinus(psi[s-1]);
141  }
142 
143  //s=0 -- 1/2k psi[0] - P_+ * psi[1] + mf* P_- * psi[N5-1]
144  // chi[0][rb[cb]] = b5InvTwoKappa*psi[0] -
145  // c5Fact*( psi[1] - m_q*psi[N5-1] + GammaConst<Ns,Ns*Ns-1>()*( psi[1]+m_q*psi[N5-1]) ) ;
146  chi[0][rb[cb]] = b5InvTwoKappa*psi[0] - c5InvTwoKappa*chiralProjectPlus(psi[1]);
147  chi[0][rb[cb]] += c5InvTwoKappamf * chiralProjectMinus(psi[N5-1]);
148 
149 
150 
151  //s=N5-1 -- 1/2k psi[N5-1] + mf* P_+ * psi[0] - P_- * psi[N5-2]
152  // chi[N5-1][rb[cb]] = b5InvTwoKappa*psi[N5-1] -
153  // c5Fact*( psi[N5-2] - m_q *psi[0] - GammaConst<Ns,Ns*Ns-1>()*(psi[N5-2] + m_q * psi[0]) );
154 
155  // Recoded using Chiral Projector and BLAS ology
156  chi[N5-1][rb[cb]] = b5InvTwoKappa*psi[N5-1] - c5InvTwoKappa*chiralProjectMinus(psi[N5-2]);
157  chi[N5-1][rb[cb]] += c5InvTwoKappamf*chiralProjectPlus(psi[0]);
158  }
159  break ;
160  }
161 
162  END_CODE();
163  }
164 
165 
166  //! Apply the inverse even-even (odd-odd) coupling piece of the domain-wall fermion operator
167  /*!
168  * \ingroup linop
169  *
170  * The operator acts on the entire lattice
171  * Total flopcount: (10*N5 - 8)*Nc*Ns flops
172  *
173  * \param psi Pseudofermion field (Read)
174  * \param isign Flag ( PLUS | MINUS ) (Read)
175  * \param cb checkerboard ( 0 | 1 ) (Read)
176  */
177  void
178  EvenOddPrecNEFDWLinOpArray::applyDiagInv(multi1d<LatticeFermion>& chi,
179  const multi1d<LatticeFermion>& psi,
180  enum PlusMinus isign,
181  const int cb) const
182  {
183  START_CODE();
184 
185  if( chi.size() != N5 ) chi.resize(N5);
186 
187 
188  switch ( isign ) {
189 
190  case PLUS:
191  {
192 
193  // Copying and scaling, application of Lm^{-1} and L^{-1} and D^{-1}
194  // Coalesced into a fused loop.
195 
196  Real fact = m_q*TwoKappa*b5TwoKappa*invDfactor;
197  Real invDTwoKappa = invDfactor*TwoKappa;
198  Real invDb5TwoKappa = invDfactor*b5TwoKappa;
199 
200  // 2Nc Ns flops/site
201  chi[0][rb[cb]] = b5TwoKappa*psi[0];
202 
203  // 4Nc Ns flops/site
204  chi[N5-1][rb[cb]] = invDb5TwoKappa*psi[N5-1]
205  - fact * chiralProjectMinus(psi[0]);
206 
207  fact *= TwoKappa;
208 
209  // (N5-2)*6NcNs flops/site
210  for(int s = 1; s < N5-1; s++) {
211 
212  // 2Nc Ns flops/site
213  chi[s][rb[cb]] = b5TwoKappa * psi[s] ;
214  // 2Nc Ns flops/site
215  chi[s][rb[cb]] += TwoKappa*chiralProjectPlus(chi[s-1]);
216  // 2Nc Ns flops/site
217  chi[N5-1][rb[cb]] -= fact * chiralProjectMinus(psi[s]);
218  fact *= TwoKappa ;
219 
220  }
221 
222  // 2NcNs flops/site
223  chi[N5-1][rb[cb]] += invDTwoKappa*chiralProjectPlus(chi[N5-2]);
224 
225 
226  //The inverse of R. Back substitution...... Getting there!
227  // (N5-1)*2Nc*Ns flops /site total
228  for(int s(N5-2);s>-1;s--)
229  // 2Nc Ns2 flops/site
230  chi[s][rb[cb]] += TwoKappa*chiralProjectMinus(chi[s+1]);
231 
232  //Finally the inverse of Rm
233  fact = m_q*TwoKappa;
234 
235  // (N5-1)*2NcNs flops/site total
236  for(int s(0);s<N5-1;s++){
237  // 2Nc Ns flops
238  chi[s][rb[cb]] -= fact*chiralProjectPlus(chi[N5-1]);
239  fact *= TwoKappa;
240  }
241  }
242  break ;
243 
244  case MINUS:
245  {
246 
247  // Copy and scale by TwoKappa (1/M0)
248  // First apply the inverse of Lm
249  // Now apply the inverse of L. Forward elimination
250  // The inverse of D now
251  // All fused.
252 
253  Real fact = m_q*TwoKappa*b5TwoKappa*invDfactor ;
254  Real invDTwoKappa = invDfactor*TwoKappa;
255  Real invDb5TwoKappa = invDfactor*b5TwoKappa;
256 
257  chi[0][rb[cb]] = b5TwoKappa*psi[0];
258  chi[N5-1][rb[cb]] = invDb5TwoKappa*psi[N5-1];
259  chi[N5-1][rb[cb]] -= fact*chiralProjectPlus(psi[0]);
260  fact *= TwoKappa;
261 
262  for(int s(1);s<N5-1;s++) {
263  // 2Nc Ns flops/sit
264  chi[s][rb[cb]] = b5TwoKappa * psi[s] ;
265  chi[s][rb[cb]] += TwoKappa*chiralProjectMinus(chi[s-1]);
266  chi[N5-1][rb[cb]] -= fact * chiralProjectPlus(psi[s]);
267  fact *= TwoKappa ;
268  }
269 
270  chi[N5-1][rb[cb]] += invDTwoKappa*chiralProjectMinus(chi[N5-2]);
271 
272  // That was easy....
273 
274  //The inverse of R. Back substitution...... Getting there!
275  for(int s(N5-2);s>-1;s--)
276  chi[s][rb[cb]] += TwoKappa*chiralProjectPlus(chi[s+1]);
277 
278  //Finally the inverse of Rm
279  fact = m_q*TwoKappa;
280 
281  for(int s(0);s<N5-1;s++){
282  chi[s][rb[cb]] -= fact*chiralProjectMinus(chi[N5-1]);
283  fact *= TwoKappa ;
284  }
285  }
286  break ;
287  }
288 
289  //Done! That was not that bad after all....
290  //See, I told you so...
291  END_CODE();
292  }
293 
294  //! Apply the even-odd (odd-even) coupling piece of the NEF operator
295  /*!
296  * \ingroup linop
297  *
298  * The operator acts on the entire lattice
299  * Total flopcount 6 N5 Nc Ns + N5 * checkerboarded Dslash flops per site
300  *
301  * \param psi Pseudofermion field (Read)
302  * \param isign Flag ( PLUS | MINUS ) (Read)
303  * \param cb checkerboard ( 0 | 1 ) (Read)
304  */
305  void
306  EvenOddPrecNEFDWLinOpArray::applyOffDiag(multi1d<LatticeFermion>& chi,
307  const multi1d<LatticeFermion>& psi,
308  enum PlusMinus isign,
309  const int cb) const
310  {
311  START_CODE();
312 
313  Real fb5 = -Real(0.5)*b5 ;
314 
315  // Recoding with chiral projectors, a former factor of 0.5 is absorbed
316  // into the projector
317  Real fc5 = -Real(0.5)*c5 ;
318  Real fc5mf = fc5*m_q;
319 
320  if( chi.size() != N5 ) chi.resize(N5);
321 
322  switch ( isign )
323  {
324  case PLUS:
325  {
326  multi1d<LatticeFermion> tmp(N5); moveToFastMemoryHint(tmp);
327  int otherCB = (cb + 1)%2 ;
328 
329 
330  // (N5 -2 )*6 *Nc*Ns flops
331  for(int s = 1; s < N5-1; s++){
332  // tmp[s][rb[otherCB]] = fb5*psi[s] +
333  // fc5*(psi[s+1] + psi[s-1] +
334  // GammaConst<Ns,Ns*Ns-1>()*(psi[s-1]-psi[s+1]));
335  //
336  // Recoded with chiral projectors and BLAS ology.
337  // 4Nc Ns flops
338  tmp[s][rb[otherCB]] = fb5*psi[s] + fc5*chiralProjectPlus(psi[s-1]);
339 
340  // 2Nc Ns flops
341  tmp[s][rb[otherCB]]+= fc5*chiralProjectMinus(psi[s+1]);
342  }
343 
344 
345  // tmp[0][rb[otherCB]] = fb5*psi[0] +
346  // fc5*(psi[1] - m_q*psi[N5-1]
347  // - GammaConst<Ns,Ns*Ns-1>()*(m_q*psi[N5-1] + psi[1]));
348  //
349  // Recoded with chiralProjec and BLAS ology
350  // 6Nc Ns flops/site
351  //
352  // 4Nc Ns flops/site
353  tmp[0][rb[otherCB]] = fb5*psi[0] +fc5*chiralProjectMinus(psi[1]);
354  // 2Nc Ns flops/site
355  tmp[0][rb[otherCB]] -= fc5mf*chiralProjectPlus(psi[N5-1]);
356 
357  // tmp[N5-1][rb[otherCB]] = fb5*psi[N5-1] +
358  // fc5*( psi[N5-2] - m_q *psi[0] +
359  // GammaConst<Ns,Ns*Ns-1>()*(psi[N5-2] + m_q * psi[0]));
360 
361  // 6Nc Ns flops:
362  // 4Nc Ns flops
363  tmp[N5-1][rb[otherCB]] = fb5*psi[N5-1] + fc5*chiralProjectPlus(psi[N5-2]);
364  // 2Nc Ns flops
365  tmp[N5-1][rb[otherCB]] -= fc5mf*chiralProjectMinus(psi[0]);
366 
367 
368 
369  // Replace this with a std::vector Dslash in time -- done
370  D.apply(chi,tmp, isign, cb);
371 
372  }
373  break ;
374 
375  case MINUS:
376  {
377  multi1d<LatticeFermion> tmp(N5) ; moveToFastMemoryHint(tmp);
378 
379  // Replace this with a std::vector Dslash in time -- done
380  D.apply(tmp,psi,isign,cb);
381 
382 
383  for(int s(1);s<N5-1;s++){
384  // chi[s][rb[cb]] = fb5*tmp[s] +
385  // fc5*(tmp[s+1] + tmp[s-1] -
386  // GammaConst<Ns,Ns*Ns-1>()*(tmp[s-1]-tmp[s+1]));
387  //
388  // Recoded using chiralProject and BLAS ology
389  chi[s][rb[cb]] = fb5*tmp[s] + fc5*chiralProjectPlus(tmp[s+1]);
390  chi[s][rb[cb]] += fc5*chiralProjectMinus(tmp[s-1]);
391  }
392 
393  // chi[0][rb[cb]] = fb5*tmp[0] +
394  // fc5*(tmp[1] - m_q*tmp[N5-1] +
395  // GammaConst<Ns,Ns*Ns-1>()*(m_q*tmp[N5-1] + tmp[1]));
396  //
397  // Recoded using chiralProject and BLAS ology
398  chi[0][rb[cb]] = fb5*tmp[0] + fc5*chiralProjectPlus(tmp[1]);
399  chi[0][rb[cb]] -= fc5mf*chiralProjectMinus(tmp[N5-1]);
400 
401 
402  // chi[N5-1][rb[cb]] = fb5*tmp[N5-1] +
403  // fc5*( tmp[N5-2] - m_q *tmp[0] -
404  // GammaConst<Ns,Ns*Ns-1>()*(tmp[N5-2] + m_q * tmp[0]));
405  chi[N5-1][rb[cb]] = fb5*tmp[N5-1] + fc5*chiralProjectMinus(tmp[N5-2]);
406  chi[N5-1][rb[cb]] -= fc5mf*chiralProjectPlus(tmp[0]);
407 
408  }
409  break ;
410  }
411 
412  //Done! That was not that bad after all....
413  //See, I told you so...
414  END_CODE();
415  }
416 
417 
418 
419  //! Apply the Dminus operator on a lattice fermion. See my notes ;-)
420  void
421  EvenOddPrecNEFDWLinOpArray::Dminus(LatticeFermion& chi,
422  const LatticeFermion& psi,
423  enum PlusMinus isign,
424  int s5) const
425  {
426  LatticeFermion tt ; moveToFastMemoryHint(tt);
427  D.apply(tt,psi,isign,0);
428  D.apply(tt,psi,isign,1);
429  chi = c5InvTwoKappa*psi + (0.5*c5)*tt ;//It really is -(-0.5*c5)D
430  }
431 
432 
433 } // End Namespace Chroma
434 
Primary include file for CHROMA library code.
#define END_CODE()
Definition: chromabase.h:65
#define START_CODE()
Definition: chromabase.h:64
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
4D Even Odd preconditioned NEF domain-wall fermion linear operator
unsigned s
Definition: ldumul_w.cc:37
Double tmp
Definition: meslate.cc:60
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
int cb
Definition: invbicg.cc:120
chi
Definition: pade_trln_w.cc:24
psi
Definition: pade_trln_w.cc:191