CHROMA
eoprec_ovext_linop_array_w.cc
Go to the documentation of this file.
1 /* $Id: eoprec_ovext_linop_array_w.cc,v 3.2 2007-09-01 23:44:10 uid3790 Exp $
2 * \file
3 * \brief EvenOddPreconditioned extended-Overlap (5D) (Naryanan&Neuberger) linear operator
4 */
5 
6 #include "chromabase.h"
8 
9 namespace Chroma
10 {
11  //! Creation routine
12  /*! \ingroup fermact
13  *
14  * \param fs gauge field (Read)
15  * \param WilsonMass_ DWF height (Read)
16  * \param m_q_ quark mass (Read)
17  */
18  void
20  const int Npoles_,
21  const Real& coeffP_,
22  const multi1d<Real>& resP_,
23  const multi1d<Real>& rootQ_,
24  const multi1d<Real>& beta_,
25  const Real& OverMass_,
26  const Real& Mass_,
27  const Real& b5_,
28  const Real& c5_)
29  {
30 
31  START_CODE();
32 
33 
34  Npoles = Npoles_;
35  N5 = 2*Npoles_ + 1;
36 
37  Dslash.create(fs,N5);
38 
39  Real R = (Real(1) + Mass_)/(Real(1)-Mass_);
40  Real alpha = b5_ + c5_;
41  Real a5 = b5_ - c5_;
42 
43  Real QQ = Nd - OverMass_;
44  A = -alpha*QQ;
45  E = Real(2)*R + (R*a5 + alpha*coeffP_)*QQ;
46 
47  Aprime = Real(0.5)*alpha;
48  Eprime = -Real(0.5)*(R *a5 + coeffP_*alpha);
49 
50  B.resize(Npoles);
51  C.resize(Npoles);
52  D.resize(Npoles);
53  Bprime.resize(Npoles);
54  Cprime.resize(Npoles);
55  Dprime.resize(Npoles);
56 
57  for(int p=0; p < Npoles; p++) {
58  B[p] = sqrt( rootQ_[p] * beta_[p] )* ( Real(2) + a5*QQ );
59  D[p] = sqrt( resP_[p] * beta_[p] ) * ( Real(2) + a5*QQ );
60  C[p] = alpha*beta_[p]*QQ;
61 
62  Bprime[p] = -Real(0.5)*a5*sqrt( rootQ_[p] * beta_[p] );
63  Dprime[p] = -Real(0.5)*a5*sqrt( resP_[p] * beta_[p] );
64  Cprime[p] = -Real(0.5)*alpha*beta_[p];
65  }
66 
67  multi1d<Real> detBlock(Npoles);
68  for(int p=0; p < Npoles; p++) {
69  detBlock[p] = A*C[p] - B[p]*B[p];
70  }
71 
72  Atilde.resize(Npoles);
73  Btilde.resize(Npoles);
74  Ctilde.resize(Npoles);
75 
76  for(int p=0; p < Npoles; p++) {
77  Atilde[p] = A/detBlock[p];
78  Btilde[p] = B[p]/detBlock[p];
79  Ctilde[p] = C[p]/detBlock[p];
80  }
81 
82  S = E;
83  for(int p=0; p < Npoles; p++) {
84  S += D[p]*D[p]*Atilde[p];
85  }
86 
87  D_bd_inv.resize(2*Npoles);
88 
89  int pole=0;
90  for(int i=0; i < 2*Npoles; i+=2, pole++) {
91  D_bd_inv[i] = D[pole]*Btilde[pole];
92  D_bd_inv[i+1] = D[pole]*Atilde[pole];
93  }
94 
95  END_CODE();
96  }
97 
98  //! Apply the even-even (odd-odd) coupling piece of the domain-wall fermion operator
99  /*!
100  * \param chi result (Modify)
101  * \param psi source (Read)
102  * \param isign Flag ( PLUS | MINUS ) (Read)
103  * \param cb checkerboard ( 0 | 1 ) (Read)
104  *
105  * Flopcount = 10*Nc*Ns*(N5-1) + 2*Nc*Ns
106  * = 10*Nc*Ns*N5 - 8*Nc*Ns = (10N5 - 8)*Nc*Ns
107  */
108  void
109  EvenOddPrecOvExtLinOpArray::applyDiag(multi1d<LatticeFermion>& chi,
110  const multi1d<LatticeFermion>& psi,
111  enum PlusMinus isign,
112  const int cb) const
113  {
114  START_CODE();
115 
116  if( chi.size() != N5 ) chi.resize(N5);
117  int G5 = Ns*Ns - 1;
118 
119  switch( isign ) {
120  case PLUS:
121  {
122  // Lowest corner
123  // 2*Nc*Ns flops/cbsite
124  chi[N5-1][rb[cb]] = E* ( GammaConst<Ns,Ns*Ns-1>()*psi[N5-1] );
125 
126 
127  int p=0;
128  // ((N5-1)/2)*20*Nc*Ns flops/cbsite = 10*Nc*Ns*(N5-1) flops/cbsite
129  for(int i=0; i < N5-1; i+=2, p++) {
130  // 6*Nc*Ns flops/cbsite
131  chi[i][rb[cb]] = B[p]*psi[i+1] + A*(GammaConst<Ns,Ns*Ns-1>()*psi[i]);
132 
133  // 6*Nc*Ns flops/cbsite
134  chi[i+1][rb[cb]] = B[p]*psi[i] + C[p]*(GammaConst<Ns,Ns*Ns-1>()*psi[i+1]);
135  // 4*Nc*Ns flops/cbsite
136  chi[i+1][rb[cb]] += D[p]*psi[N5-1];
137 
138  // 4*Nc*Ns flops/cbsite
139  chi[N5-1][rb[cb]] -= D[p]*psi[i+1];
140  }
141  }
142  break;
143  case MINUS:
144  {
145  // Lowest corner
146  // 2*Nc*Ns cbsite flops
147  chi[N5-1][rb[cb]] = E*(GammaConst<Ns,Ns*Ns-1>()*psi[N5-1]);
148 
149  int p=0;
150  // 10 Nc*Ns*(N5-1) cbsite flops for loop
151  for(int i=0; i < N5-1; i+=2, p++) {
152  // 6*Nc*Ns cbsite flops
153  chi[i][rb[cb]] = B[p]*psi[i+1] + A*(GammaConst<Ns,Ns*Ns-1>()*psi[i]);
154 
155  // 6*Nc*Ns cbsite flops
156  chi[i+1][rb[cb]] = B[p]*psi[i] + C[p]*(GammaConst<Ns,Ns*Ns-1>()*psi[i+1]);
157  // 4*Nc*Ns cbsite flops
158  chi[i+1][rb[cb]] -= D[p]*psi[N5-1];
159 
160  // 4*Nc*Ns cbsite flops
161  chi[N5-1][rb[cb]] += D[p]*psi[i+1];
162  }
163  }
164  break;
165  default:
166  QDPIO::cerr << "Unknown value for PLUS /MINUS: " << isign << std::endl;
167  QDP_abort(1);
168  };
169 
170  END_CODE();
171  }
172 
173 
174  /* Flopcount = N5*1320 + (10*N5-8)*Nc*Ns cbsite flops */
175  void
177  const multi1d<LatticeFermion>& psi,
178  enum PlusMinus isign,
179  const int cb) const
180  {
181  START_CODE();
182 
183  if( chi.size() != N5 ) chi.resize(N5);
184  int G5 = Ns*Ns - 1;
185 
186  switch( isign ) {
187  case PLUS:
188  {
189  multi1d<LatticeFermion> Dpsi(N5); moveToFastMemoryHint(Dpsi);
190 
191  // N5 * Dslash flops = N5 * 1320 cbsite flops
192  Dslash.apply(Dpsi,psi,PLUS,cb);
193 
194 
195  // Lowest corner
196  // 2*Nc*Ns*cbsite flops
197  chi[N5-1][rb[cb]] = Eprime*(GammaConst<Ns,Ns*Ns-1>()*Dpsi[N5-1]);
198 
199  int p=0;
200  // Total flops for loop: 10*(N5-1)*Nc*Ns cbsite flops
201  for(int i=0; i < N5-1; i+=2, p++) {
202 
203  // 6*Nc*Ns*cbsite flops
204  chi[i][rb[cb]] = Bprime[p]*Dpsi[i+1] + Aprime*(GammaConst<Ns,Ns*Ns-1>()*Dpsi[i]);
205 
206  // 6*Nc*Ns*cbsite flops
207  chi[i+1][rb[cb]] = Bprime[p]*Dpsi[i] + Cprime[p]*(GammaConst<Ns,Ns*Ns-1>()*Dpsi[i+1]);
208 
209  // 4*Nc*Ns*cbsite flops
210  chi[i+1][rb[cb]] += Dprime[p]*Dpsi[N5-1];
211 
212  // 4*Nc*Ns*cbsite flops
213  chi[N5-1][rb[cb]] -= Dprime[p]*Dpsi[i+1];
214  }
215  }
216  break;
217  case MINUS:
218  {
219  multi1d<LatticeFermion> tmp(N5); moveToFastMemoryHint(tmp);
220 
221  int otherCB = (cb + 1)%2;
222 
223 
224  // Lowest corner
225  // 2*Nc*Ns cbsite flops
226  tmp[N5-1][rb[otherCB]] = Eprime*(GammaConst<Ns,Ns*Ns-1>()*psi[N5-1]);
227 
228  int p=0;
229  for(int i=0; i < N5-1; i+=2, p++) {
230  // 6*Nc*Ns cbsite flops
231  tmp[i][rb[otherCB]] = Bprime[p]*psi[i+1] + Aprime*(GammaConst<Ns,Ns*Ns-1>()*psi[i]);
232 
233  // 6*Nc*Ns cbsite flops
234  tmp[i+1][rb[otherCB]] = Bprime[p]*psi[i] + Cprime[p]*(GammaConst<Ns,Ns*Ns-1>()*psi[i+1]);
235 
236  // 4*Nc*Ns cbsite flops
237  tmp[i+1][rb[otherCB]] -= Dprime[p]*psi[N5-1];
238 
239  // 4*Nc*Ns cbsite flops
240  tmp[N5-1][rb[otherCB]] += Dprime[p]*psi[i+1];
241  }
242 
243  // N5 *1320 cbsite flops
245  }
246  break;
247  default:
248  QDPIO::cerr << "Unknown value for PLUS /MINUS: " << isign << std::endl;
249  QDP_abort(1);
250  };
251 
252  END_CODE();
253  }
254 
255  /* Flopcount: Npoles*10*Nc*Ns cbsite flops
256  + Npoles*12*Nc*Ns cbsite flops
257  + 2*Nc*Ns cbsite flops
258  + Npoles*8*Nc*Ns cbsite flops
259  = Npoles*30*Nc*Ns + 2Nc*Ns cbsite flops
260  = (30Npoles + 2)*Nc*Ns cbsite flops
261  = (30(N5-1)/2 + 2)*Nc*Ns cbsite flops
262  = (15N5-13)Nc*Nc cbsite flops
263  */
264 
265  void
267  const multi1d<LatticeFermion>& chi,
268  enum PlusMinus isign,
269  const int cb) const
270  {
271  START_CODE();
272 
273  multi1d<LatticeFermion> tmp5(N5); moveToFastMemoryHint(tmp5);
274 
275 
276  LatticeFermion tmp4; moveToFastMemoryHint(tmp4);
277 
278  Real ftmp;
279  Real ftmp2;
280 
281  if( psi.size() != N5 ) { psi.resize(N5); }
282 
283  // Apply tmp5 = L^{-1} chi
284  // L only has components along the last row, so this is a wholesale
285  // copy.
286  // The last row looks like:
287  //
288  // [ (+/-) D_p Btilde_p, (-/+) D_p Atilde_p g_5, ... , 1 ]
289  //=[ (+/-) D_bd_inv[0], (-/+) D_bd_inv[1] g_5 ,.. , 1 ]
290  //
291  // (+/-) <=> isign=(PLUS, MINUS)
292  //
293  // And we subtract it ie:
294  // tmp5[N5-1] = chi[N5-1] - (+/-) term1 - (-/+) term2 ...
295  // = - sgn term1 + sgn term2
296  //
297  // with sgn = (+1/-1) <=> isign = PLUS/MINUS
298 
299  // Start off.
300  tmp5[N5-1][rb[cb]] = chi[N5-1];
301 
302  Real sign;
303  switch (isign) {
304  case PLUS:
305  sign = Real(1);
306  break;
307  case MINUS:
308  sign = Real(-1);
309  break;
310  }
311 
312 
313  // Npoles * 10*Nc*Ns flops
314  for(int i=0; i < 2*Npoles; i+=2) {
315  // Copy
316  tmp5[i][rb[cb]] = chi[i];
317  tmp5[i+1][rb[cb]] = chi[i+1];
318 
319 
320  // Fixup last component
321  // 6Nc*Ns cbsite flops
322  ftmp = sign*D_bd_inv[i];
323  ftmp2 = sign*D_bd_inv[i+1];
324 
325  tmp4[rb[cb]] =ftmp *chi[i] - ftmp2*(GammaConst<Ns,Ns*Ns-1>()*chi[i+1]);
326 
327  // 4Nc*Ns cbsite flops
328  tmp5[N5-1][rb[cb]] -= tmp4;
329 
330  }
331 
332  // Apply Inverse of Block Diagonal Matrix to tmp5
333  //
334  // [ A g_5 B_0 ]
335  // [ B_0 C_0 g_5 ]
336  // [ A g_5 B_1 ]
337  // [ B_0 C_1 g_5 ]
338  // [ ... ]
339  // [ S g_5 ]
340  //
341  // which is
342  // [ Ctilde_0 g_5 - Btilde_0 ]
343  // [ -Btilde_0 Atilde_0 g_5 ]
344  // [ Ctilde_1 g5 -Btilde_1 ]
345  // [ -Btilde_1 Atilde_1 g5 ]
346  // [ ... ]
347  // [ (1/S)g_5 ]
348  //
349  // with Atilde_p = (1/d_p) A
350  // Btilde_p = (1/d_p) B_p, Ctilde_p = (1/d_p)C_p
351  //
352  // S = E + sum_p D^2_p A tilde_p
353  //
354  // d_p = det [ A g_5 B_p ]
355  // [ B_p C_p g_5 ]
356  //
357  // = A C_p - B_p^2
358  //
359  // Atilde_p, Btilde_p, Ctilde_p and S are precomputed in the constructor.
360  //
361  // This matrix is Hermitian.
362 
363 
364  int p=0;
365  // Npoles*12*Nc*Ns flops
366  for(int i=0; i < 2*Npoles; i+=2, p++) {
367  // Do all the blocks
368  // 6Nc*Ns flops
369  ftmp=-Btilde[p];
370  psi[i][rb[cb]] = ftmp*tmp5[i+1] + Ctilde[p]*(GammaConst<Ns,Ns*Ns-1>()*tmp5[i]);
371 
372  // 6Nc*Ns flops
373  psi[i+1][rb[cb]] = ftmp*tmp5[i] + Atilde[p]*(GammaConst<Ns,Ns*Ns-1>()*tmp5[i+1]);
374  }
375 
376  // Do the last bit
377  ftmp = Real(1)/S;
378 
379  // 2*Nc*Ns flops
380  psi[N5-1][rb[cb]] = ftmp*(GammaConst<Ns,Ns*Ns-1>()*tmp5[N5-1]);
381 
382 
383 
384  // Now apply: [ 1 ..... (-/+) Btilde_0 D_0 ] ^ {-1}
385  // [ 0 1 (+/-) Atilde_0 D_0 G5 ]
386  // [ 0 0 1 (-/+) Btilde_1 D_1 ]
387  // [ 0 0 0 1 (+/-) Atilde_1 D_1 G5 ]
388  // [ ... ... ]
389  // [ 1 ]
390  //
391  // to tmp5_2 with backsubstitution. The elements in the rightmost
392  // row are stored in D_bd_inv, but the sign is flipped compared to
393  // the forward substitution case. In either case we subtract
394  // the term from the rightmost col ie:
395  // psi[ i ] = tmp5_2[i] - (-/+) term * psi[N5-1];
396  // = tmp5_2[i] + sign * term * psi[N5-1];
397  //
398  // psi[i+1] = tmp5_2[i+1] - (+/-) term * psi[N5-1];
399  // = tmp5_2[i+1] - sign * term * psi[N5-1];
400  //
401  // with sign = 1/-1 <=> isign = PLUS/MINUS
402  // (ie signs flip in opposite order to forward subsitution)
403 
404 
405  // Backsubstitution:
406  // First piece already done
407  // Only need this so precompute it out here.
408 
409  tmp4[rb[cb]] = Real(1)*(GammaConst<Ns,Ns*Ns-1>()*psi[N5-1]);
410 
411  // Npoles * 8Nc*Ns flops
412  for(int i=0; i < 2*Npoles; i+=2) {
413  ftmp = sign*D_bd_inv[i];
414  ftmp2 = sign*D_bd_inv[i+1];
415 
416  // 4*Nc*Ns flops
417  psi[i][rb[cb]] += ftmp*psi[N5-1];
418 
419  // 4*Nc*Ns flops
420  psi[i+1][rb[cb]] -= ftmp2*tmp4;
421 
422  }
423 
424 
425 
426  // And we are done. That was not so bad now was it?
427  END_CODE();
428  }
429 
430  void
431  EvenOddPrecOvExtLinOpArray::applyDerivOffDiag(multi1d<LatticeColorMatrix>& ds_u,
432  const multi1d<LatticeFermion>& chi,
433  const multi1d<LatticeFermion>& psi,
434  enum PlusMinus isign,
435  int cb) const
436  {
437  START_CODE();
438 
439  QDPIO::cout << "Not yet implemented " << std::endl;
440  QDP_abort(1);
441 
442  END_CODE();
443  }
444 
445 } // End Namespace Chroma
446 
Primary include file for CHROMA library code.
void applyDerivOffDiag(multi1d< LatticeColorMatrix > &ds_u, const multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, int cb) const
Apply the even-odd (odd-even) coupling piece of the NEF operator.
void applyDiagInv(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, const int cb) const
Apply the inverse even-even (odd-odd) coupling piece of the domain-wall fermion operator.
void applyOffDiag(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, const int cb) const
Apply the off diagonal block.
void applyDiag(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, const int cb) const
Apply the even-even (odd-odd) coupling piece of the domain-wall fermion operator.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Unpreconditioned extended-Overlap (5D) (Naryanan&Neuberger) linear operator.
void create(Handle< FermState< T, P, Q > > fs, const int Npoles_, const Real &coeffP_, const multi1d< Real > &resP_, const multi1d< Real > &rootQ_, const multi1d< Real > &beta_, const Real &OverMass_, const Real &m_q_, const Real &b5_, const Real &c5_)
Creation routine.
void apply(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
void create(Handle< FermState< T, P, Q > > state, int N5_)
Creation routine.
Nd
Definition: meslate.cc:74
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
int G5
Definition: pbg5p_w.cc:57
LatticeFermion tmp
Definition: mespbg5p_w.cc:36
int i
Definition: pbg5p_w.cc:55
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
START_CODE()
int cb
Definition: invbicg.cc:120
Double ftmp2
Definition: topol.cc:30