CHROMA
clover_term_bagel_clover.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Clover term linear operator
3  *
4  * This particular implementation is specific to a scalar-like
5  * architecture
6  */
7 
8 #include "chromabase.h"
10 #include "meas/glue/mesfield.h"
11 
12 #include <bagel_clover.h>
13 
14 
15 namespace Chroma
16 {
17 
18 #if 0
19  //! XML output
20  inline
21  XMLWriter& operator<<(XMLWriter& xml, const PrimitiveClovTriang& d)
22  {
23  xml.openTag("PrimClovTriang");
24 
25  XMLWriterAPI::AttributeList alist;
26 
27  xml.openTag("Diag");
28  for(int i=0; i < 2; ++i)
29  {
30  for(int j=0; j < 2*Nc; ++j)
31  {
32  alist.clear();
33  alist.push_back(XMLWriterAPI::Attribute("block", i));
34  alist.push_back(XMLWriterAPI::Attribute("col", j));
35 
36  xml.openTag("elem", alist);
37  xml << d.diag[i][j];
38  xml.closeTag();
39  }
40  }
41  xml.closeTag(); // Diag
42 
43  xml.openTag("Offd");
44  for(int i=0; i < 2; ++i)
45  {
46  for(int j=0; j < 2*Nc*Nc-Nc; ++j)
47  {
48  alist.clear();
49  alist.push_back(XMLWriterAPI::Attribute("block", i));
50  alist.push_back(XMLWriterAPI::Attribute("col", j));
51 
52  xml.openTag("elem", alist);
53  xml << d.offd[i][j];
54  xml.closeTag();
55  }
56  }
57  xml.closeTag(); // Offd
58 
59  xml.closeTag(); // PrimClovTriang
60  return xml;
61  }
62 
63 
64  // Reader/writers
65  void read(XMLReader& xml, const std::string& path, PrimitiveClovTriang& param)
66  {
67  QDP_error_exit("clover reader not implemented");
68  }
69 
70  void write(XMLWriter& xml, const std::string& path, const PrimitiveClovTriang& param)
71  {
72  push(xml,path);
73  xml << param;
74  pop(xml);
75  }
76 #endif
77 
78  // Empty constructor. Must use create later
80  bool retry = false;
81 
82  // Allocate the arrays, with proper alignment
83 
84  try {
85  tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::FAST );
86  }
87  catch( std::bad_alloc ) {
88  retry = true;
89  }
90 
91  if( retry ) {
92  try {
93  tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::DEFAULT );
94  }
95  catch( std::bad_alloc ) {
96  QDPIO::cerr << "Failed to allocate the tri_diag" << std::endl << std::flush ;
97  QDP_abort(1);
98  }
99  }
100 
101 
102  retry = false;
103  try {
104  tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::FAST );
105  }
106  catch( std::bad_alloc ) {
107  retry = true;
108  }
109 
110  if( retry ) {
111  try {
112  tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::DEFAULT );
113  }
114  catch( std::bad_alloc ) {
115  QDPIO::cerr << "Failed to allocate the tri_off_diag" << std::endl << std::flush ;
116  QDP_abort(1);
117  }
118  }
119 
120 
121  }
122 
123 
125  if ( tri_diag != 0x0 ) {
126  QDP::Allocator::theQDPAllocator::Instance().free(tri_diag);
127  }
128 
129  if ( tri_off_diag != 0x0 ) {
130  QDP::Allocator::theQDPAllocator::Instance().free(tri_off_diag);
131  }
132  }
133 
134  //! Creation routine
136  const CloverFermActParams& param_)
137  {
138  START_CODE();
139 
140  u = fs->getLinks();
141  fbc = fs->getFermBC();
142  param = param_;
143 
144  // Sanity check
145  if (fbc.operator->() == 0)
146  {
147  QDPIO::cerr << "BAGELCloverTerm: error: fbc is null" << std::endl;
148  QDP_abort(1);
149  }
150 
151  {
152  Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
153  param.clovCoeffR *= Real(0.5) * ff;
154  param.clovCoeffT *= Real(0.5);
155  }
156 
157  //
158  // Yuk. Some bits of knowledge of the dslash term are buried in the
159  // effective mass term. They show up here. If I wanted some more
160  // complicated dslash then this will have to be fixed/adjusted.
161  //
162  Real diag_mass;
163  {
164  Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
165  diag_mass = 1 + (Nd-1)*ff + param.Mass;
166  }
167 
168  /* Calculate F(mu,nu) */
169  multi1d<LatticeColorMatrix> f;
170  mesField(f, u);
171  makeClov(f, diag_mass);
172 
173  choles_done.resize(rb.numSubsets());
174  for(int i=0; i < rb.numSubsets(); i++) {
175  choles_done[i] = false;
176  }
177 
178 #if 0
179  // Testing code
180  LatticeFermion foo;
181  LatticeFermion res1=zero;
182  LatticeFermion res2=zero;
183  gaussian(foo);
184  apply(res1,foo,PLUS, 0);
185  applySite(res2,foo,PLUS, 0);
186 
187  LatticeFermion diff=res1-res2;
188  XMLFileWriter fred("fred");
189  push(fred, "stuff");
190  write(fred, "diff", diff);
191  pop(fred);
192 
193  QDPIO::cout << "sqrt( norm2( diff))= " << sqrt(norm2(diff)) << std::endl << std::flush;
194  QDP_abort(1);
195 #endif
196 
197  END_CODE();
198  }
199 
200  // Now copy
202  const CloverFermActParams& param_,
203  const BAGELCloverTerm& from_)
204  {
205  START_CODE();
206 
207  const BAGELCloverTerm& from = dynamic_cast<const BAGELCloverTerm&>(from_);
208  u = fs->getLinks();
209  fbc = fs->getFermBC();
210  param = param_;
211 
212  // Sanity check
213  if (fbc.operator->() == 0) {
214  QDPIO::cerr << "BAGELCloverTerm: error: fbc is null" << std::endl;
215  QDP_abort(1);
216  }
217 
218  {
219  Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
220  param.clovCoeffR *= Real(0.5) * ff;
221  param.clovCoeffT *= Real(0.5);
222  }
223 
224  //
225  // Yuk. Some bits of knowledge of the dslash term are buried in the
226  // effective mass term. They show up here. If I wanted some more
227  // complicated dslash then this will have to be fixed/adjusted.
228  //
229  Real diag_mass;
230  {
231  Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
232  diag_mass = 1 + (Nd-1)*ff + param.Mass;
233  }
234 
235 
236  /* Calculate F(mu,nu) */
237  //multi1d<LatticeColorMatrix> f;
238  //mesField(f, u);
239  //makeClov(f, diag_mass);
240 
241  choles_done.resize(rb.numSubsets());
242  for(int i=0; i < rb.numSubsets(); i++) {
243  choles_done[i] = from.choles_done[i];
244  }
245 
246  tr_log_diag_ = from.tr_log_diag_;
247 
248  // Should be allocated by constructor.
249  // tri_diag.resize(from.tri_diag.size());
250  // tri_off_diag.resize(from.tri_off_diag.size());
251  for(int site=0; site < Layout::sitesOnNode(); site++) {
252  for(int block = 0; block < 2; block++) {
253  for(int comp=0; comp< 6; comp++) {
254  tri_diag[site][block][comp] = from.tri_diag[site][block][comp];
255  }
256  for(int comp=0; comp < 15; comp++) {
257  tri_off_diag[site][block][comp] = from.tri_off_diag[site][block][comp];
258  }
259  }
260  }
261 
262  END_CODE();
263  }
264 
265 
266  /*
267  * MAKCLOV
268  *
269  * In this routine, MAKCLOV calculates
270 
271  * 1 - (1/4)*sigma(mu,nu) F(mu,nu)
272 
273  * using F from mesfield
274 
275  * F(mu,nu) = (1/4) sum_p (1/2) [ U_p(x) - U^dag_p(x) ]
276 
277  * using basis of SPPROD and stores in a lower triangular matrix
278  * (no diagonal) plus real diagonal
279 
280  * where
281  * U_1 = u(x,mu)*u(x+mu,nu)*u_dag(x+nu,mu)*u_dag(x,nu)
282  * U_2 = u(x,nu)*u_dag(x-mu+nu,mu)*u_dag(x-mu,nu)*u(x-mu,mu)
283  * U_3 = u_dag(x-mu,mu)*u_dag(x-mu-nu,nu)*u(x-mu-nu,mu)*u(x-nu,nu)
284  * U_4 = u_dag(x-nu,nu)*u(x-nu,mu)*u(x-nu+mu,nu)*u_dag(x,mu)
285 
286  * and
287 
288  * | sigF(1) sigF(3) 0 0 |
289  * sigF = | sigF(5) -sigF(1) 0 0 |
290  * | 0 0 -sigF(0) -sigF(2) |
291  * | 0 0 -sigF(4) sigF(0) |
292  * where
293  * sigF(i) is a color matrix
294 
295  * sigF(0) = i*(ClovT*E_z + ClovR*B_z)
296  * = i*(ClovT*F(3,2) + ClovR*F(1,0))
297  * sigF(1) = i*(ClovT*E_z - ClovR*B_z)
298  * = i*(ClovT*F(3,2) - ClovR*F(1,0))
299  * sigF(2) = i*(E_+ + B_+)
300  * sigF(3) = i*(E_+ - B_+)
301  * sigF(4) = i*(E_- + B_-)
302  * sigF(5) = i*(E_- - B_-)
303  * i*E_+ = (i*ClovT*E_x - ClovT*E_y)
304  * = (i*ClovT*F(3,0) - ClovT*F(3,1))
305  * i*E_- = (i*ClovT*E_x + ClovT*E_y)
306  * = (i*ClovT*F(3,0) + ClovT*F(3,1))
307  * i*B_+ = (i*ClovR*B_x - ClovR*B_y)
308  * = (i*ClovR*F(2,1) + ClovR*F(2,0))
309  * i*B_- = (i*ClovR*B_x + ClovR*B_y)
310  * = (i*ClovR*F(2,1) - ClovR*F(2,0))
311 
312  * NOTE: I am using i*F of the usual F defined by UKQCD, Heatlie et.al.
313 
314  * NOTE: the above definitions assume that the time direction, t_dir,
315  * is 3. In general F(k,j) is multiplied with ClovT if either
316  * k=t_dir or j=t_dir, and with ClovR otherwise.
317 
318  *+++
319  * Here are some notes on the origin of this routine. NOTE, ClovCoeff or u0
320  * are not actually used in MAKCLOV.
321  *
322  * The clover mass term is suppose to act on a std::vector like
323  *
324  * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
325 
326  * Definitions used here (NOTE: no "i")
327  * sigma(mu,nu) = gamma(mu)*gamma(nu) - gamma(nu)*gamma(mu)
328  * = 2*gamma(mu)*gamma(nu) for mu != nu
329  *
330  * chi = sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi for mu < nu
331  * = (1/2) * sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi for mu != nu
332  * = (1/4) * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)*psi
333  *
334  *
335  * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
336  * = psi - (ClovCoeff/u0^3) * kappa * chi
337  * == psi - kappa * chi
338  *
339  * We have absorbed ClovCoeff/u0^3 into kappa. A u0 was previously absorbed into kappa
340  * for compatibility to ancient conventions.
341  *---
342 
343  * Arguments:
344  * \param f field strength tensor F(cb,mu,nu) (Read)
345  * \param diag_mass effective mass term (Read)
346  */
347  void BAGELCloverTerm::makeClov(const multi1d<LatticeColorMatrix>& f, const Real& diag_mass)
348  {
349  START_CODE();
350 
351  LatticeColorMatrix f0;
352  LatticeColorMatrix f1;
353  LatticeColorMatrix f2;
354  LatticeColorMatrix f3;
355  LatticeColorMatrix f4;
356  LatticeColorMatrix f5;
357 
358  const int nodeSites = QDP::Layout::sitesOnNode();
359 
360  START_CODE();
361 
362  if ( Nd != 4 )
363  QDP_error_exit("expecting Nd == 4", Nd);
364 
365  if ( Ns != 4 )
366  QDP_error_exit("expecting Ns == 4", Ns);
367 
368  f0 = f[0] * getCloverCoeff(0,1);
369  f1 = f[1] * getCloverCoeff(0,2);
370  f2 = f[2] * getCloverCoeff(0,3);
371  f3 = f[3] * getCloverCoeff(1,2);
372  f4 = f[4] * getCloverCoeff(1,3);
373  f5 = f[5] * getCloverCoeff(2,3);
374 
375  /* Multiply in the appropriate clover coefficient */
376  /*
377  switch (param.anisoParam.t_dir)
378  {
379  case 1:
380  f0 = f[0] * param.clovCoeffT;
381  f1 = f[1] * param.clovCoeffR;
382  f2 = f[2] * param.clovCoeffR;
383  f3 = f[3] * param.clovCoeffT;
384  f4 = f[4] * param.clovCoeffT;
385  f5 = f[5] * param.clovCoeffR;
386  break;
387 
388  case 2:
389  f0 = f[0] * param.clovCoeffR;
390  f1 = f[1] * param.clovCoeffT;
391  f2 = f[2] * param.clovCoeffR;
392  f3 = f[3] * param.clovCoeffT;
393  f4 = f[4] * param.clovCoeffR;
394  f5 = f[5] * param.clovCoeffT;
395  break;
396 
397  case 3:
398  f0 = f[0] * param.clovCoeffR;
399  f1 = f[1] * param.clovCoeffR;
400  f2 = f[2] * param.clovCoeffT;
401  f3 = f[3] * param.clovCoeffR;
402  f4 = f[4] * param.clovCoeffT;
403  f5 = f[5] * param.clovCoeffT;
404  break;
405 
406  default:
407  QDPIO::cerr << __func__ << ": invalid time direction: t_dir= "
408  << param.anisoParam.t_dir << std::endl;
409  QDP_abort(1);
410  }
411  */
412 
413  // These are now preallocated
414  //tri_diag.resize(nodeSites); // hold local lattice
415  // tri_off_diag.resize(nodeSites);
416 
417  /*# Construct diagonal */
418  for(int site = 0; site < nodeSites; ++site)
419  {
420 
421  for(int jj = 0; jj < 2; jj++)
422  {
423  for(int ii = 0; ii < 2*Nc; ii++)
424  {
425  tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
426  }
427  }
428  }
429 
430 
431 
432  /* The appropriate clover coeffients are already included in the
433  field strengths F(mu,nu)! */
434  for(int site = 0; site < nodeSites; ++site)
435  {
436  RComplex<REAL> E_minus;
437  RComplex<REAL> B_minus;
438  RComplex<REAL> ctmp_0;
439  RComplex<REAL> ctmp_1;
440  RScalar<REAL> rtmp_0;
441  RScalar<REAL> rtmp_1;
442 
443  for(int i = 0; i < Nc; ++i)
444  {
445  /*# diag_L(i,0) = 1 - i*diag(E_z - B_z) */
446  /*# = 1 - i*diag(F(3,2) - F(1,0)) */
447  ctmp_0 = f5.elem(site).elem().elem(i,i);
448  ctmp_0 -= f0.elem(site).elem().elem(i,i);
449  rtmp_0 = imag(ctmp_0);
450  tri_diag[site][0][i] += rtmp_0;
451 
452  /*# diag_L(i+Nc,0) = 1 + i*diag(E_z - B_z) */
453  /*# = 1 + i*diag(F(3,2) - F(1,0)) */
454  tri_diag[site][0][i+Nc] -= rtmp_0;
455 
456  /*# diag_L(i,1) = 1 + i*diag(E_z + B_z) */
457  /*# = 1 + i*diag(F(3,2) + F(1,0)) */
458  ctmp_1 = f5.elem(site).elem().elem(i,i);
459  ctmp_1 += f0.elem(site).elem().elem(i,i);
460  rtmp_1 = imag(ctmp_1);
461  tri_diag[site][1][i] -= rtmp_1;
462 
463  /*# diag_L(i+Nc,1) = 1 - i*diag(E_z + B_z) */
464  /*# = 1 - i*diag(F(3,2) + F(1,0)) */
465  tri_diag[site][1][i+Nc] += rtmp_1;
466  }
467 
468  /*# Construct lower triangular portion */
469  /*# Block diagonal terms */
470  for(int i = 1; i < Nc; ++i)
471  {
472  for(int j = 0; j < i; ++j)
473  {
474  int elem_ij = i*(i-1)/2 + j;
475  int elem_tmp = (i+Nc)*(i+Nc-1)/2 + j+Nc;
476 
477  /*# L(i,j,0) = -i*(E_z - B_z)[i,j] */
478  /*# = -i*(F(3,2) - F(1,0)) */
479  ctmp_0 = f0.elem(site).elem().elem(i,j);
480  ctmp_0 -= f5.elem(site).elem().elem(i,j);
481  tri_off_diag[site][0][elem_ij] = timesI(ctmp_0);
482 
483  /*# L(i+Nc,j+Nc,0) = +i*(E_z - B_z)[i,j] */
484  /*# = +i*(F(3,2) - F(1,0)) */
485  tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][elem_ij];
486 
487  /*# L(i,j,1) = i*(E_z + B_z)[i,j] */
488  /*# = i*(F(3,2) + F(1,0)) */
489  ctmp_1 = f5.elem(site).elem().elem(i,j);
490  ctmp_1 += f0.elem(site).elem().elem(i,j);
491  tri_off_diag[site][1][elem_ij] = timesI(ctmp_1);
492 
493  /*# L(i+Nc,j+Nc,1) = -i*(E_z + B_z)[i,j] */
494  /*# = -i*(F(3,2) + F(1,0)) */
495  tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][elem_ij];
496  }
497  }
498 
499  /*# Off-diagonal */
500  for(int i = 0; i < Nc; ++i)
501  {
502  for(int j = 0; j < Nc; ++j)
503  {
504  // Flipped index
505  // by swapping i <-> j. In the past i would run slow
506  // and now j runs slow
507  int elem_ij = (i+Nc)*(i+Nc-1)/2 + j;
508 
509  /*# i*E_- = (i*E_x + E_y) */
510  /*# = (i*F(3,0) + F(3,1)) */
511  E_minus = timesI(f2.elem(site).elem().elem(i,j));
512  E_minus += f4.elem(site).elem().elem(i,j);
513 
514  /*# i*B_- = (i*B_x + B_y) */
515  /*# = (i*F(2,1) - F(2,0)) */
516  B_minus = timesI(f3.elem(site).elem().elem(i,j));
517  B_minus -= f1.elem(site).elem().elem(i,j);
518 
519  /*# L(i+Nc,j,0) = -i*(E_- - B_-) */
520  tri_off_diag[site][0][elem_ij] = B_minus - E_minus;
521 
522  /*# L(i+Nc,j,1) = +i*(E_- + B_-) */
523  tri_off_diag[site][1][elem_ij] = E_minus + B_minus;
524  }
525  }
526 
527 
528  }
529 
530 
531  END_CODE();
532  }
533 
534 
535  //! Invert
536  /*!
537  * Computes the inverse of the term on cb using Cholesky
538  */
540  {
541  START_CODE();
542 
543  // When you are doing the cholesky - also fill out the trace_log_diag piece)
544  // chlclovms(tr_log_diag_, cb);
546 
547  END_CODE();
548  }
549 
550 
551  //! Invert
552  /*!
553  * Computes the inverse of the term on cb using Cholesky
554  *
555  * \return logarithm of the determinant
556  */
558  {
559  START_CODE();
560 
561  if( choles_done[cb] == false )
562  {
563  QDPIO::cout << "Error: YOu have not done the Cholesky.on this operator on this subset" << std::endl;
564  QDPIO::cout << "You sure you shouldn't be asking invclov?" << std::endl;
565  QDP_abort(1);
566  }
567 
568  END_CODE();
569 
570  return sum(tr_log_diag_, rb[cb]);
571  }
572 
573  /*! An LDL^\dag decomposition and inversion? */
574  void BAGELCloverTerm::ldagdlinv(LatticeReal& tr_log_diag, int cb)
575  {
576  START_CODE();
577 
578  if ( 2*Nc < 3 )
579  QDP_error_exit("Matrix is too small", Nc, Ns);
580 
581  // Zero trace log
582  tr_log_diag[rb[cb]] = zero;
583  RScalar<REAL> zip=0;
584 
585  int N = 2*Nc;
586 
587  // Loop through the sites.
588  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
589  {
590  int site = rb[cb].siteTable()[ssite];
591 
592  int site_neg_logdet=0;
593  // Loop through the blocks on the site.
594  for(int block=0; block < 2; block++) {
595 
596  // Triangular storage
597  // Big arrays to get good alignment...
598 
599  RScalar<REAL> inv_d[8] QDP_ALIGN16;
600  RComplex<REAL> inv_offd[16] QDP_ALIGN16;
601  RComplex<REAL> v[8] QDP_ALIGN16;
602  RScalar<REAL> diag_g[8] QDP_ALIGN16;
603  // Algorithm 4.1.2 LDL^\dagger Decomposition
604  // From Golub, van Loan 3rd ed, page 139
605  for(int i=0; i < N; i++) {
606  inv_d[i] = tri_diag[site][block][i];
607  }
608  for(int i=0; i < 15; i++) {
609  inv_offd[i] =tri_off_diag[site][block][i];
610  }
611 
612  for(int j=0; j < N; ++j) {
613 
614  // Compute v(0:j-1)
615  //
616  // for i=0:j-2
617  // v(i) = A(j,i) A(i,i)
618  // end
619 
620 
621  for(int i=0; i < j; i++) {
622  int elem_ji = j*(j-1)/2 + i;
623 
624  RComplex<REAL> A_ii = cmplx( inv_d[i], zip );
625  v[i] = A_ii*adj(inv_offd[elem_ji]);
626  }
627 
628  // v(j) = A(j,j) - A(j, 0:j-2) v(0:j-2)
629  // ^ This is done with a loop over k ie:
630  //
631  // v(j) = A(j,j) - sum_k A*(j,k) v(k) k=0...j-2
632  //
633  // = A(j,j) - sum_k A*(j,k) A(j,k) A(k,k)
634  // = A(j,j) - sum_k | A(j,k) |^2 A(k,k)
635 
636  v[j] = cmplx(inv_d[j],zip);
637 
638  for(int k=0; k < j; k++) {
639  int elem_jk = j*(j-1)/2 + k;
640  v[j] -= inv_offd[elem_jk]*v[k];
641  }
642 
643 
644  // At this point in time v[j] has to be real, since
645  // A(j,j) is from diag ie real and all | A(j,k) |^2 is real
646  // as is A(k,k)
647 
648  // A(j,j) is the diagonal element - so store it.
649  inv_d[j] = real( v[j] );
650 
651  // Last line of algorithm:
652  // A( j+1 : n, j) = ( A(j+1:n, j) - A(j+1:n, 1:j-1)v(1:k-1) ) / v(j)
653  //
654  // use k as first colon notation and l as second so
655  //
656  // for k=j+1 < n-1
657  // A(k,j) = A(k,j) ;
658  // for l=0 < j-1
659  // A(k,j) -= A(k, l) v(l)
660  // end
661  // A(k,j) /= v(j);
662  //
663  for(int k=j+1; k < N; k++) {
664  int elem_kj = k*(k-1)/2 + j;
665  for(int l=0; l < j; l++) {
666  int elem_kl = k*(k-1)/2 + l;
667  inv_offd[elem_kj] -= inv_offd[elem_kl] * v[l];
668  }
669  inv_offd[elem_kj] /= v[j];
670  }
671  }
672 
673  // Now fix up the inverse
674  RScalar<REAL> one;
675  one.elem() = (REAL)1;
676 
677  for(int i=0; i < N; i++) {
678  diag_g[i] = one/inv_d[i];
679 
680  // Compute the trace log
681  // NB we are always doing trace log | A |
682  // (because we are always working with actually A^\dagger A
683  // even in one flavour case where we square root)
684  tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[i].elem()));
685  // However, it is worth counting just the no of negative logdets
686  // on site
687  if( inv_d[i].elem() < 0 ) {
688  site_neg_logdet++;
689  }
690  }
691  // Now we need to invert the L D L^\dagger
692  // We can do this by solving:
693  //
694  // L D L^\dagger M^{-1} = 1
695  //
696  // This can be done by solving L D X = 1 (X = L^\dagger M^{-1})
697  //
698  // Then solving L^\dagger M^{-1} = X
699  //
700  // LD is lower diagonal and so X will also be lower diagonal.
701  // LD X = 1 can be solved by forward substitution.
702  //
703  // Likewise L^\dagger is strictly upper triagonal and so
704  // L^\dagger M^{-1} = X can be solved by forward substitution.
705  RComplex<REAL> sum;
706  for(int k = 0; k < N; ++k)
707  {
708  for(int i = 0; i < k; ++i) {
709  zero_rep(v[i]);
710  }
711 
712  /*# Forward substitution */
713 
714  // The first element is the inverse of the diagonal
715  v[k] = cmplx(diag_g[k],zip);
716 
717  for(int i = k+1; i < N; ++i) {
718  zero_rep(v[i]);
719 
720  for(int j = k; j < i; ++j) {
721  int elem_ij = i*(i-1)/2+j;
722 
723  // subtract l_ij*d_j*x_{kj}
724  v[i] -= inv_offd[elem_ij] *inv_d[j]*v[j];
725 
726  }
727 
728  // scale out by 1/d_i
729  v[i] *= diag_g[i];
730  }
731 
732  /*# Backward substitution */
733  // V[N-1] remains unchanged
734  // Start from V[N-2]
735 
736  for(int i = N-2; (int)i >= (int)k; --i) {
737  for(int j = i+1; j < N; ++j) {
738  int elem_ji = j*(j-1)/2 + i;
739  // Subtract terms of typ (l_ji)*x_kj
740  v[i] -= adj(inv_offd[elem_ji]) * v[j];
741  }
742  }
743 
744  /*# Overwrite column k of invcl.offd */
745  inv_d[k] = real(v[k]);
746  for(int i = k+1; i < N; ++i)
747  {
748  int elem_ik = i*(i-1)/2+k;
749  inv_offd[elem_ik] = v[i];
750  }
751  }
752 
753 
754  // Overwrite original data
755  for(int i=0; i < N; i++) {
756  tri_diag[site][block][i] = inv_d[i];
757  }
758  for(int i=0; i < 15; i++) {
759  tri_off_diag[site][block][i] = inv_offd[i];
760  }
761  }
762 
763  if( site_neg_logdet != 0 ) {
764  // Report if site had odd number of negative terms. (-ve def)
765  std::cout << "WARNING: Odd number of negative terms in Clover DET ("
766  << site_neg_logdet<< ") at site: " << site << std::endl;
767  }
768  }
769 
770  // This comes from the days when we used to do Cholesky
771  choles_done[cb] = true;
772  END_CODE();
773  }
774 
775 
776 
777 
778  /*! CHLCLOVMS - Cholesky decompose the clover mass term and uses it to
779  * compute lower(A^-1) = lower((L.L^dag)^-1)
780  * Adapted from Golub and Van Loan, Matrix Computations, 2nd, Sec 4.2.4
781  *
782  * Arguments:
783  *
784  * \param DetP flag whether to compute determinant (Read)
785  * \param logdet logarithm of the determinant (Write)
786  * \param cb checkerboard of work (Read)
787  */
788  void BAGELCloverTerm::chlclovms(LatticeReal& tr_log_diag, int cb)
789  {
790  START_CODE();
791 
792  if ( 2*Nc < 3 )
793  QDP_error_exit("Matrix is too small", Nc, Ns);
794 
795  tr_log_diag = zero;
796 
797  int n = 2*Nc;
798 
799  /*# Cholesky decompose A = L.L^dag */
800  /*# NOTE!!: I can store this matrix in invclov, but will need a */
801  /*# temporary diag */
802  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
803  {
804  int site = rb[cb].siteTable()[ssite];
805 
806  PrimitiveClovDiag invclov_diag;
807  PrimitiveClovOffDiag invclov_off_diag;
808 
809  multi1d< RScalar<REAL> > diag_g(n);
810  multi1d< RComplex<REAL> > v1(n);
811  RComplex<REAL> sum;
812  RScalar<REAL> one;
813  RScalar<REAL> zero;
814  RScalar<REAL> lrtmp;
815 
816  one = 1;
817  zero = 0;
818 
819  for(int s = 0; s < 2; ++s)
820  {
821  int elem_jk = 0;
822  int elem_ij;
823 
824  for(int j = 0; j < n; ++j)
825  {
826  /*# Multiply clover mass term against basis std::vector. */
827  /*# Actually, I need a column of the lower triang matrix clov. */
828  v1[j] = cmplx(tri_diag[site][s][j],zero);
829 
830  elem_ij = elem_jk + 2*j;
831 
832  for(int i = j+1; i < n; ++i)
833  {
834  v1[i] = tri_off_diag[site][s][elem_ij];
835  elem_ij += i;
836  }
837 
838  /*# Back to cholesky */
839  /*# forward substitute */
840  for(int k = 0; k < j; ++k)
841  {
842  int elem_ik = elem_jk;
843 
844  for(int i = j; i < n; ++i)
845  {
846  v1[i] -= adj(invclov_off_diag[s][elem_jk]) * invclov_off_diag[s][elem_ik];
847  elem_ik += i;
848  }
849  elem_jk++;
850  }
851 
852  /*# The diagonal is (should be!!) real and positive */
853  diag_g[j] = real(v1[j]);
854 
855  /*#+ */
856  /*# Squeeze in computation of the trace log of the diagonal term */
857  /*#- */
858  if ( diag_g[j].elem() > 0 )
859  {
860  lrtmp = log(diag_g[j]);
861  }
862  else
863  {
864  // Make sure any node can print this message
865  std::cerr << "Clover term has negative diagonal element: "
866  << "diag_g[" << j << "]= " << diag_g[j]
867  << " at site: " << site << std::endl;
868  QDP_abort(1);
869  }
870 
871  tr_log_diag.elem(site).elem().elem() += lrtmp;
872 
873  diag_g[j] = sqrt(diag_g[j]);
874  diag_g[j] = one / diag_g[j];
875 
876  /*# backward substitute */
877  elem_ij = elem_jk + j;
878  for(int i = j+1; i < n; ++i)
879  {
880  invclov_off_diag[s][elem_ij] = v1[i] * diag_g[j];
881  elem_ij += i;
882  }
883  }
884 
885  /*# Use forward and back substitution to construct invcl.offd = lower(A^-1) */
886  for(int k = 0; k < n; ++k)
887  {
888  for(int i = 0; i < k; ++i)
889  zero_rep(v1[i]);
890 
891  /*# Forward substitution */
892  v1[k] = cmplx(diag_g[k],zero);
893 
894  for(int i = k+1; i < n; ++i)
895  {
896  zero_rep(sum);
897  elem_ij = i*(i-1)/2+k;
898  for(int j = k; j < i; ++j)
899  {
900  sum -= invclov_off_diag[s][elem_ij] * v1[j];
901  elem_ij++;
902  }
903 
904  v1[i] = sum * diag_g[i];
905  }
906 
907  /*# Backward substitution */
908  v1[n-1] = v1[n-1] * diag_g[n-1];
909 
910  for(int i = n-2; (int)i >= (int)k; --i)
911  {
912  sum = v1[i];
913 
914  int elem_ji = ((i+1)*i)/2+i;
915  for(int j = i+1; j < n; ++j)
916  {
917  sum -= adj(invclov_off_diag[s][elem_ji]) * v1[j];
918  elem_ji += j;
919  }
920  v1[i] = sum * diag_g[i];
921  }
922 
923  /*# Overwrite column k of invcl.offd */
924  invclov_diag[s][k] = real(v1[k]);
925 
926  int elem_ik = ((k+1)*k)/2+k;
927 
928  for(int i = k+1; i < n; ++i)
929  {
930  invclov_off_diag[s][elem_ik] = v1[i];
931  elem_ik += i;
932  }
933  }
934  }
935 
936  // Overwrite original element
937  for(int s=0; s < 2; s++) {
938  for(int i=0; i < 6; i++) {
939  tri_diag[site][s][i] = invclov_diag[s][i];
940  }
941  for(int i=0; i < 15; i++) {
942  tri_off_diag[site][s][i] = invclov_off_diag[s][i];
943  }
944  }
945  }
946 
947 
948  choles_done[cb] = true;
949  END_CODE();
950  }
951 
952 
953  /**
954  * Apply a dslash
955  *
956  * Performs the operation
957  *
958  * chi <- (L + D + L^dag) . psi
959  *
960  * where
961  * L is a lower triangular matrix
962  * D is the real diagonal. (stored together in type TRIANG)
963  *
964  * Arguments:
965  * \param chi result (Write)
966  * \param psi source (Read)
967  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
968  * \param cb Checkerboard of OUTPUT std::vector (Read)
969  */
970  void BAGELCloverTerm::apply(LatticeFermion& chi, const LatticeFermion& psi,
971  enum PlusMinus isign, int cb) const
972  {
973  START_CODE();
974 
975  if ( Ns != 4 )
976  QDP_error_exit("code requires Ns == 4", Ns);
977 
978  int n = 2*Nc;
979 
980 
981  if( rb[cb].hasOrderedRep() ) {
982  // unsigned int start = rb[cb].start();
983  // unsigned long n_sites = rb[cb].siteTable().size();
984  int start = rb[cb].start();
985  unsigned long n_sites=rb[cb].siteTable().size();
986 #if 0
987  //Testing code do only one site
988  unsigned long n_sites =1;
989 #endif
990  // Need to unroll over sites, so instead of having : site, struct { diag, offdiag }
991  // we have site,diag, and site, offdiag arrays
992  //
993 
994  BAGELCloverFloat* chiptr = (BAGELCloverFloat *)&( chi.elem(start).elem(0).elem(0).real());
995  const BAGELCloverFloat* psiptr = (const BAGELCloverFloat *)&(psi.elem(start).elem(0).elem(0).real());
996  const BAGELCloverFloat* offd = (const BAGELCloverFloat *)&(tri_off_diag[start][0][0].real());
997  const BAGELCloverFloat* diag = (const BAGELCloverFloat *)&(tri_diag[start][0][0].elem());
998  bagel_clover(diag, offd, psiptr, chiptr, n_sites);
999 
1000  }
1001  else {
1002  const multi1d<int>& tab = rb[cb].siteTable();
1003 
1004 
1005  for(int ssite=0; ssite < tab.size(); ++ssite) {
1006 
1007  int site = tab[ssite];
1008  unsigned long n_sites=1;
1009  // RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
1010  // const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
1011 
1012  BAGELCloverFloat* chiptr = (BAGELCloverFloat *)&( chi.elem(site).elem(0).elem(0).real());
1013  const BAGELCloverFloat* psiptr = (const BAGELCloverFloat *)&(psi.elem(site).elem(0).elem(0).real());
1014  const BAGELCloverFloat* offd = (const BAGELCloverFloat *)&(tri_off_diag[site][0][0].real());
1015  const BAGELCloverFloat* diag = (const BAGELCloverFloat *)&(tri_diag[site][0][0].elem());
1016  bagel_clover(diag, offd, psiptr, chiptr, n_sites);
1017 
1018  }
1019  }
1020 
1021  getFermBC().modifyF(chi, QDP::rb[cb]);
1022 
1023  END_CODE();
1024  }
1025 
1026  /**
1027  * Apply a dslash
1028  *
1029  * Performs the operation
1030  *
1031  * chi <- (L + D + L^dag) . psi
1032  *
1033  * where
1034  * L is a lower triangular matrix
1035  * D is the real diagonal. (stored together in type TRIANG)
1036  *
1037  * Arguments:
1038  * \param chi result (Write)
1039  * \param psi source (Read)
1040  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
1041  * \param cb Checkerboard of OUTPUT std::vector (Read)
1042  */
1043  void BAGELCloverTerm::applySite(LatticeFermion& chi, const LatticeFermion& psi,
1044  enum PlusMinus isign, int site) const
1045  {
1046  START_CODE();
1047 
1048  if ( Ns != 4 )
1049  QDP_error_exit("code requires Ns == 4", Ns);
1050 
1051  int n = 2*Nc;
1052 
1053  RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
1054  const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
1055 
1056 
1057  cchi[ 0] = tri_diag[site][0][ 0] * ppsi[ 0]
1058  + conj(tri_off_diag[site][0][ 0]) * ppsi[ 1]
1059  + conj(tri_off_diag[site][0][ 1]) * ppsi[ 2]
1060  + conj(tri_off_diag[site][0][ 3]) * ppsi[ 3]
1061  + conj(tri_off_diag[site][0][ 6]) * ppsi[ 4]
1062  + conj(tri_off_diag[site][0][10]) * ppsi[ 5];
1063 
1064  cchi[ 1] = tri_diag[site][0][ 1] * ppsi[ 1]
1065  + tri_off_diag[site][0][ 0] * ppsi[ 0]
1066  + conj(tri_off_diag[site][0][ 2]) * ppsi[ 2]
1067  + conj(tri_off_diag[site][0][ 4]) * ppsi[ 3]
1068  + conj(tri_off_diag[site][0][ 7]) * ppsi[ 4]
1069  + conj(tri_off_diag[site][0][11]) * ppsi[ 5];
1070 
1071  cchi[ 2] = tri_diag[site][0][ 2] * ppsi[ 2]
1072  + tri_off_diag[site][0][ 1] * ppsi[ 0]
1073  + tri_off_diag[site][0][ 2] * ppsi[ 1]
1074  + conj(tri_off_diag[site][0][ 5]) * ppsi[ 3]
1075  + conj(tri_off_diag[site][0][ 8]) * ppsi[ 4]
1076  + conj(tri_off_diag[site][0][12]) * ppsi[ 5];
1077 
1078  cchi[ 3] = tri_diag[site][0][ 3] * ppsi[ 3]
1079  + tri_off_diag[site][0][ 3] * ppsi[ 0]
1080  + tri_off_diag[site][0][ 4] * ppsi[ 1]
1081  + tri_off_diag[site][0][ 5] * ppsi[ 2]
1082  + conj(tri_off_diag[site][0][ 9]) * ppsi[ 4]
1083  + conj(tri_off_diag[site][0][13]) * ppsi[ 5];
1084 
1085  cchi[ 4] = tri_diag[site][0][ 4] * ppsi[ 4]
1086  + tri_off_diag[site][0][ 6] * ppsi[ 0]
1087  + tri_off_diag[site][0][ 7] * ppsi[ 1]
1088  + tri_off_diag[site][0][ 8] * ppsi[ 2]
1089  + tri_off_diag[site][0][ 9] * ppsi[ 3]
1090  + conj(tri_off_diag[site][0][14]) * ppsi[ 5];
1091 
1092  cchi[ 5] = tri_diag[site][0][ 5] * ppsi[ 5]
1093  + tri_off_diag[site][0][10] * ppsi[ 0]
1094  + tri_off_diag[site][0][11] * ppsi[ 1]
1095  + tri_off_diag[site][0][12] * ppsi[ 2]
1096  + tri_off_diag[site][0][13] * ppsi[ 3]
1097  + tri_off_diag[site][0][14] * ppsi[ 4];
1098 
1099  cchi[ 6] = tri_diag[site][1][ 0] * ppsi[ 6]
1100  + conj(tri_off_diag[site][1][ 0]) * ppsi[ 7]
1101  + conj(tri_off_diag[site][1][ 1]) * ppsi[ 8]
1102  + conj(tri_off_diag[site][1][ 3]) * ppsi[ 9]
1103  + conj(tri_off_diag[site][1][ 6]) * ppsi[10]
1104  + conj(tri_off_diag[site][1][10]) * ppsi[11];
1105 
1106  cchi[ 7] = tri_diag[site][1][ 1] * ppsi[ 7]
1107  + tri_off_diag[site][1][ 0] * ppsi[ 6]
1108  + conj(tri_off_diag[site][1][ 2]) * ppsi[ 8]
1109  + conj(tri_off_diag[site][1][ 4]) * ppsi[ 9]
1110  + conj(tri_off_diag[site][1][ 7]) * ppsi[10]
1111  + conj(tri_off_diag[site][1][11]) * ppsi[11];
1112 
1113  cchi[ 8] = tri_diag[site][1][ 2] * ppsi[ 8]
1114  + tri_off_diag[site][1][ 1] * ppsi[ 6]
1115  + tri_off_diag[site][1][ 2] * ppsi[ 7]
1116  + conj(tri_off_diag[site][1][ 5]) * ppsi[ 9]
1117  + conj(tri_off_diag[site][1][ 8]) * ppsi[10]
1118  + conj(tri_off_diag[site][1][12]) * ppsi[11];
1119 
1120  cchi[ 9] = tri_diag[site][1][ 3] * ppsi[ 9]
1121  + tri_off_diag[site][1][ 3] * ppsi[ 6]
1122  + tri_off_diag[site][1][ 4] * ppsi[ 7]
1123  + tri_off_diag[site][1][ 5] * ppsi[ 8]
1124  + conj(tri_off_diag[site][1][ 9]) * ppsi[10]
1125  + conj(tri_off_diag[site][1][13]) * ppsi[11];
1126 
1127  cchi[10] = tri_diag[site][1][ 4] * ppsi[10]
1128  + tri_off_diag[site][1][ 6] * ppsi[ 6]
1129  + tri_off_diag[site][1][ 7] * ppsi[ 7]
1130  + tri_off_diag[site][1][ 8] * ppsi[ 8]
1131  + tri_off_diag[site][1][ 9] * ppsi[ 9]
1132  + conj(tri_off_diag[site][1][14]) * ppsi[11];
1133 
1134  cchi[11] = tri_diag[site][1][ 5] * ppsi[11]
1135  + tri_off_diag[site][1][10] * ppsi[ 6]
1136  + tri_off_diag[site][1][11] * ppsi[ 7]
1137  + tri_off_diag[site][1][12] * ppsi[ 8]
1138  + tri_off_diag[site][1][13] * ppsi[ 9]
1139  + tri_off_diag[site][1][14] * ppsi[10];
1140 
1141 
1142  END_CODE();
1143  }
1144 
1145 
1146 
1147 
1148  //! TRIACNTR
1149  /*!
1150  * \ingroup linop
1151  *
1152  * Calculates
1153  * Tr_D ( Gamma_mat L )
1154  *
1155  * This routine is specific to Wilson fermions!
1156  *
1157  * the trace over the Dirac indices for one of the 16 Gamma matrices
1158  * and a hermitian color x spin matrix A, stored as a block diagonal
1159  * complex lower triangular matrix L and a real diagonal diag_L.
1160 
1161  * Here 0 <= mat <= 15 and
1162  * if mat = mat_1 + mat_2 * 2 + mat_3 * 4 + mat_4 * 8
1163  *
1164  * Gamma(mat) = gamma(1)^(mat_1) * gamma(2)^(mat_2) * gamma(3)^(mat_3)
1165  * * gamma(4)^(mat_4)
1166  *
1167  * Further, in basis for the Gamma matrices used, A is of the form
1168  *
1169  * | A_0 | 0 |
1170  * A = | --------- |
1171  * | 0 | A_1 |
1172  *
1173  *
1174  * Arguments:
1175  *
1176  * \param B the resulting SU(N) color matrix (Write)
1177  * \param clov clover term (Read)
1178  * \param mat label of the Gamma matrix (Read)
1179  */
1180  void BAGELCloverTerm::triacntr(LatticeColorMatrix& B, int mat, int cb) const
1181  {
1182  START_CODE();
1183 
1184  B = zero;
1185 
1186  if ( mat < 0 || mat > 15 )
1187  {
1188  QDPIO::cerr << __func__ << ": Gamma out of range: mat = " << mat << std::endl;
1189  QDP_abort(1);
1190  }
1191 
1192  switch( mat )
1193  {
1194  case 0:
1195  /*# gamma( 0) 1 0 0 0 # ( 0000 ) --> 0 */
1196  /*# 0 1 0 0 */
1197  /*# 0 0 1 0 */
1198  /*# 0 0 0 1 */
1199  /*# From diagonal part */
1200  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1201  {
1202  int site = rb[cb].siteTable()[ssite];
1203 
1204  RComplex<REAL> lctmp0;
1205  RScalar<REAL> lr_zero0;
1206  RScalar<REAL> lrtmp0;
1207 
1208  lr_zero0 = 0;
1209 
1210  for(int i0 = 0; i0 < Nc; ++i0)
1211  {
1212  lrtmp0 = tri_diag[site][0][i0];
1213  lrtmp0 += tri_diag[site][0][i0+Nc];
1214  lrtmp0 += tri_diag[site][1][i0];
1215  lrtmp0 += tri_diag[site][1][i0+Nc];
1216  B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
1217  }
1218 
1219  /*# From lower triangular portion */
1220  int elem_ij0 = 0;
1221  for(int i0 = 1; i0 < Nc; ++i0)
1222  {
1223  int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
1224 
1225  for(int j0 = 0; j0 < i0; ++j0)
1226  {
1227  lctmp0 = tri_off_diag[site][0][elem_ij0];
1228  lctmp0 += tri_off_diag[site][0][elem_ijb0];
1229  lctmp0 += tri_off_diag[site][1][elem_ij0];
1230  lctmp0 += tri_off_diag[site][1][elem_ijb0];
1231 
1232  B.elem(site).elem().elem(j0,i0) = lctmp0;
1233  B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
1234 
1235 
1236  elem_ij0++;
1237  elem_ijb0++;
1238  }
1239  }
1240  }
1241  break;
1242 
1243  case 3:
1244  /*# gamma( 12) -i 0 0 0 # ( 0011 ) --> 3 */
1245  /*# 0 i 0 0 */
1246  /*# 0 0 -i 0 */
1247  /*# 0 0 0 i */
1248  /*# From diagonal part */
1249  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1250  {
1251  int site = rb[cb].siteTable()[ssite];
1252 
1253  RComplex<REAL> lctmp3;
1254  RScalar<REAL> lr_zero3;
1255  RScalar<REAL> lrtmp3;
1256 
1257  lr_zero3 = 0;
1258 
1259  for(int i3 = 0; i3 < Nc; ++i3)
1260  {
1261  lrtmp3 = tri_diag[site][0][i3+Nc];
1262  lrtmp3 -= tri_diag[site][0][i3];
1263  lrtmp3 -= tri_diag[site][1][i3];
1264  lrtmp3 += tri_diag[site][1][i3+Nc];
1265  B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
1266  }
1267 
1268  /*# From lower triangular portion */
1269  int elem_ij3 = 0;
1270  for(int i3 = 1; i3 < Nc; ++i3)
1271  {
1272  int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
1273 
1274  for(int j3 = 0; j3 < i3; ++j3)
1275  {
1276  lctmp3 = tri_off_diag[site][0][elem_ijb3];
1277  lctmp3 -= tri_off_diag[site][0][elem_ij3];
1278  lctmp3 -= tri_off_diag[site][1][elem_ij3];
1279  lctmp3 += tri_off_diag[site][1][elem_ijb3];
1280 
1281  B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
1282  B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
1283 
1284  elem_ij3++;
1285  elem_ijb3++;
1286  }
1287  }
1288  }
1289  break;
1290 
1291  case 5:
1292  /*# gamma( 13) 0 -1 0 0 # ( 0101 ) --> 5 */
1293  /*# 1 0 0 0 */
1294  /*# 0 0 0 -1 */
1295  /*# 0 0 1 0 */
1296  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1297  {
1298  int site = rb[cb].siteTable()[ssite];
1299 
1300  RComplex<REAL> lctmp5;
1301  RScalar<REAL> lrtmp5;
1302 
1303  for(int i5 = 0; i5 < Nc; ++i5)
1304  {
1305  int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
1306 
1307  for(int j5 = 0; j5 < Nc; ++j5)
1308  {
1309  int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
1310 
1311 
1312  lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
1313  lctmp5 -= tri_off_diag[site][0][elem_ij5];
1314  lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
1315  lctmp5 -= tri_off_diag[site][1][elem_ij5];
1316 
1317 
1318  B.elem(site).elem().elem(i5,j5) = lctmp5;
1319 
1320  elem_ij5++;
1321  }
1322  }
1323  }
1324  break;
1325 
1326  case 6:
1327  /*# gamma( 23) 0 -i 0 0 # ( 0110 ) --> 6 */
1328  /*# -i 0 0 0 */
1329  /*# 0 0 0 -i */
1330  /*# 0 0 -i 0 */
1331  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1332  {
1333  int site = rb[cb].siteTable()[ssite];
1334 
1335  RComplex<REAL> lctmp6;
1336  RScalar<REAL> lrtmp6;
1337 
1338  for(int i6 = 0; i6 < Nc; ++i6)
1339  {
1340  int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
1341 
1342  for(int j6 = 0; j6 < Nc; ++j6)
1343  {
1344  int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
1345 
1346  lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
1347  lctmp6 += tri_off_diag[site][0][elem_ij6];
1348  lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
1349  lctmp6 += tri_off_diag[site][1][elem_ij6];
1350 
1351  B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
1352 
1353  elem_ij6++;
1354  }
1355  }
1356  }
1357  break;
1358 
1359  case 9:
1360  /*# gamma( 14) 0 i 0 0 # ( 1001 ) --> 9 */
1361  /*# i 0 0 0 */
1362  /*# 0 0 0 -i */
1363  /*# 0 0 -i 0 */
1364  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1365  {
1366  int site = rb[cb].siteTable()[ssite];
1367 
1368  RComplex<REAL> lctmp9;
1369  RScalar<REAL> lrtmp9;
1370 
1371  for(int i9 = 0; i9 < Nc; ++i9)
1372  {
1373  int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
1374 
1375  for(int j9 = 0; j9 < Nc; ++j9)
1376  {
1377  int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
1378 
1379  lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
1380  lctmp9 += tri_off_diag[site][0][elem_ij9];
1381  lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
1382  lctmp9 -= tri_off_diag[site][1][elem_ij9];
1383 
1384  B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
1385 
1386  elem_ij9++;
1387  }
1388  }
1389  }
1390  break;
1391 
1392  case 10:
1393  /*# gamma( 24) 0 -1 0 0 # ( 1010 ) --> 10 */
1394  /*# 1 0 0 0 */
1395  /*# 0 0 0 1 */
1396  /*# 0 0 -1 0 */
1397  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1398  {
1399  int site = rb[cb].siteTable()[ssite];
1400 
1401  RComplex<REAL> lctmp10;
1402  RScalar<REAL> lrtmp10;
1403 
1404  for(int i10 = 0; i10 < Nc; ++i10)
1405  {
1406  int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2;
1407 
1408  for(int j10 = 0; j10 < Nc; ++j10)
1409  {
1410  int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
1411 
1412  lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
1413  lctmp10 -= tri_off_diag[site][0][elem_ij10];
1414  lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
1415  lctmp10 += tri_off_diag[site][1][elem_ij10];
1416 
1417  B.elem(site).elem().elem(i10,j10) = lctmp10;
1418 
1419  elem_ij10++;
1420  }
1421  }
1422  }
1423  break;
1424 
1425  case 12:
1426  /*# gamma( 34) i 0 0 0 # ( 1100 ) --> 12 */
1427  /*# 0 -i 0 0 */
1428  /*# 0 0 -i 0 */
1429  /*# 0 0 0 i */
1430  /*# From diagonal part */
1431  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite)
1432  {
1433  int site = rb[cb].siteTable()[ssite];
1434 
1435  RComplex<REAL> lctmp12;
1436  RScalar<REAL> lr_zero12;
1437  RScalar<REAL> lrtmp12;
1438 
1439  lr_zero12 = 0;
1440 
1441  for(int i12 = 0; i12 < Nc; ++i12)
1442  {
1443  lrtmp12 = tri_diag[site][0][i12];
1444  lrtmp12 -= tri_diag[site][0][i12+Nc];
1445  lrtmp12 -= tri_diag[site][1][i12];
1446  lrtmp12 += tri_diag[site][1][i12+Nc];
1447  B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
1448  }
1449 
1450  /*# From lower triangular portion */
1451  int elem_ij12 = 0;
1452  for(int i12 = 1; i12 < Nc; ++i12)
1453  {
1454  int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
1455 
1456  for(int j12 = 0; j12 < i12; ++j12)
1457  {
1458  lctmp12 = tri_off_diag[site][0][elem_ij12];
1459  lctmp12 -= tri_off_diag[site][0][elem_ijb12];
1460  lctmp12 -= tri_off_diag[site][1][elem_ij12];
1461  lctmp12 += tri_off_diag[site][1][elem_ijb12];
1462 
1463  B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
1464  B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
1465 
1466  elem_ij12++;
1467  elem_ijb12++;
1468  }
1469  }
1470  }
1471  break;
1472 
1473  default:
1474  {
1475  B = zero;
1476  QDPIO::cout << "BAD DEFAULT CASE HIT" << std::endl;
1477  }
1478  }
1479 
1480 
1481  END_CODE();
1482  }
1483 
1484  //! Returns the appropriate clover coefficient for indices mu and nu
1485  Real BAGELCloverTerm::getCloverCoeff(int mu, int nu) const
1486  {
1487  START_CODE();
1488 
1489  if( param.anisoParam.anisoP )
1490  {
1491  if (mu==param.anisoParam.t_dir || nu == param.anisoParam.t_dir) {
1492  return param.clovCoeffT;
1493  }
1494  else {
1495  // Otherwise return the spatial coeff
1496  return param.clovCoeffR;
1497  }
1498  }
1499  else {
1500  // If there is no anisotropy just return the spatial one, it will
1501  // be the same as the temporal one
1502  return param.clovCoeffR;
1503  }
1504 
1505  END_CODE();
1506  }
1507 
1508 }
1509 
Primary include file for CHROMA library code.
void makeClov(const multi1d< LatticeColorMatrix > &f, const Real &diag_mass)
Create the clover term on cb.
void ldagdlinv(LatticeReal &tr_log_diag, int cb)
Invert the clover term on cb using LDL^\dagger decomp.
~BAGELCloverTerm()
Free the internals.
Real getCloverCoeff(int mu, int nu) const
Calculates Tr_D ( Gamma_mat L )
void apply(LatticeFermion &chi, const LatticeFermion &psi, enum PlusMinus isign, int cb) const
Handle< FermBC< T, P, Q > > fbc
const FermBC< T, P, Q > & getFermBC() const
Return the fermion BC object for this linear operator.
multi1d< LatticeColorMatrix > u
void create(Handle< FermState< T, P, Q > > fs, const CloverFermActParams &param_, const BAGELCloverTerm &from)
Create from another.
PrimitiveClovOffDiag * tri_off_diag
void chlclovms(LatticeReal &log_diag, int cb)
Invert the clover term on cb using Cholesky decomp.
BAGELCloverTerm()
Empty constructor. Must use create later.
Double cholesDet(int cb) const
Computes the inverse of the term on cb using Cholesky.
void choles(int cb)
Computes the inverse of the term on cb using Cholesky.
void applySite(LatticeFermion &chi, const LatticeFermion &psi, enum PlusMinus isign, int site) const
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Clover term linear operator.
int mu
Definition: cool.cc:24
int nu
Definition: cool.cc:25
void read(XMLReader &xml, const std::string &path, AsqtadFermActParams &param)
Read parameters.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void block(LatticeColorMatrix &u_block, const multi1d< LatticeColorMatrix > &u, int mu, int bl_level, const Real &BlkAccu, int BlkMax, int j_decay)
Construct block links.
Definition: block.cc:42
void triacntr(LatticeColorMatrix &B, int mat, int cb) const
Calculates Tr_D ( Gamma_mat L )
unsigned elem_ij
Definition: ldumul_w.cc:38
unsigned j
Definition: ldumul_w.cc:35
unsigned elem_ji
Definition: ldumul_w.cc:39
unsigned n
Definition: ldumul_w.cc:36
Calculates the antihermitian field strength tensor iF(mu,nu)
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
QDP::StandardOutputStream & operator<<(QDP::StandardOutputStream &s, const multi1d< int > &d)
Definition: npr_vertex_w.cc:12
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
Double one
Definition: invbicg.cc:105
push(xml_out,"Condensates")
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
RScalar< REAL > PrimitiveClovDiag[2][8]
Special structure used for triangular objects.
void mesField(multi1d< LatticeColorMatrixF > &f, const multi1d< LatticeColorMatrixF > &u)
Calculates the antihermitian field strength tensor iF(mu,nu)
Definition: mesfield.cc:80
LatticeFermion psi
Definition: mespbg5p_w.cc:35
pop(xml_out)
DComplex d
Definition: invbicg.cc:99
RComplex< REAL > PrimitiveClovOffDiag[2][16]
START_CODE()
int cb
Definition: invbicg.cc:120
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > s(Ncb)
const T1 const T2 const T3 & f3
Definition: gtest.h:1327
const T1 const T2 & f2
Definition: gtest.h:1321
const T1 & f1
Definition: gtest.h:1316
const T1 const T2 const T3 const T4 const T5 & f5
Definition: gtest.h:1339
const T1 const T2 const T3 const T4 & f4
Definition: gtest.h:1333
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
int l
Definition: pade_trln_w.cc:111
Double sum
Definition: qtopcor.cc:37
Params for clover ferm acts.