CHROMA
clover_term_ssed.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Clover term linear operator
3  *
4  * This particular implementation is specific to a scalar-like
5  * architecture
6  */
7 
8 #include "chromabase.h"
10 #include "meas/glue/mesfield.h"
11 
12 
13 namespace Chroma
14 {
15  //extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr, int n_sites);
16 
17  // Empty constructor. Must use create later
19  bool retry = false;
20 
21  // Allocate the arrays, with proper alignment
22 
23  try {
24  tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::FAST );
25  }
26  catch( std::bad_alloc ) {
27  retry = true;
28  }
29 
30  if( retry ) {
31  try {
32  tri_diag = (PrimitiveClovDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovDiag) , QDP::Allocator::DEFAULT );
33  }
34  catch( std::bad_alloc ) {
35  QDPIO::cerr << "Failed to allocate the tri_diag" << std::endl << std::flush ;
36  QDP_abort(1);
37  }
38  }
39 
40 
41  retry = false;
42  try {
43  tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::FAST );
44  }
45  catch( std::bad_alloc ) {
46  retry = true;
47  }
48 
49  if( retry ) {
50  try {
51  tri_off_diag = (PrimitiveClovOffDiag *)QDP::Allocator::theQDPAllocator::Instance().allocate( Layout::sitesOnNode()*sizeof(PrimitiveClovOffDiag) , QDP::Allocator::DEFAULT );
52  }
53  catch( std::bad_alloc ) {
54  QDPIO::cerr << "Failed to allocate the tri_off_diag" << std::endl << std::flush ;
55  QDP_abort(1);
56  }
57  }
58 
59 
60  }
61 
62 
64  if ( tri_diag != 0x0 ) {
65  QDP::Allocator::theQDPAllocator::Instance().free(tri_diag);
66  }
67 
68  if ( tri_off_diag != 0x0 ) {
69  QDP::Allocator::theQDPAllocator::Instance().free(tri_off_diag);
70  }
71  }
72 
73  //! Creation routine
75  const CloverFermActParams& param_)
76  {
77  START_CODE();
78  u = fs->getLinks();
79  fbc = fs->getFermBC();
80  param = param_;
81 
82  // Sanity check
83  if (fbc.operator->() == 0)
84  {
85  QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << std::endl;
86  QDP_abort(1);
87  }
88 
89  {
90  Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
91  param.clovCoeffR *= Real(0.5) * ff;
92  param.clovCoeffT *= Real(0.5);
93  }
94 
95  //
96  // Yuk. Some bits of knowledge of the dslash term are buried in the
97  // effective mass term. They show up here. If I wanted some more
98  // complicated dslash then this will have to be fixed/adjusted.
99  //
100  Real diag_mass;
101  {
102  Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
103  diag_mass = 1 + (Nd-1)*ff + param.Mass;
104  }
105 
106  /* Calculate F(mu,nu) */
107  multi1d<LatticeColorMatrix> f;
108  mesField(f, u);
109  makeClov(f, diag_mass);
110 
111  choles_done.resize(rb.numSubsets());
112  for(int i=0; i < rb.numSubsets(); i++) {
113  choles_done[i] = false;
114  }
115 
116 #if 0
117  // Testing code
118  LatticeFermion foo;
119  LatticeFermion res1=zero;
120  LatticeFermion res2=zero;
121  gaussian(foo);
122  apply(res1,foo,PLUS, 0);
123  applySite(res2,foo,PLUS, 0);
124 
125  LatticeFermion diff=res1-res2;
126  XMLFileWriter fred("fred");
127  push(fred, "stuff");
128  write(fred, "diff", diff);
129  pop(fred);
130 
131  QDPIO::cout << "sqrt( norm2( diff))= " << sqrt(norm2(diff)) << std::endl << std::flush;
132  QDP_abort(1);
133 #endif
134 
135  END_CODE();
136  }
137 
138  // Now copy
140  const CloverFermActParams& param_,
141  const SSEDCloverTerm& from_)
142  {
143  START_CODE();
144 
145  const SSEDCloverTerm& from = dynamic_cast<const SSEDCloverTerm&>(from_);
146  u = fs->getLinks();
147  fbc = fs->getFermBC();
148  param = param_;
149 
150  // Sanity check
151  if (fbc.operator->() == 0) {
152  QDPIO::cerr << "SSEDCloverTerm: error: fbc is null" << std::endl;
153  QDP_abort(1);
154  }
155 
156  {
157  Real ff = where(param.anisoParam.anisoP, Real(1) / param.anisoParam.xi_0, Real(1));
158  param.clovCoeffR *= Real(0.5) * ff;
159  param.clovCoeffT *= Real(0.5);
160  }
161 
162  //
163  // Yuk. Some bits of knowledge of the dslash term are buried in the
164  // effective mass term. They show up here. If I wanted some more
165  // complicated dslash then this will have to be fixed/adjusted.
166  //
167  Real diag_mass;
168  {
169  Real ff = where(param.anisoParam.anisoP, param.anisoParam.nu / param.anisoParam.xi_0, Real(1));
170  diag_mass = 1 + (Nd-1)*ff + param.Mass;
171  }
172 
173 
174  /* Calculate F(mu,nu) */
175  //multi1d<LatticeColorMatrix> f;
176  //mesField(f, u);
177  //makeClov(f, diag_mass);
178 
179  choles_done.resize(rb.numSubsets());
180  for(int i=0; i < rb.numSubsets(); i++) {
181  choles_done[i] = from.choles_done[i];
182  }
183 
184  tr_log_diag_ = from.tr_log_diag_;
185 
186  // Should be allocated by constructor.
187  // tri_diag.resize(from.tri_diag.size());
188  // tri_off_diag.resize(from.tri_off_diag.size());
189 
190  // This I claim is so performance uncritical, that
191  // for now I won't parallelize it...
192  for(int site=0; site < Layout::sitesOnNode(); site++) {
193  for(int block = 0; block < 2; block++) {
194  for(int comp=0; comp< 6; comp++) {
195  tri_diag[site][block][comp] = from.tri_diag[site][block][comp];
196  }
197  for(int comp=0; comp < 15; comp++) {
198  tri_off_diag[site][block][comp] = from.tri_off_diag[site][block][comp];
199  }
200  }
201  }
202 
203  END_CODE();
204  }
205 
206 
207  /*
208  * MAKCLOV
209  *
210  * In this routine, MAKCLOV calculates
211 
212  * 1 - (1/4)*sigma(mu,nu) F(mu,nu)
213 
214  * using F from mesfield
215 
216  * F(mu,nu) = (1/4) sum_p (1/2) [ U_p(x) - U^dag_p(x) ]
217 
218  * using basis of SPPROD and stores in a lower triangular matrix
219  * (no diagonal) plus real diagonal
220 
221  * where
222  * U_1 = u(x,mu)*u(x+mu,nu)*u_dag(x+nu,mu)*u_dag(x,nu)
223  * U_2 = u(x,nu)*u_dag(x-mu+nu,mu)*u_dag(x-mu,nu)*u(x-mu,mu)
224  * U_3 = u_dag(x-mu,mu)*u_dag(x-mu-nu,nu)*u(x-mu-nu,mu)*u(x-nu,nu)
225  * U_4 = u_dag(x-nu,nu)*u(x-nu,mu)*u(x-nu+mu,nu)*u_dag(x,mu)
226 
227  * and
228 
229  * | sigF(1) sigF(3) 0 0 |
230  * sigF = | sigF(5) -sigF(1) 0 0 |
231  * | 0 0 -sigF(0) -sigF(2) |
232  * | 0 0 -sigF(4) sigF(0) |
233  * where
234  * sigF(i) is a color matrix
235 
236  * sigF(0) = i*(ClovT*E_z + ClovR*B_z)
237  * = i*(ClovT*F(3,2) + ClovR*F(1,0))
238  * sigF(1) = i*(ClovT*E_z - ClovR*B_z)
239  * = i*(ClovT*F(3,2) - ClovR*F(1,0))
240  * sigF(2) = i*(E_+ + B_+)
241  * sigF(3) = i*(E_+ - B_+)
242  * sigF(4) = i*(E_- + B_-)
243  * sigF(5) = i*(E_- - B_-)
244  * i*E_+ = (i*ClovT*E_x - ClovT*E_y)
245  * = (i*ClovT*F(3,0) - ClovT*F(3,1))
246  * i*E_- = (i*ClovT*E_x + ClovT*E_y)
247  * = (i*ClovT*F(3,0) + ClovT*F(3,1))
248  * i*B_+ = (i*ClovR*B_x - ClovR*B_y)
249  * = (i*ClovR*F(2,1) + ClovR*F(2,0))
250  * i*B_- = (i*ClovR*B_x + ClovR*B_y)
251  * = (i*ClovR*F(2,1) - ClovR*F(2,0))
252 
253  * NOTE: I am using i*F of the usual F defined by UKQCD, Heatlie et.al.
254 
255  * NOTE: the above definitions assume that the time direction, t_dir,
256  * is 3. In general F(k,j) is multiplied with ClovT if either
257  * k=t_dir or j=t_dir, and with ClovR otherwise.
258 
259  *+++
260  * Here are some notes on the origin of this routine. NOTE, ClovCoeff or u0
261  * are not actually used in MAKCLOV.
262  *
263  * The clover mass term is suppose to act on a std::vector like
264  *
265  * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
266 
267  * Definitions used here (NOTE: no "i")
268  * sigma(mu,nu) = gamma(mu)*gamma(nu) - gamma(nu)*gamma(mu)
269  * = 2*gamma(mu)*gamma(nu) for mu != nu
270  *
271  * chi = sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi for mu < nu
272  * = (1/2) * sum_mu sum_nu F(mu,nu)*gamma(mu)*gamma(nu)*psi for mu != nu
273  * = (1/4) * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)*psi
274  *
275  *
276  * chi = (1 - (ClovCoeff/u0^3) * kappa/4 * sum_mu sum_nu F(mu,nu)*sigma(mu,nu)) * psi
277  * = psi - (ClovCoeff/u0^3) * kappa * chi
278  * == psi - kappa * chi
279  *
280  * We have absorbed ClovCoeff/u0^3 into kappa. A u0 was previously absorbed into kappa
281  * for compatibility to ancient conventions.
282  *---
283 
284  * Arguments:
285  * \param f field strength tensor F(cb,mu,nu) (Read)
286  * \param diag_mass effective mass term (Read)
287  */
288 
289  namespace SSEDCloverEnv {
290  struct MakeClovArgs {
291  LatticeColorMatrix& f0;
292  LatticeColorMatrix& f1;
293  LatticeColorMatrix& f2;
294  LatticeColorMatrix& f3;
295  LatticeColorMatrix& f4;
296  LatticeColorMatrix& f5;
299  const Real& diag_mass;
300  };
301 
302  inline
303  void makeClovSiteLoop(int lo, int hi, int myId, MakeClovArgs* a)
304  {
305  LatticeColorMatrix& f0=a->f0;
306  LatticeColorMatrix& f1=a->f1;
307  LatticeColorMatrix& f2=a->f2;
308  LatticeColorMatrix& f3=a->f3;
309  LatticeColorMatrix& f4=a->f4;
310  LatticeColorMatrix& f5=a->f5;
311  PrimitiveClovDiag* tri_diag=a->tri_diag;
312  PrimitiveClovOffDiag* tri_off_diag=a->tri_off_diag;
313  const Real& diag_mass = a->diag_mass;
314 
315  for(int site = lo; site < hi; ++site) {
316 
317  for(int jj = 0; jj < 2; jj++) {
318 
319  for(int ii = 0; ii < 2*Nc; ii++) {
320 
321  tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
322  }
323  }
324 
325 
326  /* The appropriate clover coeffients are already included in the
327  field strengths F(mu,nu)! */
328 
329  RComplex<REAL> E_minus;
330  RComplex<REAL> B_minus;
331  RComplex<REAL> ctmp_0;
332  RComplex<REAL> ctmp_1;
333  RScalar<REAL> rtmp_0;
334  RScalar<REAL> rtmp_1;
335 
336  for(int i = 0; i < Nc; ++i) {
337 
338  /*# diag_L(i,0) = 1 - i*diag(E_z - B_z) */
339  /*# = 1 - i*diag(F(3,2) - F(1,0)) */
340  ctmp_0 = f5.elem(site).elem().elem(i,i);
341  ctmp_0 -= f0.elem(site).elem().elem(i,i);
342  rtmp_0 = imag(ctmp_0);
343  tri_diag[site][0][i] += rtmp_0;
344 
345  /*# diag_L(i+Nc,0) = 1 + i*diag(E_z - B_z) */
346  /*# = 1 + i*diag(F(3,2) - F(1,0)) */
347  tri_diag[site][0][i+Nc] -= rtmp_0;
348 
349  /*# diag_L(i,1) = 1 + i*diag(E_z + B_z) */
350  /*# = 1 + i*diag(F(3,2) + F(1,0)) */
351  ctmp_1 = f5.elem(site).elem().elem(i,i);
352  ctmp_1 += f0.elem(site).elem().elem(i,i);
353  rtmp_1 = imag(ctmp_1);
354  tri_diag[site][1][i] -= rtmp_1;
355 
356  /*# diag_L(i+Nc,1) = 1 - i*diag(E_z + B_z) */
357  /*# = 1 - i*diag(F(3,2) + F(1,0)) */
358  tri_diag[site][1][i+Nc] += rtmp_1;
359  }
360 
361  /*# Construct lower triangular portion */
362  /*# Block diagonal terms */
363  for(int i = 1; i < Nc; ++i) {
364 
365  for(int j = 0; j < i; ++j) {
366 
367  int elem_ij = i*(i-1)/2 + j;
368  int elem_tmp = (i+Nc)*(i+Nc-1)/2 + j+Nc;
369 
370  /*# L(i,j,0) = -i*(E_z - B_z)[i,j] */
371  /*# = -i*(F(3,2) - F(1,0)) */
372  ctmp_0 = f0.elem(site).elem().elem(i,j);
373  ctmp_0 -= f5.elem(site).elem().elem(i,j);
374  tri_off_diag[site][0][elem_ij] = timesI(ctmp_0);
375 
376  /*# L(i+Nc,j+Nc,0) = +i*(E_z - B_z)[i,j] */
377  /*# = +i*(F(3,2) - F(1,0)) */
378  tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][elem_ij];
379 
380  /*# L(i,j,1) = i*(E_z + B_z)[i,j] */
381  /*# = i*(F(3,2) + F(1,0)) */
382  ctmp_1 = f5.elem(site).elem().elem(i,j);
383  ctmp_1 += f0.elem(site).elem().elem(i,j);
384  tri_off_diag[site][1][elem_ij] = timesI(ctmp_1);
385 
386  /*# L(i+Nc,j+Nc,1) = -i*(E_z + B_z)[i,j] */
387  /*# = -i*(F(3,2) + F(1,0)) */
388  tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][elem_ij];
389  }
390  }
391 
392  /*# Off-diagonal */
393  for(int i = 0; i < Nc; ++i) {
394 
395  for(int j = 0; j < Nc; ++j) {
396 
397  // Flipped index
398  // by swapping i <-> j. In the past i would run slow
399  // and now j runs slow
400  int elem_ij = (i+Nc)*(i+Nc-1)/2 + j;
401 
402  /*# i*E_- = (i*E_x + E_y) */
403  /*# = (i*F(3,0) + F(3,1)) */
404  E_minus = timesI(f2.elem(site).elem().elem(i,j));
405  E_minus += f4.elem(site).elem().elem(i,j);
406 
407  /*# i*B_- = (i*B_x + B_y) */
408  /*# = (i*F(2,1) - F(2,0)) */
409  B_minus = timesI(f3.elem(site).elem().elem(i,j));
410  B_minus -= f1.elem(site).elem().elem(i,j);
411 
412  /*# L(i+Nc,j,0) = -i*(E_- - B_-) */
413  tri_off_diag[site][0][elem_ij] = B_minus - E_minus;
414 
415  /*# L(i+Nc,j,1) = +i*(E_- + B_-) */
416  tri_off_diag[site][1][elem_ij] = E_minus + B_minus;
417  }
418  }
419  } // End Site Loop
420  } // End Function
421  }; // End Namespace
422 
423 
424  void SSEDCloverTerm::makeClov(const multi1d<LatticeColorMatrix>& f, const Real& diag_mass)
425  {
426  START_CODE();
427 
428  LatticeColorMatrix f0;
429  LatticeColorMatrix f1;
430  LatticeColorMatrix f2;
431  LatticeColorMatrix f3;
432  LatticeColorMatrix f4;
433  LatticeColorMatrix f5;
434 
435  const int nodeSites = QDP::Layout::sitesOnNode();
436 
437  START_CODE();
438 
439  if ( Nd != 4 )
440  QDP_error_exit("expecting Nd == 4", Nd);
441 
442  if ( Ns != 4 )
443  QDP_error_exit("expecting Ns == 4", Ns);
444 
445  f0 = f[0] * getCloverCoeff(0,1);
446  f1 = f[1] * getCloverCoeff(0,2);
447  f2 = f[2] * getCloverCoeff(0,3);
448  f3 = f[3] * getCloverCoeff(1,2);
449  f4 = f[4] * getCloverCoeff(1,3);
450  f5 = f[5] * getCloverCoeff(2,3);
451 
453  dispatch_to_threads(nodeSites, a, SSEDCloverEnv::makeClovSiteLoop);
454 
455  END_CODE();
456  }
457 
458 
459  //! Invert
460  /*!
461  * Computes the inverse of the term on cb using Cholesky
462  */
464  {
465  START_CODE();
466 
467  // When you are doing the cholesky - also fill out the trace_log_diag piece)
468  //chlclovms(tr_log_diag_, cb);
470 
471  END_CODE();
472  }
473 
474 
475  //! Invert
476  /*!
477  * Computes the inverse of the term on cb using Cholesky
478  *
479  * \return logarithm of the determinant
480  */
482  {
483  START_CODE();
484 
485  if( choles_done[cb] == false )
486  {
487  QDPIO::cout << "Error: YOu have not done the Cholesky.on this operator on this subset" << std::endl;
488  QDPIO::cout << "You sure you shouldn't be asking invclov?" << std::endl;
489  QDP_abort(1);
490  }
491 
492 
493 
494  END_CODE();
495 
496  return sum(tr_log_diag_, rb[cb]);
497  }
498 
499 
500  namespace SSEDCloverEnv {
501  struct LDagDLInvArgs {
502  LatticeReal& tr_log_diag;
505  int cb;
506  };
507 
508  inline
509  void lDagDLInvSiteLoop(int lo, int hi, int myId, LDagDLInvArgs *a)
510  {
511  LatticeReal& tr_log_diag = a->tr_log_diag;
512  PrimitiveClovDiag* tri_diag = a->tri_diag;
513  PrimitiveClovOffDiag* tri_off_diag = a->tri_off_diag;
514  int cb = a->cb;
515 
516  RScalar<REAL> zip=0;
517 
518  int N = 2*Nc;
519 
520  // Loop through the sites.
521  for(int ssite=lo; ssite < hi; ++ssite) {
522 
523  int site = rb[cb].siteTable()[ssite];
524 
525  int site_neg_logdet=0;
526  // Loop through the blocks on the site.
527  for(int block=0; block < 2; block++) {
528 
529  // Triangular storage
530  // Big arrays to get good alignment...
531 
532  RScalar<REAL> inv_d[8] QDP_ALIGN16;
533  RComplex<REAL> inv_offd[16] QDP_ALIGN16;
534  RComplex<REAL> v[8] QDP_ALIGN16;
535  RScalar<REAL> diag_g[8] QDP_ALIGN16;
536  // Algorithm 4.1.2 LDL^\dagger Decomposition
537  // From Golub, van Loan 3rd ed, page 139
538  for(int i=0; i < N; i++) {
539  inv_d[i] = tri_diag[site][block][i];
540  }
541  for(int i=0; i < 15; i++) {
542  inv_offd[i] =tri_off_diag[site][block][i];
543  }
544 
545  for(int j=0; j < N; ++j) {
546 
547  // Compute v(0:j-1)
548  //
549  // for i=0:j-2
550  // v(i) = A(j,i) A(i,i)
551  // end
552 
553 
554  for(int i=0; i < j; i++) {
555  int elem_ji = j*(j-1)/2 + i;
556 
557  RComplex<REAL> A_ii = cmplx( inv_d[i], zip );
558  v[i] = A_ii*adj(inv_offd[elem_ji]);
559  }
560 
561  // v(j) = A(j,j) - A(j, 0:j-2) v(0:j-2)
562  // ^ This is done with a loop over k ie:
563  //
564  // v(j) = A(j,j) - sum_k A*(j,k) v(k) k=0...j-2
565  //
566  // = A(j,j) - sum_k A*(j,k) A(j,k) A(k,k)
567  // = A(j,j) - sum_k | A(j,k) |^2 A(k,k)
568 
569  v[j] = cmplx(inv_d[j],zip);
570 
571  for(int k=0; k < j; k++) {
572  int elem_jk = j*(j-1)/2 + k;
573  v[j] -= inv_offd[elem_jk]*v[k];
574  }
575 
576 
577  // At this point in time v[j] has to be real, since
578  // A(j,j) is from diag ie real and all | A(j,k) |^2 is real
579  // as is A(k,k)
580 
581  // A(j,j) is the diagonal element - so store it.
582  inv_d[j] = real( v[j] );
583 
584  // Last line of algorithm:
585  // A( j+1 : n, j) = ( A(j+1:n, j) - A(j+1:n, 1:j-1)v(1:k-1) ) / v(j)
586  //
587  // use k as first colon notation and l as second so
588  //
589  // for k=j+1 < n-1
590  // A(k,j) = A(k,j) ;
591  // for l=0 < j-1
592  // A(k,j) -= A(k, l) v(l)
593  // end
594  // A(k,j) /= v(j);
595  //
596  for(int k=j+1; k < N; k++) {
597  int elem_kj = k*(k-1)/2 + j;
598  for(int l=0; l < j; l++) {
599  int elem_kl = k*(k-1)/2 + l;
600  inv_offd[elem_kj] -= inv_offd[elem_kl] * v[l];
601  }
602  inv_offd[elem_kj] /= v[j];
603  }
604  }
605 
606  // Now fix up the inverse
607  RScalar<REAL> one;
608  one.elem() = (REAL)1;
609 
610  for(int i=0; i < N; i++) {
611  diag_g[i] = one/inv_d[i];
612 
613  // Compute the trace log
614  // NB we are always doing trace log | A |
615  // (because we are always working with actually A^\dagger A
616  // even in one flavour case where we square root)
617  tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[i].elem()));
618  // However, it is worth counting just the no of negative logdets
619  // on site
620  if( inv_d[i].elem() < 0 ) {
621  site_neg_logdet++;
622  }
623  }
624  // Now we need to invert the L D L^\dagger
625  // We can do this by solving:
626  //
627  // L D L^\dagger M^{-1} = 1
628  //
629  // This can be done by solving L D X = 1 (X = L^\dagger M^{-1})
630  //
631  // Then solving L^\dagger M^{-1} = X
632  //
633  // LD is lower diagonal and so X will also be lower diagonal.
634  // LD X = 1 can be solved by forward substitution.
635  //
636  // Likewise L^\dagger is strictly upper triagonal and so
637  // L^\dagger M^{-1} = X can be solved by forward substitution.
638  RComplex<REAL> sum;
639  for(int k = 0; k < N; ++k) {
640 
641  for(int i = 0; i < k; ++i) {
642  zero_rep(v[i]);
643  }
644 
645  /*# Forward substitution */
646 
647  // The first element is the inverse of the diagonal
648  v[k] = cmplx(diag_g[k],zip);
649 
650  for(int i = k+1; i < N; ++i) {
651  zero_rep(v[i]);
652 
653  for(int j = k; j < i; ++j) {
654  int elem_ij = i*(i-1)/2+j;
655 
656  // subtract l_ij*d_j*x_{kj}
657  v[i] -= inv_offd[elem_ij] *inv_d[j]*v[j];
658 
659  }
660 
661  // scale out by 1/d_i
662  v[i] *= diag_g[i];
663  }
664 
665  /*# Backward substitution */
666  // V[N-1] remains unchanged
667  // Start from V[N-2]
668 
669  for(int i = N-2; (int)i >= (int)k; --i) {
670  for(int j = i+1; j < N; ++j) {
671  int elem_ji = j*(j-1)/2 + i;
672  // Subtract terms of typ (l_ji)*x_kj
673  v[i] -= adj(inv_offd[elem_ji]) * v[j];
674  }
675  }
676 
677  /*# Overwrite column k of invcl.offd */
678  inv_d[k] = real(v[k]);
679  for(int i = k+1; i < N; ++i) {
680 
681  int elem_ik = i*(i-1)/2+k;
682  inv_offd[elem_ik] = v[i];
683  }
684  }
685 
686 
687  // Overwrite original data
688  for(int i=0; i < N; i++) {
689  tri_diag[site][block][i] = inv_d[i];
690  }
691  for(int i=0; i < 15; i++) {
692  tri_off_diag[site][block][i] = inv_offd[i];
693  }
694  }
695 
696  if( site_neg_logdet != 0 ) {
697  // Report if site had odd number of negative terms. (-ve def)
698  std::cout << "WARNING: Odd number of negative terms in Clover DET ("
699  << site_neg_logdet<< ") at site: " << site << std::endl;
700  }
701  } // End Site Loop
702  } // End Function
703  } // End Namespace
704 
705  /*! An LDL^\dag decomposition and inversion? */
706  void SSEDCloverTerm::ldagdlinv(LatticeReal& tr_log_diag, int cb)
707  {
708  START_CODE();
709 
710  if ( 2*Nc < 3 )
711  QDP_error_exit("Matrix is too small", Nc, Ns);
712 
713  // Zero trace log
714  tr_log_diag[rb[cb]] = zero;
715 
717  dispatch_to_threads(rb[cb].numSiteTable(), a, SSEDCloverEnv::lDagDLInvSiteLoop);
718 
719 
720  // This comes from the days when we used to do Cholesky
721  choles_done[cb] = true;
722  END_CODE();
723  }
724 
725 
726  /**
727  * Apply a dslash
728  *
729  * Performs the operation
730  *
731  * chi <- (L + D + L^dag) . psi
732  *
733  * where
734  * L is a lower triangular matrix
735  * D is the real diagonal. (stored together in type TRIANG)
736  *
737  * Arguments:
738  * \param chi result (Write)
739  * \param psi source (Read)
740  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
741  * \param cb Checkerboard of OUTPUT std::vector (Read)
742  */
743  extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr, int n_sites);
744 
745  namespace SSEDCloverEnv{
746  struct ApplyArgs {
747  LatticeFermion& chi;
748  const LatticeFermion& psi;
751  int cb;
752  };
753 
754  // Dispatch function for threading
755  inline
756  void orderedApplySiteLoop(int lo, int hi, int myID,
757  ApplyArgs* a)
758  {
759  int n_4vec=hi-lo;
760  int start=rb[ a->cb ].start()+lo;
761 
762  REAL64* chiptr = (REAL64 *)&( a->chi.elem(start).elem(0).elem(0).real());
763  REAL64* psiptr = (REAL64 *)&( a->psi.elem(start).elem(0).elem(0).real());
764  REAL64* offd = (REAL64 *)&(a->tri_off[start][0][0].real());
765  REAL64* diag = (REAL64 *)&(a->tri_diag[start][0][0].elem());
766  ssed_clover_apply(diag, offd, psiptr, chiptr, n_4vec);
767  }
768 
769  inline
770  void unorderedApplySiteLoop(int lo, int hi, int myId, ApplyArgs* a)
771  {
772  int cb = a->cb;
773  const multi1d<int>& tab = rb[cb].siteTable();
774 
775 
776  for(int ssite=lo; ssite < hi; ++ssite) {
777 
778  int site = tab[ssite];
779  unsigned long n_sites=1;
780 
781  REAL64* chiptr = (REAL64 *)&( a->chi.elem(site).elem(0).elem(0).real());
782  REAL64* psiptr = (REAL64 *)&(a->psi.elem(site).elem(0).elem(0).real());
783  REAL64* offd = (REAL64 *)&(a->tri_off[site][0][0].real());
784  REAL64* diag = (REAL64 *)&(a->tri_diag[site][0][0].elem());
785  ssed_clover_apply(diag, offd, psiptr, chiptr, n_sites);
786 
787  }
788  }
789  }
790 
791  void SSEDCloverTerm::apply(LatticeFermion& chi, const LatticeFermion& psi,
792  enum PlusMinus isign, int cb) const
793  {
794  START_CODE();
795 
796  if ( Ns != 4 )
797  QDP_error_exit("code requires Ns == 4", Ns);
798 
799  int n = 2*Nc;
800 
802 
803  if( rb[cb].hasOrderedRep() ) {
804 
805  dispatch_to_threads(rb[cb].siteTable().size(), a,
807 
808 
809  }
810  else {
811  dispatch_to_threads(rb[cb].siteTable().size(), a,
813  }
814 
815  getFermBC().modifyF(chi, QDP::rb[cb]);
816 
817  END_CODE();
818  }
819 
820  /**
821  * Apply a dslash
822  *
823  * Performs the operation
824  *
825  * chi <- (L + D + L^dag) . psi
826  *
827  * where
828  * L is a lower triangular matrix
829  * D is the real diagonal. (stored together in type TRIANG)
830  *
831  * Arguments:
832  * \param chi result (Write)
833  * \param psi source (Read)
834  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
835  * \param cb Checkerboard of OUTPUT std::vector (Read)
836  */
837  void SSEDCloverTerm::applySite(LatticeFermion& chi, const LatticeFermion& psi,
838  enum PlusMinus isign, int site) const
839  {
840  START_CODE();
841 
842  if ( Ns != 4 )
843  QDP_error_exit("code requires Ns == 4", Ns);
844 
845  int n = 2*Nc;
846 
847  RComplex<REAL>* cchi = (RComplex<REAL>*)&(chi.elem(site).elem(0).elem(0));
848  const RComplex<REAL>* ppsi = (const RComplex<REAL>*)&(psi.elem(site).elem(0).elem(0));
849 
850 
851  cchi[ 0] = tri_diag[site][0][ 0] * ppsi[ 0]
852  + conj(tri_off_diag[site][0][ 0]) * ppsi[ 1]
853  + conj(tri_off_diag[site][0][ 1]) * ppsi[ 2]
854  + conj(tri_off_diag[site][0][ 3]) * ppsi[ 3]
855  + conj(tri_off_diag[site][0][ 6]) * ppsi[ 4]
856  + conj(tri_off_diag[site][0][10]) * ppsi[ 5];
857 
858  cchi[ 1] = tri_diag[site][0][ 1] * ppsi[ 1]
859  + tri_off_diag[site][0][ 0] * ppsi[ 0]
860  + conj(tri_off_diag[site][0][ 2]) * ppsi[ 2]
861  + conj(tri_off_diag[site][0][ 4]) * ppsi[ 3]
862  + conj(tri_off_diag[site][0][ 7]) * ppsi[ 4]
863  + conj(tri_off_diag[site][0][11]) * ppsi[ 5];
864 
865  cchi[ 2] = tri_diag[site][0][ 2] * ppsi[ 2]
866  + tri_off_diag[site][0][ 1] * ppsi[ 0]
867  + tri_off_diag[site][0][ 2] * ppsi[ 1]
868  + conj(tri_off_diag[site][0][ 5]) * ppsi[ 3]
869  + conj(tri_off_diag[site][0][ 8]) * ppsi[ 4]
870  + conj(tri_off_diag[site][0][12]) * ppsi[ 5];
871 
872  cchi[ 3] = tri_diag[site][0][ 3] * ppsi[ 3]
873  + tri_off_diag[site][0][ 3] * ppsi[ 0]
874  + tri_off_diag[site][0][ 4] * ppsi[ 1]
875  + tri_off_diag[site][0][ 5] * ppsi[ 2]
876  + conj(tri_off_diag[site][0][ 9]) * ppsi[ 4]
877  + conj(tri_off_diag[site][0][13]) * ppsi[ 5];
878 
879  cchi[ 4] = tri_diag[site][0][ 4] * ppsi[ 4]
880  + tri_off_diag[site][0][ 6] * ppsi[ 0]
881  + tri_off_diag[site][0][ 7] * ppsi[ 1]
882  + tri_off_diag[site][0][ 8] * ppsi[ 2]
883  + tri_off_diag[site][0][ 9] * ppsi[ 3]
884  + conj(tri_off_diag[site][0][14]) * ppsi[ 5];
885 
886  cchi[ 5] = tri_diag[site][0][ 5] * ppsi[ 5]
887  + tri_off_diag[site][0][10] * ppsi[ 0]
888  + tri_off_diag[site][0][11] * ppsi[ 1]
889  + tri_off_diag[site][0][12] * ppsi[ 2]
890  + tri_off_diag[site][0][13] * ppsi[ 3]
891  + tri_off_diag[site][0][14] * ppsi[ 4];
892 
893  cchi[ 6] = tri_diag[site][1][ 0] * ppsi[ 6]
894  + conj(tri_off_diag[site][1][ 0]) * ppsi[ 7]
895  + conj(tri_off_diag[site][1][ 1]) * ppsi[ 8]
896  + conj(tri_off_diag[site][1][ 3]) * ppsi[ 9]
897  + conj(tri_off_diag[site][1][ 6]) * ppsi[10]
898  + conj(tri_off_diag[site][1][10]) * ppsi[11];
899 
900  cchi[ 7] = tri_diag[site][1][ 1] * ppsi[ 7]
901  + tri_off_diag[site][1][ 0] * ppsi[ 6]
902  + conj(tri_off_diag[site][1][ 2]) * ppsi[ 8]
903  + conj(tri_off_diag[site][1][ 4]) * ppsi[ 9]
904  + conj(tri_off_diag[site][1][ 7]) * ppsi[10]
905  + conj(tri_off_diag[site][1][11]) * ppsi[11];
906 
907  cchi[ 8] = tri_diag[site][1][ 2] * ppsi[ 8]
908  + tri_off_diag[site][1][ 1] * ppsi[ 6]
909  + tri_off_diag[site][1][ 2] * ppsi[ 7]
910  + conj(tri_off_diag[site][1][ 5]) * ppsi[ 9]
911  + conj(tri_off_diag[site][1][ 8]) * ppsi[10]
912  + conj(tri_off_diag[site][1][12]) * ppsi[11];
913 
914  cchi[ 9] = tri_diag[site][1][ 3] * ppsi[ 9]
915  + tri_off_diag[site][1][ 3] * ppsi[ 6]
916  + tri_off_diag[site][1][ 4] * ppsi[ 7]
917  + tri_off_diag[site][1][ 5] * ppsi[ 8]
918  + conj(tri_off_diag[site][1][ 9]) * ppsi[10]
919  + conj(tri_off_diag[site][1][13]) * ppsi[11];
920 
921  cchi[10] = tri_diag[site][1][ 4] * ppsi[10]
922  + tri_off_diag[site][1][ 6] * ppsi[ 6]
923  + tri_off_diag[site][1][ 7] * ppsi[ 7]
924  + tri_off_diag[site][1][ 8] * ppsi[ 8]
925  + tri_off_diag[site][1][ 9] * ppsi[ 9]
926  + conj(tri_off_diag[site][1][14]) * ppsi[11];
927 
928  cchi[11] = tri_diag[site][1][ 5] * ppsi[11]
929  + tri_off_diag[site][1][10] * ppsi[ 6]
930  + tri_off_diag[site][1][11] * ppsi[ 7]
931  + tri_off_diag[site][1][12] * ppsi[ 8]
932  + tri_off_diag[site][1][13] * ppsi[ 9]
933  + tri_off_diag[site][1][14] * ppsi[10];
934 
935 
936  END_CODE();
937  }
938 
939 
940 
941 
942  //! TRIACNTR
943  /*!
944  * \ingroup linop
945  *
946  * Calculates
947  * Tr_D ( Gamma_mat L )
948  *
949  * This routine is specific to Wilson fermions!
950  *
951  * the trace over the Dirac indices for one of the 16 Gamma matrices
952  * and a hermitian color x spin matrix A, stored as a block diagonal
953  * complex lower triangular matrix L and a real diagonal diag_L.
954 
955  * Here 0 <= mat <= 15 and
956  * if mat = mat_1 + mat_2 * 2 + mat_3 * 4 + mat_4 * 8
957  *
958  * Gamma(mat) = gamma(1)^(mat_1) * gamma(2)^(mat_2) * gamma(3)^(mat_3)
959  * * gamma(4)^(mat_4)
960  *
961  * Further, in basis for the Gamma matrices used, A is of the form
962  *
963  * | A_0 | 0 |
964  * A = | --------- |
965  * | 0 | A_1 |
966  *
967  *
968  * Arguments:
969  *
970  * \param B the resulting SU(N) color matrix (Write)
971  * \param clov clover term (Read)
972  * \param mat label of the Gamma matrix (Read)
973  */
974 
975  namespace SSEDCloverEnv {
976  struct TriaCntrArgs {
977  LatticeColorMatrix& B;
980  int mat;
981  int cb;
982  };
983 
984 
985  inline
986  void triaCntrSiteLoop(int lo, int hi, int myId, TriaCntrArgs* a)
987  {
988  PrimitiveClovDiag* tri_diag = a->tri_diag;
989  PrimitiveClovOffDiag* tri_off_diag = a->tri_off;
990  LatticeColorMatrix& B = a->B;
991  int mat = a->mat;
992  int cb = a->cb;
993 
994 
995  for(int ssite=0; ssite < rb[cb].numSiteTable(); ++ssite) {
996 
997  int site = rb[cb].siteTable()[ssite];
998 
999  switch( mat ) {
1000 
1001  case 0:
1002  /*# gamma( 0) 1 0 0 0 # ( 0000 ) --> 0 */
1003  /*# 0 1 0 0 */
1004  /*# 0 0 1 0 */
1005  /*# 0 0 0 1 */
1006  /*# From diagonal part */
1007  {
1008  RComplex<REAL> lctmp0;
1009  RScalar<REAL> lr_zero0;
1010  RScalar<REAL> lrtmp0;
1011 
1012  lr_zero0 = 0;
1013 
1014  for(int i0 = 0; i0 < Nc; ++i0) {
1015  lrtmp0 = tri_diag[site][0][i0];
1016  lrtmp0 += tri_diag[site][0][i0+Nc];
1017  lrtmp0 += tri_diag[site][1][i0];
1018  lrtmp0 += tri_diag[site][1][i0+Nc];
1019  B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
1020  }
1021 
1022  /*# From lower triangular portion */
1023  int elem_ij0 = 0;
1024  for(int i0 = 1; i0 < Nc; ++i0) {
1025 
1026  int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
1027 
1028  for(int j0 = 0; j0 < i0; ++j0) {
1029 
1030  lctmp0 = tri_off_diag[site][0][elem_ij0];
1031  lctmp0 += tri_off_diag[site][0][elem_ijb0];
1032  lctmp0 += tri_off_diag[site][1][elem_ij0];
1033  lctmp0 += tri_off_diag[site][1][elem_ijb0];
1034 
1035  B.elem(site).elem().elem(j0,i0) = lctmp0;
1036  B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
1037 
1038 
1039  elem_ij0++;
1040  elem_ijb0++;
1041  }
1042  }
1043  }
1044  break;
1045 
1046  case 3:
1047  /*# gamma( 12) -i 0 0 0 # ( 0011 ) --> 3 */
1048  /*# 0 i 0 0 */
1049  /*# 0 0 -i 0 */
1050  /*# 0 0 0 i */
1051  /*# From diagonal part */
1052  {
1053 
1054  RComplex<REAL> lctmp3;
1055  RScalar<REAL> lr_zero3;
1056  RScalar<REAL> lrtmp3;
1057 
1058  lr_zero3 = 0;
1059 
1060  for(int i3 = 0; i3 < Nc; ++i3) {
1061 
1062  lrtmp3 = tri_diag[site][0][i3+Nc];
1063  lrtmp3 -= tri_diag[site][0][i3];
1064  lrtmp3 -= tri_diag[site][1][i3];
1065  lrtmp3 += tri_diag[site][1][i3+Nc];
1066  B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
1067  }
1068 
1069  /*# From lower triangular portion */
1070  int elem_ij3 = 0;
1071  for(int i3 = 1; i3 < Nc; ++i3) {
1072 
1073  int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
1074 
1075  for(int j3 = 0; j3 < i3; ++j3) {
1076 
1077  lctmp3 = tri_off_diag[site][0][elem_ijb3];
1078  lctmp3 -= tri_off_diag[site][0][elem_ij3];
1079  lctmp3 -= tri_off_diag[site][1][elem_ij3];
1080  lctmp3 += tri_off_diag[site][1][elem_ijb3];
1081 
1082  B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
1083  B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
1084 
1085  elem_ij3++;
1086  elem_ijb3++;
1087  }
1088  }
1089  }
1090  break;
1091 
1092  case 5:
1093  /*# gamma( 13) 0 -1 0 0 # ( 0101 ) --> 5 */
1094  /*# 1 0 0 0 */
1095  /*# 0 0 0 -1 */
1096  /*# 0 0 1 0 */
1097  {
1098 
1099 
1100  RComplex<REAL> lctmp5;
1101  RScalar<REAL> lrtmp5;
1102 
1103  for(int i5 = 0; i5 < Nc; ++i5) {
1104 
1105  int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
1106 
1107  for(int j5 = 0; j5 < Nc; ++j5) {
1108 
1109  int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
1110 
1111 
1112  lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
1113  lctmp5 -= tri_off_diag[site][0][elem_ij5];
1114  lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
1115  lctmp5 -= tri_off_diag[site][1][elem_ij5];
1116 
1117 
1118  B.elem(site).elem().elem(i5,j5) = lctmp5;
1119 
1120  elem_ij5++;
1121  }
1122  }
1123  }
1124  break;
1125 
1126  case 6:
1127  /*# gamma( 23) 0 -i 0 0 # ( 0110 ) --> 6 */
1128  /*# -i 0 0 0 */
1129  /*# 0 0 0 -i */
1130  /*# 0 0 -i 0 */
1131  {
1132 
1133  RComplex<REAL> lctmp6;
1134  RScalar<REAL> lrtmp6;
1135 
1136  for(int i6 = 0; i6 < Nc; ++i6) {
1137 
1138  int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
1139 
1140  for(int j6 = 0; j6 < Nc; ++j6) {
1141 
1142  int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
1143 
1144  lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
1145  lctmp6 += tri_off_diag[site][0][elem_ij6];
1146  lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
1147  lctmp6 += tri_off_diag[site][1][elem_ij6];
1148 
1149  B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
1150 
1151  elem_ij6++;
1152  }
1153  }
1154  }
1155  break;
1156 
1157  case 9:
1158  /*# gamma( 14) 0 i 0 0 # ( 1001 ) --> 9 */
1159  /*# i 0 0 0 */
1160  /*# 0 0 0 -i */
1161  /*# 0 0 -i 0 */
1162  {
1163  RComplex<REAL> lctmp9;
1164  RScalar<REAL> lrtmp9;
1165 
1166  for(int i9 = 0; i9 < Nc; ++i9) {
1167 
1168  int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
1169 
1170  for(int j9 = 0; j9 < Nc; ++j9) {
1171 
1172  int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
1173 
1174  lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
1175  lctmp9 += tri_off_diag[site][0][elem_ij9];
1176  lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
1177  lctmp9 -= tri_off_diag[site][1][elem_ij9];
1178 
1179  B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
1180 
1181  elem_ij9++;
1182  }
1183  }
1184  }
1185  break;
1186 
1187  case 10:
1188  /*# gamma( 24) 0 -1 0 0 # ( 1010 ) --> 10 */
1189  /*# 1 0 0 0 */
1190  /*# 0 0 0 1 */
1191  /*# 0 0 -1 0 */
1192  {
1193 
1194  RComplex<REAL> lctmp10;
1195  RScalar<REAL> lrtmp10;
1196 
1197  for(int i10 = 0; i10 < Nc; ++i10) {
1198 
1199  int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2;
1200 
1201  for(int j10 = 0; j10 < Nc; ++j10) {
1202 
1203  int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
1204 
1205  lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
1206  lctmp10 -= tri_off_diag[site][0][elem_ij10];
1207  lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
1208  lctmp10 += tri_off_diag[site][1][elem_ij10];
1209 
1210  B.elem(site).elem().elem(i10,j10) = lctmp10;
1211 
1212  elem_ij10++;
1213  }
1214  }
1215  }
1216 
1217  break;
1218 
1219  case 12:
1220  /*# gamma( 34) i 0 0 0 # ( 1100 ) --> 12 */
1221  /*# 0 -i 0 0 */
1222  /*# 0 0 -i 0 */
1223  /*# 0 0 0 i */
1224  /*# From diagonal part */
1225  {
1226 
1227 
1228  RComplex<REAL> lctmp12;
1229  RScalar<REAL> lr_zero12;
1230  RScalar<REAL> lrtmp12;
1231 
1232  lr_zero12 = 0;
1233 
1234  for(int i12 = 0; i12 < Nc; ++i12) {
1235 
1236  lrtmp12 = tri_diag[site][0][i12];
1237  lrtmp12 -= tri_diag[site][0][i12+Nc];
1238  lrtmp12 -= tri_diag[site][1][i12];
1239  lrtmp12 += tri_diag[site][1][i12+Nc];
1240  B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
1241  }
1242 
1243  /*# From lower triangular portion */
1244  int elem_ij12 = 0;
1245  for(int i12 = 1; i12 < Nc; ++i12) {
1246 
1247  int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
1248 
1249  for(int j12 = 0; j12 < i12; ++j12) {
1250 
1251  lctmp12 = tri_off_diag[site][0][elem_ij12];
1252  lctmp12 -= tri_off_diag[site][0][elem_ijb12];
1253  lctmp12 -= tri_off_diag[site][1][elem_ij12];
1254  lctmp12 += tri_off_diag[site][1][elem_ijb12];
1255 
1256  B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
1257  B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
1258 
1259  elem_ij12++;
1260  elem_ijb12++;
1261  }
1262  }
1263  }
1264  break;
1265 
1266  default:
1267  {
1268  B = zero;
1269  QDPIO::cout << "BAD DEFAULT CASE HIT" << std::endl;
1270  }
1271  } // End Switch
1272 
1273  } // End Site Loop.
1274  } // End Function
1275  } // End Namespace
1276 
1277  void SSEDCloverTerm::triacntr(LatticeColorMatrix& B, int mat, int cb) const
1278  {
1279  START_CODE();
1280 
1281  B = zero;
1282 
1283  if ( mat < 0 || mat > 15 )
1284  {
1285  QDPIO::cerr << __func__ << ": Gamma out of range: mat = " << mat << std::endl;
1286  QDP_abort(1);
1287  }
1288 
1290  dispatch_to_threads(rb[cb].numSiteTable(), a,
1292 
1293 
1294  END_CODE();
1295  }
1296 
1297  //! Returns the appropriate clover coefficient for indices mu and nu
1298  Real SSEDCloverTerm::getCloverCoeff(int mu, int nu) const
1299  {
1300  START_CODE();
1301 
1302  if( param.anisoParam.anisoP )
1303  {
1304  if (mu==param.anisoParam.t_dir || nu == param.anisoParam.t_dir) {
1305  return param.clovCoeffT;
1306  }
1307  else {
1308  // Otherwise return the spatial coeff
1309  return param.clovCoeffR;
1310  }
1311  }
1312  else {
1313  // If there is no anisotropy just return the spatial one, it will
1314  // be the same as the temporal one
1315  return param.clovCoeffR;
1316  }
1317 
1318  END_CODE();
1319  }
1320 }
1321 
1322 
1323 
1324 
Primary include file for CHROMA library code.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Double cholesDet(int cb) const
Computes the inverse of the term on cb using Cholesky.
~SSEDCloverTerm()
Free the internals.
PrimitiveClovOffDiag * tri_off_diag
void triacntr(LatticeColorMatrixD3 &B, int mat, int cb) const
Calculates Tr_D ( Gamma_mat L )
multi1d< bool > choles_done
void applySite(LatticeDiracFermionD3 &chi, const LatticeDiracFermionD3 &psi, enum PlusMinus isign, int site) const
PrimitiveClovDiag * tri_diag
void makeClov(const multi1d< LatticeColorMatrixD3 > &f, const Real &diag_mass)
Create the clover term on cb.
const FermBC< T, P, Q > & getFermBC() const
Return the fermion BC object for this linear operator.
void apply(LatticeDiracFermionD3 &chi, const LatticeDiracFermionD3 &psi, enum PlusMinus isign, int cb) const
Handle< FermBC< T, P, Q > > fbc
CloverFermActParams param
void create(Handle< FermState< T, P, Q > > fs, const CloverFermActParams &param_, const SSEDCloverTerm &from)
Create from another.
multi1d< LatticeColorMatrixD3 > u
void ldagdlinv(LatticeDouble &tr_log_diag, int cb)
Invert the clover term on cb using LDL^\dagger decomp.
Real getCloverCoeff(int mu, int nu) const
Calculates Tr_D ( Gamma_mat L )
SSEDCloverTerm()
Empty constructor. Must use create later.
void choles(int cb)
Computes the inverse of the term on cb using Cholesky.
Clover term linear operator.
int mu
Definition: cool.cc:24
int nu
Definition: cool.cc:25
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void block(LatticeColorMatrix &u_block, const multi1d< LatticeColorMatrix > &u, int mu, int bl_level, const Real &BlkAccu, int BlkMax, int j_decay)
Construct block links.
Definition: block.cc:42
unsigned elem_ij
Definition: ldumul_w.cc:38
unsigned j
Definition: ldumul_w.cc:35
unsigned elem_ji
Definition: ldumul_w.cc:39
unsigned n
Definition: ldumul_w.cc:36
Calculates the antihermitian field strength tensor iF(mu,nu)
Nd
Definition: meslate.cc:74
void triaCntrSiteLoop(int lo, int hi, int myId, TriaCntrArgs *a)
void makeClovSiteLoop(int lo, int hi, int myId, MakeClovArgs *a)
void orderedApplySiteLoop(int lo, int hi, int myID, ApplyArgs *a)
void unorderedApplySiteLoop(int lo, int hi, int myId, ApplyArgs *a)
void lDagDLInvSiteLoop(int lo, int hi, int myId, LDagDLInvArgs *a)
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
Double one
Definition: invbicg.cc:105
push(xml_out,"Condensates")
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
RScalar< REAL > PrimitiveClovDiag[2][8]
Special structure used for triangular objects.
void mesField(multi1d< LatticeColorMatrixF > &f, const multi1d< LatticeColorMatrixF > &u)
Calculates the antihermitian field strength tensor iF(mu,nu)
Definition: mesfield.cc:80
Complex a
Definition: invbicg.cc:95
LatticeFermion psi
Definition: mespbg5p_w.cc:35
pop(xml_out)
void ssed_clover_apply(REAL64 *diag, REAL64 *offd, REAL64 *psiptr, REAL64 *chiptr, int n_sites)
RComplex< REAL > PrimitiveClovOffDiag[2][16]
START_CODE()
int cb
Definition: invbicg.cc:120
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
const T1 const T2 const T3 & f3
Definition: gtest.h:1327
const T1 const T2 & f2
Definition: gtest.h:1321
const T1 & f1
Definition: gtest.h:1316
const T1 const T2 const T3 const T4 const T5 & f5
Definition: gtest.h:1339
const T1 const T2 const T3 const T4 & f4
Definition: gtest.h:1333
FloatingPoint< double > Double
Definition: gtest.h:7351
int l
Definition: pade_trln_w.cc:111
Double sum
Definition: qtopcor.cc:37
Params for clover ferm acts.