26 catch( std::bad_alloc ) {
34 catch( std::bad_alloc ) {
35 QDPIO::cerr <<
"Failed to allocate the tri_diag" << std::endl << std::flush ;
45 catch( std::bad_alloc ) {
53 catch( std::bad_alloc ) {
54 QDPIO::cerr <<
"Failed to allocate the tri_off_diag" << std::endl << std::flush ;
65 QDP::Allocator::theQDPAllocator::Instance().free(
tri_diag);
69 QDP::Allocator::theQDPAllocator::Instance().free(
tri_off_diag);
79 fbc = fs->getFermBC();
83 if (
fbc.operator->() == 0)
85 QDPIO::cerr <<
"SSEDCloverTerm: error: fbc is null" << std::endl;
107 multi1d<LatticeColorMatrix> f;
112 for(
int i=0;
i < rb.numSubsets();
i++) {
119 LatticeFermion res1=
zero;
120 LatticeFermion res2=
zero;
125 LatticeFermion diff=res1-res2;
126 XMLFileWriter fred(
"fred");
128 write(fred,
"diff", diff);
131 QDPIO::cout <<
"sqrt( norm2( diff))= " << sqrt(norm2(diff)) << std::endl << std::flush;
147 fbc = fs->getFermBC();
151 if (
fbc.operator->() == 0) {
152 QDPIO::cerr <<
"SSEDCloverTerm: error: fbc is null" << std::endl;
180 for(
int i=0;
i < rb.numSubsets();
i++) {
192 for(
int site=0; site < Layout::sitesOnNode(); site++) {
194 for(
int comp=0; comp< 6; comp++) {
197 for(
int comp=0; comp < 15; comp++) {
289 namespace SSEDCloverEnv {
291 LatticeColorMatrix&
f0;
292 LatticeColorMatrix&
f1;
293 LatticeColorMatrix&
f2;
294 LatticeColorMatrix&
f3;
295 LatticeColorMatrix&
f4;
296 LatticeColorMatrix&
f5;
305 LatticeColorMatrix& f0=
a->f0;
306 LatticeColorMatrix&
f1=
a->f1;
307 LatticeColorMatrix&
f2=
a->f2;
308 LatticeColorMatrix&
f3=
a->f3;
309 LatticeColorMatrix&
f4=
a->f4;
310 LatticeColorMatrix&
f5=
a->f5;
313 const Real& diag_mass =
a->diag_mass;
315 for(
int site = lo; site < hi; ++site) {
317 for(
int jj = 0; jj < 2; jj++) {
319 for(
int ii = 0; ii < 2*Nc; ii++) {
321 tri_diag[site][jj][ii] = diag_mass.elem().elem().elem();
329 RComplex<REAL> E_minus;
330 RComplex<REAL> B_minus;
331 RComplex<REAL> ctmp_0;
332 RComplex<REAL> ctmp_1;
333 RScalar<REAL> rtmp_0;
334 RScalar<REAL> rtmp_1;
336 for(
int i = 0;
i < Nc; ++
i) {
340 ctmp_0 =
f5.elem(site).elem().elem(
i,
i);
341 ctmp_0 -= f0.elem(site).elem().elem(
i,
i);
342 rtmp_0 = imag(ctmp_0);
343 tri_diag[site][0][
i] += rtmp_0;
347 tri_diag[site][0][
i+Nc] -= rtmp_0;
351 ctmp_1 =
f5.elem(site).elem().elem(
i,
i);
352 ctmp_1 += f0.elem(site).elem().elem(
i,
i);
353 rtmp_1 = imag(ctmp_1);
354 tri_diag[site][1][
i] -= rtmp_1;
358 tri_diag[site][1][
i+Nc] += rtmp_1;
363 for(
int i = 1;
i < Nc; ++
i) {
365 for(
int j = 0;
j <
i; ++
j) {
368 int elem_tmp = (
i+Nc)*(
i+Nc-1)/2 +
j+Nc;
372 ctmp_0 = f0.elem(site).elem().elem(
i,
j);
373 ctmp_0 -=
f5.elem(site).elem().elem(
i,
j);
374 tri_off_diag[site][0][
elem_ij] = timesI(ctmp_0);
378 tri_off_diag[site][0][elem_tmp] = -tri_off_diag[site][0][
elem_ij];
382 ctmp_1 =
f5.elem(site).elem().elem(
i,
j);
383 ctmp_1 += f0.elem(site).elem().elem(
i,
j);
384 tri_off_diag[site][1][
elem_ij] = timesI(ctmp_1);
388 tri_off_diag[site][1][elem_tmp] = -tri_off_diag[site][1][
elem_ij];
393 for(
int i = 0;
i < Nc; ++
i) {
395 for(
int j = 0;
j < Nc; ++
j) {
404 E_minus = timesI(
f2.elem(site).elem().elem(
i,
j));
405 E_minus +=
f4.elem(site).elem().elem(
i,
j);
409 B_minus = timesI(
f3.elem(site).elem().elem(
i,
j));
410 B_minus -=
f1.elem(site).elem().elem(
i,
j);
413 tri_off_diag[site][0][
elem_ij] = B_minus - E_minus;
416 tri_off_diag[site][1][
elem_ij] = E_minus + B_minus;
428 LatticeColorMatrix f0;
429 LatticeColorMatrix
f1;
430 LatticeColorMatrix
f2;
431 LatticeColorMatrix
f3;
432 LatticeColorMatrix
f4;
433 LatticeColorMatrix
f5;
435 const int nodeSites = QDP::Layout::sitesOnNode();
487 QDPIO::cout <<
"Error: YOu have not done the Cholesky.on this operator on this subset" << std::endl;
488 QDPIO::cout <<
"You sure you shouldn't be asking invclov?" << std::endl;
500 namespace SSEDCloverEnv {
511 LatticeReal& tr_log_diag =
a->tr_log_diag;
521 for(
int ssite=lo; ssite < hi; ++ssite) {
523 int site = rb[
cb].siteTable()[ssite];
525 int site_neg_logdet=0;
532 RScalar<REAL> inv_d[8] QDP_ALIGN16;
533 RComplex<REAL> inv_offd[16] QDP_ALIGN16;
534 RComplex<REAL> v[8] QDP_ALIGN16;
535 RScalar<REAL> diag_g[8] QDP_ALIGN16;
538 for(
int i=0;
i < N;
i++) {
539 inv_d[
i] = tri_diag[site][
block][
i];
541 for(
int i=0;
i < 15;
i++) {
542 inv_offd[
i] =tri_off_diag[site][
block][
i];
545 for(
int j=0;
j < N; ++
j) {
554 for(
int i=0;
i <
j;
i++) {
557 RComplex<REAL> A_ii = cmplx( inv_d[
i], zip );
569 v[
j] = cmplx(inv_d[
j],zip);
571 for(
int k=0;
k <
j;
k++) {
572 int elem_jk =
j*(
j-1)/2 +
k;
573 v[
j] -= inv_offd[elem_jk]*v[
k];
582 inv_d[
j] = real( v[
j] );
596 for(
int k=
j+1;
k < N;
k++) {
597 int elem_kj =
k*(
k-1)/2 +
j;
598 for(
int l=0;
l <
j;
l++) {
599 int elem_kl =
k*(
k-1)/2 +
l;
600 inv_offd[elem_kj] -= inv_offd[elem_kl] * v[
l];
602 inv_offd[elem_kj] /= v[
j];
608 one.elem() = (REAL)1;
610 for(
int i=0;
i < N;
i++) {
611 diag_g[
i] =
one/inv_d[
i];
617 tr_log_diag.elem(site).elem().elem().elem() += log(fabs(inv_d[
i].elem()));
620 if( inv_d[
i].elem() < 0 ) {
639 for(
int k = 0;
k < N; ++
k) {
641 for(
int i = 0;
i <
k; ++
i) {
648 v[
k] = cmplx(diag_g[
k],zip);
650 for(
int i =
k+1;
i < N; ++
i) {
653 for(
int j =
k;
j <
i; ++
j) {
669 for(
int i = N-2; (int)
i >= (
int)
k; --
i) {
670 for(
int j =
i+1;
j < N; ++
j) {
678 inv_d[
k] = real(v[
k]);
679 for(
int i =
k+1;
i < N; ++
i) {
681 int elem_ik =
i*(
i-1)/2+
k;
682 inv_offd[elem_ik] = v[
i];
688 for(
int i=0;
i < N;
i++) {
689 tri_diag[site][
block][
i] = inv_d[
i];
691 for(
int i=0;
i < 15;
i++) {
692 tri_off_diag[site][
block][
i] = inv_offd[
i];
696 if( site_neg_logdet != 0 ) {
698 std::cout <<
"WARNING: Odd number of negative terms in Clover DET ("
699 << site_neg_logdet<<
") at site: " << site << std::endl;
714 tr_log_diag[rb[
cb]] =
zero;
743 extern void ssed_clover_apply(REAL64* diag, REAL64* offd, REAL64* psiptr, REAL64* chiptr,
int n_sites);
745 namespace SSEDCloverEnv{
748 const LatticeFermion&
psi;
760 int start=rb[
a->cb ].start()+lo;
762 REAL64* chiptr = (REAL64 *)&(
a->chi.elem(start).elem(0).elem(0).real());
763 REAL64* psiptr = (REAL64 *)&(
a->psi.elem(start).elem(0).elem(0).real());
764 REAL64* offd = (REAL64 *)&(
a->tri_off[start][0][0].real());
765 REAL64* diag = (REAL64 *)&(
a->tri_diag[start][0][0].elem());
773 const multi1d<int>& tab = rb[
cb].siteTable();
776 for(
int ssite=lo; ssite < hi; ++ssite) {
778 int site = tab[ssite];
779 unsigned long n_sites=1;
781 REAL64* chiptr = (REAL64 *)&(
a->chi.elem(site).elem(0).elem(0).real());
782 REAL64* psiptr = (REAL64 *)&(
a->psi.elem(site).elem(0).elem(0).real());
783 REAL64* offd = (REAL64 *)&(
a->tri_off[site][0][0].real());
784 REAL64* diag = (REAL64 *)&(
a->tri_diag[site][0][0].elem());
803 if( rb[
cb].hasOrderedRep() ) {
805 dispatch_to_threads(rb[
cb].siteTable().size(),
a,
811 dispatch_to_threads(rb[
cb].siteTable().size(),
a,
847 RComplex<REAL>* cchi = (RComplex<REAL>*)&(
chi.elem(site).elem(0).elem(0));
848 const RComplex<REAL>* ppsi = (
const RComplex<REAL>*)&(
psi.elem(site).elem(0).elem(0));
851 cchi[ 0] =
tri_diag[site][0][ 0] * ppsi[ 0]
858 cchi[ 1] =
tri_diag[site][0][ 1] * ppsi[ 1]
865 cchi[ 2] =
tri_diag[site][0][ 2] * ppsi[ 2]
872 cchi[ 3] =
tri_diag[site][0][ 3] * ppsi[ 3]
879 cchi[ 4] =
tri_diag[site][0][ 4] * ppsi[ 4]
886 cchi[ 5] =
tri_diag[site][0][ 5] * ppsi[ 5]
893 cchi[ 6] =
tri_diag[site][1][ 0] * ppsi[ 6]
900 cchi[ 7] =
tri_diag[site][1][ 1] * ppsi[ 7]
907 cchi[ 8] =
tri_diag[site][1][ 2] * ppsi[ 8]
914 cchi[ 9] =
tri_diag[site][1][ 3] * ppsi[ 9]
921 cchi[10] =
tri_diag[site][1][ 4] * ppsi[10]
928 cchi[11] =
tri_diag[site][1][ 5] * ppsi[11]
975 namespace SSEDCloverEnv {
977 LatticeColorMatrix&
B;
990 LatticeColorMatrix& B =
a->B;
995 for(
int ssite=0; ssite < rb[
cb].numSiteTable(); ++ssite) {
997 int site = rb[
cb].siteTable()[ssite];
1008 RComplex<REAL> lctmp0;
1009 RScalar<REAL> lr_zero0;
1010 RScalar<REAL> lrtmp0;
1014 for(
int i0 = 0; i0 < Nc; ++i0) {
1015 lrtmp0 = tri_diag[site][0][i0];
1016 lrtmp0 += tri_diag[site][0][i0+Nc];
1017 lrtmp0 += tri_diag[site][1][i0];
1018 lrtmp0 += tri_diag[site][1][i0+Nc];
1019 B.elem(site).elem().elem(i0,i0) = cmplx(lrtmp0,lr_zero0);
1024 for(
int i0 = 1; i0 < Nc; ++i0) {
1026 int elem_ijb0 = (i0+Nc)*(i0+Nc-1)/2 + Nc;
1028 for(
int j0 = 0; j0 < i0; ++j0) {
1030 lctmp0 = tri_off_diag[site][0][elem_ij0];
1031 lctmp0 += tri_off_diag[site][0][elem_ijb0];
1032 lctmp0 += tri_off_diag[site][1][elem_ij0];
1033 lctmp0 += tri_off_diag[site][1][elem_ijb0];
1035 B.elem(site).elem().elem(j0,i0) = lctmp0;
1036 B.elem(site).elem().elem(i0,j0) = adj(lctmp0);
1054 RComplex<REAL> lctmp3;
1055 RScalar<REAL> lr_zero3;
1056 RScalar<REAL> lrtmp3;
1060 for(
int i3 = 0; i3 < Nc; ++i3) {
1062 lrtmp3 = tri_diag[site][0][i3+Nc];
1063 lrtmp3 -= tri_diag[site][0][i3];
1064 lrtmp3 -= tri_diag[site][1][i3];
1065 lrtmp3 += tri_diag[site][1][i3+Nc];
1066 B.elem(site).elem().elem(i3,i3) = cmplx(lr_zero3,lrtmp3);
1071 for(
int i3 = 1; i3 < Nc; ++i3) {
1073 int elem_ijb3 = (i3+Nc)*(i3+Nc-1)/2 + Nc;
1075 for(
int j3 = 0; j3 < i3; ++j3) {
1077 lctmp3 = tri_off_diag[site][0][elem_ijb3];
1078 lctmp3 -= tri_off_diag[site][0][elem_ij3];
1079 lctmp3 -= tri_off_diag[site][1][elem_ij3];
1080 lctmp3 += tri_off_diag[site][1][elem_ijb3];
1082 B.elem(site).elem().elem(j3,i3) = timesI(adj(lctmp3));
1083 B.elem(site).elem().elem(i3,j3) = timesI(lctmp3);
1100 RComplex<REAL> lctmp5;
1101 RScalar<REAL> lrtmp5;
1103 for(
int i5 = 0; i5 < Nc; ++i5) {
1105 int elem_ij5 = (i5+Nc)*(i5+Nc-1)/2;
1107 for(
int j5 = 0; j5 < Nc; ++j5) {
1109 int elem_ji5 = (j5+Nc)*(j5+Nc-1)/2 + i5;
1112 lctmp5 = adj(tri_off_diag[site][0][elem_ji5]);
1113 lctmp5 -= tri_off_diag[site][0][elem_ij5];
1114 lctmp5 += adj(tri_off_diag[site][1][elem_ji5]);
1115 lctmp5 -= tri_off_diag[site][1][elem_ij5];
1118 B.elem(site).elem().elem(i5,j5) = lctmp5;
1133 RComplex<REAL> lctmp6;
1134 RScalar<REAL> lrtmp6;
1136 for(
int i6 = 0; i6 < Nc; ++i6) {
1138 int elem_ij6 = (i6+Nc)*(i6+Nc-1)/2;
1140 for(
int j6 = 0; j6 < Nc; ++j6) {
1142 int elem_ji6 = (j6+Nc)*(j6+Nc-1)/2 + i6;
1144 lctmp6 = adj(tri_off_diag[site][0][elem_ji6]);
1145 lctmp6 += tri_off_diag[site][0][elem_ij6];
1146 lctmp6 += adj(tri_off_diag[site][1][elem_ji6]);
1147 lctmp6 += tri_off_diag[site][1][elem_ij6];
1149 B.elem(site).elem().elem(i6,j6) = timesMinusI(lctmp6);
1163 RComplex<REAL> lctmp9;
1164 RScalar<REAL> lrtmp9;
1166 for(
int i9 = 0; i9 < Nc; ++i9) {
1168 int elem_ij9 = (i9+Nc)*(i9+Nc-1)/2;
1170 for(
int j9 = 0; j9 < Nc; ++j9) {
1172 int elem_ji9 = (j9+Nc)*(j9+Nc-1)/2 + i9;
1174 lctmp9 = adj(tri_off_diag[site][0][elem_ji9]);
1175 lctmp9 += tri_off_diag[site][0][elem_ij9];
1176 lctmp9 -= adj(tri_off_diag[site][1][elem_ji9]);
1177 lctmp9 -= tri_off_diag[site][1][elem_ij9];
1179 B.elem(site).elem().elem(i9,j9) = timesI(lctmp9);
1194 RComplex<REAL> lctmp10;
1195 RScalar<REAL> lrtmp10;
1197 for(
int i10 = 0; i10 < Nc; ++i10) {
1199 int elem_ij10 = (i10+Nc)*(i10+Nc-1)/2;
1201 for(
int j10 = 0; j10 < Nc; ++j10) {
1203 int elem_ji10 = (j10+Nc)*(j10+Nc-1)/2 + i10;
1205 lctmp10 = adj(tri_off_diag[site][0][elem_ji10]);
1206 lctmp10 -= tri_off_diag[site][0][elem_ij10];
1207 lctmp10 -= adj(tri_off_diag[site][1][elem_ji10]);
1208 lctmp10 += tri_off_diag[site][1][elem_ij10];
1210 B.elem(site).elem().elem(i10,j10) = lctmp10;
1228 RComplex<REAL> lctmp12;
1229 RScalar<REAL> lr_zero12;
1230 RScalar<REAL> lrtmp12;
1234 for(
int i12 = 0; i12 < Nc; ++i12) {
1236 lrtmp12 = tri_diag[site][0][i12];
1237 lrtmp12 -= tri_diag[site][0][i12+Nc];
1238 lrtmp12 -= tri_diag[site][1][i12];
1239 lrtmp12 += tri_diag[site][1][i12+Nc];
1240 B.elem(site).elem().elem(i12,i12) = cmplx(lr_zero12,lrtmp12);
1245 for(
int i12 = 1; i12 < Nc; ++i12) {
1247 int elem_ijb12 = (i12+Nc)*(i12+Nc-1)/2 + Nc;
1249 for(
int j12 = 0; j12 < i12; ++j12) {
1251 lctmp12 = tri_off_diag[site][0][elem_ij12];
1252 lctmp12 -= tri_off_diag[site][0][elem_ijb12];
1253 lctmp12 -= tri_off_diag[site][1][elem_ij12];
1254 lctmp12 += tri_off_diag[site][1][elem_ijb12];
1256 B.elem(site).elem().elem(i12,j12) = timesI(lctmp12);
1257 B.elem(site).elem().elem(j12,i12) = timesI(adj(lctmp12));
1269 QDPIO::cout <<
"BAD DEFAULT CASE HIT" << std::endl;
1283 if ( mat < 0 || mat > 15 )
1285 QDPIO::cerr << __func__ <<
": Gamma out of range: mat = " << mat << std::endl;
1290 dispatch_to_threads(rb[
cb].numSiteTable(),
a,
Primary include file for CHROMA library code.
Support class for fermion actions and linear operators.
Class for counted reference semantics.
Double cholesDet(int cb) const
Computes the inverse of the term on cb using Cholesky.
LatticeDouble tr_log_diag_
~SSEDCloverTerm()
Free the internals.
PrimitiveClovOffDiag * tri_off_diag
void triacntr(LatticeColorMatrixD3 &B, int mat, int cb) const
Calculates Tr_D ( Gamma_mat L )
multi1d< bool > choles_done
void applySite(LatticeDiracFermionD3 &chi, const LatticeDiracFermionD3 &psi, enum PlusMinus isign, int site) const
PrimitiveClovDiag * tri_diag
void makeClov(const multi1d< LatticeColorMatrixD3 > &f, const Real &diag_mass)
Create the clover term on cb.
const FermBC< T, P, Q > & getFermBC() const
Return the fermion BC object for this linear operator.
void apply(LatticeDiracFermionD3 &chi, const LatticeDiracFermionD3 &psi, enum PlusMinus isign, int cb) const
Handle< FermBC< T, P, Q > > fbc
CloverFermActParams param
void create(Handle< FermState< T, P, Q > > fs, const CloverFermActParams ¶m_, const SSEDCloverTerm &from)
Create from another.
multi1d< LatticeColorMatrixD3 > u
void ldagdlinv(LatticeDouble &tr_log_diag, int cb)
Invert the clover term on cb using LDL^\dagger decomp.
Real getCloverCoeff(int mu, int nu) const
Calculates Tr_D ( Gamma_mat L )
SSEDCloverTerm()
Empty constructor. Must use create later.
void choles(int cb)
Computes the inverse of the term on cb using Cholesky.
Clover term linear operator.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams ¶m)
Writer parameters.
void block(LatticeColorMatrix &u_block, const multi1d< LatticeColorMatrix > &u, int mu, int bl_level, const Real &BlkAccu, int BlkMax, int j_decay)
Construct block links.
Calculates the antihermitian field strength tensor iF(mu,nu)
void triaCntrSiteLoop(int lo, int hi, int myId, TriaCntrArgs *a)
void makeClovSiteLoop(int lo, int hi, int myId, MakeClovArgs *a)
void orderedApplySiteLoop(int lo, int hi, int myID, ApplyArgs *a)
void unorderedApplySiteLoop(int lo, int hi, int myId, ApplyArgs *a)
void lDagDLInvSiteLoop(int lo, int hi, int myId, LDagDLInvArgs *a)
Asqtad Staggered-Dirac operator.
QDP_error_exit("too many BiCG iterations", n_count, rsd_sq, cp, c, re_rvr, im_rvr, re_a, im_a, re_b, im_b)
push(xml_out,"Condensates")
multi1d< LatticeFermion > chi(Ncb)
RScalar< REAL > PrimitiveClovDiag[2][8]
Special structure used for triangular objects.
void mesField(multi1d< LatticeColorMatrixF > &f, const multi1d< LatticeColorMatrixF > &u)
Calculates the antihermitian field strength tensor iF(mu,nu)
void ssed_clover_apply(REAL64 *diag, REAL64 *offd, REAL64 *psiptr, REAL64 *chiptr, int n_sites)
RComplex< REAL > PrimitiveClovOffDiag[2][16]
const T1 const T2 const T3 & f3
const T1 const T2 const T3 const T4 const T5 & f5
const T1 const T2 const T3 const T4 & f4
FloatingPoint< double > Double
Params for clover ferm acts.
PrimitiveClovDiag * tri_diag
const LatticeFermion & psi
PrimitiveClovOffDiag * tri_off
PrimitiveClovDiag * tri_diag
LatticeReal & tr_log_diag
PrimitiveClovOffDiag * tri_off_diag
PrimitiveClovOffDiag * tri_off_diag
PrimitiveClovDiag * tri_diag
PrimitiveClovOffDiag * tri_off
PrimitiveClovDiag * tri_diag