6 #ifndef __clover_term_base_w_h__
7 #define __clover_term_base_w_h__
21 template<
typename T,
typename U>
31 const Subset&
subset()
const {
return all;}
85 const multi1d<T>&
chi,
const multi1d<T>&
psi,
99 const multi1d<T>&
chi,
const multi1d<T>&
psi,
111 const U& Lambda)
const;
122 virtual const multi1d<U>&
getU()
const = 0;
130 template<
typename T,
typename U>
143 template<
typename T,
typename U>
163 template<
typename T,
typename U>
165 const multi1d<T>&
chi,
const multi1d<T>&
psi,
185 #if defined(CHROMA_FUSED_CLOVER_DERIV_LOOPS) && !defined(BUILD_JIT_CLOVER_TERM)
187 #warning "Using Fused deriv_loops contributed by Jacques Bloch of Regensburg University"
188 template<
typename LCM>
190 void fused_deriv_loops(
const multi1d<LCM>&
u,
191 const int mu,
const int nu,
const int cb,
192 LCM& ds_u_mu,
LCM& ds_u_nu,
const LCM& Lambda)
198 Lambda_xplus_mu = shift(Lambda,
FORWARD,
mu);
209 using MatSU3 = PColorMatrix< RComplex< typename WordType<LCM>::Type_t>, 3>;
210 #define _elem(x,i) (x.elem(i).elem())
218 MatSU3 up_left_corner;
219 MatSU3 up_right_corner;
220 MatSU3 low_right_corner;
221 MatSU3 low_left_corner;
225 const Subset &
s=rb[
cb];
226 const int* tab =
s.siteTable().slice();
227 const int numSiteTable =
s.numSiteTable();
228 LCM &Lambda_xplus_muplusnu=
tmp;
229 Lambda_xplus_muplusnu[
s] = shift(Lambda_xplus_mu,
FORWARD,
nu);
231 #pragma omp parallel for private(staple_for, staple_back, staple_left, staple_right, u_tmp3, up_left_corner, up_right_corner, low_right_corner, low_left_corner)
232 for(
int j=0;
j < numSiteTable; ++
j)
236 up_left_corner = adj(_elem(u_mu_for_nu,
i))*adj(_elem(
u[
nu],
i));
237 low_right_corner = adj(_elem(u_nu_for_mu,
i))*adj(_elem(
u[
mu],
i));
238 low_left_corner = adj(_elem(
u[
mu],
i))*_elem(
u[
nu],
i);
240 u_tmp3 = _elem(u_nu_for_mu,
i)*_elem(Lambda_xplus_muplusnu,
i);
241 _elem(ds_u_mu,
i) = u_tmp3*up_left_corner;
243 u_tmp3 = up_left_corner*_elem(Lambda,
i);
244 _elem(ds_tmp_nu,
i) = u_tmp3*_elem(
u[
mu],
i);
246 u_tmp3 = low_right_corner*_elem(Lambda,
i);
247 _elem(ds_tmp_mu,
i) = u_tmp3*_elem(
u[
nu],
i);
249 u_tmp3 = _elem(u_mu_for_nu,
i)*_elem(Lambda_xplus_muplusnu,
i);
250 _elem(ds_u_nu,
i) = u_tmp3*low_right_corner;
252 staple_for = _elem(u_nu_for_mu,
i)*up_left_corner;
253 staple_right = up_left_corner*_elem(
u[
mu],
i);
254 staple_left = _elem(u_mu_for_nu,
i)*low_right_corner;
255 staple_back = adj(_elem(u_nu_for_mu,
i))*low_left_corner;
257 _elem(ds_u_mu,
i) += staple_for*_elem(Lambda,
i);
258 _elem(ds_tmp_nu,
i) += _elem(Lambda_xplus_muplusnu,
i)*staple_right;
259 _elem(ds_u_nu,
i) += staple_left*_elem(Lambda,
i);
260 _elem(ds_tmp_mu,
i) += _elem(Lambda_xplus_muplusnu,
i)*staple_back;
266 const Subset &
s=rb[1-
cb];
267 const int* tab =
s.siteTable().slice();
268 const int numSiteTable =
s.numSiteTable();
271 Lambda_xplus_nu[
s] = shift(Lambda,
FORWARD,
nu);
273 #pragma omp parallel for private(staple_for, staple_back, staple_left, staple_right, u_tmp3, up_left_corner, up_right_corner, low_right_corner, low_left_corner)
274 for (
int j=0;
j < numSiteTable; ++
j)
278 up_left_corner = adj(_elem(u_mu_for_nu,
i))*adj(_elem(
u[
nu],
i));
279 up_right_corner = _elem(u_nu_for_mu,
i)*adj(_elem(u_mu_for_nu,
i));
280 low_right_corner = adj(_elem(u_nu_for_mu,
i))*adj(_elem(
u[
mu],
i));
281 low_left_corner = adj(_elem(
u[
mu],
i))*_elem(
u[
nu],
i);
283 u_tmp3 = adj(_elem(u_mu_for_nu,
i))*_elem(Lambda_xplus_nu,
i);
284 _elem(ds_tmp_nu,
i) = u_tmp3*adj(low_left_corner);
286 u_tmp3 = _elem(Lambda_xplus_nu,
i)*adj(_elem(
u[
nu],
i));
287 _elem(ds_u_mu,
i) = up_right_corner * u_tmp3;
289 u_tmp3 = adj(_elem(u_nu_for_mu,
i))*_elem(Lambda_xplus_mu,
i);
290 _elem(ds_tmp_mu,
i) = u_tmp3*low_left_corner;
292 u_tmp3 = adj(up_right_corner)*_elem(Lambda_xplus_mu,
i);
293 _elem(ds_u_nu,
i) = u_tmp3*adj(_elem(
u[
mu],
i));
295 staple_for = _elem(u_nu_for_mu,
i)*up_left_corner;
296 staple_right = up_left_corner*_elem(
u[
mu],
i);
297 staple_left = _elem(u_mu_for_nu,
i)*low_right_corner;
298 staple_back = adj(_elem(u_nu_for_mu,
i))*low_left_corner;
300 _elem(ds_tmp_nu,
i) += staple_right*_elem(Lambda_xplus_mu,
i);
301 _elem(ds_u_mu,
i) += _elem(Lambda_xplus_mu,
i)*staple_for;
302 _elem(ds_tmp_mu,
i) += staple_back*_elem(Lambda_xplus_nu,
i);
303 _elem(ds_u_nu,
i) += _elem(Lambda_xplus_nu,
i)*staple_left;
319 template<
typename T,
typename U>
323 const U& Lambda)
const
327 const multi1d<U>&
u = getU();
329 #if defined(CHROMA_FUSED_CLOVER_DERIV_LOOPS) && !defined(BUILD_JIT_CLOVER_TERM)
331 fused_deriv_loops<U>(
u,
mu,
nu,
cb,ds_u_mu,ds_u_nu,Lambda);
362 U Lambda_xplus_mu = shift(Lambda,
FORWARD,
mu);
363 U Lambda_xplus_nu = shift(Lambda,
FORWARD,
nu);
364 U Lambda_xplus_muplusnu = shift(Lambda_xplus_mu,
FORWARD,
nu);
381 up_left_corner = adj(u_mu_for_nu)*adj(
u[
nu]);
390 up_right_corner = u_nu_for_mu*adj(u_mu_for_nu);
397 low_right_corner = adj(u_nu_for_mu)*adj(
u[
mu]);
405 low_left_corner = adj(
u[
mu])*
u[
nu];
422 u_tmp3[rb[
cb]] = u_nu_for_mu*Lambda_xplus_muplusnu;
423 ds_u_mu[rb[
cb]] = u_tmp3*up_left_corner;
434 u_tmp3[rb[1-
cb]] = adj(u_mu_for_nu)*Lambda_xplus_nu;
437 ds_tmp_nu[rb[1-
cb]] = u_tmp3*adj(low_left_corner);
449 u_tmp3[rb[1-
cb]] = Lambda_xplus_nu*adj(
u[
nu]);
450 ds_u_mu[rb[1-
cb]] = up_right_corner * u_tmp3;
461 u_tmp3[rb[
cb]] = up_left_corner*Lambda;
464 ds_tmp_nu[rb[
cb]] = u_tmp3*
u[
mu];
488 u_tmp3[rb[1-
cb]] = adj(u_nu_for_mu)*Lambda_xplus_mu;
491 ds_tmp_mu[rb[1-
cb]] = u_tmp3*low_left_corner;
501 u_tmp3[rb[1-
cb]] = adj(up_right_corner)*Lambda_xplus_mu;
502 ds_u_nu[rb[1-
cb]] = u_tmp3*adj(
u[
mu]);
514 u_tmp3[rb[
cb]] = low_right_corner*Lambda;
517 ds_tmp_mu[rb[
cb]] = u_tmp3*
u[
nu];
529 u_tmp3[rb[
cb]] = u_mu_for_nu*Lambda_xplus_muplusnu;
530 ds_u_nu[rb[
cb]] = u_tmp3*low_right_corner;
548 staple_for = u_nu_for_mu*up_left_corner;
556 staple_right = up_left_corner*
u[
mu];
565 staple_left = u_mu_for_nu*low_right_corner;
575 staple_back = adj(u_nu_for_mu)*low_left_corner;
587 ds_u_mu[rb[
cb]] += staple_for*Lambda;
600 ds_tmp_nu[rb[1-
cb]] += staple_right*Lambda_xplus_mu;
611 ds_u_mu[rb[1-
cb]] += Lambda_xplus_mu*staple_for;
622 ds_tmp_nu[rb[
cb]] += Lambda_xplus_muplusnu * staple_right;
635 ds_tmp_mu[rb[1-
cb]] += staple_back*Lambda_xplus_nu;
645 ds_u_nu[rb[
cb]] += staple_left*Lambda;
656 ds_tmp_mu[rb[
cb]] += Lambda_xplus_muplusnu * staple_back;
666 ds_u_nu[rb[1-
cb]] += Lambda_xplus_nu * staple_left;
690 template<
typename T,
typename U>
699 if( ds_u.size() !=
Nd ) {
722 Real factor = (Real(-1)/Real(8))*getCloverCoeff(
mu,
nu);
727 int mu_nu_index = (1 <<
mu) + (1 <<
nu);
728 T ferm_tmp = Gamma(mu_nu_index)*
psi;
729 U s_xy_dag = traceSpin( outerProduct(ferm_tmp,
chi));
730 s_xy_dag *= Real(factor);
733 deriv_loops(
mu,
nu,
cb, ds_tmp_mu, ds_tmp_nu, s_xy_dag);
736 ds_u[
mu] += ds_tmp_mu;
737 ds_u[
nu] -= ds_tmp_nu;
745 (*this).getFermBC().zero(ds_u);
749 template<
typename T,
typename U>
751 const multi1d<T>&
chi,
const multi1d<T>&
psi,
758 if( ds_u.size() !=
Nd ) {
781 Real factor = (Real(-1)/Real(8))*getCloverCoeff(
mu,
nu);
786 int mu_nu_index = (1 <<
mu) + (1 <<
nu);
790 for(
int i=0;
i <
chi.size();
i++) {
791 T ferm_tmp = Gamma(mu_nu_index)*
psi[
i];
792 s_xy_dag += traceSpin( outerProduct(ferm_tmp,
chi[
i]));
795 s_xy_dag *= Real(factor);
798 deriv_loops(
mu,
nu,
cb, ds_tmp_mu, ds_tmp_nu, s_xy_dag);
801 ds_u[
mu] += ds_tmp_mu;
802 ds_u[
nu] -= ds_tmp_nu;
810 (*this).getFermBC().zero(ds_u);
824 template<
typename T,
typename U>
831 if( ds_u.size() !=
Nd ) {
841 int mu_nu_index = (1 <<
mu) + (1 <<
nu);
844 Real factor = Real(-1)*getCloverCoeff(
mu,
nu)/Real(8);
849 triacntr(sigma_XY_dag, mu_nu_index,
cb);
850 sigma_XY_dag[rb[
cb]] *= factor;
857 deriv_loops(
mu,
nu,
cb, ds_tmp_mu, ds_tmp_nu, sigma_XY_dag);
860 ds_u[
mu] += ds_tmp_mu;
863 ds_u[
nu] -= ds_tmp_nu;
871 (*this).getFermBC().zero(ds_u);
virtual void applySite(T &chi, const T &psi, enum PlusMinus isign, int site) const =0
virtual ~CloverTermBase()
No real need for cleanup here.
void derivMultipole(multi1d< U > &ds_u, const multi1d< T > &chi, const multi1d< T > &psi, enum PlusMinus isign, int cb) const
Take deriv of D.
void derivMultipole(multi1d< U > &ds_u, const multi1d< T > &chi, const multi1d< T > &psi, enum PlusMinus isign) const
Take deriv of D.
virtual void choles(int cb)=0
Invert.
unsigned long nFlops() const
Return flops performed by the operator()
void derivTrLn(multi1d< U > &ds_u, enum PlusMinus isign, int cb) const
Take derivative of TrLn D.
void deriv_loops(const int u, const int mu, const int cb, U &ds_u_mu, U &ds_u_nu, const U &Lambda) const
virtual Real getCloverCoeff(int mu, int nu) const =0
get the clover coefficient
virtual Double cholesDet(int cb) const =0
Invert.
void deriv(multi1d< U > &ds_u, const T &chi, const T &psi, enum PlusMinus isign, int cb) const
Take deriv of D.
const Subset & subset() const
Subset is all here.
void deriv(multi1d< U > &ds_u, const T &chi, const T &psi, enum PlusMinus isign) const
Take deriv of D.
virtual void triacntr(U &B, int mat, int cb) const =0
Calculates Tr_D ( Gamma_mat L )
virtual const multi1d< U > & getU() const =0
Get the u field.
Dslash-like Linear Operator.
Asqtad Staggered-Dirac operator.
static multi1d< LatticeColorMatrix > u
LinOpSysSolverMGProtoClover::T T
multi1d< LatticeFermion > chi(Ncb)
multi1d< LatticeColorMatrix > LCM
multi1d< LatticeFermion > s(Ncb)
FloatingPoint< double > Double
multi1d< LatticeColorMatrix > U
multi1d< LatticeColorMatrix > deriv(const EvenOddPrecLinearOperator< LatticeFermion, multi1d< LatticeColorMatrix >, multi1d< LatticeColorMatrix > > &AP, const LatticeFermion &chi, const LatticeFermion &psi, enum PlusMinus isign)
Apply the operator onto a source std::vector.
multi1d< LatticeColorMatrix > LCM