17 typedef LatticeFermionF
TF;
18 typedef LatticeColorMatrixF
UF;
19 typedef multi1d<LatticeColorMatrixF>
QF;
20 typedef multi1d<LatticeColorMatrixF>
PF;
22 typedef LatticeFermionD
TD;
23 typedef LatticeColorMatrixD
UD;
24 typedef multi1d<LatticeColorMatrixD>
QD;
25 typedef multi1d<LatticeColorMatrixD>
PD;
33 void commDimPartitionedSet(
int);
45 multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
56 QudaGaugeParam q_gauge_param=newQudaGaugeParam();
57 QudaInvertParam quda_inv_param=newQudaInvertParam();
59 const multi1d<int>& latdims = Layout::subgridLattSize();
60 q_gauge_param.X[0] = latdims[0];
61 q_gauge_param.X[1] = latdims[1];
62 q_gauge_param.X[2] = latdims[2];
63 q_gauge_param.X[3] = latdims[3];
64 int vol=latdims[0]*latdims[1]*latdims[2]*latdims[3];
70 q_gauge_param.anisotropy = 1.0;
72 q_gauge_param.type = QUDA_WILSON_LINKS;
74 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
78 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
81 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
84 QudaPrecision_s cpu_prec=QUDA_SINGLE_PRECISION;
85 QudaPrecision_s gpu_prec=QUDA_SINGLE_PRECISION;
86 QudaPrecision_s gpu_half_prec=QUDA_HALF_PRECISION;
88 q_gauge_param.cpu_prec=cpu_prec;
89 q_gauge_param.cuda_prec=gpu_prec;
90 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
91 q_gauge_param.reconstruct_sloppy = q_gauge_param.reconstruct;
92 q_gauge_param.cuda_prec_sloppy=q_gauge_param.cuda_prec;
93 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
95 quda_inv_param.kappa=1;
96 quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
97 quda_inv_param.dagger = (
isign ==
PLUS) ? QUDA_DAG_NO : QUDA_DAG_YES;
100 multi1d<int> face_size(4);
101 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
102 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
103 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
104 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
106 int max_face = face_size[0];
107 for(
int i=1;
i <=3;
i++) {
108 if ( face_size[
i] > max_face ) {
109 max_face = face_size[
i];
112 q_gauge_param.ga_pad = max_face;
113 quda_inv_param.sp_pad = 0;
114 quda_inv_param.cl_pad = 0;
116 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
117 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
119 quda_inv_param.dslash_type = QUDA_WILSON_DSLASH;
120 quda_inv_param.cpu_prec = cpu_prec;
121 quda_inv_param.cuda_prec = gpu_prec;
122 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
131 links_single[
mu] = fs_handle->getLinks()[
mu];
139 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
142 loadGaugeQuda((
void *)gauge, &q_gauge_param);
156 unsigned int vol2 = vol/2;
157 unsigned int h_size=4*3*2*vol2;
158 multi1d<float> buffer_in(h_size);
159 multi1d<float> buffer_out(h_size);
168 for(
int site=0; site < rb[1].siteTable().size(); site++) {
169 for(
int spin=0; spin < 4; spin++) {
170 for(
int col=0; col < 3; col++) {
171 buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).real();
172 buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).imag();
178 dslashQuda((
void *)buffer_out.slice(),
179 (
void *)buffer_in.slice(),
185 for(
int site=0; site < rb[1].siteTable().size(); site++) {
186 for(
int spin=0; spin < 4; spin++) {
187 for(
int col=0; col < 3; col++) {
188 dst2.elem(rb[
cb].siteTable()[site]).elem(spin).elem(col).real() = buffer_out[lin++];
189 dst2.elem(rb[
cb].siteTable()[site]).elem(spin).elem(col).imag() = buffer_out[lin++];
195 diff[rb[
cb]] = dst1-dst2;
199 QDPIO::cout <<
"\t\t diff = " << diff_norm <<
" per site \t";
201 if ( toBool( diff_norm <
Double(1.0e-7) ) ) {
220 multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
240 RealD diff_norm = sqrt( norm2(diff, rb[
cb]) );
241 QDPIO::cout <<
"diff_norm = " << diff_norm << std::endl;
242 QDPIO::cout <<
"diff_norm / site = " << diff_norm/((double)Layout::vol()/(double)2) << std::endl;
248 QudaGaugeParam q_gauge_param=newQudaGaugeParam();
249 QudaInvertParam quda_inv_param=newQudaInvertParam();
251 QudaPrecision_s cpu_prec=QUDA_DOUBLE_PRECISION;
252 QudaPrecision_s gpu_prec=QUDA_DOUBLE_PRECISION;
253 QudaPrecision_s gpu_half_prec=QUDA_DOUBLE_PRECISION;
257 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
260 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
264 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
265 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
266 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
269 q_gauge_param.cpu_prec=cpu_prec;
270 q_gauge_param.cuda_prec=gpu_prec;
271 q_gauge_param.cuda_prec_sloppy=gpu_half_prec;
274 const multi1d<int>& latdims = Layout::subgridLattSize();
275 q_gauge_param.X[0] = latdims[0];
276 q_gauge_param.X[1] = latdims[1];
277 q_gauge_param.X[2] = latdims[2];
278 q_gauge_param.X[3] = latdims[3];
279 int vol=latdims[0]*latdims[1]*latdims[2]*latdims[3];
280 q_gauge_param.type = QUDA_WILSON_LINKS;
281 q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
284 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
287 multi1d<int> face_size(4);
288 face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
289 face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
290 face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
291 face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
293 int max_face = face_size[0];
294 for(
int i=1;
i <=3;
i++) {
295 if ( face_size[
i] > max_face ) {
296 max_face = face_size[
i];
301 q_gauge_param.ga_pad = max_face;
302 quda_inv_param.sp_pad = 0;
303 quda_inv_param.cl_pad = 0;
306 q_gauge_param.anisotropy = 1.0;
318 links_single[
mu] = fs_handle->getLinks()[
mu];
329 void* gauge_minus[4];
333 gauge[
mu] = (
void *)&(links_single[
mu].elem(all.start()).elem().elem(0,0).real());
336 gauge_minus[
mu] = (
void *)&(links_minus[
mu].elem(all.start()).elem().elem(0,0).real());
341 loadGaugeQuda((
void *)gauge, (
void *)gauge_minus, &q_gauge_param);
343 loadGaugeQuda((
void *)gauge, &q_gauge_param);
346 quda_inv_param.dslash_type = QUDA_WILSON_DSLASH;
347 quda_inv_param.cpu_prec = cpu_prec;
348 quda_inv_param.cuda_prec = gpu_prec;
349 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
350 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
351 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
352 quda_inv_param.dagger = (
isign ==
PLUS) ? QUDA_DAG_NO : QUDA_DAG_YES;
362 unsigned int vol2 = vol/2;
363 unsigned int h_size=4*3*2*vol2;
364 multi1d<double> buffer_in(h_size);
365 multi1d<double> buffer_out(h_size);
373 for(
int site=0; site < rb[1].siteTable().size(); site++) {
374 for(
int spin=0; spin < 4; spin++) {
375 for(
int col=0; col < 3; col++) {
376 buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).real();
377 buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).imag();
383 dslashQuda((
void *)buffer_out.slice(),
384 (
void *)buffer_in.slice(),
391 for(
int site=0; site < rb[1].siteTable().size(); site++) {
392 for(
int spin=0; spin < 4; spin++) {
393 for(
int col=0; col < 3; col++) {
394 dst2.elem(rb[
cb].siteTable()[site]).elem(spin).elem(col).real() = buffer_out[lin++];
395 dst2.elem(rb[
cb].siteTable()[site]).elem(spin).elem(col).imag() = buffer_out[lin++];
401 diff[rb[
cb]] = dst1-dst2;
405 QDPIO::cout <<
"\t\t diff = " << diff_norm <<
" per site \t";
407 if ( toBool( diff_norm <
Double(1.0e-7) ) ) {
423 multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
430 WilsonDslash3D D_me(fs_handle);
434 QudaGaugeParam q_gauge_param=newQudaGaugeParam();
435 QudaInvertParam quda_inv_param=newQudaInvertParam();
437 QudaPrecision_s cpu_prec=QUDA_SINGLE_PRECISION;
438 QudaPrecision_s gpu_prec=QUDA_SINGLE_PRECISION;
439 QudaPrecision_s gpu_half_prec=QUDA_SINGLE_PRECISION;
443 q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
446 q_gauge_param.t_boundary = QUDA_PERIODIC_T;
450 q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
451 q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
454 q_gauge_param.cpu_prec=cpu_prec;
455 q_gauge_param.cuda_prec=gpu_prec;
456 q_gauge_param.cuda_prec_sloppy=gpu_half_prec;
459 const multi1d<int>& latdims = Layout::subgridLattSize();
460 q_gauge_param.X[0] = latdims[0];
461 q_gauge_param.X[1] = latdims[1];
462 q_gauge_param.X[2] = latdims[2];
463 q_gauge_param.X[3] = latdims[3];
466 q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
471 unsigned int vol = latdims[0]*latdims[1]*latdims[2]*latdims[3];
472 unsigned int vol2 = vol/2;
473 unsigned int padding=latdims[0]*latdims[1]*latdims[2]/2;
474 quda_inv_param.sp_pad = q_gauge_param.ga_pad = quda_inv_param.cl_pad = padding;
478 q_gauge_param.anisotropy = 1.0;
490 tmp_links[
mu] = fs_handle->getLinks()[
mu];
497 for(
int t=0;
t < latdims[3];
t++) {
498 for(
int z=0;
z < latdims[2];
z++) {
499 for(
int y=0;
y < latdims[1];
y++) {
500 for(
int x=0;
x < latdims[0];
x++) {
502 int par=(
x +
y +
z) & 1;
503 int par4d=(
x +
y +
z +
t) & 1;
505 int off=xh+(latdims[0]/2)*(
y + latdims[1]*(
z+latdims[2]*
t));
507 for(
int r=0;
r < Nc;
r++) {
508 for(
int c=0;
c < Nc;
c++) {
509 links_single[
mu].elem(vol2*par+off).elem().elem(
r,
c).real()
510 = tmp_links[
mu].elem(vol2*par4d+off).elem().elem(
r,
c).real();
514 links_single[
mu].elem(vol2*par+off).elem().elem(
r,
c).imag()
515 = tmp_links[
mu].elem(vol2*par4d+off).elem().elem(
r,
c).imag();
536 gauge[
mu] = (
void *)&(links_single[
mu].elem(0).elem().elem(0,0).real());
540 loadGaugeQuda((
void *)gauge, &q_gauge_param);
543 quda_inv_param.dslash_type = QUDA_WILSON_DSLASH;
544 quda_inv_param.cpu_prec = cpu_prec;
545 quda_inv_param.cuda_prec = gpu_prec;
546 quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
547 quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
548 quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
557 unsigned int h_size=4*3*2*vol2;
558 multi1d<float> buffer_in(h_size);
559 multi1d<float> buffer_out(h_size);
568 for(
int t=0;
t < latdims[3];
t++) {
569 for(
int z=0;
z < latdims[2];
z++) {
570 for(
int y=0;
y < latdims[1];
y++) {
571 for(
int x=0;
x < latdims[0];
x++) {
573 int par=(
x +
y +
z) & 1;
574 if( par == otherCB ) {
575 int par4d=(
x +
y +
z +
t) & 1;
577 int off=xh+(latdims[0]/2)*(
y + latdims[1]*(
z+latdims[2]*
t));
579 for(
int spin=0; spin < 4; spin++) {
580 for(
int col=0; col < 3; col++) {
581 buffer_in[24*off+(lin++)] = src.elem(vol2*par4d+off).elem(spin).elem(col).real();
582 buffer_in[24*off+(lin++)] = src.elem(vol2*par4d+off).elem(spin).elem(col).imag();
592 dslash3DQuda((
void *)buffer_out.slice(),
593 (
void *)buffer_in.slice(),
598 for(
int t=0;
t < latdims[3];
t++) {
599 for(
int z=0;
z < latdims[2];
z++) {
600 for(
int y=0;
y < latdims[1];
y++) {
601 for(
int x=0;
x < latdims[0];
x++) {
603 int par=(
x +
y +
z) & 1;
605 int par4d=(
x +
y +
z +
t) & 1;
607 int off=xh+(latdims[0]/2)*(
y + latdims[1]*(
z+latdims[2]*
t));
609 for(
int spin=0; spin < 4; spin++) {
610 for(
int col=0; col < 3; col++) {
611 dst2.elem(vol2*par4d+off).elem(spin).elem(col).real()= buffer_out[24*off+(lin++)] ;
612 dst2.elem(vol2*par4d+off).elem(spin).elem(col).imag()=buffer_out[24*off+(lin++)];
622 diff[rb3[
cb]] = dst1-dst2;
626 QDPIO::cout <<
"\t diff = " << diff_norm <<
" per site \t";
628 if ( toBool( diff_norm <
Double(2.0e-8) ) ) {
650 int main(
int argc,
char **argv)
654 QDPIO::cout <<
"Linkage = " <<
linkageHack() << std::endl;
658 const int lsize[4]={12,12,12,48};
659 multi1d<int> nrow(4);
661 Layout::setLattSize(nrow);
667 XMLReader gauge_file_xml, gauge_xml;
669 inputCfg.cfg_type=WEAK_FIELD;
689 push(xml_out,
"t_invert");
690 push(xml_out,
"params");
691 write(xml_out,
"nrow", nrow);
695 MesPlq(xml_out,
"Observables", ud);
699 QDPIO::cout <<
"Howdy" << std::endl;
704 QDPIO::cout <<
"Test: Dslash PLUS, cb=0";
706 QDPIO::cout <<
"\t FAILED" << std::endl;
710 QDPIO::cout <<
"\t OK" << std::endl;
713 QDPIO::cout <<
"Test: Dslash PLUS, cb=1" ;
715 QDPIO::cout <<
"\t FAILED" << std::endl;
719 QDPIO::cout <<
"\t OK" << std::endl;
723 QDPIO::cout <<
"Test: Dslash MINUS, cb=0";
725 QDPIO::cout <<
"\t FAILED" << std::endl;
729 QDPIO::cout <<
"\t OK" << std::endl;
733 QDPIO::cout <<
"Test: Dslash MINUS, cb=1" ;
735 QDPIO::cout <<
"\t FAILED" << std::endl;
739 QDPIO::cout <<
"\t OK" << std::endl;
747 QDPIO::cout <<
"Test: Dslash PLUS, cb=0";
749 QDPIO::cout <<
"\t FAILED" << std::endl;
753 QDPIO::cout <<
"\t OK" << std::endl;
756 QDPIO::cout <<
"Test: Dslash PLUS, cb=1" ;
758 QDPIO::cout <<
"\t FAILED" << std::endl;
762 QDPIO::cout <<
"\t OK" << std::endl;
766 QDPIO::cout <<
"Test: Dslash MINUS, cb=0";
768 QDPIO::cout <<
"\t FAILED" << std::endl;
772 QDPIO::cout <<
"\t OK" << std::endl;
776 QDPIO::cout <<
"Test: Dslash MINUS, cb=1" ;
778 QDPIO::cout <<
"\t FAILED" << std::endl;
782 QDPIO::cout <<
"\t OK" << std::endl;
790 QDPIO::cout <<
"Test: Dslash3D PLUS, cb=0";
791 if ( ! testQudaDslash3D(
u,
PLUS, 0) ) {
792 QDPIO::cout <<
"\t FAILED" << std::endl;
796 QDPIO::cout <<
"\t OK" << std::endl;
799 QDPIO::cout <<
"Test: Dslash3D MINUS, cb=0";
800 if ( ! testQudaDslash3D(
u,
MINUS, 0) ) {
801 QDPIO::cout <<
"\t FAILED" << std::endl;
805 QDPIO::cout <<
"\t OK" << std::endl;
808 QDPIO::cout <<
"Test: Dslash3D PLUS, cb=1" ;
809 if ( ! testQudaDslash3D(
u,
PLUS, 1) ) {
810 QDPIO::cout <<
"\t FAILED" << std::endl;
814 QDPIO::cout <<
"\t OK" << std::endl;
817 QDPIO::cout <<
"Test: Dslash3D MINUS, cb=1" ;
818 if ( ! testQudaDslash3D(
u,
MINUS, 1) ) {
819 QDPIO::cout <<
"\t FAILED" << std::endl;
823 QDPIO::cout <<
"\t OK" << std::endl;
829 QDPIO::cout <<
"All tests passed" << std::endl;
Primary include file for CHROMA in application codes.
Class for counted reference semantics.
General Wilson-Dirac dslash.
Concrete class for all gauge actions with simple boundary conditions.
Simple version of FermState.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams ¶m)
Writer parameters.
void unitarityCheck(const multi1d< LatticeColorMatrixF3 > &u)
Check the unitarity of color matrix in SU(N)
void gaugeStartup(XMLReader &gauge_file_xml, XMLReader &gauge_xml, multi1d< LatticeColorMatrix > &u, Cfg_t &cfg)
Initialize the gauge fields.
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
bool registerAll()
Register all the factories.
bool registerAll()
Register all the factories.
Asqtad Staggered-Dirac operator.
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
void finalize(void)
Chroma finalization routine.
void reunit(LatticeColorMatrixF3 &xa)
void MesPlq(const multi1d< LatticeColorMatrixF3 > &u, multi2d< Double > &plane_plaq, Double &link)
XMLFileWriter & getXMLOutputInstance()
Get xml output instance.
FloatingPoint< double > Double
multi1d< LatticeColorMatrixF > QF
int main(int argc, char **argv)
bool testQudaDslash(const QF &u, enum PlusMinus isign, int cb)
multi1d< LatticeColorMatrixF > PF
bool testQudaDslashD(const QD &u, enum PlusMinus isign, int cb)
multi1d< LatticeColorMatrixD > PD
multi1d< LatticeColorMatrixD > QD