CHROMA
t_quda_tprec.cc
Go to the documentation of this file.
1 
2 #include <iostream>
3 #include <sstream>
4 #include <iomanip>
5 #include <string>
6 
7 #include <cstdio>
8 
9 #include <stdlib.h>
10 #include <sys/time.h>
11 #include <math.h>
12 
13 #include "chroma.h"
14 #include <string>
15 using namespace Chroma;
16 
17 typedef LatticeFermionF TF;
18 typedef LatticeColorMatrixF UF;
19 typedef multi1d<LatticeColorMatrixF> QF;
20 typedef multi1d<LatticeColorMatrixF> PF;
21 
22 typedef LatticeFermionD TD;
23 typedef LatticeColorMatrixD UD;
24 typedef multi1d<LatticeColorMatrixD> QD;
25 typedef multi1d<LatticeColorMatrixD> PD;
26 
27 
28 #include "quda.h"
29 #if 0
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33  void commDimPartitionedSet(int);
34 #ifdef __cplusplus
35 };
36 #endif
37 #endif
38 
39 
40 
41 bool testQudaDslash(const QF& u, enum PlusMinus isign, int cb)
42 {
43  // I want to test the QUDA dslash
44  // First make a reference dslash: 4D for now
45  multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
46  boundary[3]=-1;
47 
48  Handle< FermBC<TF,PF,QF> > bc_handle(new SimpleFermBC<TF,PF,QF>(boundary));
49 
50  Handle< FermState<TF,PF,QF> > fs_handle(new SimpleFermState<TF,PF,QF>(bc_handle, u));
51 
52  WilsonDslashF D_me(fs_handle);
53 
54  // Now set up a QUDA Dslash
55  // ----------**************************--------------
56  QudaGaugeParam q_gauge_param=newQudaGaugeParam();
57  QudaInvertParam quda_inv_param=newQudaInvertParam();
58 
59  const multi1d<int>& latdims = Layout::subgridLattSize();
60  q_gauge_param.X[0] = latdims[0];
61  q_gauge_param.X[1] = latdims[1];
62  q_gauge_param.X[2] = latdims[2];
63  q_gauge_param.X[3] = latdims[3];
64  int vol=latdims[0]*latdims[1]*latdims[2]*latdims[3];
65 
66  //
67  // setDims(q_gauge_param.X);
68 
69  // Setup gauge anisotropy
70  q_gauge_param.anisotropy = 1.0;
71 
72  q_gauge_param.type = QUDA_WILSON_LINKS;
73  // Setup Gauge Order
74  q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
75 
76  // Setp Boundaries
77  if(boundary[3]==-1){
78  q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
79  }
80  else {
81  q_gauge_param.t_boundary = QUDA_PERIODIC_T;
82  }
83 
84  QudaPrecision_s cpu_prec=QUDA_SINGLE_PRECISION;
85  QudaPrecision_s gpu_prec=QUDA_SINGLE_PRECISION;
86  QudaPrecision_s gpu_half_prec=QUDA_HALF_PRECISION;
87 
88  q_gauge_param.cpu_prec=cpu_prec;
89  q_gauge_param.cuda_prec=gpu_prec;
90  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_NO;
91  q_gauge_param.reconstruct_sloppy = q_gauge_param.reconstruct;
92  q_gauge_param.cuda_prec_sloppy=q_gauge_param.cuda_prec;
93  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
94 
95  quda_inv_param.kappa=1;
96  quda_inv_param.matpc_type = QUDA_MATPC_ODD_ODD;
97  quda_inv_param.dagger = (isign == PLUS) ? QUDA_DAG_NO : QUDA_DAG_YES;
98 
99  // Setup padding
100  multi1d<int> face_size(4);
101  face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
102  face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
103  face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
104  face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
105 
106  int max_face = face_size[0];
107  for(int i=1; i <=3; i++) {
108  if ( face_size[i] > max_face ) {
109  max_face = face_size[i];
110  }
111  }
112  q_gauge_param.ga_pad = max_face;
113  quda_inv_param.sp_pad = 0;
114  quda_inv_param.cl_pad = 0;
115 
116  quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
117  quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
118  // Now set up inverter params.
119  quda_inv_param.dslash_type = QUDA_WILSON_DSLASH; // Sets Wilson Matrix
120  quda_inv_param.cpu_prec = cpu_prec;
121  quda_inv_param.cuda_prec = gpu_prec;
122  quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
123 
124  //commDimPartitionedSet(3);
125 
126  // Make the links
127  QF links_single(Nd);
128 
129 
130  for(int mu=0; mu < Nd; mu++) {
131  links_single[mu] = fs_handle->getLinks()[mu];
132  }
133 
134 
135  // Set up the links
136  void* gauge[4];
137 
138  for(int mu=0; mu < Nd; mu++) {
139  gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
140  }
141 
142  loadGaugeQuda((void *)gauge, &q_gauge_param);
143 
144 
145 
146 
147  TF src, dst1,dst2;
148  gaussian(src);
149  //src.elem(rb[1].end()).elem(0).elem(0).real() = 1.0;
150 
151  gaussian(dst1); // Junk these
152  gaussian(dst2);
153  D_me.apply(dst1,src,isign,cb);
154 
155  // Compute buffers for transfer to GPU
156  unsigned int vol2 = vol/2;
157  unsigned int h_size=4*3*2*vol2;
158  multi1d<float> buffer_in(h_size);
159  multi1d<float> buffer_out(h_size);
160 
161 
162 
163  int otherCB=1-cb;
164  int daggerBit = (isign == MINUS ? 1 : 0);
165 
166  // Pack buffer_in: output cb = rb[0], input cb=rb[1]
167  int lin=0;
168  for(int site=0; site < rb[1].siteTable().size(); site++) {
169  for(int spin=0; spin < 4; spin++) {
170  for(int col=0; col < 3; col++) {
171  buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).real();
172  buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).imag();
173  }
174  }
175  }
176 
177 
178  dslashQuda((void *)buffer_out.slice(),
179  (void *)buffer_in.slice(),
180  &quda_inv_param,
181  (QudaParity)cb); // source parity =1, dst parity=0
182 
183  // Unpack
184  lin=0;
185  for(int site=0; site < rb[1].siteTable().size(); site++) {
186  for(int spin=0; spin < 4; spin++) {
187  for(int col=0; col < 3; col++) {
188  dst2.elem(rb[cb].siteTable()[site]).elem(spin).elem(col).real() = buffer_out[lin++];
189  dst2.elem(rb[cb].siteTable()[site]).elem(spin).elem(col).imag() = buffer_out[lin++];
190  }
191  }
192  }
193 
194  TF diff=zero;
195  diff[rb[cb]] = dst1-dst2;
196 
197  // Free QUDA data structures
198  Double diff_norm = sqrt(norm2(diff,rb[cb]))/Double(vol2);
199  QDPIO::cout << "\t\t diff = " << diff_norm << " per site \t";
200  bool ret_val;
201  if ( toBool( diff_norm < Double(1.0e-7) ) ) {
202  ret_val = true;
203  }
204  else {
205  ret_val = false;
206  }
207 
208  freeGaugeQuda();
209 
210  return ret_val;
211 }
212 
213 
214 
215 // -------DOUBLE PRECISION TEST --------
216 bool testQudaDslashD(const QD& u, enum PlusMinus isign, int cb)
217 {
218  // I want to test the QUDA dslash
219  // First make a reference dslash: 4D for now
220  multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
221  boundary[3]=-1;
222 
223  Handle< FermBC<TD,PD,QD> > bc_handle(new SimpleFermBC<TD,PD,QD>(boundary));
224 
225  Handle< FermState<TD,PD,QD> > fs_handle(new SimpleFermState<TD,PD,QD>(bc_handle, u));
226 
227  WilsonDslashD D_me(fs_handle);
228 
229 #if 0
230  {
231  QDPWilsonDslashD D_qdp(fs_handle);
232 
233  TD src, dst1, dst2;
234  gaussian(src);
235 
236  D_me.apply(dst1, src, isign, cb);
237  D_qdp.apply(dst2, src, isign, cb);
238 
239  TD diff = dst1-dst2;
240  RealD diff_norm = sqrt( norm2(diff, rb[cb]) );
241  QDPIO::cout << "diff_norm = " << diff_norm << std::endl;
242  QDPIO::cout << "diff_norm / site = " << diff_norm/((double)Layout::vol()/(double)2) << std::endl;
243  }
244 #endif
245 
246  // Now set up a QUDA Dslash
247  // ----------**************************--------------
248  QudaGaugeParam q_gauge_param=newQudaGaugeParam();
249  QudaInvertParam quda_inv_param=newQudaInvertParam();
250 
251  QudaPrecision_s cpu_prec=QUDA_DOUBLE_PRECISION;
252  QudaPrecision_s gpu_prec=QUDA_DOUBLE_PRECISION;
253  QudaPrecision_s gpu_half_prec=QUDA_DOUBLE_PRECISION;
254 
255  // Setup Boundaries
256  if(boundary[3]==-1){
257  q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
258  }
259  else {
260  q_gauge_param.t_boundary = QUDA_PERIODIC_T;
261  }
262 
263  // Setup Gauge Order
264  q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
265  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
266  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
267 
268  // Setup precisions
269  q_gauge_param.cpu_prec=cpu_prec;
270  q_gauge_param.cuda_prec=gpu_prec;
271  q_gauge_param.cuda_prec_sloppy=gpu_half_prec;
272 
273  // Setup lattice size
274  const multi1d<int>& latdims = Layout::subgridLattSize();
275  q_gauge_param.X[0] = latdims[0];
276  q_gauge_param.X[1] = latdims[1];
277  q_gauge_param.X[2] = latdims[2];
278  q_gauge_param.X[3] = latdims[3];
279  int vol=latdims[0]*latdims[1]*latdims[2]*latdims[3];
280  q_gauge_param.type = QUDA_WILSON_LINKS;
281  q_gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER;
282 
283  // Setup gauge fixing
284  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
285 
286  // Setup padding
287  multi1d<int> face_size(4);
288  face_size[0] = latdims[1]*latdims[2]*latdims[3]/2;
289  face_size[1] = latdims[0]*latdims[2]*latdims[3]/2;
290  face_size[2] = latdims[0]*latdims[1]*latdims[3]/2;
291  face_size[3] = latdims[0]*latdims[1]*latdims[2]/2;
292 
293  int max_face = face_size[0];
294  for(int i=1; i <=3; i++) {
295  if ( face_size[i] > max_face ) {
296  max_face = face_size[i];
297  }
298  }
299 
300 
301  q_gauge_param.ga_pad = max_face;
302  quda_inv_param.sp_pad = 0;
303  quda_inv_param.cl_pad = 0;
304 
305  // Setup gauge anisotropy
306  q_gauge_param.anisotropy = 1.0;
307 
308  //commDimPartitionedSet(3);
309 
310  // Make the links
311  QD links_single(Nd);
312 
313 #if 0
314  QD links_minus(Nd);
315 #endif
316 
317  for(int mu=0; mu < Nd; mu++) {
318  links_single[mu] = fs_handle->getLinks()[mu];
319 #if 0
320  links_minus[mu] = shift(links_single[mu], BACKWARD, mu);
321 #endif
322  //gaussian(links_minus[mu]);
323  }
324 
325 
326  // Set up the links
327  void* gauge[4];
328 #if 0
329  void* gauge_minus[4];
330 #endif
331 
332  for(int mu=0; mu < Nd; mu++) {
333  gauge[mu] = (void *)&(links_single[mu].elem(all.start()).elem().elem(0,0).real());
334 
335 #if 0
336  gauge_minus[mu] = (void *)&(links_minus[mu].elem(all.start()).elem().elem(0,0).real());
337 #endif
338  }
339 
340 #if 0
341  loadGaugeQuda((void *)gauge, (void *)gauge_minus, &q_gauge_param);
342 #else
343  loadGaugeQuda((void *)gauge, &q_gauge_param);
344 #endif
345  // Now set up inverter params.
346  quda_inv_param.dslash_type = QUDA_WILSON_DSLASH; // Sets Wilson Matrix
347  quda_inv_param.cpu_prec = cpu_prec;
348  quda_inv_param.cuda_prec = gpu_prec;
349  quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
350  quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
351  quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
352  quda_inv_param.dagger = (isign == PLUS) ? QUDA_DAG_NO : QUDA_DAG_YES;
353  TD src, dst1,dst2;
354  gaussian(src);
355  //src.elem(rb[1].end()).elem(0).elem(0).real() = 1.0;
356 
357  gaussian(dst1); // Junk these
358  gaussian(dst2);
359  D_me.apply(dst1,src,isign,cb);
360 
361  // Compute buffers for transfer to GPU
362  unsigned int vol2 = vol/2;
363  unsigned int h_size=4*3*2*vol2;
364  multi1d<double> buffer_in(h_size);
365  multi1d<double> buffer_out(h_size);
366 
367 
368 
369  int otherCB=1-cb;
370 
371  // Pack buffer_in: output cb = rb[0], input cb=rb[1]
372  int lin=0;
373  for(int site=0; site < rb[1].siteTable().size(); site++) {
374  for(int spin=0; spin < 4; spin++) {
375  for(int col=0; col < 3; col++) {
376  buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).real();
377  buffer_in[lin++] = src.elem(rb[otherCB].siteTable()[site]).elem(spin).elem(col).imag();
378  }
379  }
380  }
381 
382 
383  dslashQuda((void *)buffer_out.slice(),
384  (void *)buffer_in.slice(),
385  &quda_inv_param,
386  (QudaParity)cb); // source parity =1, dst parity=0
387 
388 
389  // Unpack
390  lin=0;
391  for(int site=0; site < rb[1].siteTable().size(); site++) {
392  for(int spin=0; spin < 4; spin++) {
393  for(int col=0; col < 3; col++) {
394  dst2.elem(rb[cb].siteTable()[site]).elem(spin).elem(col).real() = buffer_out[lin++];
395  dst2.elem(rb[cb].siteTable()[site]).elem(spin).elem(col).imag() = buffer_out[lin++];
396  }
397  }
398  }
399 
400  TD diff=zero;
401  diff[rb[cb]] = dst1-dst2;
402 
403  // Free QUDA data structures
404  Double diff_norm = sqrt(norm2(diff,rb[cb]))/Double(vol2);
405  QDPIO::cout << "\t\t diff = " << diff_norm << " per site \t";
406  bool ret_val;
407  if ( toBool( diff_norm < Double(1.0e-7) ) ) {
408  ret_val = true;
409  }
410  else {
411  ret_val = false;
412  }
413 
414  freeGaugeQuda();
415  return ret_val;
416 }
417 
418 #if 0
419 bool testQudaDslash3D(const QF& u, enum PlusMinus isign, int cb)
420 {
421  // I want to test the QUDA dslash
422  // First make a reference dslash: 4D for now
423  multi1d<int> boundary(4); boundary[0]=boundary[1]=boundary[2]=1;
424  boundary[3]=+1;
425 
426  Handle< FermBC<TF,PF,QF> > bc_handle(new SimpleFermBC<TF,PF,QF>(boundary));
427 
428  Handle< FermState<TF,PF,QF> > fs_handle(new SimpleFermState<TF,PF,QF>(bc_handle, u));
429 
430  WilsonDslash3D D_me(fs_handle);
431 
432  // Now set up a QUDA Dslash
433  // ----------**************************--------------
434  QudaGaugeParam q_gauge_param=newQudaGaugeParam();
435  QudaInvertParam quda_inv_param=newQudaInvertParam();
436 
437  QudaPrecision_s cpu_prec=QUDA_SINGLE_PRECISION;
438  QudaPrecision_s gpu_prec=QUDA_SINGLE_PRECISION;
439  QudaPrecision_s gpu_half_prec=QUDA_SINGLE_PRECISION;
440 
441  // Setup Boundaries
442  if(boundary[3]==-1){
443  q_gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T;
444  }
445  else {
446  q_gauge_param.t_boundary = QUDA_PERIODIC_T;
447  }
448 
449  // Setup Gauge Order
450  q_gauge_param.reconstruct = QUDA_RECONSTRUCT_12;
451  q_gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12;
452 
453  // Setup precisions
454  q_gauge_param.cpu_prec=cpu_prec;
455  q_gauge_param.cuda_prec=gpu_prec;
456  q_gauge_param.cuda_prec_sloppy=gpu_half_prec;
457 
458  // Setup lattice size
459  const multi1d<int>& latdims = Layout::subgridLattSize();
460  q_gauge_param.X[0] = latdims[0];
461  q_gauge_param.X[1] = latdims[1];
462  q_gauge_param.X[2] = latdims[2];
463  q_gauge_param.X[3] = latdims[3];
464 
465  // Setup gauge fixing
466  q_gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO;
467 
468 
469 
470  // Setup padding
471  unsigned int vol = latdims[0]*latdims[1]*latdims[2]*latdims[3];
472  unsigned int vol2 = vol/2;
473  unsigned int padding=latdims[0]*latdims[1]*latdims[2]/2;
474  quda_inv_param.sp_pad = q_gauge_param.ga_pad = quda_inv_param.cl_pad = padding;
475 
476 
477  // Setup gauge anisotropy
478  q_gauge_param.anisotropy = 1.0;
479 
480 
481  // Make the links
482  QF links_single(Nd);
483  // QF links_minus(Nd);
484 
485  {
486  QF tmp_links(Nd);
487  // QF tmp_links_minus(Nd);
488 
489  for(int mu=0; mu < Nd; mu++){
490  tmp_links[mu] = fs_handle->getLinks()[mu];
491  // tmp_links_minus[mu] = shift(tmp_links[mu], BACKWARD, mu);
492  }
493 
494 
495  // Repack gauge, so that for links_single[mu] has gauge field with 3d cb being contiguous
496  for(int mu=0; mu < Nd; mu++) {
497  for(int t=0; t < latdims[3]; t++) {
498  for(int z=0; z < latdims[2]; z++) {
499  for(int y=0; y < latdims[1]; y++) {
500  for(int x=0; x < latdims[0]; x++) {
501  int xh=x/2;
502  int par=(x + y + z) & 1;
503  int par4d=(x + y + z + t) & 1;
504  // Weirdly, this is the same offset in both 3 and 4d
505  int off=xh+(latdims[0]/2)*(y + latdims[1]*(z+latdims[2]*t));
506 
507  for(int r=0; r < Nc; r++) {
508  for(int c=0; c < Nc; c++) {
509  links_single[mu].elem(vol2*par+off).elem().elem(r,c).real()
510  = tmp_links[mu].elem(vol2*par4d+off).elem().elem(r,c).real();
511 
512 
513 
514  links_single[mu].elem(vol2*par+off).elem().elem(r,c).imag()
515  = tmp_links[mu].elem(vol2*par4d+off).elem().elem(r,c).imag();
516 
517 
518 // links_minus[mu].elem(vol2*par+off).elem().elem(r,c).real()
519 // = tmp_links_minus[mu].elem(vol2*par4d+off).elem().elem(r,c).real();
520 // links_minus[mu].elem(vol2*par+off).elem().elem(r,c).imag()
521 // = tmp_links_minus[mu].elem(vol2*par4d+off).elem().elem(r,c).imag();
522  }
523  }
524  }
525  }
526  }
527  }
528  }
529 
530  }
531  // Set up the links
532  void* gauge[4];
533  // void* gauge_minus[4];
534 
535  for(int mu=0; mu < Nd; mu++) {
536  gauge[mu] = (void *)&(links_single[mu].elem(0).elem().elem(0,0).real());
537  // gauge_minus[mu] = (void *)&(links_minus[mu].elem(0).elem().elem(0,0).real());
538  }
539  // loadGaugeQuda((void *)gauge,(void *)gauge_minus, &q_gauge_param);
540  loadGaugeQuda((void *)gauge, &q_gauge_param);
541 
542  // Now set up inverter params.
543  quda_inv_param.dslash_type = QUDA_WILSON_DSLASH; // Sets Wilson Matrix
544  quda_inv_param.cpu_prec = cpu_prec;
545  quda_inv_param.cuda_prec = gpu_prec;
546  quda_inv_param.cuda_prec_sloppy = gpu_half_prec;
547  quda_inv_param.dirac_order = QUDA_DIRAC_ORDER;
548  quda_inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS;
549  TF src, dst1,dst2;
550  gaussian(src);
551  gaussian(dst1); // Junk these
552  gaussian(dst2);
553  D_me.apply(dst1,src,isign,cb); // fills out dst[rb[0]]
554 
555  // Compute buffers for transfer to GPU
556 
557  unsigned int h_size=4*3*2*vol2;
558  multi1d<float> buffer_in(h_size);
559  multi1d<float> buffer_out(h_size);
560 
561 
562 
563  int otherCB=1-cb;
564  int daggerBit = (isign == MINUS ? 1 : 0);
565 
566  // Pack buffer_in: output cb = rb3[0], input cb=rb3[1]
567  int lin;
568  for(int t=0; t < latdims[3]; t++) {
569  for(int z=0; z < latdims[2]; z++) {
570  for(int y=0; y < latdims[1]; y++) {
571  for(int x=0; x < latdims[0]; x++) {
572  int xh=x/2;
573  int par=(x + y + z) & 1;
574  if( par == otherCB ) {
575  int par4d=(x + y + z + t) & 1;
576  // Weirdly, this is the same offset in both 3 and 4d
577  int off=xh+(latdims[0]/2)*(y + latdims[1]*(z+latdims[2]*t));
578  int lin=0;
579  for(int spin=0; spin < 4; spin++) {
580  for(int col=0; col < 3; col++) {
581  buffer_in[24*off+(lin++)] = src.elem(vol2*par4d+off).elem(spin).elem(col).real();
582  buffer_in[24*off+(lin++)] = src.elem(vol2*par4d+off).elem(spin).elem(col).imag();
583  }
584  }
585  }
586  }
587  }
588  }
589  }
590 
591 
592  dslash3DQuda((void *)buffer_out.slice(),
593  (void *)buffer_in.slice(),
594  &quda_inv_param,
595  cb, // source parity =1, dst parity=0
596  daggerBit); // no dagger
597 
598  for(int t=0; t < latdims[3]; t++) {
599  for(int z=0; z < latdims[2]; z++) {
600  for(int y=0; y < latdims[1]; y++) {
601  for(int x=0; x < latdims[0]; x++) {
602  int xh=x/2;
603  int par=(x + y + z) & 1;
604  if( par == cb ) {
605  int par4d=(x + y + z + t) & 1;
606  // Weirdly, this is the same offset in both 3 and 4d
607  int off=xh+(latdims[0]/2)*(y + latdims[1]*(z+latdims[2]*t));
608  int lin=0;
609  for(int spin=0; spin < 4; spin++) {
610  for(int col=0; col < 3; col++) {
611  dst2.elem(vol2*par4d+off).elem(spin).elem(col).real()= buffer_out[24*off+(lin++)] ;
612  dst2.elem(vol2*par4d+off).elem(spin).elem(col).imag()=buffer_out[24*off+(lin++)];
613  }
614  }
615  }
616  }
617  }
618  }
619  }
620 
621  TF diff=zero;
622  diff[rb3[cb]] = dst1-dst2;
623 
624  // Free QUDA data structures
625  Double diff_norm = sqrt(norm2(diff,rb3[cb]))/Double(vol2);
626  QDPIO::cout << "\t diff = " << diff_norm << " per site \t";
627  bool ret_val;
628  if ( toBool( diff_norm < Double(2.0e-8) ) ) {
629  ret_val = true;
630  }
631  else {
632  ret_val = false;
633  }
634 
635  return ret_val;
636 }
637 #endif
638 
639 bool linkageHack(void)
640 {
641  bool foo = true;
642 
643  // Inline Measurements
645  foo &= GaugeInitEnv::registerAll();
646 
647  return foo;
648 }
649 
650 int main(int argc, char **argv)
651 {
652  // Put the machine into a known state
653  Chroma::initialize(&argc, &argv);
654  QDPIO::cout << "Linkage = " << linkageHack() << std::endl;
655 
656 
657  // AppParams params;
658  const int lsize[4]={12,12,12,48};
659  multi1d<int> nrow(4);
660  nrow=lsize;
661  Layout::setLattSize(nrow);
662  Layout::create();
663 
664  multi1d<UF> u(Nd);
665  multi1d<UD> ud(Nd);
666 #if 0
667  XMLReader gauge_file_xml, gauge_xml;
668  Cft_t inputCfg;
669  inputCfg.cfg_type=WEAK_FIELD;
670  gaugeStartup(gauge_file_xml, gauge_xml, u, params.inputCfg);
671 #endif
672  for(int mu=0; mu < Nd; mu++){
673  gaussian(u[mu]);
674  reunit(u[mu]);
675  gaussian(ud[mu]);
676  reunit(ud[mu]);
677 
678  //u[mu] = Real(1);
679  //ud[mu] = u[mu];
680 
681  }
682 
683  unitarityCheck(u);
684  unitarityCheck(ud);
685 
686  // Setup the lattice
687 
688  XMLFileWriter& xml_out = Chroma::getXMLOutputInstance();
689  push(xml_out,"t_invert");
690  push(xml_out,"params");
691  write(xml_out, "nrow", nrow);
692  pop(xml_out); // Params
693 
694  // Measure the plaquette on the gauge
695  MesPlq(xml_out, "Observables", ud);
696  xml_out.flush();
697 
698  // Write code here?
699  QDPIO::cout << "Howdy" << std::endl;
700  bool result;
701 
702 #if 1
703  // Single Precision tests
704  QDPIO::cout << "Test: Dslash PLUS, cb=0";
705  if ( ! testQudaDslash(u, PLUS, 0) ) {
706  QDPIO::cout << "\t FAILED" << std::endl;
707  // QDP_abort(1);
708  }
709  else {
710  QDPIO::cout << "\t OK" << std::endl;
711  }
712 
713  QDPIO::cout << "Test: Dslash PLUS, cb=1" ;
714  if ( ! testQudaDslash(u, PLUS, 1) ) {
715  QDPIO::cout << "\t FAILED" << std::endl;
716  // QDP_abort(1);
717  }
718  else {
719  QDPIO::cout << "\t OK" << std::endl;
720  }
721 
722 
723  QDPIO::cout << "Test: Dslash MINUS, cb=0";
724  if ( ! testQudaDslash(u, MINUS, 0) ) {
725  QDPIO::cout << "\t FAILED" << std::endl;
726  // QDP_abort(1);
727  }
728  else {
729  QDPIO::cout << "\t OK" << std::endl;
730  }
731 
732 
733  QDPIO::cout << "Test: Dslash MINUS, cb=1" ;
734  if ( ! testQudaDslash(u, MINUS, 1) ) {
735  QDPIO::cout << "\t FAILED" << std::endl;
736  // QDP_abort(1);
737  }
738  else {
739  QDPIO::cout << "\t OK" << std::endl;
740  }
741 
742 #endif
743 
744 
745 #if 1
746  // Double Precision tests
747  QDPIO::cout << "Test: Dslash PLUS, cb=0";
748  if ( ! testQudaDslashD(ud, PLUS, 0) ) {
749  QDPIO::cout << "\t FAILED" << std::endl;
750  // QDP_abort(1);
751  }
752  else {
753  QDPIO::cout << "\t OK" << std::endl;
754  }
755 
756  QDPIO::cout << "Test: Dslash PLUS, cb=1" ;
757  if ( ! testQudaDslashD(ud, PLUS, 1) ) {
758  QDPIO::cout << "\t FAILED" << std::endl;
759  // QDP_abort(1);
760  }
761  else {
762  QDPIO::cout << "\t OK" << std::endl;
763  }
764 
765 
766  QDPIO::cout << "Test: Dslash MINUS, cb=0";
767  if ( ! testQudaDslashD(ud, MINUS, 0) ) {
768  QDPIO::cout << "\t FAILED" << std::endl;
769  //QDP_abort(1);
770  }
771  else {
772  QDPIO::cout << "\t OK" << std::endl;
773  }
774 
775 
776  QDPIO::cout << "Test: Dslash MINUS, cb=1" ;
777  if ( ! testQudaDslashD(ud, MINUS, 1) ) {
778  QDPIO::cout << "\t FAILED" << std::endl;
779  // QDP_abort(1);
780  }
781  else {
782  QDPIO::cout << "\t OK" << std::endl;
783  }
784 #endif
785 
786 
787 #if 0
788  // 3D Dslash testsd
789 
790  QDPIO::cout << "Test: Dslash3D PLUS, cb=0";
791  if ( ! testQudaDslash3D(u, PLUS, 0) ) {
792  QDPIO::cout << "\t FAILED" << std::endl;
793  QDP_abort(1);
794  }
795  else {
796  QDPIO::cout << "\t OK" << std::endl;
797  }
798 
799  QDPIO::cout << "Test: Dslash3D MINUS, cb=0";
800  if ( ! testQudaDslash3D(u, MINUS, 0) ) {
801  QDPIO::cout << "\t FAILED" << std::endl;
802  QDP_abort(1);
803  }
804  else {
805  QDPIO::cout << "\t OK" << std::endl;
806  }
807 
808  QDPIO::cout << "Test: Dslash3D PLUS, cb=1" ;
809  if ( ! testQudaDslash3D(u, PLUS, 1) ) {
810  QDPIO::cout << "\t FAILED" << std::endl;
811  QDP_abort(1);
812  }
813  else {
814  QDPIO::cout << "\t OK" << std::endl;
815  }
816 
817  QDPIO::cout << "Test: Dslash3D MINUS, cb=1" ;
818  if ( ! testQudaDslash3D(u, MINUS, 1) ) {
819  QDPIO::cout << "\t FAILED" << std::endl;
820  QDP_abort(1);
821  }
822  else {
823  QDPIO::cout << "\t OK" << std::endl;
824  }
825 #endif
826 
827 
828 
829  QDPIO::cout << "All tests passed" << std::endl;
830  pop(xml_out);
831  xml_out.close();
832 
834 
835  exit(0);
836 }
Primary include file for CHROMA in application codes.
Class for counted reference semantics.
Definition: handle.h:33
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:48
Concrete class for all gauge actions with simple boundary conditions.
Definition: simple_fermbc.h:42
Simple version of FermState.
int mu
Definition: cool.cc:24
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void unitarityCheck(const multi1d< LatticeColorMatrixF3 > &u)
Check the unitarity of color matrix in SU(N)
Definition: unit_check.cc:20
void gaugeStartup(XMLReader &gauge_file_xml, XMLReader &gauge_xml, multi1d< LatticeColorMatrix > &u, Cfg_t &cfg)
Initialize the gauge fields.
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:228
Params params
int y
Definition: meslate.cc:35
int z
Definition: meslate.cc:36
int x
Definition: meslate.cc:34
int t
Definition: meslate.cc:37
Nd
Definition: meslate.cc:74
bool registerAll()
Register all the factories.
bool registerAll()
Register all the factories.
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
Double c
Definition: invbicg.cc:108
int i
Definition: pbg5p_w.cc:55
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
Definition: chroma_init.cc:114
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
void finalize(void)
Chroma finalization routine.
Definition: chroma_init.cc:308
bool linkageHack(void)
Definition: const_hmc.cc:660
void reunit(LatticeColorMatrixF3 &xa)
Definition: reunit.cc:467
pop(xml_out)
void MesPlq(const multi1d< LatticeColorMatrixF3 > &u, multi2d< Double > &plane_plaq, Double &link)
Definition: mesplq.cc:83
int cb
Definition: invbicg.cc:120
XMLFileWriter & getXMLOutputInstance()
Get xml output instance.
Definition: chroma_init.cc:359
Double zero
Definition: invbicg.cc:106
FloatingPoint< double > Double
Definition: gtest.h:7351
#define BACKWARD
Definition: primitives.h:83
multi1d< LatticeColorMatrixF > QF
Definition: t_quda_tprec.cc:19
LatticeColorMatrixF UF
Definition: t_quda_tprec.cc:18
int main(int argc, char **argv)
bool testQudaDslash(const QF &u, enum PlusMinus isign, int cb)
Definition: t_quda_tprec.cc:41
multi1d< LatticeColorMatrixF > PF
Definition: t_quda_tprec.cc:20
LatticeFermionD TD
Definition: t_quda_tprec.cc:22
LatticeColorMatrixD UD
Definition: t_quda_tprec.cc:23
LatticeFermionF TF
Definition: t_quda_tprec.cc:17
bool testQudaDslashD(const QD &u, enum PlusMinus isign, int cb)
multi1d< LatticeColorMatrixD > PD
Definition: t_quda_tprec.cc:25
multi1d< LatticeColorMatrixD > QD
Definition: t_quda_tprec.cc:24