CHROMA
t_lwldslash_array.cc
Go to the documentation of this file.
1 
2 
3 #include "chroma.h"
4 
5 #include <iostream>
6 #include <cstdio>
7 
8 
9 using namespace Chroma;
10 
11 int main(int argc, char **argv)
12 {
13  // Put the machine into a known state
14  Chroma::initialize(&argc, &argv);
15 
16 
17  int N5;
18 
19  // Lattice Size
20  multi1d<int> nrow(Nd);
21  try {
22  // Read parameters
23  XMLReader xml_in(Chroma::getXMLInputFileName());
24  read(xml_in, "/param/nrow", nrow);
25  read(xml_in, "/param/N5", N5);
26  xml_in.close();
27  }
28  catch( const std::string&e ) {
29  QDPIO::cerr << "Caught Exception while reading XML " << e << std::endl;
30  QDP_abort(1);
31  }
32 
33  // Setup the layout
34  Layout::setLattSize(nrow);
35  Layout::create();
36 
37  XMLFileWriter xml(Chroma::getXMLOutputFileName());
38  push(xml,"t_lwldslash_array");
39  proginfo(xml); // Print out basic program info
40 
41  // Make up a random gauge field.
42  multi1d<LatticeColorMatrix> u(Nd);
43  for(int m=0; m < u.size(); ++m)
44  gaussian(u[m]);
45 
46 
47  // Make up a gaussian source and a zero result std::vector
48  LatticeFermion psi, chi, chi2;
49  gaussian(psi);
50  chi = zero;
51 
52  //! Create a linear operator
53  QDPIO::cout << "Constructing naive QDPWilsonDslash" << std::endl;
54 
55  Handle< FermState<LatticeFermion,
56  multi1d<LatticeColorMatrix>,
57  multi1d<LatticeColorMatrix> > > state(new PeriodicFermState<LatticeFermion,
58  multi1d<LatticeColorMatrix>,
59  multi1d<LatticeColorMatrix> >(u));
60 
61  // Naive Dslash
63 
64  QDPIO::cout << "Done" << std::endl;
65 
66 
67  push(xml,"Unoptimized_test");
68 
69  int isign, cb, loop, iter=1;
70 
71  bool first = true;
72  for(isign = 1; isign >= -1; isign -= 2) {
73  for(cb = 0; cb < 2; ++cb) {
74 
75  double mydt;
76  QDP::StopWatch swatch;
77 
78  if (first)
79  {
80  for(iter=1; ; iter <<= 1)
81  {
82  QDPIO::cout << "Applying naive D " << iter << " times" << std::endl;
83 
84  swatch.reset();
85  swatch.start();
86 
87  for(int i=iter; i-- > 0; ) {
88  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
89  }
90  swatch.stop();
91 
92  mydt=swatch.getTimeInSeconds();
93  QDPInternal::globalSum(mydt);
94  mydt /= Layout::numNodes();
95 
96  if (mydt > 1) {
97  first = false;
98  break;
99  }
100  }
101  }
102 
103  QDPIO::cout << "Applying naive D for timings" << std::endl;
104 
105  swatch.reset();
106  swatch.start();
107  for(int i=iter; i-- > 0; ) {
108  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
109  }
110  swatch.stop();
111 
112  mydt=swatch.getTimeInSeconds();
113  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
114  QDPInternal::globalSum(mydt);
115  mydt /= Layout::numNodes();
116 
117  float mflops = float(1320.0f/mydt);
118  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
119  QDPIO::cout << "The time per lattice point is "<< mydt
120  << " micro sec (" << mflops << ") Mflops " << std::endl;
121 
122  push(xml,"Unopt_test");
123  write(xml,"cb",cb);
124  write(xml,"isign",isign);
125  write(xml,"mflops",mflops);
126  pop(xml);
127  }
128  }
129 
130  pop(xml);
131 
132  //! Create a linear operator
133  QDPIO::cout << "Constructing (possibly optimized) WilsonDslash" << std::endl;
134 
135  WilsonDslash D_opt(state);
136 
137  QDPIO::cout << "Done" << std::endl;
138 
139  push(xml,"Optimized_test");
140 
141  first = true;
142  for(isign = 1; isign >= -1; isign -= 2) {
143  for(cb = 0; cb < 2; ++cb) {
144 
145  double mydt= 2.0;
146  QDP::StopWatch swatch;
147 
148  if (first)
149  {
150  for(iter=1; ; iter <<= 1)
151  {
152  QDPIO::cout << "Applying D_opt " << iter << " times" << std::endl;
153 
154  swatch.reset();
155  swatch.start();
156  for(int i=iter; i-- > 0; ) {
157  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
158  }
159  swatch.stop();
160 
161  mydt=swatch.getTimeInSeconds();
162 
163  QDPInternal::globalSum(mydt);
164  mydt /= Layout::numNodes();
165 
166  if (mydt > 1) {
167  first = false;
168  break;
169  }
170  }
171  }
172 
173  QDPIO::cout << "Applying D_opt for timings" << std::endl;
174 
175  swatch.reset();
176  swatch.start();
177  for(int i=iter; i-- > 0; ) {
178  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
179  }
180  swatch.stop();
181 
182  mydt=swatch.getTimeInSeconds();
183  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
184  QDPInternal::globalSum(mydt);
185  mydt /= Layout::numNodes();
186 
187  float mflops = float(1320.0f/mydt);
188  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
189  QDPIO::cout << "After " << iter << " calls, the time per lattice point is "<< mydt
190  << " micro sec (" << mflops << ") Mflops " << std::endl;
191 
192  push(xml,"OPT_test");
193  write(xml,"cb",cb);
194  write(xml,"isign",isign);
195  write(xml,"mflops",mflops);
196  pop(xml);
197  }
198  }
199 
200  pop(xml);
201 
202 
203  LatticeFermion chi3;
204  Double n2;
205 
206  gaussian(chi3);
207  gaussian(psi);
208  for(cb = 0; cb < 2; cb++) {
209  for(isign = 1; isign >= -1; isign -= 2) {
210 
211  chi = chi3;
212  chi2 = chi3;
213  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
214  D.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
215 
216  n2 = norm2( chi2 - chi );
217 
218  QDPIO::cout << "Paranoia test: || D(psi, "
219  << (isign > 0 ? "+, " : "-, ") << cb
220  << ") - D(psi, "
221  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
222  << std::endl;
223  }
224  }
225 
226  gaussian(chi3);
227  gaussian(psi);
228  for(cb = 0; cb < 2; cb++) {
229  for(isign = 1; isign >= -1; isign -= 2) {
230 
231  chi = chi3;
232  chi2 = chi3;
233  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
234  D_opt.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
235 
236  n2 = norm2( chi2 - chi );
237 
238  QDPIO::cout << "OPT test: || D(psi, "
239  << (isign > 0 ? "+, " : "-, ") << cb
240  << ") - D_opt(psi, "
241  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
242  << std::endl;
243 
244  push(xml,"OPT_correctness_test");
245  write(xml,"isign", isign);
246  write(xml,"cb", cb);
247  write(xml,"norm2_diff",n2);
248  pop(xml);
249  }
250  }
251 
252 
253  // Now do the std::vector case:
254  multi1d<LatticeFermion> chis1(N5);
255  multi1d<LatticeFermion> chis2(N5);
256  multi1d<LatticeFermion> chis3(N5);
257  multi1d<LatticeFermion> psis(N5);
258 
259  // Naive Dslash
260  QDPIO::cout << "Consturcting Naive 5D Dslash, N5=" << N5 << std::endl;
262  QDPIO::cout << "Done" << std::endl;
263 
264 
265  push(xml,"Unoptimized_array_test");
266  for(int k=0; k < N5; k++) {
267  gaussian(psis[k]);
268  chis1[k]=zero;
269  chis2[k]=zero;
270  chis3[k]=zero;
271  }
272 
273  iter=1;
274  first = true;
275  for(isign = 1; isign >= -1; isign -= 2) {
276  for(cb = 0; cb < 2; ++cb) {
277 
278  QDP::StopWatch swatch;
279  double mydt=2.0;
280 
281  if (first)
282  {
283  for(iter=1; ; iter <<= 1)
284  {
285  QDPIO::cout << "Applying naive D5 " << iter << " times" << std::endl;
286 
287  swatch.reset();
288  swatch.start();
289  for(int i=iter; i-- > 0; ) {
290  D5.apply(chis1, psis, (isign == 1 ? PLUS : MINUS), cb);
291  }
292  swatch.stop();
293 
294  mydt=swatch.getTimeInSeconds();
295 
296  QDPInternal::globalSum(mydt);
297  mydt /= Layout::numNodes();
298 
299  if (mydt > 1) {
300  first = false;
301  break;
302  }
303  }
304  }
305 
306  QDPIO::cout << "Applying naive D5 for timings" << std::endl;
307 
308  swatch.reset();
309  swatch.start();
310  for(int i=iter; i-- > 0; ) {
311  D5.apply(chis1, psis, (isign == 1 ? PLUS : MINUS), cb);
312  }
313  swatch.stop();
314 
315  mydt=swatch.getTimeInSeconds();
316  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
317  QDPInternal::globalSum(mydt);
318  mydt /= Layout::numNodes();
319 
320  float mflops = float(1320.0f*N5/mydt);
321  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
322  QDPIO::cout << "The time per lattice point is "<< mydt
323  << " micro sec (" << mflops << ") Mflops " << std::endl;
324 
325  push(xml,"Unopt_array_test");
326  write(xml,"cb",cb);
327  write(xml,"isign",isign);
328  write(xml,"mflops",mflops);
329  pop(xml);
330  }
331  }
332 
333  pop(xml);
334 
335  //! Naive loop
336  QDPIO::cout << "Constructing (possibly optimized) WilsonDslash to do std::vector operation with a loop" << std::endl;
337  WilsonDslash D_opt_loop(state);
338  QDPIO::cout << "Done" << std::endl;
339 
340  push(xml,"Optimized_loop_test");
341 
342  first = true;
343  for(isign = 1; isign >= -1; isign -= 2) {
344  for(cb = 0; cb < 2; ++cb) {
345 
346  QDP::StopWatch swatch;
347  double mydt= 2.0;
348 
349  if (first)
350  {
351  for(iter=1; ; iter <<= 1)
352  {
353  QDPIO::cout << "Applying D_opt_loop " << iter << " times" << std::endl;
354 
355  swatch.reset();
356  swatch.start();
357  for(int i=iter; i-- > 0; ) {
358  for(int loop=0; loop < N5; loop++) {
359  D_opt_loop.apply(chis1[loop], psis[loop], (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
360  }
361  }
362  swatch.stop();
363 
364  mydt=swatch.getTimeInSeconds();
365  QDPInternal::globalSum(mydt);
366  mydt /= Layout::numNodes();
367 
368  if (mydt > 1) {
369  first = false;
370  break;
371  }
372  }
373  }
374 
375  QDPIO::cout << "Applying D_opt_loop for timings" << std::endl;
376 
377  swatch.reset();
378  swatch.start();
379  for(int i=iter; i-- > 0; ) {
380  for(int loop=0; loop<N5; loop++) {
381  D_opt_loop.apply(chis1[loop], psis[loop], (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
382  }
383  }
384  swatch.stop();
385 
386  mydt=swatch.getTimeInSeconds();
387  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
388  QDPInternal::globalSum(mydt);
389  mydt /= Layout::numNodes();
390 
391  float mflops = float(1320.0f*N5/mydt);
392  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
393  QDPIO::cout << "After " << iter << " calls, the time per lattice point is "<< mydt
394  << " micro sec (" << mflops << ") Mflops " << std::endl;
395 
396  push(xml,"OPT_loop_test");
397  write(xml,"cb",cb);
398  write(xml,"isign",isign);
399  write(xml,"mflops",mflops);
400  pop(xml);
401  }
402  }
403 
404  pop(xml);
405 
406  //! Create a linear operator
407  QDPIO::cout << "Constructing (possibly optimized) WilsonDslashArray N5="<< N5 << std::endl;
408 
409  WilsonDslashArray D5_opt(state, N5);
410 
411  QDPIO::cout << "Done" << std::endl;
412 
413  push(xml,"Optimized_array_test");
414 
415  first = true;
416  for(isign = 1; isign >= -1; isign -= 2) {
417  for(cb = 0; cb < 2; ++cb) {
418 
419  double mydt= 2.0;
420  QDP::StopWatch swatch;
421 
422  if (first)
423  {
424  for(iter=1; ; iter <<= 1)
425  {
426  QDPIO::cout << "Applying D5_opt " << iter << " times" << std::endl;
427 
428  swatch.reset();
429  swatch.start();
430  for(int i=iter; i-- > 0; ) {
431  D5_opt.apply(chis1, psis, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
432  }
433  swatch.stop();
434 
435  mydt=swatch.getTimeInSeconds();
436 
437  QDPInternal::globalSum(mydt);
438  mydt /= Layout::numNodes();
439 
440  if (mydt > 1) {
441  first = false;
442  break;
443  }
444  }
445  }
446 
447  QDPIO::cout << "Applying D5_opt for timings" << std::endl;
448 
449  swatch.reset();
450  swatch.start();
451  for(int i=iter; i-- > 0; ) {
452  D5_opt.apply(chis1, psis, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
453  }
454  swatch.stop();
455 
456  mydt=swatch.getTimeInSeconds();
457  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
458  QDPInternal::globalSum(mydt);
459  mydt /= Layout::numNodes();
460 
461  float mflops = float(1320.0f*N5/mydt);
462  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
463  QDPIO::cout << "After " << iter << " calls, the time per lattice point is "<< mydt
464  << " micro sec (" << mflops << ") Mflops " << std::endl;
465 
466  push(xml,"Opt_array_test");
467  write(xml,"cb",cb);
468  write(xml,"isign",isign);
469  write(xml,"mflops",mflops);
470  pop(xml);
471  }
472  }
473 
474  pop(xml);
475 
476  for(cb = 0; cb < 2; cb++) {
477  for(isign = 1; isign >= -1; isign -= 2) {
478  for(int k=0; k < N5; k++) {
479  chis1[k] = zero;
480  chis2[k] = zero;
481  }
482 
483  D5.apply(chis1, psis, (isign > 0 ? PLUS : MINUS), cb);
484  D5.apply(chis2, psis, (isign > 0 ? PLUS : MINUS), cb);
485 
486  // 5D Norm
487  n2 = Double(0);
488  for(int i=0; i < N5; i++) {
489  n2 += norm2( chis2[i] - chis1[i] );
490  }
491 
492  QDPIO::cout << "Paranoia test: || D5(psi, "
493  << (isign > 0 ? "+, " : "-, ") << cb
494  << ") - D5(psi, "
495  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
496  << std::endl;
497  }
498  }
499 
500  for(cb = 0; cb < 2; cb++) {
501  for(isign = 1; isign >= -1; isign -= 2) {
502 
503  for(int k=0; k < N5; k++) {
504  chis1[k] = zero;
505  chis2[k] = zero;
506  }
507 
508  D5.apply(chis1, psis, (isign > 0 ? PLUS : MINUS), cb);
509  D5_opt.apply(chis2, psis, (isign > 0 ? PLUS : MINUS), cb);
510 
511  n2=Double(0);
512  for(int i=0; i < N5; i++) {
513  n2 += norm2( chis2[i] - chis1[i] );
514  }
515  QDPIO::cout << "OPT test: || D5(psi, "
516  << (isign > 0 ? "+, " : "-, ") << cb
517  << ") - D5_opt(psi, "
518  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
519  << std::endl;
520 
521  push(xml,"Opt_array_correctness_test");
522  write(xml,"isign", isign);
523  write(xml,"cb", cb);
524  write(xml,"norm2_diff",n2);
525  pop(xml);
526  }
527  }
528 
529  pop(xml);
530 
531 
532 
533  // Time to bolt
535 
536  exit(0);
537 }
Primary include file for CHROMA in application codes.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Periodic version of FermState.
General Wilson-Dirac dslash of arrays.
General Wilson-Dirac dslash.
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:48
void read(XMLReader &xml, const std::string &path, AsqtadFermActParams &param)
Read parameters.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void proginfo(XMLWriter &xml)
Print out basic information about this program.
Definition: proginfo.cc:24
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:228
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
void apply(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
static int m[4]
Definition: make_seeds.cc:16
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
std::string getXMLOutputFileName()
Get output file name.
Definition: chroma_init.cc:91
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
int i
Definition: pbg5p_w.cc:55
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
Definition: chroma_init.cc:114
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
void finalize(void)
Chroma finalization routine.
Definition: chroma_init.cc:308
multi1d< LatticeFermion > chi(Ncb)
std::string getXMLInputFileName()
Get input file name.
Definition: chroma_init.cc:88
LatticeFermion psi
Definition: mespbg5p_w.cc:35
pop(xml_out)
int cb
Definition: invbicg.cc:120
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
Double zero
Definition: invbicg.cc:106
int k
Definition: invbicg.cc:119
FloatingPoint< double > Double
Definition: gtest.h:7351
::std::string string
Definition: gtest.h:1979
int main(int argc, char **argv)