CHROMA
t_lwldslash_new.cc
Go to the documentation of this file.
1 
2 
3 #include "chroma.h"
4 #include <iostream>
5 #include <cstdio>
6 
7 
8 using namespace Chroma;
9 
10 
11 int main(int argc, char **argv)
12 {
13  // Put the machine into a known state
14  Chroma::initialize(&argc, &argv);
15 
16 
17  // Lattice Size
18  multi1d<int> nrow(Nd);
19 
20  /*
21  try {
22  XMLReader xml_in(Chroma::getXMLInputFileName());
23  read(xml_in, "/param/nrow", nrow);
24  xml_in.close();
25  }
26  catch( const std::string&e ) {
27  QDPIO::cerr << "Caught Exception while reading XML " << e << std::endl;
28  QDP_abort(1);
29  }
30  */
31 
32  // Setup the layout
33  Layout::setLattSize(nrow);
34  Layout::create();
35 
36  XMLFileWriter xml(Chroma::getXMLOutputFileName());
37  push(xml,"t_lwldslash_array");
38  proginfo(xml); // Print out basic program info
39 
40  // Make up a random gauge field.
41  multi1d<LatticeColorMatrix> u(Nd);
42  for(int m=0; m < u.size(); ++m)
43  gaussian(u[m]);
44 
45 
46  // Make up a gaussian source and a zero result std::vector
47  LatticeFermion psi, chi, chi2;
48  gaussian(psi);
49  chi = zero;
50 
51  //! Create a linear operator
52  QDPIO::cout << "Constructing naive QDPWilsonDslash" << std::endl;
53 
54  Handle< FermState<LatticeFermion,
55  multi1d<LatticeColorMatrix>,
56  multi1d<LatticeColorMatrix> > > state(new PeriodicFermState<LatticeFermion,
57  multi1d<LatticeColorMatrix>,
58  multi1d<LatticeColorMatrix> >(u));
59 
60  // Naive Dslash
62 
63  QDPIO::cout << "Done" << std::endl;
64 
65 
66  push(xml,"Unoptimized_test");
67 
68  int isign, cb, loop, iter=1;
69  bool first = true;
70  for(isign = 1; isign >= -1; isign -= 2) {
71  for(cb = 0; cb < 2; ++cb) {
72 
73  double mydt;
74  QDP::StopWatch swatch;
75 
76  if (first)
77  {
78  for(iter=1; ; iter <<= 1)
79  {
80  QDPIO::cout << "Applying naive D " << iter << " times" << std::endl;
81 
82  swatch.reset();
83  swatch.start();
84 
85  for(int i=iter; i-- > 0; ) {
86  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
87  }
88  swatch.stop();
89 
90  mydt=swatch.getTimeInSeconds();
91  QDPInternal::globalSum(mydt);
92  mydt /= Layout::numNodes();
93 
94  if (mydt > 1) {
95  first = false;
96  break;
97  }
98  }
99  }
100 
101  QDPIO::cout << "Applying naive D for timings" << std::endl;
102 
103  swatch.reset();
104  swatch.start();
105  for(int i=iter; i-- > 0; ) {
106  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
107  }
108  swatch.stop();
109 
110  mydt=swatch.getTimeInSeconds();
111  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
112  QDPInternal::globalSum(mydt);
113  mydt /= Layout::numNodes();
114 
115  float mflops = float(1320.0f/mydt);
116  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
117  QDPIO::cout << "The time per lattice point is "<< mydt
118  << " micro sec (" << mflops << ") Mflops " << std::endl;
119 
120  push(xml,"Unopt_test");
121  write(xml,"cb",cb);
122  write(xml,"isign",isign);
123  write(xml,"mflops",mflops);
124  pop(xml);
125  }
126  }
127 
128  pop(xml);
129 
130  //! Create a linear operator
131  QDPIO::cout << "Constructing (possibly optimized) WilsonDslash" << std::endl;
132 
133  QDPWilsonDslashOpt D_opt(state);
134 
135  QDPIO::cout << "Done" << std::endl;
136 
137  push(xml,"Optimized_test");
138 
139  first = true;
140  for(isign = 1; isign >= -1; isign -= 2) {
141  for(cb = 0; cb < 2; ++cb) {
142 
143  double mydt= 2.0;
144  QDP::StopWatch swatch;
145 
146  if (first)
147  {
148  for(iter=1; ; iter <<= 1)
149  {
150  QDPIO::cout << "Applying D_opt " << iter << " times" << std::endl;
151 
152  swatch.reset();
153  swatch.start();
154  for(int i=iter; i-- > 0; ) {
155  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
156  }
157  swatch.stop();
158 
159  mydt=swatch.getTimeInSeconds();
160 
161  QDPInternal::globalSum(mydt);
162  mydt /= Layout::numNodes();
163 
164  if (mydt > 1) {
165  first = false;
166  break;
167  }
168  }
169  }
170 
171  QDPIO::cout << "Applying D_opt for timings" << std::endl;
172 
173  swatch.reset();
174  swatch.start();
175  for(int i=iter; i-- > 0; ) {
176  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
177  }
178  swatch.stop();
179 
180  mydt=swatch.getTimeInSeconds();
181  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
182  QDPInternal::globalSum(mydt);
183  mydt /= Layout::numNodes();
184 
185  float mflops = float(1320.0f/mydt);
186  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
187  QDPIO::cout << "After " << iter << " calls, the time per lattice point is "<< mydt
188  << " micro sec (" << mflops << ") Mflops " << std::endl;
189 
190  push(xml,"OPT_test");
191  write(xml,"cb",cb);
192  write(xml,"isign",isign);
193  write(xml,"mflops",mflops);
194  pop(xml);
195  }
196  }
197 
198  pop(xml);
199 
200  LatticeFermion chi3;
201  Double n2;
202 
203  gaussian(chi3);
204  gaussian(psi);
205  for(cb = 0; cb < 2; cb++) {
206  for(isign = 1; isign >= -1; isign -= 2) {
207 
208  chi = chi3;
209  chi2 = chi3;
210  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
211  D.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
212 
213  n2 = norm2( chi2 - chi );
214 
215  QDPIO::cout << "Paranoia test: || D(psi, "
216  << (isign > 0 ? "+, " : "-, ") << cb
217  << ") - D(psi, "
218  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
219  << std::endl;
220  }
221  }
222 
223  gaussian(chi3);
224  gaussian(psi);
225  for(cb = 0; cb < 2; cb++) {
226  for(isign = 1; isign >= -1; isign -= 2) {
227 
228  chi = chi3;
229  chi2 = chi3;
230  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
231  D_opt.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
232 
233  n2 = norm2( chi2 - chi );
234 
235  QDPIO::cout << "OPT test: || D(psi, "
236  << (isign > 0 ? "+, " : "-, ") << cb
237  << ") - D_opt(psi, "
238  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
239  << std::endl;
240 
241  push(xml,"OPT_correctness_test");
242  write(xml,"isign", isign);
243  write(xml,"cb", cb);
244  write(xml,"norm2_diff",n2);
245  pop(xml);
246  }
247  }
248 
249 
250  pop(xml);
251 
252 
253 
254  // Time to bolt
256 
257  exit(0);
258 }
Primary include file for CHROMA in application codes.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Periodic version of FermState.
General Wilson-Dirac dslash.
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:48
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void proginfo(XMLWriter &xml)
Print out basic information about this program.
Definition: proginfo.cc:24
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:228
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
static int m[4]
Definition: make_seeds.cc:16
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
std::string getXMLOutputFileName()
Get output file name.
Definition: chroma_init.cc:91
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
int i
Definition: pbg5p_w.cc:55
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
Definition: chroma_init.cc:114
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
void finalize(void)
Chroma finalization routine.
Definition: chroma_init.cc:308
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
pop(xml_out)
int cb
Definition: invbicg.cc:120
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
Double zero
Definition: invbicg.cc:106
FloatingPoint< double > Double
Definition: gtest.h:7351
int main(int argc, char **argv)