CHROMA
t_lwldslash_sse.cc
Go to the documentation of this file.
1 
2 #include "chroma.h"
3 #include <iostream>
4 #include <cstdio>
5 
6 
7 
8 using namespace Chroma;
9 
10 
11 int main(int argc, char **argv)
12 {
13  // Put the machine into a known state
14  Chroma::initialize(&argc, &argv);
15 
16  // Read parameters
17  XMLReader xml_in("input.xml");
18 
19  // Lattice Size
20  multi1d<int> nrow(Nd);
21  read(xml_in, "/param/nrow", nrow);
22 
23  xml_in.close();
24 
25  // Setup the layout
26  Layout::setLattSize(nrow);
27  Layout::create();
28 
29  XMLFileWriter xml("t_lwldslash.xml");
30 
31  push(xml,"t_lwldslash");
32 
33  proginfo(xml); // Print out basic program info
34 
35  // Make up a random gauge field.
36  multi1d<LatticeColorMatrix> u(Nd);
37  for(int m=0; m < u.size(); ++m)
38  gaussian(u[m]);
39 
40 
41  // Make up a gaussian source and a zero result std::vector
42  LatticeFermion psi, chi, chi2;
43  gaussian(psi);
44  chi = zero;
45 
46  //! Create a linear operator
47  QDPIO::cout << "Constructing naive QDPWilsonDslash" << std::endl;
48 
49  Handle< FermState<LatticeFermion,
50  multi1d<LatticeColorMatrix>,
51  multi1d<LatticeColorMatrix> > > state(new PeriodicFermState<LatticeFermion,
52  multi1d<LatticeColorMatrix>,
53  multi1d<LatticeColorMatrix> >(u));
54 
55  // Naive Dslash
57 
58  QDPIO::cout << "Done" << std::endl;
59 
60 
61  push(xml,"Unoptimized_test");
62 
63  int isign, cb, loop, iter=1;
64  bool first = true;
65  for(isign = 1; isign >= -1; isign -= 2) {
66  for(cb = 0; cb < 2; ++cb) {
67 
68  QDP::StopWatch swatch;
69  double mydt;
70 
71  if (first)
72  {
73  for(iter=1; ; iter <<= 1)
74  {
75  QDPIO::cout << "Applying D " << iter << " times" << std::endl;
76 
77  swatch.reset();
78  swatch.start();
79  for(int i=iter; i-- > 0; ) {
80  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
81  }
82  swatch.stop();
83  mydt=swatch.getTimeInSeconds();
84 
85  QDPInternal::globalSum(mydt);
86  mydt /= Layout::numNodes();
87 
88  if (mydt > 1) {
89  first = false;
90  break;
91  }
92  }
93  }
94 
95  QDPIO::cout << "Applying D for timings" << std::endl;
96 
97  swatch.reset();
98  swatch.start();
99  for(int i=iter; i-- > 0; ) {
100  D.apply(chi, psi, (isign == 1 ? PLUS : MINUS), cb);
101  }
102  swatch.stop();
103 
104  mydt=swatch.getTimeInSeconds();
105  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
106  QDPInternal::globalSum(mydt);
107  mydt /= Layout::numNodes();
108 
109  float mflops = float(1320.0f/mydt);
110  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
111  QDPIO::cout << "The time per lattice point is "<< mydt
112  << " micro sec (" << mflops << ") Mflops " << std::endl;
113 
114  push(xml,"test");
115  write(xml,"cb",cb);
116  write(xml,"isign",isign);
117  write(xml,"mflops",mflops);
118  pop(xml);
119  }
120  }
121 
122  pop(xml);
123 
124  //! Create a linear operator
125  QDPIO::cout << "Constructing (possibly optimized) WilsonDslash" << std::endl;
126 
127  WilsonDslash D_opt(state);
128 
129  QDPIO::cout << "Done" << std::endl;
130 
131  push(xml,"Optimized_test");
132 
133  first = true;
134  for(isign = 1; isign >= -1; isign -= 2) {
135  for(cb = 0; cb < 2; ++cb) {
136 
137  double mydt;
138  QDP::StopWatch swatch;
139 
140  if (first)
141  {
142  for(iter=1; ; iter <<= 1)
143  {
144  QDPIO::cout << "Applying D " << iter << " times" << std::endl;
145 
146  swatch.reset();
147  swatch.start();
148 
149  for(int i=iter; i-- > 0; ) {
150  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
151  }
152  swatch.stop();
153 
154 
155  mydt=swatch.getTimeInSeconds();
156  QDPInternal::globalSum(mydt);
157  mydt /= Layout::numNodes();
158 
159  if (mydt > 1) {
160  first = false;
161  break;
162  }
163  }
164  }
165 
166  QDPIO::cout << "Applying D for timings" << std::endl;
167 
168  swatch.reset();
169  swatch.start();
170  for(int i=iter; i-- > 0; ) {
171  D_opt.apply(chi, psi, (isign == 1 ? PLUS : MINUS ) , cb); // NOTE: for timings throw away return value
172  }
173  swatch.stop();
174  mydt=swatch.getTimeInSeconds();
175  mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
176  QDPInternal::globalSum(mydt);
177  mydt /= Layout::numNodes();
178 
179  float mflops = float(1320.0f/mydt);
180  QDPIO::cout << "cb = " << cb << " isign = " << isign << std::endl;
181  QDPIO::cout << "After " << iter << " calls, the time per lattice point is "<< mydt
182  << " micro sec (" << mflops << ") Mflops " << std::endl;
183 
184  push(xml,"test");
185  write(xml,"cb",cb);
186  write(xml,"isign",isign);
187  write(xml,"mflops",mflops);
188  pop(xml);
189  }
190  }
191 
192  pop(xml);
193 
194  LatticeFermion chi3;
195  Double n2;
196 
197  gaussian(chi3);
198  gaussian(psi);
199  for(cb = 0; cb < 2; cb++) {
200  for(isign = 1; isign >= -1; isign -= 2) {
201 
202  chi = chi3;
203  chi2 = chi3;
204  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
205  D.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
206 
207  n2 = norm2( chi2 - chi );
208 
209  QDPIO::cout << "Paranoia test: || D(psi, "
210  << (isign > 0 ? "+, " : "-, ") << cb
211  << ") - D(psi, "
212  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
213  << std::endl;
214  }
215  }
216 
217  gaussian(chi3);
218  gaussian(psi);
219  for(cb = 0; cb < 2; cb++) {
220  for(isign = 1; isign >= -1; isign -= 2) {
221 
222  chi = chi3;
223  chi2 = chi3;
224  D.apply(chi, psi, (isign > 0 ? PLUS : MINUS), cb);
225  D_opt.apply(chi2, psi, (isign > 0 ? PLUS : MINUS), cb);
226 
227  n2 = norm2( chi2 - chi );
228 
229  QDPIO::cout << "OPT test: || D(psi, "
230  << (isign > 0 ? "+, " : "-, ") << cb
231  << ") - D_opt(psi, "
232  << (isign > 0 ? "+, " : "-, ") << cb << " ) || = " << n2
233  << std::endl;
234 
235  push(xml,"OPT test");
236  write(xml,"isign", isign);
237  write(xml,"cb", cb);
238  write(xml,"norm2_diff",n2);
239  pop(xml);
240  }
241  }
242 
243  pop(xml);
244 
245  // Time to bolt
247 
248  exit(0);
249 }
Primary include file for CHROMA in application codes.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
Periodic version of FermState.
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:48
void read(XMLReader &xml, const std::string &path, AsqtadFermActParams &param)
Read parameters.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams &param)
Writer parameters.
void proginfo(XMLWriter &xml)
Print out basic information about this program.
Definition: proginfo.cc:24
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Definition: lwldslash_w.h:228
static int m[4]
Definition: make_seeds.cc:16
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
gaussian(aux)
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
int i
Definition: pbg5p_w.cc:55
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
Definition: chroma_init.cc:114
@ MINUS
Definition: chromabase.h:45
@ PLUS
Definition: chromabase.h:45
void finalize(void)
Chroma finalization routine.
Definition: chroma_init.cc:308
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
pop(xml_out)
int cb
Definition: invbicg.cc:120
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
Double zero
Definition: invbicg.cc:106
FloatingPoint< double > Double
Definition: gtest.h:7351
int main(int argc, char **argv)