11 int main(
int argc,
char **argv)
20 multi1d<int> nrow(
Nd);
24 read(xml_in,
"/param/nrow", nrow);
25 read(xml_in,
"/param/N5",
N5);
29 QDPIO::cerr <<
"Caught Exception while reading XML " << e << std::endl;
34 Layout::setLattSize(nrow);
38 push(xml,
"t_lwldslash_array");
42 multi1d<LatticeColorMatrix>
u(
Nd);
43 for(
int m=0;
m <
u.size(); ++
m)
48 LatticeFermion
psi,
chi, chi2;
53 QDPIO::cout <<
"Constructing naive QDPWilsonDslash" << std::endl;
56 multi1d<LatticeColorMatrix>,
58 multi1d<LatticeColorMatrix>,
59 multi1d<LatticeColorMatrix> >(
u));
64 QDPIO::cout <<
"Done" << std::endl;
67 push(xml,
"Unoptimized_test");
73 for(
cb = 0;
cb < 2; ++
cb) {
76 QDP::StopWatch swatch;
80 for(iter=1; ; iter <<= 1)
82 QDPIO::cout <<
"Applying naive D " << iter <<
" times" << std::endl;
87 for(
int i=iter;
i-- > 0; ) {
92 mydt=swatch.getTimeInSeconds();
93 QDPInternal::globalSum(mydt);
94 mydt /= Layout::numNodes();
103 QDPIO::cout <<
"Applying naive D for timings" << std::endl;
107 for(
int i=iter;
i-- > 0; ) {
112 mydt=swatch.getTimeInSeconds();
113 mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
114 QDPInternal::globalSum(mydt);
115 mydt /= Layout::numNodes();
117 float mflops = float(1320.0f/mydt);
118 QDPIO::cout <<
"cb = " <<
cb <<
" isign = " <<
isign << std::endl;
119 QDPIO::cout <<
"The time per lattice point is "<< mydt
120 <<
" micro sec (" << mflops <<
") Mflops " << std::endl;
122 push(xml,
"Unopt_test");
125 write(xml,
"mflops",mflops);
133 QDPIO::cout <<
"Constructing (possibly optimized) WilsonDslash" << std::endl;
137 QDPIO::cout <<
"Done" << std::endl;
139 push(xml,
"Optimized_test");
143 for(
cb = 0;
cb < 2; ++
cb) {
146 QDP::StopWatch swatch;
150 for(iter=1; ; iter <<= 1)
152 QDPIO::cout <<
"Applying D_opt " << iter <<
" times" << std::endl;
156 for(
int i=iter;
i-- > 0; ) {
161 mydt=swatch.getTimeInSeconds();
163 QDPInternal::globalSum(mydt);
164 mydt /= Layout::numNodes();
173 QDPIO::cout <<
"Applying D_opt for timings" << std::endl;
177 for(
int i=iter;
i-- > 0; ) {
182 mydt=swatch.getTimeInSeconds();
183 mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
184 QDPInternal::globalSum(mydt);
185 mydt /= Layout::numNodes();
187 float mflops = float(1320.0f/mydt);
188 QDPIO::cout <<
"cb = " <<
cb <<
" isign = " <<
isign << std::endl;
189 QDPIO::cout <<
"After " << iter <<
" calls, the time per lattice point is "<< mydt
190 <<
" micro sec (" << mflops <<
") Mflops " << std::endl;
192 push(xml,
"OPT_test");
195 write(xml,
"mflops",mflops);
208 for(
cb = 0;
cb < 2;
cb++) {
216 n2 = norm2( chi2 -
chi );
218 QDPIO::cout <<
"Paranoia test: || D(psi, "
219 << (
isign > 0 ?
"+, " :
"-, ") <<
cb
221 << (
isign > 0 ?
"+, " :
"-, ") <<
cb <<
" ) || = " << n2
228 for(
cb = 0;
cb < 2;
cb++) {
236 n2 = norm2( chi2 -
chi );
238 QDPIO::cout <<
"OPT test: || D(psi, "
239 << (
isign > 0 ?
"+, " :
"-, ") <<
cb
241 << (
isign > 0 ?
"+, " :
"-, ") <<
cb <<
" ) || = " << n2
244 push(xml,
"OPT_correctness_test");
247 write(xml,
"norm2_diff",n2);
254 multi1d<LatticeFermion> chis1(
N5);
255 multi1d<LatticeFermion> chis2(
N5);
256 multi1d<LatticeFermion> chis3(
N5);
257 multi1d<LatticeFermion> psis(
N5);
260 QDPIO::cout <<
"Consturcting Naive 5D Dslash, N5=" <<
N5 << std::endl;
262 QDPIO::cout <<
"Done" << std::endl;
265 push(xml,
"Unoptimized_array_test");
266 for(
int k=0;
k <
N5;
k++) {
276 for(
cb = 0;
cb < 2; ++
cb) {
278 QDP::StopWatch swatch;
283 for(iter=1; ; iter <<= 1)
285 QDPIO::cout <<
"Applying naive D5 " << iter <<
" times" << std::endl;
289 for(
int i=iter;
i-- > 0; ) {
294 mydt=swatch.getTimeInSeconds();
296 QDPInternal::globalSum(mydt);
297 mydt /= Layout::numNodes();
306 QDPIO::cout <<
"Applying naive D5 for timings" << std::endl;
310 for(
int i=iter;
i-- > 0; ) {
315 mydt=swatch.getTimeInSeconds();
316 mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
317 QDPInternal::globalSum(mydt);
318 mydt /= Layout::numNodes();
320 float mflops = float(1320.0f*
N5/mydt);
321 QDPIO::cout <<
"cb = " <<
cb <<
" isign = " <<
isign << std::endl;
322 QDPIO::cout <<
"The time per lattice point is "<< mydt
323 <<
" micro sec (" << mflops <<
") Mflops " << std::endl;
325 push(xml,
"Unopt_array_test");
328 write(xml,
"mflops",mflops);
336 QDPIO::cout <<
"Constructing (possibly optimized) WilsonDslash to do std::vector operation with a loop" << std::endl;
338 QDPIO::cout <<
"Done" << std::endl;
340 push(xml,
"Optimized_loop_test");
344 for(
cb = 0;
cb < 2; ++
cb) {
346 QDP::StopWatch swatch;
351 for(iter=1; ; iter <<= 1)
353 QDPIO::cout <<
"Applying D_opt_loop " << iter <<
" times" << std::endl;
357 for(
int i=iter;
i-- > 0; ) {
358 for(
int loop=0; loop <
N5; loop++) {
364 mydt=swatch.getTimeInSeconds();
365 QDPInternal::globalSum(mydt);
366 mydt /= Layout::numNodes();
375 QDPIO::cout <<
"Applying D_opt_loop for timings" << std::endl;
379 for(
int i=iter;
i-- > 0; ) {
380 for(
int loop=0; loop<
N5; loop++) {
386 mydt=swatch.getTimeInSeconds();
387 mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
388 QDPInternal::globalSum(mydt);
389 mydt /= Layout::numNodes();
391 float mflops = float(1320.0f*
N5/mydt);
392 QDPIO::cout <<
"cb = " <<
cb <<
" isign = " <<
isign << std::endl;
393 QDPIO::cout <<
"After " << iter <<
" calls, the time per lattice point is "<< mydt
394 <<
" micro sec (" << mflops <<
") Mflops " << std::endl;
396 push(xml,
"OPT_loop_test");
399 write(xml,
"mflops",mflops);
407 QDPIO::cout <<
"Constructing (possibly optimized) WilsonDslashArray N5="<<
N5 << std::endl;
411 QDPIO::cout <<
"Done" << std::endl;
413 push(xml,
"Optimized_array_test");
417 for(
cb = 0;
cb < 2; ++
cb) {
420 QDP::StopWatch swatch;
424 for(iter=1; ; iter <<= 1)
426 QDPIO::cout <<
"Applying D5_opt " << iter <<
" times" << std::endl;
430 for(
int i=iter;
i-- > 0; ) {
435 mydt=swatch.getTimeInSeconds();
437 QDPInternal::globalSum(mydt);
438 mydt /= Layout::numNodes();
447 QDPIO::cout <<
"Applying D5_opt for timings" << std::endl;
451 for(
int i=iter;
i-- > 0; ) {
456 mydt=swatch.getTimeInSeconds();
457 mydt=1.0e6*mydt/double(iter*(Layout::sitesOnNode()/2));
458 QDPInternal::globalSum(mydt);
459 mydt /= Layout::numNodes();
461 float mflops = float(1320.0f*
N5/mydt);
462 QDPIO::cout <<
"cb = " <<
cb <<
" isign = " <<
isign << std::endl;
463 QDPIO::cout <<
"After " << iter <<
" calls, the time per lattice point is "<< mydt
464 <<
" micro sec (" << mflops <<
") Mflops " << std::endl;
466 push(xml,
"Opt_array_test");
469 write(xml,
"mflops",mflops);
476 for(
cb = 0;
cb < 2;
cb++) {
478 for(
int k=0;
k <
N5;
k++) {
488 for(
int i=0;
i <
N5;
i++) {
489 n2 += norm2( chis2[
i] - chis1[
i] );
492 QDPIO::cout <<
"Paranoia test: || D5(psi, "
493 << (
isign > 0 ?
"+, " :
"-, ") <<
cb
495 << (
isign > 0 ?
"+, " :
"-, ") <<
cb <<
" ) || = " << n2
500 for(
cb = 0;
cb < 2;
cb++) {
503 for(
int k=0;
k <
N5;
k++) {
512 for(
int i=0;
i <
N5;
i++) {
513 n2 += norm2( chis2[
i] - chis1[
i] );
515 QDPIO::cout <<
"OPT test: || D5(psi, "
516 << (
isign > 0 ?
"+, " :
"-, ") <<
cb
517 <<
") - D5_opt(psi, "
518 << (
isign > 0 ?
"+, " :
"-, ") <<
cb <<
" ) || = " << n2
521 push(xml,
"Opt_array_correctness_test");
524 write(xml,
"norm2_diff",n2);
Primary include file for CHROMA in application codes.
Support class for fermion actions and linear operators.
Class for counted reference semantics.
Periodic version of FermState.
General Wilson-Dirac dslash of arrays.
General Wilson-Dirac dslash.
General Wilson-Dirac dslash.
void read(XMLReader &xml, const std::string &path, AsqtadFermActParams ¶m)
Read parameters.
void write(XMLWriter &xml, const std::string &path, const AsqtadFermActParams ¶m)
Writer parameters.
void proginfo(XMLWriter &xml)
Print out basic information about this program.
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
void apply(T &chi, const T &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
void apply(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Asqtad Staggered-Dirac operator.
std::string getXMLOutputFileName()
Get output file name.
static multi1d< LatticeColorMatrix > u
push(xml_out,"Condensates")
void initialize(int *argc, char ***argv)
Chroma initialisation routine.
void finalize(void)
Chroma finalization routine.
multi1d< LatticeFermion > chi(Ncb)
std::string getXMLInputFileName()
Get input file name.
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
FloatingPoint< double > Double
int main(int argc, char **argv)