CHROMA
lwldslash_array_pab_w.cc
Go to the documentation of this file.
1 /*! \file
2  * \brief Wilson Dslash linear operator array
3  */
4 
5 #include "chromabase.h"
6 #include "chroma_config.h"
7 // Need this for the refcount namespace
10 
11 #include <wfm.h>
12 
13 namespace Chroma
14 {
15  //! Empty constructor. Must use create later
17  {
18  }
19 
20 
21  //! Full constructor
23  int N5_)
24  {
25  create(state,N5_);
26  }
27 
28 
29  //! Full constructor
31  int N5_,
32  const AnisoParam_t& aniso_)
33  {
34  create(state,N5_,aniso_);
35  }
36 
37 
38  //! Creation routine
40  int N5_)
41  {
42  multi1d<Real> cf(Nd);
43  cf = 1.0;
44  create(state, N5_, cf);
45  }
46 
47 
48  //! Creation routine with anisotropy
50  const AnisoParam_t& anisoParam)
51  {
52  START_CODE();
53 
54  create(state, N5_, makeFermCoeffs(anisoParam));
55 
56  END_CODE();
57  }
58 
59  //! Creation routine
61  const multi1d<Real>& coeffs_)
62  {
63  START_CODE();
64 
65  N5 = N5_;
66  coeffs = coeffs_;
67 
68  // Save a copy of the fermbc
69  fbc = state->getFermBC();
70 
71  // Sanity check
72  if (fbc.operator->() == 0)
73  {
74  QDPIO::cerr << "PABWilsonDslashArray: error: fbc is null" << std::endl;
75  QDP_abort(1);
76  }
77 
78  // Temporary copy - not kept
79  multi1d<LatticeColorMatrix> u = state->getLinks();
80 
81  // Rescale the u fields by the anisotropy
82  for(int mu=0; mu < u.size(); ++mu)
83  {
84  u[mu] *= coeffs[mu];
85  }
86 
87  // Rearrange the gauge
88 
89  // Allocate the packed gauge
90  bool got_fast = true;
91  try {
93  QDP::Allocator::theQDPAllocator::Instance().allocate(
94  Nd*sizeof(PrimitiveSU3Matrix)*Layout::sitesOnNode(),
95  QDP::Allocator::FAST);
96  }
97  catch( std::bad_alloc ) {
98  got_fast = false;
99  }
100 
101  if( got_fast == false ) {
102  try {
104  QDP::Allocator::theQDPAllocator::Instance().allocate(
105  Nd*sizeof(PrimitiveSU3Matrix)*Layout::sitesOnNode(),
107  }
108  catch( std::bad_alloc ) {
109  QDPIO::cerr << "Could not allocate packed gauge in lwldslash_array_pab_w.cc line 67" << std::endl << std::flush;
110  QDP_abort(1);
111  }
112 
113  }
114 
115 
116 
117 
118  // Rearrange gauge from Dir,Site, Matrix
119  // to Site, Dir, Matrix
120  // (and sites are ordered in cb order)
121 
122  int volume=Layout::sitesOnNode();
123  for(int ix=0; ix < volume; ix++) {
124  for(int mu=0; mu < 4; mu++) {
125  packed_gauge[ 4*ix + mu ] = transpose(u[mu].elem(ix).elem());
126  }
127  }
128 
129  // Need this
130 
131  wil_cbsize=Layout::sitesOnNode()/2;
132  if ( PABDslashEnv::refcount == 0 ) {
133  // I probably need the local size here...
134  const multi1d<int>& lsize= Layout::subgridLattSize();
135  const multi1d<int>& machsize = Layout::logicalSize();
136 
137  // Set up the wilson thingie
138  for(int i = 0; i < Nd; i++) {
139  wil.local_latt[i] = lsize[i];
140 
141  if( machsize[i] == 1 ) {
142  wil.local_comm[i] = 1;
143 
144  }
145  else {
146  wil.local_comm[i] = 0;
147  }
148  }
149 
150 #ifdef CHROMA_USE_SLOPPY_BAGEL_DSLASH
151  wil.BGL = true;
152  wil.SloppyPrecision = true;
153 #endif
154 
155  wfm_vec_init(&wil);
156 
157 #ifdef CHROMA_WFM_NO_END
158  // If we never call the end,
159  // the refcount is a lock variable
160  // and we never increase it
162 #endif
163 
164  }
165 
166 #ifndef CHROMA_WFM_NO_END
167  // Correct refcounting
169 #endif
170 
171  END_CODE();
172  }
173 
175  {
176 
177 #ifndef CHROMA_WFM_NO_END
178  // Only deal with refcount defreases if the refcount is at least 1
179  if( PABDslashEnv::refcount > 0 ) {
180 
181  // Always decrease the refcount
183 
184  // Only call free when the refcount == 0
185  if( PABDslashEnv::refcount == 0 ) {
186  wfm_vec_end(&wil);
187  }
188  }
189 #endif
190 
191  // Free the gauge field tho
192  QDP::Allocator::theQDPAllocator::Instance().free(packed_gauge);
193 
194  END_CODE();
195  }
196 
197  //! General Wilson-Dirac dslash
198  /*! \ingroup linop
199  * Wilson dslash
200  *
201  * Arguments:
202  *
203  * \param chi Result (Write)
204  * \param psi Pseudofermion field (Read)
205  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
206  * \param cb Checkerboard of OUTPUT std::vector (Read)
207  */
208  void
209  PABWilsonDslashArray::apply (multi1d<LatticeFermion>& chi,
210  const multi1d<LatticeFermion>& psi,
211  enum PlusMinus isign, int cb) const
212  {
213  START_CODE();
214 
215  if( chi.size() != N5 ) chi.resize(N5);
216 
217  multi1d<Float*> chis(N5);
218  multi1d<Float*> psis(N5);
219  multi1d<int> cbs(N5);
220 
221  for(int i=0; i < N5; i++) {
222  chis[i]=(Float*)&(chi[i].elem(wil_cbsize*cb).elem(0).elem(0).real());
223  psis[i]=(Float*)&(psi[i].elem(wil_cbsize*(1-cb)).elem(0).elem(0).real());
224  cbs[i]=1-cb;
225  }
226 
227  wfm_dslash_vec(N5,
228  (Float**)&(chis[0]),
229  (Float *)&(packed_gauge[0]),
230  (Float**)&(psis[0]),
231  (int *)&(cbs[0]),
232  (isign == (enum PlusMinus)PLUS ? 0 : 1));
233 
234 
235  END_CODE();
236  }
237 
238  //! Apply Wilson-Dirac dslash
239  /*! \ingroup linop
240  * Wilson dslash
241  *
242  * Arguments:
243  *
244  * \param chi Result (Write)
245  * \param psi Pseudofermion field (Read)
246  * \param isign D'^dag or D' ( MINUS | PLUS ) resp. (Read)
247  * \param cb Checkerboard of OUTPUT std::vector (Read)
248  */
249  void
250  PABWilsonDslashArray::apply (LatticeFermion& chi, const LatticeFermion& psi,
251  enum PlusMinus isign, int cb) const
252  {
253  START_CODE();
254 
255  /* Pass the right parities.
256  *
257  * SZIN standard is that cb is cb of INPUT
258  * Chroma standard is that the cb is cb of OUTPUT
259  *
260  * Need to invert cb for SZIN style SSE call
261 
262  *
263  *
264  * Pass all the fermion and all the gauge pieces
265  *
266  * NOTE: this breaks usage from SZIN. However, Chroma and SSE dslash can handle
267  * odd subgrid lattice sizes, whereas SZIN cannot. Thus, I must pass all fermion
268  * cbs to support such flexibility.
269  *
270  */
271  /* sse_su3dslash_wilson((SSEREAL *)&(packed_gauge[0]),
272  (SSEREAL *)&(psi.elem(0).elem(0).elem(0).real()),
273  (SSEREAL *)&(chi.elem(0).elem(0).elem(0).real()),
274  (int)isign, (1-cb));
275  */
276 
277  wfm_dslash((Float *)&(chi.elem(wil_cbsize*(cb)).elem(0).elem(0).real()),
278  (Float *)&(packed_gauge[0]),
279  (Float *)&(psi.elem(wil_cbsize*(1-cb)).elem(0).elem(0).real()),
280  1-cb,
281  (isign == (enum PlusMinus)PLUS ? 0 : 1));
282 
283  getFermBC().modifyF(chi, QDP::rb[cb]);
284 
285  END_CODE();
286  }
287 
288 
289 } // End Namespace Chroma
290 
Primary include file for CHROMA library code.
Support class for fermion actions and linear operators.
Definition: state.h:94
Class for counted reference semantics.
Definition: handle.h:33
const FermBC< T, P, Q > & getFermBC() const
Return the fermion BC object for this linear operator.
~PABWilsonDslashArray()
Destroy / deal with refcounting.
void create(Handle< FermState< T, P, Q > > state, int N5_)
Creation routine.
Handle< FermBC< T, P, Q > > fbc
PABWilsonDslashArray()
Empty constructor. Must use create later.
int mu
Definition: cool.cc:24
void apply(multi1d< LatticeFermion > &chi, const multi1d< LatticeFermion > &psi, enum PlusMinus isign, int cb) const
General Wilson-Dirac dslash.
Wilson Dslash linear operator over arrays.
Wilson Dslash linear operator.
Nd
Definition: meslate.cc:74
Asqtad Staggered-Dirac operator.
Definition: klein_gord.cc:10
static multi1d< LatticeColorMatrix > u
void transpose(multi2d< LatticeColorVector > &dist_rep, const multi2d< LatticeColorVector > &prop_rep)
Take transpose of a matrix in (explicit) spin space.
int i
Definition: pbg5p_w.cc:55
@ PLUS
Definition: chromabase.h:45
multi1d< LatticeFermion > chi(Ncb)
LatticeFermion psi
Definition: mespbg5p_w.cc:35
PColorMatrix< RComplex< REAL >, Nc > PrimitiveSU3Matrix
START_CODE()
int cb
Definition: invbicg.cc:120
const WilsonTypeFermAct< multi1d< LatticeFermion > > Handle< const ConnectState > state
Definition: pbg5p_w.cc:28
multi1d< Real > makeFermCoeffs(const AnisoParam_t &aniso)
Make fermion coefficients.
Definition: aniso_io.cc:63
FloatingPoint< float > Float
Definition: gtest.h:7350
Parameters for anisotropy.
Definition: aniso_io.h:24