7 #ifndef QDPJIT_IS_QDPJITPTX
8 #ifndef QDPJIT_IS_QDPJITNVVM
10 #warning "Using QDP-JIT/LLVM stouting routines"
12 void function_get_fs_bs_exec(
const JitFunction&
function,
13 const LatticeColorMatrix&
Q,
14 const LatticeColorMatrix& QQ,
15 multi1d<LatticeComplex>& f,
16 multi1d<LatticeComplex>& b1,
17 multi1d<LatticeComplex>& b2,
20 AddressLeaf addr_leaf(all);
22 addr_leaf.setLit( dobs );
24 int junk_0 = forEach(
Q, addr_leaf, NullCombine());
25 int junk_1 = forEach(QQ, addr_leaf, NullCombine());
26 int junk_2 = forEach(f[0], addr_leaf, NullCombine());
27 int junk_3 = forEach(f[1], addr_leaf, NullCombine());
28 int junk_4 = forEach(f[2], addr_leaf, NullCombine());
29 int junk_5 = forEach(b1[0], addr_leaf, NullCombine());
30 int junk_6 = forEach(b1[1], addr_leaf, NullCombine());
31 int junk_7 = forEach(b1[2], addr_leaf, NullCombine());
32 int junk_8 = forEach(b2[0], addr_leaf, NullCombine());
33 int junk_9 = forEach(b2[1], addr_leaf, NullCombine());
34 int junk_10= forEach(b2[2], addr_leaf, NullCombine());
38 jit_dispatch(
function.
func().at(0),Layout::sitesOnNode(),getDataLayoutInnerSize(),
true,0,addr_leaf);
43 WordREG<REAL> jit_constant(
double f )
45 return WordREG<REAL>(f);
49 void function_get_fs_bs_build( JitFunction&
func,
50 const LatticeColorMatrix&
Q,
51 const LatticeColorMatrix& QQ,
52 multi1d<LatticeComplex>& f,
53 multi1d<LatticeComplex>& b1,
54 multi1d<LatticeComplex>& b2)
60 ParamRef p_dobs = llvm_add_param<bool>();
64 typedef typename LeafFunctor<LatticeColorMatrix, ParamLeaf>::Type_t LCMJIT;
65 typedef typename LeafFunctor<LatticeComplex , ParamLeaf>::Type_t LCJIT;
67 LCMJIT Q_jit(forEach(
Q, param_leaf, TreeCombine()));
68 LCMJIT QQ_jit(forEach(QQ, param_leaf, TreeCombine()));
69 LCJIT f0_jit(forEach(f[0], param_leaf, TreeCombine()));
70 LCJIT f1_jit(forEach(f[1], param_leaf, TreeCombine()));
71 LCJIT f2_jit(forEach(f[2], param_leaf, TreeCombine()));
72 LCJIT b10_jit(forEach(b1[0], param_leaf, TreeCombine()));
73 LCJIT b11_jit(forEach(b1[1], param_leaf, TreeCombine()));
74 LCJIT b12_jit(forEach(b1[2], param_leaf, TreeCombine()));
75 LCJIT b20_jit(forEach(b2[0], param_leaf, TreeCombine()));
76 LCJIT b21_jit(forEach(b2[1], param_leaf, TreeCombine()));
77 LCJIT b22_jit(forEach(b2[2], param_leaf, TreeCombine()));
79 llvm::Value* r_dobs = llvm_derefParam( p_dobs );
81 IndexDomainVector idx = loop.getIdx();
83 typename LCMJIT::Subtype_t& Q_j = Q_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
84 typename LCMJIT::Subtype_t& QQ_j = QQ_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
86 typename LCJIT::Subtype_t& f0_j = f0_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
87 typename LCJIT::Subtype_t& f1_j = f1_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
88 typename LCJIT::Subtype_t& f2_j = f2_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
90 typename LCJIT::Subtype_t& b10_j = b10_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
91 typename LCJIT::Subtype_t& b11_j = b11_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
92 typename LCJIT::Subtype_t& b12_j = b12_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
93 typename LCJIT::Subtype_t& b20_j = b20_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
94 typename LCJIT::Subtype_t& b21_j = b21_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
95 typename LCJIT::Subtype_t& b22_j = b22_jit.elem(JitDeviceLayout::LayoutCoalesced,idx);
97 llvm::BasicBlock * block_exit = llvm_new_basic_block();
100 PColorMatrixREG< RComplexREG< WordREG<REAL> >, Nc> Q_site = Q_j.elem();
101 PColorMatrixREG< RComplexREG< WordREG<REAL> >, Nc> QQ_site = QQ_j.elem();
102 PColorMatrixREG< RComplexREG< WordREG<REAL> >, Nc> QQQ = QQ_site*Q_site;
112 PScalarREG< RScalarREG< WordREG<REAL> > > trQQQ = realTrace(QQQ);
113 PScalarREG< RScalarREG< WordREG<REAL> > > trQQ = realTrace(QQ_site);
115 WordREG<REAL> c0 = jit_constant((REAL)1/(REAL)3) * trQQQ.elem().elem();
116 WordREG<REAL> c1 = jit_constant((REAL)1/(REAL)2) * trQQ.elem().elem();
118 llvm::BasicBlock * block_c1_lt = llvm_new_basic_block();
119 llvm::BasicBlock * block_c1_nlt = llvm_new_basic_block();
120 llvm_cond_branch( llvm_lt( c1.get_val() , llvm_create_value( 4.0e-3 ) ) , block_c1_lt , block_c1_nlt );
122 llvm_set_insert_point( block_c1_lt );
124 f0_j.elem().elem().real() = jit_constant(1.0) - c0 * c0 / jit_constant(720.0);
125 f0_j.elem().elem().imag() = -( c0 / jit_constant(6.0) )*( jit_constant(1.0) -(c1/jit_constant(20.0))*(jit_constant(1.0)-(c1/jit_constant(42.0)))) ;
127 f1_j.elem().elem().real() = c0/jit_constant(24.0)*(jit_constant(1.0)-c1/jit_constant(15.0)*(jit_constant(1.0)-jit_constant(3.0)*c1/jit_constant(112.0))) ;
128 f1_j.elem().elem().imag() = jit_constant(1.0)-c1/jit_constant(6.0)*(jit_constant(1.0)-c1/jit_constant(20.0)*(jit_constant(1.0)-c1/jit_constant(42.0)))-c0*c0/jit_constant(5040.0);
130 f2_j.elem().elem().real() = jit_constant(0.5)*(jit_constant(-1.0)+c1/jit_constant(12.0)*(jit_constant(1.0)-c1/jit_constant(30.0)*(jit_constant(1.0)-c1/jit_constant(56.0)))+c0*c0/jit_constant(20160.0));
131 f2_j.elem().elem().imag() = jit_constant(0.5)*(c0/jit_constant(60.0)*(jit_constant(1.0)-c1/jit_constant(21.0)*(jit_constant(1.0)-c1/jit_constant(48.0))));
134 llvm::BasicBlock * block_dobs_0 = llvm_new_basic_block();
135 llvm_cond_branch( r_dobs , block_dobs_0 , block_exit );
138 llvm_set_insert_point( block_dobs_0 );
140 b20_j.elem().elem().real() = -c0/jit_constant(360.0);
141 b20_j.elem().elem().imag() = -jit_constant(1.0/6.0)*(jit_constant(1.0)-(c1/jit_constant(20.0))*(jit_constant(1.0)-c1/jit_constant(42.0)));
145 b10_j.elem().elem().real() = jit_constant(0);
146 b10_j.elem().elem().imag() = (c0/jit_constant(120.0))*(jit_constant(1.0)-c1/jit_constant(21.0));
150 b21_j.elem().elem().real() = jit_constant(1.0/24.0)*(jit_constant(1.0)-c1/jit_constant(15.0)*(jit_constant(1.0)-jit_constant(3.0)*c1/jit_constant(112.0)));
151 b21_j.elem().elem().imag() = -c0/jit_constant(2520.0);
155 b11_j.elem().elem().real() = -c0/jit_constant(360.0)*(jit_constant(1.0) - jit_constant(3.0)*c1/jit_constant(56.0) );
156 b11_j.elem().elem().imag() = -jit_constant(1.0/6.0)*(jit_constant(1.0)-c1/jit_constant(10.0)*(jit_constant(1.0)-c1/jit_constant(28.0)));
159 b22_j.elem().elem().real() = jit_constant(0.5)*c0/jit_constant(10080.0);
160 b22_j.elem().elem().imag() = jit_constant(0.5)*( jit_constant(1.0/60.0)*(jit_constant(1.0)-c1/jit_constant(21.0)*(jit_constant(1.0)-c1/jit_constant(48.0))) );
163 b12_j.elem().elem().real() = jit_constant(0.5)*( jit_constant(1.0/12.0)*(jit_constant(1.0)-(jit_constant(2.0)*c1/jit_constant(30.0))*(jit_constant(1.0)-jit_constant(3.0)*c1/jit_constant(112.0))) );
164 b12_j.elem().elem().imag() = jit_constant(0.5)*( -c0/jit_constant(1260.0)*(jit_constant(1.0)-c1/jit_constant(24.0)) );
166 llvm_branch( block_exit );
172 llvm_set_insert_point( block_c1_nlt );
175 llvm::Value* c0_negativeP = llvm_lt( c0.get_val() , llvm_create_value(0.0) );
176 WordREG<REAL> c0abs = fabs(c0);
177 WordREG<REAL> c0max = jit_constant(2.0) * pow( c1 / jit_constant(3.0) , jit_constant(1.5) );
179 WordREG<REAL>
eps = (c0max - c0abs)/c0max;
185 llvm::Value* theta_phi0;
186 llvm::Value* theta_phi1;
187 llvm::Value* theta_phi2;
189 llvm::BasicBlock * block_eps_0 = llvm_new_basic_block();
190 llvm::BasicBlock * block_eps_1 = llvm_new_basic_block();
191 llvm::BasicBlock * block_eps_2 = llvm_new_basic_block();
192 llvm::BasicBlock * block_eps_3 = llvm_new_basic_block();
193 llvm::BasicBlock * block_eps_exit = llvm_new_basic_block();
195 llvm_cond_branch( llvm_lt(
eps.get_val() , llvm_create_value(0.0) ) , block_eps_0 , block_eps_1 );
197 llvm_set_insert_point( block_eps_0 );
198 theta_phi0 = llvm_create_value(0.0);
199 llvm_branch(block_eps_exit);
201 llvm_set_insert_point( block_eps_1 );
202 llvm_cond_branch( llvm_lt(
eps.get_val() , llvm_create_value(1.0e-3) ) , block_eps_2 , block_eps_3 );
204 llvm_set_insert_point( block_eps_2 );
205 WordREG<REAL> sqtwo = sqrt( jit_constant(2.0) );
206 WordREG<REAL> theta_tmp;
210 ( jit_constant(1.0) +
211 ( jit_constant(1/(REAL)12) +
212 ( jit_constant(3/(REAL)160) +
213 ( jit_constant(5/(REAL)896) +
214 ( jit_constant(35/(REAL)18432) +
215 jit_constant(63/(REAL)90112) *
eps ) *
220 theta_phi1 = theta_tmp.get_val();
221 llvm_branch(block_eps_exit);
223 llvm_set_insert_point( block_eps_3 );
224 WordREG<REAL> theta_tmp2;
225 theta_tmp2 = acos( c0abs/c0max );
226 theta_phi2 = theta_tmp2.get_val();
227 llvm_branch(block_eps_exit);
229 llvm_set_insert_point( block_eps_exit );
231 llvm::PHINode* theta_phi = llvm_phi( llvm_type<REAL>::value , 3 );
232 theta_phi->addIncoming( theta_phi0 , block_eps_0 );
233 theta_phi->addIncoming( theta_phi1 , block_eps_2 );
234 theta_phi->addIncoming( theta_phi2 , block_eps_3 );
236 theta.setup( theta_phi );
240 multi1d<WordREG<REAL> > f_site_re(3);
241 multi1d<WordREG<REAL> > f_site_im(3);
243 multi1d<WordREG<REAL> > b1_site_re_phi0(3);
244 multi1d<WordREG<REAL> > b1_site_im_phi0(3);
246 multi1d<WordREG<REAL> > b2_site_re_phi0(3);
247 multi1d<WordREG<REAL> > b2_site_im_phi0(3);
251 WordREG<REAL>
u = sqrt(c1/jit_constant(3.0))*cos(theta/jit_constant(3.0));
252 WordREG<REAL> w = sqrt(c1)*sin(theta/jit_constant(3.0));
254 WordREG<REAL> u_sq =
u*
u;
255 WordREG<REAL> w_sq = w*w;
257 WordREG<REAL> xi0,xi1;
259 llvm::Value *w_smallP = llvm_lt( (fabs( w )).get_val() , llvm_create_value( 0.05 ) );
260 llvm::BasicBlock * block_xi0_exit = llvm_new_basic_block();
262 llvm::BasicBlock * block_xi0_small = llvm_new_basic_block();
263 llvm::BasicBlock * block_xi0_not_small = llvm_new_basic_block();
264 llvm::Value* xi0_phi0;
265 llvm::Value* xi0_phi1;
267 llvm_cond_branch( w_smallP , block_xi0_small , block_xi0_not_small );
269 llvm_set_insert_point( block_xi0_small );
270 WordREG<REAL> xi0_tmp0 =
272 (jit_constant(1.0/6.0)*w_sq*( jit_constant(1.0) -
273 (jit_constant(1.0/20.)*w_sq*( jit_constant(1.0) -
274 (jit_constant(1.0/42.0)*w_sq ) ))));
275 xi0_phi0 = xi0_tmp0.get_val();
277 llvm_branch( block_xi0_exit );
279 llvm_set_insert_point( block_xi0_not_small );
280 WordREG<REAL> xi0_tmp1 = sin(w)/w;
281 xi0_phi1 = xi0_tmp1.get_val();
282 llvm_branch( block_xi0_exit );
284 llvm_set_insert_point( block_xi0_exit );
286 llvm::PHINode* xi0_phi = llvm_phi( llvm_type<REAL>::value , 2 );
287 xi0_phi->addIncoming( xi0_phi0 , block_xi0_small );
288 xi0_phi->addIncoming( xi0_phi1 , block_xi0_not_small );
289 xi0.setup( xi0_phi );
292 llvm::BasicBlock * block_dobs1 = llvm_new_basic_block();
293 llvm::BasicBlock * block_dobs1_exit = llvm_new_basic_block();
294 llvm_cond_branch( r_dobs , block_dobs1 , block_dobs1_exit );
295 llvm_set_insert_point( block_dobs1 );
297 llvm::BasicBlock * block_xi1_small = llvm_new_basic_block();
298 llvm::BasicBlock * block_xi1_not_small = llvm_new_basic_block();
299 llvm::Value* xi1_phi0;
300 llvm::Value* xi1_phi1;
303 llvm_cond_branch( w_smallP , block_xi1_small , block_xi1_not_small );
305 llvm_set_insert_point( block_xi1_small );
306 WordREG<REAL> xi1_tmp0 =
308 ( jit_constant((REAL)1/(REAL)3) -
309 jit_constant((REAL)1/(REAL)30)*w_sq*( jit_constant((REAL)1) -
310 jit_constant((REAL)1/(REAL)28)*w_sq*( jit_constant((REAL)1) -
311 jit_constant((REAL)1/(REAL)54)*w_sq ) ) );
312 xi1_phi0 = xi1_tmp0.get_val();
314 llvm_branch( block_dobs1_exit );
316 llvm_set_insert_point( block_xi1_not_small );
317 WordREG<REAL> xi1_tmp1 = cos(w)/w_sq - sin(w)/(w_sq*w);
318 xi1_phi1 = xi1_tmp1.get_val();
320 llvm_branch( block_dobs1_exit );
326 llvm_set_insert_point( block_dobs1_exit );
328 llvm::PHINode* xi1_phi = llvm_phi( llvm_type<REAL>::value , 3 );
329 xi1_phi->addIncoming( xi1_phi0 , block_xi1_small );
330 xi1_phi->addIncoming( xi1_phi1 , block_xi1_not_small );
331 xi1_phi->addIncoming( llvm_cast( llvm_type<REAL>::value , llvm_create_value(0.0) ) , block_xi0_exit );
332 xi1.setup( xi1_phi );
334 WordREG<REAL> cosu = cos(
u);
335 WordREG<REAL> sinu = sin(
u);
336 WordREG<REAL> cosw = cos(w);
337 WordREG<REAL> sinw = sin(w);
338 WordREG<REAL> sin2u = sin(jit_constant(2.0)*
u);
339 WordREG<REAL> cos2u = cos(jit_constant(2.0)*
u);
340 WordREG<REAL> ucosu =
u*cosu;
341 WordREG<REAL> usinu =
u*sinu;
342 WordREG<REAL> ucos2u =
u*cos2u;
343 WordREG<REAL> usin2u =
u*sin2u;
345 WordREG<REAL> denum = jit_constant(9.0) * u_sq - w_sq;
349 WordREG<REAL> subexp1 = u_sq - w_sq;
350 WordREG<REAL> subexp2 = jit_constant(8.0)*u_sq*cosw;
351 WordREG<REAL> subexp3 = (jit_constant(3.0)*u_sq + w_sq)*xi0;
353 f_site_re[0] = ( (subexp1)*cos2u + cosu*subexp2 + jit_constant(2.0)*usinu*subexp3 ) / denum ;
354 f_site_im[0] = ( (subexp1)*sin2u - sinu*subexp2 + jit_constant(2.0)*ucosu*subexp3 ) / denum ;
358 WordREG<REAL> subexp = (jit_constant(3.0)*u_sq -w_sq)*xi0;
360 f_site_re[1] = (jit_constant(2.0)*(ucos2u - ucosu*cosw)+subexp*sinu)/denum;
361 f_site_im[1] = (jit_constant(2.0)*(usin2u + usinu*cosw)+subexp*cosu)/denum;
366 WordREG<REAL> subexp=jit_constant(3.0)*xi0;
368 f_site_re[2] = (cos2u - cosu*cosw -usinu*subexp) /denum ;
369 f_site_im[2] = (sin2u + sinu*cosw -ucosu*subexp) /denum ;
372 llvm::BasicBlock * block_dobs2 = llvm_new_basic_block();
373 llvm::BasicBlock * block_dobs2_exit = llvm_new_basic_block();
374 llvm_cond_branch( r_dobs , block_dobs2 , block_dobs2_exit );
375 llvm_set_insert_point( block_dobs2 );
378 multi1d<WordREG<REAL> > r_1_re(3);
379 multi1d<WordREG<REAL> > r_1_im(3);
380 multi1d<WordREG<REAL> > r_2_re(3);
381 multi1d<WordREG<REAL> > r_2_im(3);
387 WordREG<REAL> subexp1 = u_sq - w_sq;
388 WordREG<REAL> subexp2 = jit_constant(8.0)*cosw + (jit_constant(3.0)*u_sq + w_sq)*xi0 ;
389 WordREG<REAL> subexp3 = jit_constant(4.0)*u_sq*cosw - (jit_constant(9.0)*u_sq + w_sq)*xi0 ;
391 r_1_re[0] = jit_constant(2.0)*(ucos2u - sin2u *(subexp1)+ucosu*( subexp2 )- sinu*( subexp3 ) );
392 r_1_im[0] = jit_constant(2.0)*(usin2u + cos2u *(subexp1)-usinu*( subexp2 )- cosu*( subexp3 ) );
397 WordREG<REAL> subexp1 = cosw + jit_constant(3.0) * xi0;
398 WordREG<REAL> subexp2 = jit_constant(2.0)*cosw + xi0*(w_sq - jit_constant(3.0)*u_sq);
400 r_1_re[1] = jit_constant(2.0)*((cos2u - jit_constant(2.0)*usin2u) + usinu*( subexp1 )) - cosu*( subexp2 );
401 r_1_im[1] = jit_constant(2.0)*((sin2u + jit_constant(2.0)*ucos2u) + ucosu*( subexp1 )) + sinu*( subexp2 );
407 WordREG<REAL> subexp = cosw - jit_constant(3.0)*xi0;
408 r_1_re[2] = -jit_constant(2.0)*sin2u -jit_constant(3.0)*ucosu*xi0 + sinu*( subexp );
409 r_1_im[2] = jit_constant(2.0)*cos2u +jit_constant(3.0)*usinu*xi0 + cosu*( subexp );
416 WordREG<REAL> subexp = cosw + xi0 + jit_constant(3.0)*u_sq*xi1;
417 r_2_re[0] = -jit_constant(2.0)*(cos2u +
u*( jit_constant(4.0)*ucosu*xi0 - sinu*(subexp )) );
418 r_2_im[0] = -jit_constant(2.0)*(sin2u -
u*( jit_constant(4.0)*usinu*xi0 + cosu*(subexp )) );
425 WordREG<REAL> subexp = cosw + xi0 - jit_constant(3.0)*u_sq*xi1;
426 r_2_re[1] = jit_constant(2.0)*ucosu*xi0 - sinu*( subexp ) ;
427 r_2_im[1] = jit_constant(-2.0)*usinu*xi0 - cosu*( subexp ) ;
432 WordREG<REAL> subexp = jit_constant(3.0)*xi1;
434 r_2_re[2] = cosu*xi0 - usinu*subexp ;
435 r_2_im[2] = -( sinu*xi0 + ucosu*subexp ) ;
438 WordREG<REAL> b_denum=jit_constant(2.0)*denum*denum;
441 for(
int j=0;
j < 3;
j++) {
444 WordREG<REAL> subexp1 = jit_constant(2.0)*
u;
445 WordREG<REAL> subexp2 = jit_constant(3.0)*u_sq - w_sq;
446 WordREG<REAL> subexp3 = jit_constant(2.0)*(jit_constant(15.0)*u_sq + w_sq);
448 b1_site_re_phi0[
j]=( subexp1*r_1_re[
j] + subexp2*r_2_re[
j] - subexp3*f_site_re[
j] )/b_denum;
449 b1_site_im_phi0[
j]=( subexp1*r_1_im[
j] + subexp2*r_2_im[
j] - subexp3*f_site_im[
j] )/b_denum;
453 WordREG<REAL> subexp1 = jit_constant(3.0)*
u;
454 WordREG<REAL> subexp2 = jit_constant(24.0)*
u;
456 b2_site_re_phi0[
j]=( r_1_re[
j]- subexp1*r_2_re[
j] - subexp2 * f_site_re[
j] )/b_denum;
457 b2_site_im_phi0[
j]=( r_1_im[
j] -subexp1*r_2_im[
j] - subexp2 * f_site_im[
j] )/b_denum;
464 llvm::BasicBlock * block_c0neg = llvm_new_basic_block();
465 llvm::BasicBlock * block_c0neg_exit = llvm_new_basic_block();
466 llvm_cond_branch( c0_negativeP , block_c0neg , block_c0neg_exit );
467 llvm_set_insert_point( block_c0neg );
469 multi1d<WordREG<REAL> > b1_site_re_phi1(3);
470 multi1d<WordREG<REAL> > b1_site_im_phi1(3);
472 multi1d<WordREG<REAL> > b2_site_re_phi1(3);
473 multi1d<WordREG<REAL> > b2_site_im_phi1(3);
478 b1_site_im_phi1[0] = b1_site_im_phi0[0] * jit_constant(-1.0);
481 b1_site_re_phi1[1] = b1_site_re_phi0[1] * jit_constant(-1.0);
484 b1_site_im_phi1[2] = b1_site_im_phi0[2] * jit_constant(-1.0);
487 b2_site_re_phi1[0] = b2_site_re_phi0[0] * jit_constant(-1.0);
490 b2_site_im_phi1[1] = b2_site_im_phi0[1] * jit_constant(-1.0);
493 b2_site_re_phi1[2] = b2_site_re_phi0[2] * jit_constant(-1.0);
495 llvm_branch( block_c0neg_exit );
497 llvm_set_insert_point( block_c0neg_exit );
502 multi1d<WordREG<REAL> > f_site_re(3);
503 multi1d<WordREG<REAL> > f_site_im(3);
505 multi1d<WordREG<REAL> > b1_site_re(3);
506 multi1d<WordREG<REAL> > b1_site_im(3);
508 multi1d<WordREG<REAL> > b2_site_re(3);
509 multi1d<WordREG<REAL> > b2_site_im(3);
511 b1_site_im[1] = b1_site_im_phi0[1];
512 b1_site_re[0] = b1_site_re_phi0[0];
513 b1_site_re[2] = b1_site_re_phi0[2];
515 b2_site_re[1] = b2_site_re_phi0[1];
516 b2_site_im[0] = b2_site_im_phi0[0];
517 b2_site_im[2] = b2_site_im_phi0[2];
520 qdpPHI( b1_site_im[0] , b1_site_im_phi1[0] , block_c0neg ,
521 b1_site_im_phi0[0] , block_dobs2 );
522 qdpPHI( b1_site_re[1] , b1_site_re_phi1[1] , block_c0neg ,
523 b1_site_re_phi0[1] , block_dobs2 );
524 qdpPHI( b1_site_im[2] , b1_site_im_phi1[2] , block_c0neg ,
525 b1_site_im_phi0[2] , block_dobs2 );
526 qdpPHI( b2_site_re[0] , b2_site_re_phi1[0] , block_c0neg ,
527 b2_site_re_phi0[0] , block_dobs2 );
528 qdpPHI( b2_site_im[1] , b2_site_im_phi1[1] , block_c0neg ,
529 b2_site_im_phi0[1] , block_dobs2 );
530 qdpPHI( b2_site_re[2] , b2_site_re_phi1[2] , block_c0neg ,
531 b2_site_re_phi0[2] , block_dobs2 );
537 b10_j.elem().elem().real() = b1_site_re[0];
538 b10_j.elem().elem().imag() = b1_site_im[0];
540 b20_j.elem().elem().real() = b2_site_re[0];
541 b20_j.elem().elem().imag() = b2_site_im[0];
543 b11_j.elem().elem().real() = b1_site_re[1];
544 b11_j.elem().elem().imag() = b1_site_im[1];
546 b21_j.elem().elem().real() = b2_site_re[1];
547 b21_j.elem().elem().imag() = b2_site_im[1];
549 b12_j.elem().elem().real() = b1_site_re[2];
550 b12_j.elem().elem().imag() = b1_site_im[2];
552 b22_j.elem().elem().real() = b2_site_re[2];
553 b22_j.elem().elem().imag() = b2_site_im[2];
555 llvm_branch( block_dobs2_exit );
560 llvm_set_insert_point( block_dobs2_exit );
568 f0_j.elem().elem().real() = f_site_re[0];
569 f0_j.elem().elem().imag() = f_site_im[0];
571 f1_j.elem().elem().real() = f_site_re[1];
572 f1_j.elem().elem().imag() = f_site_im[1];
574 f2_j.elem().elem().real() = f_site_re[2];
575 f2_j.elem().elem().imag() = f_site_im[2];
579 llvm::BasicBlock * block_c0neg1 = llvm_new_basic_block();
580 llvm::BasicBlock * block_c0neg1_exit = llvm_new_basic_block();
581 llvm_cond_branch( c0_negativeP , block_c0neg1 , block_c0neg1_exit );
582 llvm_set_insert_point( block_c0neg1 );
587 f0_j.elem().elem().imag() *= jit_constant(-1.0);
591 f1_j.elem().elem().real() *= jit_constant(-1.0);
595 f2_j.elem().elem().imag() *= jit_constant(-1.0);
597 llvm_branch( block_c0neg1_exit );
599 llvm_set_insert_point( block_c0neg1_exit );
605 llvm_branch( block_exit );
606 llvm_set_insert_point( block_exit );
611 func.func().push_back( jit_function_epilogue_get(
"jit_get_fs_bs.ptx") );
static multi1d< LatticeColorMatrix > u
multi1d< LatticeColorMatrix > Q