CHROMA
ord_ib_rxupdate_kernel_generic.h
Go to the documentation of this file.
1 // 32 BIT Version. Use std::vector length of 4 (guaranteed OK for LatticeDirac Fermion)
2 // for easy std::vectorization (with suitably good compiler).
3 
4 inline
5 void ord_ib_rxupdate_kernel_real32(int lo, int hi, int my_id, ib_rxupdate_arg<REAL32>* a)
6 {
7 
8  int atom=a->atom;
9  int low=atom*lo;
10  int len = atom*(hi - lo);
11 
12  REAL32* s = &(a->s_ptr[low]);
13  REAL32* t = &(a->t_ptr[low]);
14  REAL32* z = &(a->z_ptr[low]);
15  REAL32* r = &(a->r_ptr[low]);
16  REAL32* x = &(a->x_ptr[low]);
17 
18  REAL32 om_re = a->omega_re;
19  REAL32 om_im = a->omega_im;
20 
21  if( len % 4 == 0 ) {
22  for(int count = 0; count < len; count+=4) {
23 
24 
25  r[count] = s[count] - om_re*t[count] + om_im*t[count+1];
26  r[count+1] = s[count+1] - om_re*t[count+1] - om_im*t[count];
27  r[count+2] = s[count+2] - om_re*t[count+2] + om_im*t[count+3];
28  r[count+3] = s[count+3] - om_re*t[count+3] - om_im*t[count+2];
29 
30 
31  x[count] += om_re*s[count] - om_im*s[count+1] + z[count];
32  x[count+1] += om_re*s[count+1] + om_im*s[count] + z[count+1];
33  x[count+2] += om_re*s[count+2] - om_im*s[count+3] + z[count+2];
34  x[count+3] += om_re*s[count+3] + om_im*s[count+2] + z[count+3];
35  }
36  }
37  else {
38  QDPIO::cout << "ord_ib_rxupdate_kernel_generic.h: len not divisible by 4" << std::endl;
39  QDP_abort(1);
40  }
41 }
42 
43 // 64 BIT Version. Use std::vector length of 2 (guaranteed OK for Complex Numbers)
44 // for easy std::vectorization (with suitably good compiler). Can do function
45 // overloading so no need to change kernel name
46 
47 inline
48 void ord_ib_rxupdate_kernel_real64(int lo, int hi, int my_id, ib_rxupdate_arg<REAL64>* a)
49 {
50 
51  int atom=a->atom;
52  int low = atom*lo;
53  int len =atom*(hi - lo);
54 
55  REAL64* s = &(a->s_ptr[low]);
56  REAL64* t = &(a->t_ptr[low]);
57  REAL64* z = &(a->z_ptr[low]);
58  REAL64* r = &(a->r_ptr[low]);
59  REAL64* x = &(a->x_ptr[low]);
60 
61  REAL64 om_re = a->omega_re;
62  REAL64 om_im = a->omega_im;
63 
64  if( len % 2 == 0) {
65  for(int count = 0; count < len; count+=2) {
66  r[count] = s[count] - om_re*t[count] + om_im*t[count+1];
67  r[count+1] = s[count+1] - om_re*t[count+1] - om_im*t[count];
68 
69  x[count] += om_re*s[count] - om_im*s[count+1] + z[count];
70  x[count+1] += om_re*s[count+1] + om_im*s[count] + z[count+1];
71 
72  }
73  }
74  else {
75  QDPIO::cout << "ord_ib_rxupdate_kernel_generic.h: len not divisible by 2"<<std::endl;
76  QDP_abort(1);
77  }
78 }
79 
unsigned s
Definition: ldumul_w.cc:37
int z
Definition: meslate.cc:36
int x
Definition: meslate.cc:34
int t
Definition: meslate.cc:37
Complex a
Definition: invbicg.cc:95
int count
Definition: octave.h:14
void ord_ib_rxupdate_kernel_real64(int lo, int hi, int my_id, ib_rxupdate_arg< REAL64 > *a)
void ord_ib_rxupdate_kernel_real32(int lo, int hi, int my_id, ib_rxupdate_arg< REAL32 > *a)
multi1d< LatticeFermion > r(Ncb)