CHROMA
ord_ib_zvupdates_kernel_generic.h
Go to the documentation of this file.
1 // 32 BIT Version: Use std::vector length of 4 for easy std::vectorization.
2 // This is guaranteed good for LatticeDiracFermions
3 
4 inline
5 void ord_ib_zvupdates_kernel_real32(int lo, int hi, int my_id, ib_zvupdates_arg<REAL32>* a)
6 {
7 
8  int atom = a->atom;
9  int low = atom*lo;
10  int len = atom*(hi-lo);
11 
12  REAL32* r = &(a->r_ptr[low]);
13  REAL32* z = &(a->z_ptr[low]);
14  REAL32* v = &(a->v_ptr[low]);
15  REAL32* u = &(a->u_ptr[low]);
16  REAL32* q = &(a->q_ptr[low]);
17 
18  REAL32 a_re = a->alpha_re;
19  REAL32 a_im = a->alpha_im;
20 
21  REAL32 arb_re = a->alpha_rat_beta_re;
22  REAL32 arb_im = a->alpha_rat_beta_im;
23 
24  REAL32 ad_re = a->alpha_delta_re;
25  REAL32 ad_im = a->alpha_delta_im;
26 
27  REAL32 b_re = a->beta_re;
28  REAL32 b_im = a->beta_im;
29 
30  REAL32 d_re = a->delta_re;
31  REAL32 d_im = a->delta_im;
32 
33  REAL32 ztmp[4];
34  REAL32 vtmp[4];
35 
36  if( len % 4 == 0 ) {
37  for(int count = 0; count < len; count+=4) {
38 
39  /* z = (alpha_n/alpha_n-1)*beta z */
40  ztmp[0] = z[count];
41  ztmp[1] = z[count+1];
42  ztmp[2] = z[count+2];
43  ztmp[3] = z[count+3];
44 
45  vtmp[0] = v[count];
46  vtmp[1] = v[count+1];
47  vtmp[2] = v[count+2];
48  vtmp[3] = v[count+3];
49 
50  z[count] = arb_re * ztmp[0] - arb_im * ztmp[1];
51  z[count+1] = arb_re * ztmp[1] + arb_im * ztmp[0];
52  z[count+2] = arb_re * ztmp[2] - arb_im * ztmp[3];
53  z[count+3] = arb_re * ztmp[3] + arb_im * ztmp[2];
54 
55 
56  /* z += alpha*r */
57  z[count ] += a_re * r[count];
58  z[count+1] += a_re * r[count+1];
59  z[count+2] += a_re * r[count+2];
60  z[count+3] += a_re * r[count+3];
61 
62  z[count ] -= a_im * r[count+1];
63  z[count+1] += a_im * r[count];
64  z[count+2] -= a_im * r[count+3];
65  z[count+3] += a_im * r[count+2];
66 
67 
68  /* z -= alpha*delta*v */
69  z[count ] -= ad_re * v[count] ;
70  z[count+1] -= ad_re * v[count+1];
71  z[count+2] -= ad_re * v[count+2] ;
72  z[count+3] -= ad_re * v[count+3];
73 
74  z[count ] += ad_im * v[count+1];
75  z[count+1] -= ad_im * v[count];
76  z[count+2] += ad_im * v[count+3];
77  z[count+3] -= ad_im * v[count+2];
78 
79 
80  v[count] = u[count] + b_re*vtmp[0] - b_im*vtmp[1];
81  v[count+1] = u[count+1] + b_re*vtmp[1] + b_im*vtmp[0];
82  v[count+2] = u[count+2] + b_re*vtmp[2] - b_im*vtmp[3];
83  v[count+3] = u[count+3] + b_re*vtmp[3] + b_im*vtmp[2];
84 
85  v[count] -= d_re*q[count];
86  v[count+1] -= d_re*q[count+1];
87  v[count+2] -= d_re*q[count+2];
88  v[count+3] -= d_re*q[count+3];
89 
90  v[count] += d_im*q[count+1];
91  v[count+1] -= d_im*q[count];
92  v[count+2] += d_im*q[count+3];
93  v[count+3] -= d_im*q[count+2];
94 
95  }
96  }
97  else {
98  QDPIO::cout << "ord_ib_zvupdates_kernel_generic.h: len not divisible by 4" << std::endl;
99  QDP_abort(1);
100  }
101 }
102 
103 // 64 BIT Version: Use std::vector length of 2 for easy std::vectorization.
104 // This is guaranteed good for LatticeDiracFermions
105 
106 
107 inline
108 void ord_ib_zvupdates_kernel_real64(int lo, int hi, int my_id, ib_zvupdates_arg<REAL64>* a)
109 {
110 
111  int atom = a->atom;
112  int low = atom*lo;
113  int len = atom*(hi-lo);
114 
115  REAL64* r = &(a->r_ptr[low]);
116  REAL64* z = &(a->z_ptr[low]);
117  REAL64* v = &(a->v_ptr[low]);
118  REAL64* u = &(a->u_ptr[low]);
119  REAL64* q = &(a->q_ptr[low]);
120 
121  REAL64 a_re = a->alpha_re;
122  REAL64 a_im = a->alpha_im;
123 
124  REAL64 arb_re = a->alpha_rat_beta_re;
125  REAL64 arb_im = a->alpha_rat_beta_im;
126 
127  REAL64 ad_re = a->alpha_delta_re;
128  REAL64 ad_im = a->alpha_delta_im;
129 
130  REAL64 b_re = a->beta_re;
131  REAL64 b_im = a->beta_im;
132 
133  REAL64 d_re = a->delta_re;
134  REAL64 d_im = a->delta_im;
135 
136  REAL64 ztmp[2];
137  REAL64 vtmp[2];
138 
139  if( len % 2 == 0) {
140  for(int count = 0; count < len; count+=2) {
141 
142  /* z = (alpha_n/alpha_n-1)*beta z */
143  ztmp[0] = z[count];
144  ztmp[1] = z[count+1];
145  vtmp[0] = v[count];
146  vtmp[1] = v[count+1];
147 
148  z[count] = arb_re * ztmp[0] - arb_im * ztmp[1];
149  z[count+1] = arb_re * ztmp[1] + arb_im * ztmp[0];
150 
151  /* z += alpha*r */
152  z[count ] += a_re * r[count];
153  z[count+1] += a_re * r[count+1];
154 
155  z[count ] -= a_im * r[count+1];
156  z[count+1] += a_im * r[count];
157 
158  /* z -= alpha*delta*v */
159  z[count ] -= ad_re * v[count] ;
160  z[count+1] -= ad_re * v[count+1];
161 
162  z[count ] += ad_im * v[count+1];
163  z[count+1] -= ad_im * v[count];
164 
165  v[count] = u[count] + b_re*vtmp[0] - b_im*vtmp[1];
166  v[count+1] = u[count+1] + b_re*vtmp[1] + b_im*vtmp[0];
167 
168  v[count] -= d_re*q[count];
169  v[count+1] -= d_re*q[count+1];
170 
171  v[count] += d_im*q[count+1];
172  v[count+1] -= d_im*q[count];
173 
174 
175  }
176  }
177  else {
178  QDPIO::cout << "ord_ib_zvupdates_kernel_generic.h: len not divisible by 2"<<std::endl;
179  QDP_abort(1);
180  }
181 }
182 
int z
Definition: meslate.cc:36
Double q
Definition: mesq.cc:17
static multi1d< LatticeColorMatrix > u
Complex a
Definition: invbicg.cc:95
int count
Definition: octave.h:14
void ord_ib_zvupdates_kernel_real32(int lo, int hi, int my_id, ib_zvupdates_arg< REAL32 > *a)
void ord_ib_zvupdates_kernel_real64(int lo, int hi, int my_id, ib_zvupdates_arg< REAL64 > *a)
multi1d< LatticeFermion > r(Ncb)