CHROMA
ord_ib_stupdates_kernel_generic.h
Go to the documentation of this file.
1 #include <cstdio>
2 
3 
4 inline
5 void ord_ib_stupdates_kernel_real32(int lo, int hi, int my_id, ib_stupdate_arg<REAL32>* a)
6 {
7  int atom=a->atom;
8  int low = lo*atom;
9  int len = atom*(hi - lo);
10 
11  REAL32 a_r = a->a_r;
12  REAL32 a_i = a->a_i;
13 
14  REAL32* r = &(a->r[low]);
15  REAL32* u = &(a->u[low]);
16  REAL32* v = &(a->v[low]);
17  REAL32* q = &(a->q[low]);
18  REAL32* r0 = &(a->r0[low]);
19  REAL32* f0 = &(a->f0[low]);
20  REAL32* s = &(a->s[low]);
21  REAL32* t = &(a->t[low]);
22  REAL64* norm_array = &(a->norm_space[12*my_id]);
23 
24  // Caller zeroed norm_space
25  if( len % 4 == 0) {
26  for(int count = 0; count < len; count+=4) {
27  // First need s = r - alpha*v
28 
29  // Real Part -(+,-)
30  s[count] = r[count] - a_r*v[count];
31  s[count] += a_i*v[count+1];
32  // Imag Part -(+,+)
33  s[count+1] = r[count+1] - a_r*v[count+1];
34  s[count+1] -= a_i*v[count];
35 
36  // Real Part -(+,-)
37  s[count+2] = r[count+2] - a_r*v[count+2];
38  s[count+2] += a_i*v[count+3];
39  // Imag Part -(+,+)
40  s[count+3] = r[count+3] - a_r*v[count+3];
41  s[count+3] -= a_i*v[count+2];
42 
43  // Now inner products with s.
44  // ** phi=(r0,s)
45  norm_array[0] += r0[count]*s[count];
46  norm_array[0] += r0[count+1]*s[count+1];
47 
48  norm_array[0] += r0[count+2]*s[count+2];
49  norm_array[0] += r0[count+3]*s[count+3];
50 
51  norm_array[1] += r0[count]*s[count+1];
52  norm_array[1] -= r0[count+1]*s[count];
53 
54  norm_array[1] += r0[count+2]*s[count+3];
55  norm_array[1] -= r0[count+3]*s[count+2];
56 
57  // ** gamma =(f0,s)
58  norm_array[2] += f0[count]*s[count];
59  norm_array[2] += f0[count+1]*s[count+1];
60 
61  norm_array[2] += f0[count+2]*s[count+2];
62  norm_array[2] += f0[count+3]*s[count+3];
63 
64  norm_array[3] += f0[count]*s[count+1];
65  norm_array[3] -= f0[count+1]*s[count];
66 
67 
68  norm_array[3] += f0[count+2]*s[count+3];
69  norm_array[3] -= f0[count+3]*s[count+2];
70 
71 
72  // T update: t = u - alpha q
73  // Real Part -(+,-)
74  t[count] = u[count] - a_r*q[count];
75  t[count] += a_i*q[count+1];
76  // Imag Part -(+,+)
77  t[count+1] = u[count+1] - a_r*q[count+1];
78  t[count+1] -= a_i*q[count];
79 
80  // Real Part -(+,-)
81  t[count+2] = u[count+2] - a_r*q[count+2];
82  t[count+2] += a_i*q[count+3];
83  // Imag Part -(+,+)
84  t[count+3] = u[count+3] - a_r*q[count+3];
85  t[count+3] -= a_i*q[count+2];
86 
87 
88  // Now inner products with q.
89  // ** pi=(r0,q)
90  norm_array[4] += r0[count]*q[count];
91  norm_array[4] += r0[count+1]*q[count+1];
92 
93  norm_array[4] += r0[count+2]*q[count+2];
94  norm_array[4] += r0[count+3]*q[count+3];
95 
96 
97  norm_array[5] += r0[count]*q[count+1];
98  norm_array[5] -= r0[count+1]*q[count];
99 
100  norm_array[5] += r0[count+2]*q[count+3];
101  norm_array[5] -= r0[count+3]*q[count+2];
102 
103  // Now inner products with t.
104  // ** eta=(f0,t)
105  norm_array[6] += f0[count]*t[count];
106  norm_array[6] += f0[count+1]*t[count+1];
107 
108  norm_array[6] += f0[count+2]*t[count+2];
109  norm_array[6] += f0[count+3]*t[count+3];
110 
111  norm_array[7] += f0[count]*t[count+1];
112  norm_array[7] -= f0[count+1]*t[count];
113 
114  norm_array[7] += f0[count+2]*t[count+3];
115  norm_array[7] -= f0[count+3]*t[count+2];
116 
117  // ** theta=(t,s)
118  norm_array[8] += t[count]*s[count];
119  norm_array[8] += t[count+1]*s[count+1];
120 
121  norm_array[8] += t[count+2]*s[count+2];
122  norm_array[8] += t[count+3]*s[count+3];
123 
124 
125  norm_array[9] += t[count]*s[count+1];
126  norm_array[9] -= t[count+1]*s[count];
127 
128  norm_array[9] += t[count+2]*s[count+3];
129  norm_array[9] -= t[count+3]*s[count+2];
130 
131  // ** kappa = || t ||^2
132  norm_array[10] += t[count]*t[count];
133  norm_array[10] += t[count+1]*t[count+1];
134 
135  norm_array[10] += t[count+2]*t[count+2];
136  norm_array[10] += t[count+3]*t[count+3];
137 
138  // ** rnorm = || r ||^2
139  norm_array[11] += r[count]*r[count];
140  norm_array[11] += r[count+1]*r[count+1];
141 
142  norm_array[11] += r[count+2]*r[count+2];
143  norm_array[11] += r[count+3]*r[count+3];
144 
145 
146  }
147  }
148  else {
149  QDPIO::cout << "ord_ib_stubdates_kernel_generic.h: len not divisibe by 4" << std::endl;
150  QDP_abort(1);
151  }
152 
153 }
154 
155 inline
156 void ord_ib_stupdates_kernel_real64(int lo, int hi, int my_id, ib_stupdate_arg<REAL64>* a)
157 {
158  int atom = a->atom;
159  int low = atom*lo;
160  int len = atom*(hi-lo);
161 
162  REAL64 a_r = a->a_r;
163  REAL64 a_i = a->a_i;
164 
165  REAL64* r = &(a->r[low]);
166  REAL64* u = &(a->u[low]);
167  REAL64* v = &(a->v[low]);
168  REAL64* q = &(a->q[low]);
169  REAL64* r0 = &(a->r0[low]);
170  REAL64* f0 = &(a->f0[low]);
171  REAL64* s = &(a->s[low]);
172  REAL64* t = &(a->t[low]);
173  REAL64* norm_array = &(a->norm_space[12*my_id]);
174 
175  // Caller zeroed norm_space
176 
177  if (len % 2 == 0 ) {
178  for(int count = 0; count < len; count+=2) {
179  // First need s = r - alpha*v
180 
181  // Real Part -(+,-)
182  s[count] = r[count] - a_r*v[count];
183  s[count] += a_i*v[count+1];
184  // Imag Part -(+,+)
185  s[count+1] = r[count+1] - a_r*v[count+1];
186  s[count+1] -= a_i*v[count];
187 
188 
189  // Now inner products with s.
190  // ** phi=(r0,s)
191  norm_array[0] += r0[count]*s[count];
192  norm_array[0] += r0[count+1]*s[count+1];
193 
194  norm_array[1] += r0[count]*s[count+1];
195  norm_array[1] -= r0[count+1]*s[count];
196 
197  // ** gamma=(f0,s)
198  norm_array[2] += f0[count]*s[count];
199  norm_array[2] += f0[count+1]*s[count+1];
200 
201  norm_array[3] += f0[count]*s[count+1];
202  norm_array[3] -= f0[count+1]*s[count];
203 
204 
205  // T update: t = u - alpha q
206  // Real Part -(+,-)
207  t[count] = u[count] - a_r*q[count];
208  t[count] += a_i*q[count+1];
209  // Imag Part -(+,+)
210  t[count+1] = u[count+1] - a_r*q[count+1];
211  t[count+1] -= a_i*q[count];
212 
213 
214  // Now inner products with q.
215  // ** pi=(r0,q)
216  norm_array[4] += r0[count]*q[count];
217  norm_array[4] += r0[count+1]*q[count+1];
218 
219  norm_array[5] += r0[count]*q[count+1];
220  norm_array[5] -= r0[count+1]*q[count];
221 
222 
223  // Now inner products with t.
224  // ** eta=(f0,t)
225  norm_array[6] += f0[count]*t[count];
226  norm_array[6] += f0[count+1]*t[count+1];
227 
228  norm_array[7] -= f0[count+1]*t[count];
229  norm_array[7] += f0[count]*t[count+1];
230 
231 
232  // ** theta=(t,s)
233  norm_array[8] += t[count]*s[count];
234  norm_array[8] += t[count+1]*s[count+1];
235 
236  norm_array[9] += t[count]*s[count+1];
237  norm_array[9] -= t[count+1]*s[count];
238 
239 
240  // ** kappa = || t ||^2
241  norm_array[10] += t[count]*t[count];
242  norm_array[10] += t[count+1]*t[count+1];
243 
244  // ** rnorm = || r ||^2
245  norm_array[11] += r[count]*r[count];
246  norm_array[11] += r[count+1]*r[count+1];
247 
248  }
249  }
250  else {
251  QDPIO::cout << "ord_bi_stubdates_kernel_generic.h: len not divisible by 2" <<std::endl;
252  QDP_abort(1);
253  }
254 
255 }
unsigned s
Definition: ldumul_w.cc:37
int t
Definition: meslate.cc:37
Double q
Definition: mesq.cc:17
static multi1d< LatticeColorMatrix > u
Complex a
Definition: invbicg.cc:95
int count
Definition: octave.h:14
void ord_ib_stupdates_kernel_real64(int lo, int hi, int my_id, ib_stupdate_arg< REAL64 > *a)
void ord_ib_stupdates_kernel_real32(int lo, int hi, int my_id, ib_stupdate_arg< REAL32 > *a)
multi1d< LatticeFermion > r(Ncb)
int r0
Definition: qtopcor.cc:41