CHROMA
ord_xmay_normx_cdotzx_kernel_generic.h
Go to the documentation of this file.
1 inline
2 void ord_xmay_normx_cdotzx_kernel(int lo, int hi, int my_id, ord_xmay_normx_cdotzx_arg *a)
3 {
4  int atom = a->atom;
5  int low = atom*lo;
6  int len = atom*(hi-lo);
7 
8  REAL32* x_ptr=&(a->x_ptr[low]);
9  REAL32* y_ptr=&(a->y_ptr[low]);
10  REAL32* z_ptr=&(a->z_ptr[low]);
11  REAL32 a_re = a->a_re;
12  REAL32 a_im = a->a_im;
13  REAL64 norm_array[3]={0,0,0};
14 
15  if( len % 4 == 0) {
16  for(int count = 0; count < len; count+=4) {
17 
18  x_ptr[count] -= a_re*y_ptr[count];
19  x_ptr[count] += a_im*y_ptr[count+1];
20 
21  x_ptr[count+1] -= a_im*y_ptr[count];
22  x_ptr[count+1] -= a_re*y_ptr[count+1];
23 
24  x_ptr[count+2] -= a_re*y_ptr[count+2];
25  x_ptr[count+2] += a_im*y_ptr[count+3];
26 
27  x_ptr[count+3] -= a_im*y_ptr[count+2];
28  x_ptr[count+3] -= a_re*y_ptr[count+3];
29 
30  norm_array[0] += x_ptr[count]*x_ptr[count];
31  norm_array[0] += x_ptr[count+1]*x_ptr[count+1];
32  norm_array[0] += x_ptr[count+2]*x_ptr[count+2];
33  norm_array[0] += x_ptr[count+3]*x_ptr[count+3];
34 
35  norm_array[1] += z_ptr[count]*x_ptr[count];
36  norm_array[1] += z_ptr[count+1]*x_ptr[count+1];
37  norm_array[1] += z_ptr[count+2]*x_ptr[count+2];
38  norm_array[1] += z_ptr[count+3]*x_ptr[count+3];
39 
40  norm_array[2] += z_ptr[count]*x_ptr[count+1];
41  norm_array[2] -= z_ptr[count+1]*x_ptr[count];
42  norm_array[2] += z_ptr[count+2]*x_ptr[count+3];
43  norm_array[2] -= z_ptr[count+3]*x_ptr[count+2];
44 
45  }
46  a->norm_space[3*my_id]=norm_array[0];
47  a->norm_space[3*my_id+1]=norm_array[1];
48  a->norm_space[3*my_id+2]=norm_array[2];
49  }
50  else {
51  QDPIO::cout << "ord_xmay_normx_cdotzx_kernel_generic.h: len not divisible by 4" << std::endl;
52  QDP_abort(1);
53  }
54 
55 }
Complex a
Definition: invbicg.cc:95
int count
Definition: octave.h:14
void ord_xmay_normx_cdotzx_kernel(int lo, int hi, int my_id, ord_xmay_normx_cdotzx_arg *a)