CHROMA
ord_cxmayf_kernel_sse.h
Go to the documentation of this file.
1 
2 #include <xmmintrin.h>
3 #include <pmmintrin.h>
4 
5 inline
6 void ord_cxmayf_kernel(int lo, int hi, int my_id, ord_cxmayf_arg* arg)
7 {
8  int atom=arg->atom;
9  int low = atom*lo;
10  int len = atom*(hi - lo);
11 
12  REAL32* x_ptr = &(arg->x_ptr[low]);
13  REAL32* y_ptr = &(arg->y_ptr[low]);
14 
15  REAL32 a_re = arg->a_re;
16  REAL32 a_im = arg->a_im;
17 
18  __m128 av_re = _mm_set_ps(a_re, a_re, a_re, a_re);
19  __m128 av_im = _mm_set_ps(-a_im,a_im,-a_im, a_im);
20 
21  if( len % 4 == 0){
22  for(int count=0; count < len; count+=4) {
23  __m128 xv = _mm_load_ps(&x_ptr[count]);
24  __m128 yv = _mm_load_ps(&y_ptr[count]);
25  __m128 yv2 = _mm_shuffle_ps( yv,yv, 0xb1);
26 
27  __m128 t1 = _mm_mul_ps(av_re, yv);
28  __m128 t2 = _mm_sub_ps(xv, t1);
29  __m128 t3 = _mm_mul_ps(av_im, yv2);
30  xv = _mm_add_ps(t2, t3);
31 
32  _mm_store_ps(&x_ptr[count], xv);
33 
34  }
35  }
36  else {
37  QDPIO::cout << "ord_cxmayf_kernel_sse.h: len not divisible by 4 " << std::endl;
38  QDP_abort(1);
39  }
40 
41 }
int count
Definition: octave.h:14
void ord_cxmayf_kernel(int lo, int hi, int my_id, ord_cxmayf_arg *arg)