Actual source code: dot.h

  1: /* $Id: dot.h,v 1.21 2001/09/07 20:07:42 bsmith Exp $ */

  3: #ifndef DOT
 4:  #include petsc.h

  6: EXTERN_C_BEGIN

  8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
  9: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 10: #define fortranmdot4_      FORTRANMDOT4
 11: #define fortranmdot3_      FORTRANMDOT3
 12: #define fortranmdot2_      FORTRANMDOT2
 13: #define fortranmdot1_      FORTRANMDOT1
 14: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 15: #define fortranmdot4_      fortranmdot4
 16: #define fortranmdot3_      fortranmdot3
 17: #define fortranmdot2_      fortranmdot2
 18: #define fortranmdot1_      fortranmdot1
 19: #endif
 20: EXTERN void fortranmdot4_(void *,void *,void *,void *,void *,int *,
 21:                            void *,void *,void *,void *);
 22: EXTERN void fortranmdot3_(void *,void *,void *,void *,int *,
 23:                            void *,void *,void *);
 24: EXTERN void fortranmdot2_(void *,void *,void *,int *,
 25:                            void *,void *);
 26: EXTERN void fortranmdot1_(void *,void *,int *,
 27:                            void *);
 28: #endif

 30: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
 31: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 32: #define fortrannormsqr_    FORTRANNORMSQR
 33: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 34: #define fortrannormsqr_    fortrannormsqr
 35: #endif
 36: EXTERN void fortrannormsqr_(void *,int *,void *);
 37: #endif

 39: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
 40: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 41: #define fortranmultaij_    FORTRANMULTAIJ
 42: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 43: #define fortranmultaij_    fortranmultaij
 44: #endif
 45: EXTERN void fortranmultaij_(int *,void*,int *,int *,void *,void*);
 46: #endif

 48: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ)
 49: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 50: #define fortranmulttransposeaddaij_    FORTRANMULTTRANSPOSEADDAIJ
 51: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 52: #define fortranmulttransposeaddaij_    fortranmulttransposeaddaij
 53: #endif
 54: EXTERN void fortranmulttransposeaddaij_(int *,void*,int *,int *,void *,void*);
 55: #endif

 57: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
 58: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 59: #define fortranmultaddaij_ FORTRANMULTADDAIJ
 60: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 61: #define fortranmultaddaij_ fortranmultaddaij
 62: #endif
 63: EXTERN void fortranmultaddaij_(int *,void*,int *,int *,void *,void*,void*);
 64: #endif

 66: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
 67: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 68: #define fortransolveaij_   FORTRANSOLVEAIJ
 69: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 70: #define fortransolveaij_   fortransolveaij
 71: #endif
 72: EXTERN void fortransolveaij_(int *,void*,int *,int *,int*,void *,void*);
 73: #endif

 75: #if defined(PETSC_USE_FORTRAN_KERNEL_RELAXAIJ)
 76: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 77: #define fortranrelaxaijforward_   FORTRANRELAXAIJFORWARD
 78: #define fortranrelaxaijbackward_   FORTRANRELAXAIJBACKWARD
 79: #define fortranrelaxaijforwardzero_   FORTRANRELAXAIJFORWARDZERO
 80: #define fortranrelaxaijbackwardzero_   FORTRANRELAXAIJBACKWARDZERO
 81: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 82: #define fortranrelaxaijforward_   fortranrelaxaijforward
 83: #define fortranrelaxaijbackward_   fortranrelaxaijbackward
 84: #define fortranrelaxaijforwardzero_   fortranrelaxaijforwardzero
 85: #define fortranrelaxaijbackwardzero_   fortranrelaxaijbackwardzero
 86: #endif
 87: EXTERN void fortranrelaxaijforward_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
 88: EXTERN void fortranrelaxaijbackward_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
 89: EXTERN void fortranrelaxaijforwardzero_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
 90: EXTERN void fortranrelaxaijbackwardzero_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
 91: #endif

 93: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
 94: #if defined(PETSC_HAVE_FORTRAN_CAPS)
 95: #define fortransolvebaij4_         FORTRANSOLVEBAIJ4
 96: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
 97: #define fortransolvebaij4_          fortransolvebaij4
 98: #endif
 99: EXTERN void fortransolvebaij4_(int *,void*,int *,int *,int*,void *,void*,void *);
100: #endif

102: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
103: #if defined(PETSC_HAVE_FORTRAN_CAPS)
104: #define fortransolvebaij4unroll_   FORTRANSOLVEBAIJ4UNROLL
105: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
106: #define fortransolvebaij4unroll_    fortransolvebaij4unroll
107: #endif
108: EXTERN void fortransolvebaij4unroll_(int *,void*,int *,int *,int*,void *,void*);
109: #endif

111: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
112: #if defined(PETSC_HAVE_FORTRAN_CAPS)
113: #define fortransolvebaij4blas_     FORTRANSOLVEBAIJ4BLAS
114: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
115: #define fortransolvebaij4blas_      fortransolvebaij4blas
116: #endif
117: EXTERN void fortransolvebaij4blas_(int *,void*,int *,int *,int*,void *,void*,void *);
118: #endif

120: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
121: #ifdef PETSC_HAVE_FORTRAN_CAPS
122: #define fortranxtimesy_ FORTRANXTIMESY
123: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
124: #define fortranxtimesy_ fortranxtimesy
125: #endif
126: EXTERN void fortranxtimesy_(void *,void *,void *,int *);
127: #endif

129: EXTERN_C_END

131: /* ------------------------------------------------------------------- */


134: #if !defined(PETSC_USE_COMPLEX)

136: #ifdef PETSC_USE_UNROLL_KERNELS
137: #define DOT(sum,x,y,n) {
138: switch (n & 0x3) {
139: case 3: sum += *x++ * *y++;
140: case 2: sum += *x++ * *y++;
141: case 1: sum += *x++ * *y++;
142: n -= 4;case 0:break;}
143: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;
144: n -= 4;}}
145: #define DOT2(sum1,sum2,x,y1,y2,n) {
146: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}
147: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;
148: y1+=2;y2+=2;n -= 2;}}
149: #define SQR(sum,x,n) {
150: switch (n & 0x3) {
151: case 3: sum += *x * *x;x++;
152: case 2: sum += *x * *x;x++;
153: case 1: sum += *x * *x;x++;
154: n -= 4;case 0:break;}
155: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;
156: n -= 4;}}

158: #elif defined(PETSC_USE_WHILE_KERNELS)
159: #define DOT(sum,x,y,n) {
160: while(n--) sum+= *x++ * *y++;}
161: #define DOT2(sum1,sum2,x,y1,y2,n) {
162: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
163: #define SQR(sum,x,n)   {
164: while(n--) {sum+= *x * *x; x++;}}

166: #elif defined(PETSC_USE_BLAS_KERNELS)
167: EXTERN double ddot_();
168: #define DOT(sum,x,y,n) {int one=1;
169: sum=ddot_(&n,x,&one,y,&one);}
170: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
171: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
172: #define SQR(sum,x,n)   {int one=1;
173: sum=ddot_(&n,x,&one,x,&one);}

175: #else
176: #define DOT(sum,x,y,n) {int __i;
177: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
178: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
179: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
180: #define SQR(sum,x,n)   {int __i;
181: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
182: #endif

184: #else

186: #ifdef PETSC_USE_UNROLL_KERNELS
187: #define DOT(sum,x,y,n) {
188: switch (n & 0x3) {
189: case 3: sum += *x * conj(*y); x++; y++;
190: case 2: sum += *x * conj(*y); x++; y++;
191: case 1: sum += *x * conj(*y); x++; y++;
192: n -= 4;case 0:break;}
193: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;
194: n -= 4;}}
195: #define DOT2(sum1,sum2,x,y1,y2,n) {
196: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}
197: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;
198: y1+=2;y2+=2;n -= 2;}}
199: #define SQR(sum,x,n) {
200: switch (n & 0x3) {
201: case 3: sum += *x * conj(*x);x++;
202: case 2: sum += *x * conj(*x);x++;
203: case 1: sum += *x * conj(*x);x++;
204: n -= 4;case 0:break;}
205: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;
206: n -= 4;}}

208: #elif defined(PETSC_USE_WHILE_KERNELS)
209: #define DOT(sum,x,y,n) {
210: while(n--) sum+= *x++ * conj(*y++);}
211: #define DOT2(sum1,sum2,x,y1,y2,n) {
212: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
213: #define SQR(sum,x,n)   {
214: while(n--) {sum+= *x * conj(*x); x++;}}

216: #else
217: #define DOT(sum,x,y,n) {int __i;
218: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
219: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
220: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
221: #define SQR(sum,x,n)   {int __i;
222: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
223: #endif

225: #endif

227: #endif