Actual source code: dot.h
1: /* $Id: dot.h,v 1.21 2001/09/07 20:07:42 bsmith Exp $ */
3: #ifndef DOT
4: #include petsc.h
6: EXTERN_C_BEGIN
8: #if defined(PETSC_USE_FORTRAN_KERNEL_MDOT)
9: #if defined(PETSC_HAVE_FORTRAN_CAPS)
10: #define fortranmdot4_ FORTRANMDOT4
11: #define fortranmdot3_ FORTRANMDOT3
12: #define fortranmdot2_ FORTRANMDOT2
13: #define fortranmdot1_ FORTRANMDOT1
14: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
15: #define fortranmdot4_ fortranmdot4
16: #define fortranmdot3_ fortranmdot3
17: #define fortranmdot2_ fortranmdot2
18: #define fortranmdot1_ fortranmdot1
19: #endif
20: EXTERN void fortranmdot4_(void *,void *,void *,void *,void *,int *,
21: void *,void *,void *,void *);
22: EXTERN void fortranmdot3_(void *,void *,void *,void *,int *,
23: void *,void *,void *);
24: EXTERN void fortranmdot2_(void *,void *,void *,int *,
25: void *,void *);
26: EXTERN void fortranmdot1_(void *,void *,int *,
27: void *);
28: #endif
30: #if defined(PETSC_USE_FORTRAN_KERNEL_NORM)
31: #if defined(PETSC_HAVE_FORTRAN_CAPS)
32: #define fortrannormsqr_ FORTRANNORMSQR
33: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
34: #define fortrannormsqr_ fortrannormsqr
35: #endif
36: EXTERN void fortrannormsqr_(void *,int *,void *);
37: #endif
39: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTAIJ)
40: #if defined(PETSC_HAVE_FORTRAN_CAPS)
41: #define fortranmultaij_ FORTRANMULTAIJ
42: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
43: #define fortranmultaij_ fortranmultaij
44: #endif
45: EXTERN void fortranmultaij_(int *,void*,int *,int *,void *,void*);
46: #endif
48: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTTRANSPOSEAIJ)
49: #if defined(PETSC_HAVE_FORTRAN_CAPS)
50: #define fortranmulttransposeaddaij_ FORTRANMULTTRANSPOSEADDAIJ
51: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
52: #define fortranmulttransposeaddaij_ fortranmulttransposeaddaij
53: #endif
54: EXTERN void fortranmulttransposeaddaij_(int *,void*,int *,int *,void *,void*);
55: #endif
57: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTADDAIJ)
58: #if defined(PETSC_HAVE_FORTRAN_CAPS)
59: #define fortranmultaddaij_ FORTRANMULTADDAIJ
60: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
61: #define fortranmultaddaij_ fortranmultaddaij
62: #endif
63: EXTERN void fortranmultaddaij_(int *,void*,int *,int *,void *,void*,void*);
64: #endif
66: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
67: #if defined(PETSC_HAVE_FORTRAN_CAPS)
68: #define fortransolveaij_ FORTRANSOLVEAIJ
69: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
70: #define fortransolveaij_ fortransolveaij
71: #endif
72: EXTERN void fortransolveaij_(int *,void*,int *,int *,int*,void *,void*);
73: #endif
75: #if defined(PETSC_USE_FORTRAN_KERNEL_RELAXAIJ)
76: #if defined(PETSC_HAVE_FORTRAN_CAPS)
77: #define fortranrelaxaijforward_ FORTRANRELAXAIJFORWARD
78: #define fortranrelaxaijbackward_ FORTRANRELAXAIJBACKWARD
79: #define fortranrelaxaijforwardzero_ FORTRANRELAXAIJFORWARDZERO
80: #define fortranrelaxaijbackwardzero_ FORTRANRELAXAIJBACKWARDZERO
81: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
82: #define fortranrelaxaijforward_ fortranrelaxaijforward
83: #define fortranrelaxaijbackward_ fortranrelaxaijbackward
84: #define fortranrelaxaijforwardzero_ fortranrelaxaijforwardzero
85: #define fortranrelaxaijbackwardzero_ fortranrelaxaijbackwardzero
86: #endif
87: EXTERN void fortranrelaxaijforward_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
88: EXTERN void fortranrelaxaijbackward_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
89: EXTERN void fortranrelaxaijforwardzero_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
90: EXTERN void fortranrelaxaijbackwardzero_(int *,PetscReal*,void*,int *,int *,int*,void *,void*);
91: #endif
93: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJ)
94: #if defined(PETSC_HAVE_FORTRAN_CAPS)
95: #define fortransolvebaij4_ FORTRANSOLVEBAIJ4
96: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
97: #define fortransolvebaij4_ fortransolvebaij4
98: #endif
99: EXTERN void fortransolvebaij4_(int *,void*,int *,int *,int*,void *,void*,void *);
100: #endif
102: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJUNROLL)
103: #if defined(PETSC_HAVE_FORTRAN_CAPS)
104: #define fortransolvebaij4unroll_ FORTRANSOLVEBAIJ4UNROLL
105: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
106: #define fortransolvebaij4unroll_ fortransolvebaij4unroll
107: #endif
108: EXTERN void fortransolvebaij4unroll_(int *,void*,int *,int *,int*,void *,void*);
109: #endif
111: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEBAIJBLAS)
112: #if defined(PETSC_HAVE_FORTRAN_CAPS)
113: #define fortransolvebaij4blas_ FORTRANSOLVEBAIJ4BLAS
114: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
115: #define fortransolvebaij4blas_ fortransolvebaij4blas
116: #endif
117: EXTERN void fortransolvebaij4blas_(int *,void*,int *,int *,int*,void *,void*,void *);
118: #endif
120: #if defined(PETSC_USE_FORTRAN_KERNEL_XTIMESY)
121: #ifdef PETSC_HAVE_FORTRAN_CAPS
122: #define fortranxtimesy_ FORTRANXTIMESY
123: #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
124: #define fortranxtimesy_ fortranxtimesy
125: #endif
126: EXTERN void fortranxtimesy_(void *,void *,void *,int *);
127: #endif
129: EXTERN_C_END
131: /* ------------------------------------------------------------------- */
134: #if !defined(PETSC_USE_COMPLEX)
136: #ifdef PETSC_USE_UNROLL_KERNELS
137: #define DOT(sum,x,y,n) {
138: switch (n & 0x3) {
139: case 3: sum += *x++ * *y++;
140: case 2: sum += *x++ * *y++;
141: case 1: sum += *x++ * *y++;
142: n -= 4;case 0:break;}
143: while (n>0) {sum += x[0]*y[0]+x[1]*y[1]+x[2]*y[2]+x[3]*y[3];x+=4;y+=4;
144: n -= 4;}}
145: #define DOT2(sum1,sum2,x,y1,y2,n) {
146: if(n&0x1){sum1+=*x**y1++;sum2+=*x++**y2++;n--;}
147: while (n>0) {sum1+=x[0]*y1[0]+x[1]*y1[1];sum2+=x[0]*y2[0]+x[1]*y2[1];x+=2;
148: y1+=2;y2+=2;n -= 2;}}
149: #define SQR(sum,x,n) {
150: switch (n & 0x3) {
151: case 3: sum += *x * *x;x++;
152: case 2: sum += *x * *x;x++;
153: case 1: sum += *x * *x;x++;
154: n -= 4;case 0:break;}
155: while (n>0) {sum += x[0]*x[0]+x[1]*x[1]+x[2]*x[2]+x[3]*x[3];x+=4;
156: n -= 4;}}
158: #elif defined(PETSC_USE_WHILE_KERNELS)
159: #define DOT(sum,x,y,n) {
160: while(n--) sum+= *x++ * *y++;}
161: #define DOT2(sum1,sum2,x,y1,y2,n) {
162: while(n--){sum1+= *x**y1++;sum2+=*x++**y2++;}}
163: #define SQR(sum,x,n) {
164: while(n--) {sum+= *x * *x; x++;}}
166: #elif defined(PETSC_USE_BLAS_KERNELS)
167: EXTERN double ddot_();
168: #define DOT(sum,x,y,n) {int one=1;
169: sum=ddot_(&n,x,&one,y,&one);}
170: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
171: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
172: #define SQR(sum,x,n) {int one=1;
173: sum=ddot_(&n,x,&one,x,&one);}
175: #else
176: #define DOT(sum,x,y,n) {int __i;
177: for(__i=0;__i<n;__i++)sum+=x[__i]*y[__i];}
178: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
179: for(__i=0;__i<n;__i++){sum1+=x[__i]*y1[__i];sum2+=x[__i]*y2[__i];}}
180: #define SQR(sum,x,n) {int __i;
181: for(__i=0;__i<n;__i++)sum+=x[__i]*x[__i];}
182: #endif
184: #else
186: #ifdef PETSC_USE_UNROLL_KERNELS
187: #define DOT(sum,x,y,n) {
188: switch (n & 0x3) {
189: case 3: sum += *x * conj(*y); x++; y++;
190: case 2: sum += *x * conj(*y); x++; y++;
191: case 1: sum += *x * conj(*y); x++; y++;
192: n -= 4;case 0:break;}
193: while (n>0) {sum += x[0]*conj(y[0])+x[1]*conj(y[1])+x[2]*conj(y[2])+x[3]*conj(y[3]);x+=4;y+=4;
194: n -= 4;}}
195: #define DOT2(sum1,sum2,x,y1,y2,n) {
196: if(n&0x1){sum1+=*x*conj(*y1)++;sum2+=*x++*conj(*y2)++;n--;}
197: while (n>0) {sum1+=x[0]*conj(y1[0])+x[1]*conj(y1[1]);sum2+=x[0]*conj(y2[0])+x[1]*conj(y2[1]);x+=2;
198: y1+=2;y2+=2;n -= 2;}}
199: #define SQR(sum,x,n) {
200: switch (n & 0x3) {
201: case 3: sum += *x * conj(*x);x++;
202: case 2: sum += *x * conj(*x);x++;
203: case 1: sum += *x * conj(*x);x++;
204: n -= 4;case 0:break;}
205: while (n>0) {sum += x[0]*conj(x[0])+x[1]*conj(x[1])+x[2]*conj(x[2])+x[3]*conj(x[3]);x+=4;
206: n -= 4;}}
208: #elif defined(PETSC_USE_WHILE_KERNELS)
209: #define DOT(sum,x,y,n) {
210: while(n--) sum+= *x++ * conj(*y++);}
211: #define DOT2(sum1,sum2,x,y1,y2,n) {
212: while(n--){sum1+= *x*conj(*y1);sum2+=*x*conj(*y2); x++; y1++; y2++;}}
213: #define SQR(sum,x,n) {
214: while(n--) {sum+= *x * conj(*x); x++;}}
216: #else
217: #define DOT(sum,x,y,n) {int __i;
218: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(y[__i]);}
219: #define DOT2(sum1,sum2,x,y1,y2,n) {int __i;
220: for(__i=0;__i<n;__i++){sum1+=x[__i]*conj(y1[__i]);sum2+=x[__i]*conj(y2[__i]);}}
221: #define SQR(sum,x,n) {int __i;
222: for(__i=0;__i<n;__i++)sum+=x[__i]*conj(x[__i]);}
223: #endif
225: #endif
227: #endif