Actual source code: spops.h

  1: /* $Id: spops.h,v 1.5 2000/01/11 20:58:48 bsmith Exp $ */

  3: #ifndef SPARSEDENSESMAXPY


  6: /* take (x,i) into dense vector r; there are nnz entries in (x,i)
  7:    r(xi) -= alpha * xv */
  8: #ifdef PETSC_USE_UNROLL_KERNELS
  9: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {int __noff;
 10: __noff = nnz & 0x3;
 11: switch (__noff) {
 12: case 3: r[xi[2]] -= alpha * xv[2];
 13: case 2: r[xi[1]] -= alpha * xv[1];
 14: case 1: r[xi[0]] -= alpha * xv[0];
 15: nnz -= 4;xi+=__noff;xv+=__noff;
 16: }
 17: while (nnz > 0) {
 18: r[xi[0]] -= alpha * xv[0];r[xi[1]] -= alpha * xv[1];
 19: r[xi[2]] -= alpha * xv[2];r[xi[3]] -= alpha * xv[3];
 20: xi  += 4;xv  += 4;nnz -= 4;}}/*}*/

 22: #elif defined(PETSC_USE_WHILE_KERNELS)
 23: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {
 24: while (nnz-->0) r[*xi++] -= alpha * *xv++;}

 26: #elif defined(PETSC_USE_FOR_KERNELS)
 27: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {
 28:  int __i,__j1,__j2; double __s1,__s2;
 29: for(__i=0;__i<nnz-1;__i+=2){__j1=xi[__i];__j2=xi[__i+1];__s1=alpha*xv[__i];
 30: __s2=alpha*xv[__i+1];__s1=r[__j1]-__s1;__s2=r[__j2]-__s2;
 31: r[__j1]=__s1;r[__j2]=__s2;}
 32: if (nnz & 0x1) r[xi[__i]] -= alpha * xv[__i];}

 34: #else
 35: #define SPARSEDENSESMAXPY(r,alpha,xv,xi,nnz) {
 36: int __i;
 37: for(__i=0;__i<nnz;__i++)r[xi[__i]] -= alpha * xv[__i];}
 38: #endif

 40: /* Form sum -= r[xi] * xv; */
 41: #ifdef PETSC_USE_UNROLL_KERNELS
 42: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {
 43: if (nnz > 0) {
 44: switch (nnz & 0x3) {
 45: case 3: sum -= *xv++ * r[*xi++];
 46: case 2: sum -= *xv++ * r[*xi++];
 47: case 1: sum -= *xv++ * r[*xi++];
 48: nnz -= 4;}
 49: while (nnz > 0) {
 50: sum = sum - xv[0] * r[xi[0]] - xv[1] * r[xi[1]] -
 51:         xv[2] * r[xi[2]] - xv[3] * r[xi[3]];
 52: xv  += 4; xi += 4; nnz -= 4; }}}

 54: #elif defined(PETSC_USE_WHILE_KERNELS)
 55: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {
 56: while (nnz--) sum -= *xv++ * r[*xi++];}

 58: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
 59: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {
 60: int __i,__i1,__i2;
 61: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];
 62: sum -= (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}
 63: if (nnz & 0x1) sum -= xv[__i] * r[xi[__i]];}

 65: #else
 66: #define SPARSEDENSEMDOT(sum,r,xv,xi,nnz) {
 67: int __i;
 68: for(__i=0;__i<nnz;__i++) sum -= xv[__i] * r[xi[__i]];}
 69: #endif



 73: /* Form sum += r[xi] * xv; */
 74: #ifdef PETSC_USE_UNROLL_KERNELS
 75: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {
 76: if (nnz > 0) {
 77: switch (nnz & 0x3) {
 78: case 3: sum += *xv++ * r[*xi++];
 79: case 2: sum += *xv++ * r[*xi++];
 80: case 1: sum += *xv++ * r[*xi++];
 81: nnz -= 4;}
 82: while (nnz > 0) {
 83: sum = sum + xv[0] * r[xi[0]] + xv[1] * r[xi[1]] +
 84:         xv[2] * r[xi[2]] + xv[3] * r[xi[3]];
 85: xv  += 4; xi += 4; nnz -= 4; }}}

 87: #elif defined(PETSC_USE_WHILE_KERNELS)
 88: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {
 89: while (nnz--) sum += *xv++ * r[*xi++];}

 91: #elif defined(PETSC_USE_FOR_KERNELS) && defined(MEMQUEST)
 92: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {
 93: int __i,__i1,__i2;
 94: for(__i=0;__i<nnz-1;__i+=2) {__i1 = xi[__i]; __i2=xi[__i+1];
 95: sum += (xv[__i]*r[__i1] + xv[__i+1]*r[__i2]);}
 96: if (nnz & 0x1) sum += xv[__i] * r[xi[__i]];}

 98: #else
 99: #define SPARSEDENSEDOT(sum,r,xv,xi,nnz) {
100:  int __i;
101: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[xi[__i]];}
102: #endif

104: /* Form sum += r[map[xi]] * xv; */
105: #ifdef PETSC_USE_UNROLL_KERNELS
106: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {
107: if (nnz > 0) {
108: switch (nnz & 0x3) {
109: case 3: sum += *xv++ * r[map[*xi++]];
110: case 2: sum += *xv++ * r[map[*xi++]];
111: case 1: sum += *xv++ * r[map[*xi++]];
112: nnz -= 4;}
113: while (nnz > 0) {
114: sum = sum + xv[0] * r[map[xi[0]]] + xv[1] * r[map[xi[1]]] +
115:         xv[2] * r[map[xi[2]]] + xv[3] * r[map[xi[3]]];
116: xv  += 4; xi += 4; nnz -= 4; }}}

118: #elif defined(PETSC_USE_WHILE_KERNELS)
119: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {
120: while (nnz--) sum += *xv++ * r[map[*xi++]];}

122: #else
123: #define SPARSEDENSEMAPDOT(sum,r,xv,xi,map,nnz) {
124: int __i;
125: for(__i=0;__i<nnz;__i++) sum += xv[__i] * r[map[xi[__i]]];}
126: #endif

128: /* Gather xv = r[xi] */
129: #ifdef PETSC_USE_UNROLL_KERNELS
130: #define GATHER(xv,xi,r,nz) {int __noff;
131: if (nz > 0) {
132: __noff = nz & 0x3;
133: switch (nz & 0x3) {
134: case 3: xv[2] = r[xi[2]];
135: case 2: xv[1] = r[xi[1]];
136: case 1: xv[0] = r[xi[0]];
137: nz -= 4;xv+=__noff;xi+=__noff;}
138: while (nz > 0) {
139: xv[0] = r[xi[0]]; xv[1] = r[xi[1]]; xv[2] = r[xi[2]]; xv[3] = r[xi[3]];
140: xi  += 4;xv  += 4;nz -= 4;}}}

142: #elif defined(PETSC_USE_WHILE_KERNELS)
143: #define GATHER(xv,xi,r,nz) while (nz--) *xv++ = r[*xi++];

145: #elif defined(PETSC_USE_FOR_KERNELS)
146: #define GATHER(xv,xi,r,nz) {int __i; double __s1,__s2;
147: for(__i=0;__i<nz-1;__i+=2){__s1=r[xi[__i]];__s2=r[xi[__i+1]];
148: xv[__i]=__s1;xv[__i+1]=__s2;}if ((nz)&0x1) xv[__i]=r[xi[__i]];}

150: #else
151: #define GATHER(xv,xi,r,nz) {int __i;for(__i=0;__i<nz;__i++)xv[__i]=r[xi[__i]];}
152: #endif

154: /* Scatter r[xi] = xv */
155: #ifdef PETSC_USE_UNROLL_KERNELS
156: #define SCATTER(xv,xi,r,nz) 
157: if (nz > 0) {
158: switch (nz & 0x3) {
159: case 3: r[*xi++] = *xv++;
160: case 2: r[*xi++] = *xv++;
161: case 1: r[*xi++] = *xv++;
162: nz -= 4;}
163: while (nz > 0) {
164: r[xi[0]]=xv[0]; r[xi[1]]=xv[1]; r[xi[2]]=xv[2]; r[xi[3]]=xv[3];
165: xi  += 4;xv  += 4;nz -= 4;}}

167: #elif defined(PETSC_USE_WHILE_KERNELS)
168: #define SCATTER(xv,xi,r,nz) while (nz--) r[*xi++]=*xv++;

170: #elif defined(PETSC_USE_FOR_KERNELS)
171: #define SCATTER(xv,xi,r,nz) {int __i; double __s1,__s2;
172: for(__i=0;__i<nz-1;__i+=2){__s1=xv[__i];__s2=xv[__i+1];
173: r[xi[__i]]=__s1;r[xi[__i+1]]=__s2;}if ((nz)&0x1)r[xi[__i]]=xv[__i];}

175: #else
176: #define SCATTER(xv,xi,r,nz) {int __i;
177: for(__i=0;__i<nz;__i++)r[xi[__i]]=xv[__i];}
178: #endif

180: /* Scatter r[xi] = val */
181: #ifdef PETSC_USE_UNROLL_KERNELS
182: #define SCATTERVAL(r,xi,n,val) 
183: switch (n & 0x3) {
184: case 3: r[*xi++] = val;
185: case 2: r[*xi++] = val;
186: case 1: r[*xi++] = val;
187: n -= 4;}
188: while (n > 0) {
189: r[xi[0]]=val; r[xi[1]]=val; r[xi[2]]=val; r[xi[3]]=val;xi  += 4;n -= 4;}

191: #elif defined(PETSC_USE_WHILE_KERNELS)
192: #define SCATTERVAL(r,xi,n,val) while (n--) r[*xi++]=val;

194: #else
195: #define SCATTERVAL(r,xi,n,val) {int __i;for(__i=0;__i<n;__i++)r[xi[__i]]=val;}
196: #endif

198: /* Copy vo[xi] = vi[xi] */
199: #ifdef PETSC_USE_UNROLL_KERNELS
200: #define COPYPERM(vo,xi,vi,n) 
201: switch (n & 0x3) {
202: case 3: vo[*xi] = vi[*xi];xi++;
203: case 2: vo[*xi] = vi[*xi];xi++;
204: case 1: vo[*xi] = vi[*xi];xi++;
205: n -= 4;}
206: while (n > 0) {
207: vo[xi[0]]=vi[xi[0]]; vo[xi[1]]=vi[xi[1]]; vo[xi[2]]=vi[xi[2]]; 
208: vo[xi[3]]=vi[xi[3]];xi  += 4;n -= 4;}}

210: #elif defined(PETSC_USE_WHILE_KERNELS)
211: #define COPYPERM(vo,xi,vi,n) while (n--) {vo[*xi]=vi[*xi];xi++;}

213: #else
214: #define COPYPERM(vo,xi,vi,n) {int __i;
215: for(__i=0;__i<n;__i++)vo[xi[__i]]=vi[xi[__i]];}
216: #endif

218: /* Scale sparse vector v[xi] *= a */
219: #ifdef PETSC_USE_UNROLL_KERNELS
220: #define SPARSESCALE(v,xi,n,val) {
221: switch (n & 0x3) {
222: case 3: v[*xi++] *= val;
223: case 2: v[*xi++] *= val;
224: case 1: v[*xi++] *= val;
225: n -= 4;}
226: while (n > 0) {
227: v[xi[0]]*=val;vo[xi[1]]*=val; vo[xi[2]]*=val;vo[xi[3]]*=val;xi  += 4;n -= 4;}}}

229: #elif defined(PETSC_USE_WHILE_KERNELS)
230: #define SPARSESCALE(v,xi,n,val) {
231: while (n--) v[*xi++] *= val;}

233: #else
234: #define SPARSESCALE(v,xi,n,val) {int __i;
235: for(__i=0;__i<n;__i++)v[xi[__i]*=val;}
236: #endif

238: /* sparse dot sum = sum(a[xi] * b[xi]) */
239: #ifdef PETSC_USE_UNROLL_KERNELS
240: #define SPARSEDOT(sum,a,b,xi,n) {
241: switch (n & 0x3) {
242: case 3: sum += a[*xi] * b[*xi]; xi++;
243: case 2: sum += a[*xi] * b[*xi]; xi++;
244: case 1: sum += a[*xi] * b[*xi]; xi++;
245: n -= 4;}
246: while (n > 0) {
247: sum+=a[xi[0]]*b[xi[0]]+a[xi[1]]*b[xi[1]]+a[xi[2]]*b[xi[2]]+a[xi[3]]*b[xi[3]];
248: xi  += 4;n -= 4;}}}

250: #elif defined(PETSC_USE_WHILE_KERNELS)
251: #define SPARSEDOT(sum,a,b,xi,n) {
252: while (n--) {sum += a[*xi]*b[*xi];xi++;}}

254: #else
255: #define SPARSEDOT(sum,a,b,xi,n) {
256: int __i;
257: for(__i=0;__i<n;__i++)sum+= a[xi[__i]]*b[xi[__i]];}
258: #endif


261: /* Scatter r[xi] += xv */
262: #ifdef PETSC_USE_UNROLL_KERNELS
263: #define SCATTERADD(xv,xi,r,nz) {
264: if (nz > 0) {
265: switch (nz & 0x3) {
266: case 3: r[*xi++] += *xv++;
267: case 2: r[*xi++] += *xv++;
268: case 1: r[*xi++] += *xv++;
269: nz -= 4;}
270: while (nz > 0) {
271: r[xi[0]]+=xv[0]; r[xi[1]]+=xv[1]; r[xi[2]]+=xv[2]; r[xi[3]]+=xv[3];
272: xi  += 4;xv  += 4;nz -= 4;}}}

274: #elif defined(PETSC_USE_WHILE_KERNELS)
275: #define SCATTERADD(xv,xi,r,nz) {
276: while (nz--) r[*xi++]+= *xv++;}

278: #elif defined(PETSC_USE_FOR_KERNELS)
279: #define SCATTERADD(xv,xi,r,nz) { double __s1,__s2;
280:  int __i,__i1,__i2;
281: for(__i=0;__i<nz-1;__i+=2){__i1 = xi[__i]; __i2 = xi[__i+1];
282: __s1 = r[__i1]; __s2 = r[__i2]; __s1 += xv[__i]; __s2 += xv[__i+1];
283: r[__i1]=__s1;r[__i2]=__s2;}if ((nz)&0x1)r[xi[__i]]+=xv[__i];}

285: #else
286: #define SCATTERADD(xv,xi,r,nz) {
287: int __i;
288: for(__i=0;__i<nz;__i++) r[xi[__i]]+= xv[__i];}
289: #endif

291: /* Gather xv += r[xi] */
292: #ifdef PETSC_USE_UNROLL_KERNELS
293: #define GATHERADD(xv,xi,r,nz) {
294: if (nz > 0) {
295: switch (nz & 0x3) {
296: case 3: *xv++ += r[*xi++];
297: case 2: *xv++ += r[*xi++];
298: case 1: *xv++ += r[*xi++];
299: nz -= 4;}
300: while (nz > 0) {
301: xv[0] += r[xi[0]]; xv[1] += r[xi[1]]; xv[2] += r[xi[2]]; xv[3] += r[xi[3]];
302: xi  += 4;xv  += 4;nz -= 4;}}}

304: #elif defined(PETSC_USE_WHILE_KERNELS)
305: #define GATHERADD(xv,xi,r,nz) {
306: while (nz--) *xv++ += r[*xi++];}

308: #else
309: #define GATHERADD(xv,xi,r,nz) {
310: int __i;
311: for(__i=0;__i<nz;__i++)xv[__i] += r[xi[__i]];}
312: #endif

314: #endif