CHROMA
zolotarev_coeffs.cc
Go to the documentation of this file.
1 /* -*- Mode: C; comment-column: 22; fill-column: 79; compile-command: "gcc -o zolotarev zolotarev.c -ansi -pedantic -lm -DTEST"; -*- */
2 #define VERSION Source Time-stamp: <19-OCT-2004 09:33:22.00 adk@MISSCONTRARY>
3 
4 /* These C routines evalute the optimal rational approximation to the signum
5  * function for epsilon < |x| < 1 using Zolotarev's theorem.
6  *
7  * To obtain reliable results for high degree approximations (large n) it is
8  * necessary to compute using sufficiently high precision arithmetic. To this
9  * end the code has been parameterised to work with the preprocessor names
10  * INTERNAL_PRECISION and PRECISION set to float, double, or long double as
11  * appropriate. INTERNAL_PRECISION is used in computing the Zolotarev
12  * coefficients, which are converted to PRECISION before being returned to the
13  * caller. Presumably even higher precision could be obtained using GMP or
14  * similar package, but bear in mind that rounding errors might also be
15  * significant in evaluating the resulting polynomial. The convergence criteria
16  * have been written in a precision-independent form. */
17 
18 #include <math.h>
19 #include <stdlib.h>
20 #include <stdio.h>
21 
22 #define MAX(a,b) ((a) > (b) ? (a) : (b))
23 #define MIN(a,b) ((a) < (b) ? (a) : (b))
24 
25 #ifndef INTERNAL_PRECISION
26 #define INTERNAL_PRECISION long double
27 #endif
28 
29 #include "zolotarev.h"
30 #define ZOLOTAREV_INTERNAL
31 #undef ZOLOTAREV_DATA
32 #define ZOLOTAREV_DATA izd
33 #undef ZPRECISION
34 #define ZPRECISION INTERNAL_PRECISION
35 #include "zolotarev.h"
36 #undef ZOLOTAREV_INTERNAL
37 
38 /* The ANSI standard appears not to know what pi is */
39 
40 #ifndef M_PI
41 #define M_PI ((INTERNAL_PRECISION) 3.141592653589793238462643383279502884197\
42 169399375105820974944592307816406286208998628034825342117068)
43 #endif
44 
45 #define ZERO ((INTERNAL_PRECISION) 0)
46 #define ONE ((INTERNAL_PRECISION) 1)
47 #define TWO ((INTERNAL_PRECISION) 2)
48 #define THREE ((INTERNAL_PRECISION) 3)
49 #define FOUR ((INTERNAL_PRECISION) 4)
50 #define HALF (ONE/TWO)
51 
52 /* The following obscenity seems to be the simplest (?) way to coerce the C
53  * preprocessor to convert the value of a preprocessor token into a std::string. */
54 
55 #define PP2(x) #x
56 #define PP1(a,b,c) a ## b(c)
57 #define STRINGIFY(name) PP1(PP,2,name)
58 
59 /* Compute the partial fraction expansion coefficients (alpha) from the
60  * factored form */
61 
62 static void construct_partfrac(izd *z) {
63  int dn = z -> dn, dd = z -> dd, type = z -> type;
64  int j, k, da = dd + 1 + type;
65  INTERNAL_PRECISION A = z -> A, *a = z -> a, *ap = z -> ap, *alpha;
66  alpha = (INTERNAL_PRECISION*) malloc(da * sizeof(INTERNAL_PRECISION));
67  for (j = 0; j < dd; j++)
68  for (k = 0, alpha[j] = A; k < dd; k++)
69  alpha[j] *=
70  (k < dn ? ap[j] - a[k] : ONE) / (k == j ? ONE : ap[j] - ap[k]);
71  if(type == 1) /* implicit pole at zero? */
72  for (k = 0, alpha[dd] = A * (dn > dd ? - a[dd] : ONE); k < dd; k++) {
73  alpha[dd] *= a[k] / ap[k];
74  alpha[k] *= (dn > dd ? ap[k] - a[dd] : ONE) / ap[k];
75  }
76  alpha[da-1] = dn == da - 1 ? A : ZERO;
77  z -> alpha = alpha;
78  z -> da = da;
79  return;
80 }
81 
82 /* Convert factored polynomial into dense polynomial. The input is the overall
83  * factor A and the roots a[i], such that p = A product(x - a[i], i = 1..d) */
84 
87  int d) {
89  int i, j;
90  p = (INTERNAL_PRECISION *) malloc((d + 2) * sizeof(INTERNAL_PRECISION));
91  p[0] = A;
92  for (i = 0; i < d; i++) {
93  p[i+1] = p[i];
94  for (j = i; j > 0; j--) p[j] = p[j-1] - a[i]*p[j];
95  p[0] *= - a[i];
96  }
97  return p;
98 }
99 
100 /* Convert a rational function of the form R0(x) = x p(x^2)/q(x^2) (type 0) or
101  * R1(x) = p(x^2)/[x q(x^2)] (type 1) into its continued fraction
102  * representation. We assume that 0 <= deg(q) - deg(p) <= 1 for type 0 and 0 <=
103  * deg(p) - deg(q) <= 1 for type 1. On input p and q are in factored form, and
104  * deg(q) = dq, deg(p) = dp. The output is the continued fraction coefficients
105  * beta, where R(x) = beta[0] x + 1/(beta[1] x + 1/(...)).
106  *
107  * The method used is as follows. There are four cases to consider:
108  *
109  * 0.i. Type 0, deg p = deg q
110  *
111  * 0.ii. Type 0, deg p = deg q - 1
112  *
113  * 1.i. Type 1, deg p = deg q
114  *
115  * 1.ii. Type 1, deg p = deg q + 1
116  *
117  * and these are connected by two transformations:
118  *
119  * A. To obtain a continued fraction expansion of type 1 we use a single-step
120  * polynomial division we find beta and r(x) such that p(x) = beta x q(x) +
121  * r(x), with deg(r) = deg(q). This implies that p(x^2) = beta x^2 q(x^2) +
122  * r(x^2), and thus R1(x) = x beta + r(x^2)/(x q(x^2)) = x beta + 1/R0(x)
123  * with R0(x) = x q(x^2)/r(x^2).
124  *
125  * B. A continued fraction expansion of type 0 is obtained in a similar, but
126  * not identical, manner. We use the polynomial division algorithm to compute
127  * the quotient beta and the remainder r that satisfy p(x) = beta q(x) + r(x)
128  * with deg(r) = deg(q) - 1. We thus have x p(x^2) = x beta q(x^2) + x r(x^2),
129  * so R0(x) = x beta + x r(x^2)/q(x^2) = x beta + 1/R1(x) with R1(x) = q(x^2) /
130  * (x r(x^2)).
131  *
132  * Note that the deg(r) must be exactly deg(q) for (A) and deg(q) - 1 for (B)
133  * because p and q have disjoint roots all of multiplicity 1. This means that
134  * the division algorithm requires only a single polynomial subtraction step.
135  *
136  * The transformations between the cases form the following finite state
137  * automaton:
138  *
139  * +------+ +------+ +------+ +------+
140  * | | | | ---(A)---> | | | |
141  * | 0.ii | ---(B)---> | 1.ii | | 0.i | <---(A)--- | 1.i |
142  * | | | | <---(B)--- | | | |
143  * +------+ +------+ +------+ +------+
144  */
145 
149  INTERNAL_PRECISION *, int, int);
150 
154  INTERNAL_PRECISION *, int, int);
155 
156 static void construct_contfrac(izd *z){
157  INTERNAL_PRECISION *r, A = z -> A, *p = z -> a, *q = z -> ap;
158  int dp = z -> dn, dq = z -> dd, type = z -> type;
159 
160  z -> db = 2 * dq + 1 + type;
161  z -> beta = (INTERNAL_PRECISION *)
162  malloc(z -> db * sizeof(INTERNAL_PRECISION));
163  p = poly_factored_to_dense(A, p, dp);
164  q = poly_factored_to_dense(ONE, q, dq);
165  r = (INTERNAL_PRECISION *) malloc((MAX(dp,dq) + 1) *
166  sizeof(INTERNAL_PRECISION));
167  if (type == 0) (void) contfrac_B(z -> beta, p, q, r, dp, dq);
168  else (void) contfrac_A(z -> beta, p, q, r, dp, dq);
169  free(p); free(q); free(r);
170  return;
171 }
172 
176  INTERNAL_PRECISION *r, int dp, int dq) {
177  INTERNAL_PRECISION quot, *rb;
178  int j;
179 
180  /* p(x) = x beta q(x) + r(x); dp = dq or dp = dq + 1 */
181 
182  quot = dp == dq ? ZERO : p[dp] / q[dq];
183  r[0] = p[0];
184  for (j = 1; j <= dp; j++) r[j] = p[j] - quot * q[j-1];
185 #ifdef DEBUG
186  printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
187  __FUNCTION__, dp, dq, (float) quot);
188  for (j = 0; j <= dq + 1; j++)
189  printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
190  j, (float) (j > dp ? ZERO : p[j]),
191  j, (float) (j == 0 ? ZERO : q[j-1]),
192  j, (float) (j == dp ? ZERO : r[j]));
193 #endif /* DEBUG */
194  *(rb = contfrac_B(beta, q, r, p, dq, dq)) = quot;
195  return rb + 1;
196 }
197 
201  INTERNAL_PRECISION *r, int dp, int dq) {
202  INTERNAL_PRECISION quot, *rb;
203  int j;
204 
205  /* p(x) = beta q(x) + r(x); dp = dq or dp = dq - 1 */
206 
207  quot = dp == dq ? p[dp] / q[dq] : ZERO;
208  for (j = 0; j < dq; j++) r[j] = p[j] - quot * q[j];
209 #ifdef DEBUG
210  printf("%s: Continued Fraction form: deg p = %2d, deg q = %2d, beta = %g\n",
211  __FUNCTION__, dp, dq, (float) quot);
212  for (j = 0; j <= dq; j++)
213  printf("\tp[%2d] = %14.6g\tq[%2d] = %14.6g\tr[%2d] = %14.6g\n",
214  j, (float) (j > dp ? ZERO : p[j]),
215  j, (float) q[j],
216  j, (float) (j == dq ? ZERO : r[j]));
217 #endif /* DEBUG */
218  *(rb = dq > 0 ? contfrac_A(beta, q, r, p, dq, dq-1) : beta) = quot;
219  return rb + 1;
220 }
221 
222 /* The global variable U is used to hold the argument u throughout the AGM
223  * recursion. The global variables F and K are set in the innermost
224  * instantiation of the recursive function AGM to the values of the elliptic
225  * integrals F(u,k) and K(k) respectively. They must be made thread local to
226  * make this code thread-safe in a multithreaded environment. */
227 
228 static INTERNAL_PRECISION U, F, K; /* THREAD LOCAL */
229 
230 /* Recursive implementation of Gauss' arithmetico-geometric mean, which is the
231  * kernel of the method used to compute the Jacobian elliptic functions
232  * sn(u,k), cn(u,k), and dn(u,k) with parameter k (where 0 < k < 1), as well
233  * as the elliptic integral F(s,k) satisfying F(sn(u,k)) = u and the complete
234  * elliptic integral K(k).
235  *
236  * The algorithm used is a recursive implementation of the Gauss (Landen)
237  * transformation.
238  *
239  * The function returns the value of sn(u,k'), where k' is the dual parameter,
240  * and also sets the values of the global variables F and K. The latter is
241  * used to determine the sign of cn(u,k').
242  *
243  * The algorithm is deemed to have converged when b ceases to increase. This
244  * works whatever INTERNAL_PRECISION is specified. */
245 
249  static INTERNAL_PRECISION pb = -ONE;
250  INTERNAL_PRECISION c, d, xi;
251 
252  if (b <= pb) {
253  pb = -ONE;
254  F = asin(s) / a; /* Here, a is the AGM */
255  K = M_PI / (TWO * a);
256  return sin(U * a);
257  }
258  pb = b;
259  c = a - b;
260  d = a + b;
261  xi = AGM(HALF*d, sqrt(a*b), ONE + c*c == ONE ?
262  HALF*s*d/a : (a - sqrt(a*a - s*s*c*d))/(c*s));
263  return 2*a*xi / (d + c*xi*xi);
264 }
265 
266 /* Computes sn(u,k), cn(u,k), dn(u,k), F(u,k), and K(k). It is essentially a
267  * wrapper for the routine AGM. The sign of cn(u,k) is defined to be -1 if
268  * K(k) < u < 3*K(k) and +1 otherwise, and thus sign is computed by some quite
269  * unnecessarily obfuscated bit manipulations. */
270 
274  INTERNAL_PRECISION* elK) {
275  int sgn;
276  U = u;
277  *sn = AGM(ONE, sqrt(ONE - k*k), u);
278  sgn = ((int) (fabs(u) / K)) % 4; /* sgn = 0, 1, 2, 3 */
279  sgn ^= sgn >> 1; /* (sgn & 1) = 0, 1, 1, 0 */
280  sgn = 1 - ((sgn & 1) << 1); /* sgn = 1, -1, -1, 1 */
281  *cn = ((INTERNAL_PRECISION) sgn) * sqrt(ONE - *sn * *sn);
282  *dn = sqrt(ONE - k*k* *sn * *sn);
283  *elF = F;
284  *elK = K;
285 }
286 
287 /* Compute the coefficients for the optimal rational approximation R(x) to
288  * sgn(x) of degree n over the interval epsilon < |x| < 1 using Zolotarev's
289  * formula.
290  *
291  * Set type = 0 for the Zolotarev approximation, which is zero at x = 0, and
292  * type = 1 for the approximation which is infinite at x = 0. */
293 
294 zolotarev_data* zolotarev(PRECISION epsilon, int n, int type) {
295  INTERNAL_PRECISION A, c, cp, kp, ksq, sn, cn, dn, Kp, Kj, z, z0, t, M, F,
296  l, invlambda, xi, xisq, *tv, s, opl;
297  int m, czero, ts;
298  zolotarev_data *zd;
299  izd *d = (izd*) malloc(sizeof(izd));
300 
301  d -> type = type;
303  d -> n = n;
304  d -> dd = n / 2;
305  d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
306  d -> deg_denom = 2 * d -> dd;
307  d -> deg_num = 2 * d -> dn + 1;
308 
309  d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
310  sizeof(INTERNAL_PRECISION));
311  d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
312  sizeof(INTERNAL_PRECISION));
313  ksq = d -> epsilon * d -> epsilon;
314  kp = sqrt(ONE - ksq);
315  sncndnFK(ZERO, kp, &sn, &cn, &dn, &F, &Kp); /* compute Kp = K(kp) */
316  z0 = TWO * Kp / (INTERNAL_PRECISION) n;
317  M = ONE;
318  A = ONE / d -> epsilon;
319 
320  sncndnFK(HALF * z0, kp, &sn, &cn, &dn, &F, &Kj); /* compute xi */
321  xi = ONE / dn;
322  xisq = xi * xi;
323  invlambda = xi;
324 
325  for (m = 0; m < d -> dd; m++) {
326  czero = 2 * (m + 1) == n; /* n even and m = dd -1 */
327 
328  z = z0 * ((INTERNAL_PRECISION) m + ONE);
329  sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
330  t = cn / sn;
331  c = - t*t;
332  if (!czero) (d -> a)[d -> dn - 1 - m] = ksq / c;
333 
334  z = z0 * ((INTERNAL_PRECISION) m + HALF);
335  sncndnFK(z, kp, &sn, &cn, &dn, &F, &Kj);
336  t = cn / sn;
337  cp = - t*t;
338  (d -> ap)[d -> dd - 1 - m] = ksq / cp;
339 
340  M *= (ONE - c) / (ONE - cp);
341  A *= (czero ? -ksq : c) * (ONE - cp) / (cp * (ONE - c));
342  invlambda *= (ONE - c*xisq) / (ONE - cp*xisq);
343  }
344  invlambda /= M;
345  d -> A = TWO / (ONE + invlambda) * A;
346  d -> Delta = (invlambda - ONE) / (invlambda + ONE);
347 
348  d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
349  sizeof(INTERNAL_PRECISION));
350  l = ONE / invlambda;
351  opl = ONE + l;
352  sncndnFK(sqrt( d -> type == 1
353  ? (THREE + l) / (FOUR * opl)
354  : (ONE + THREE*l) / (opl*opl*opl)
355  ), sqrt(ONE - l*l), &sn, &cn, &dn, &F, &Kj);
356  s = M * F;
357  for (m = 0; m < d -> n; m++) {
358  sncndnFK(s + TWO*Kp*m/n, kp, &sn, &cn, &dn, &F, &Kj);
359  d -> gamma[m] = d -> epsilon / dn;
360  }
361 
362  /* If R(x) is a Zolotarev rational approximation of degree (n,m) with maximum
363  * error Delta, then (1 - Delta^2) / R(x) is also an optimal Chebyshev
364  * approximation of degree (m,n) */
365 
366  if (d -> type == 1) {
367  d -> A = (ONE - d -> Delta * d -> Delta) / d -> A;
368  tv = d -> a; d -> a = d -> ap; d -> ap = tv;
369  ts = d -> dn; d -> dn = d -> dd; d -> dd = ts;
370  ts = d -> deg_num; d -> deg_num = d -> deg_denom; d -> deg_denom = ts;
371  }
372 
375 
376  /* Converting everything to PRECISION for external use only */
377 
378  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
379  zd -> A = (PRECISION) d -> A;
380  zd -> Delta = (PRECISION) d -> Delta;
381  zd -> epsilon = (PRECISION) d -> epsilon;
382  zd -> n = d -> n;
383  zd -> type = d -> type;
384  zd -> dn = d -> dn;
385  zd -> dd = d -> dd;
386  zd -> da = d -> da;
387  zd -> db = d -> db;
388  zd -> deg_num = d -> deg_num;
389  zd -> deg_denom = d -> deg_denom;
390 
391  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
392  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
393  free(d -> a);
394 
395  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
396  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
397  free(d -> ap);
398 
399  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
400  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
401  free(d -> alpha);
402 
403  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
404  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
405  free(d -> beta);
406 
407  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
408  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
409  free(d -> gamma);
410 
411  free(d);
412  return zd;
413 }
414 
415 zolotarev_data* higham(PRECISION epsilon, int n) {
416  INTERNAL_PRECISION A, M, c, cp, z, z0, t, epssq;
417  int m, czero;
418  zolotarev_data *zd;
419  izd *d = (izd*) malloc(sizeof(izd));
420 
421  d -> type = 0;
423  d -> n = n;
424  d -> dd = n / 2;
425  d -> dn = d -> dd - 1 + n % 2; /* n even: dn = dd - 1, n odd: dn = dd */
426  d -> deg_denom = 2 * d -> dd;
427  d -> deg_num = 2 * d -> dn + 1;
428 
429  d -> a = (INTERNAL_PRECISION*) malloc((1 + d -> dn) *
430  sizeof(INTERNAL_PRECISION));
431  d -> ap = (INTERNAL_PRECISION*) malloc(d -> dd *
432  sizeof(INTERNAL_PRECISION));
433  A = (INTERNAL_PRECISION) n;
434  z0 = M_PI / A;
435  A = n % 2 == 0 ? A : ONE / A;
436  M = d -> epsilon * A;
437  epssq = d -> epsilon * d -> epsilon;
438 
439  for (m = 0; m < d -> dd; m++) {
440  czero = 2 * (m + 1) == n; /* n even and m = dd - 1*/
441 
442  if (!czero) {
443  z = z0 * ((INTERNAL_PRECISION) m + ONE);
444  t = tan(z);
445  c = - t*t;
446  (d -> a)[d -> dn - 1 - m] = c;
447  M *= epssq - c;
448  }
449 
450  z = z0 * ((INTERNAL_PRECISION) m + HALF);
451  t = tan(z);
452  cp = - t*t;
453  (d -> ap)[d -> dd - 1 - m] = cp;
454  M /= epssq - cp;
455  }
456 
457  d -> gamma = (INTERNAL_PRECISION*) malloc((1 + d -> n) *
458  sizeof(INTERNAL_PRECISION));
459  for (m = 0; m < d -> n; m++) d -> gamma[m] = ONE;
460 
461  d -> A = A;
462  d -> Delta = ONE - M;
463 
466 
467  /* Converting everything to PRECISION for external use only */
468 
469  zd = (zolotarev_data*) malloc(sizeof(zolotarev_data));
470  zd -> A = (PRECISION) d -> A;
471  zd -> Delta = (PRECISION) d -> Delta;
472  zd -> epsilon = (PRECISION) d -> epsilon;
473  zd -> n = d -> n;
474  zd -> type = d -> type;
475  zd -> dn = d -> dn;
476  zd -> dd = d -> dd;
477  zd -> da = d -> da;
478  zd -> db = d -> db;
479  zd -> deg_num = d -> deg_num;
480  zd -> deg_denom = d -> deg_denom;
481 
482  zd -> a = (PRECISION*) malloc(zd -> dn * sizeof(PRECISION));
483  for (m = 0; m < zd -> dn; m++) zd -> a[m] = (PRECISION) d -> a[m];
484  free(d -> a);
485 
486  zd -> ap = (PRECISION*) malloc(zd -> dd * sizeof(PRECISION));
487  for (m = 0; m < zd -> dd; m++) zd -> ap[m] = (PRECISION) d -> ap[m];
488  free(d -> ap);
489 
490  zd -> alpha = (PRECISION*) malloc(zd -> da * sizeof(PRECISION));
491  for (m = 0; m < zd -> da; m++) zd -> alpha[m] = (PRECISION) d -> alpha[m];
492  free(d -> alpha);
493 
494  zd -> beta = (PRECISION*) malloc(zd -> db * sizeof(PRECISION));
495  for (m = 0; m < zd -> db; m++) zd -> beta[m] = (PRECISION) d -> beta[m];
496  free(d -> beta);
497 
498  zd -> gamma = (PRECISION*) malloc(zd -> n * sizeof(PRECISION));
499  for (m = 0; m < zd -> n; m++) zd -> gamma[m] = (PRECISION) d -> gamma[m];
500  free(d -> gamma);
501 
502  free(d);
503  return zd;
504 }
505 
506 /* Cleanup */
507 
508 void zolotarev_free(zolotarev_data* rdata)
509 {
510  free( rdata->a );
511  free( rdata->ap );
512  free( rdata->alpha );
513  free( rdata->beta );
514  free( rdata->gamma );
515  free( rdata );
516 }
517 
518 
519 #ifdef TEST
520 
521 #undef ZERO
522 #define ZERO ((PRECISION) 0)
523 #undef ONE
524 #define ONE ((PRECISION) 1)
525 #undef TWO
526 #define TWO ((PRECISION) 2)
527 
528 /* Evaluate the rational approximation R(x) using the factored form */
529 
530 static PRECISION zolotarev_eval(PRECISION x, zolotarev_data* rdata) {
531  int m;
532  PRECISION R;
533 
534  if (rdata -> type == 0) {
535  R = rdata -> A * x;
536  for (m = 0; m < rdata -> deg_denom/2; m++)
537  R *= (2*(m+1) > rdata -> deg_num ? ONE : x*x - rdata -> a[m]) /
538  (x*x - rdata -> ap[m]);
539  } else {
540  R = rdata -> A / x;
541  for (m = 0; m < rdata -> deg_num/2; m++)
542  R *= (x*x - rdata -> a[m]) /
543  (2*(m+1) > rdata -> deg_denom ? ONE : x*x - rdata -> ap[m]);
544  }
545  return R;
546 }
547 
548 /* Evaluate the rational approximation R(x) using the partial fraction form */
549 
550 static PRECISION zolotarev_partfrac_eval(PRECISION x, zolotarev_data* rdata) {
551  int m;
552  PRECISION R = rdata -> alpha[rdata -> da - 1];
553  for (m = 0; m < rdata -> dd; m++)
554  R += rdata -> alpha[m] / (x * x - rdata -> ap[m]);
555  if (rdata -> type == 1) R += rdata -> alpha[rdata -> dd] / (x * x);
556  return R * x;
557 }
558 
559 /* Evaluate the rational approximation R(x) using continued fraction form.
560  *
561  * If x = 0 and type = 1 then the result should be INF, whereas if x = 0 and
562  * type = 0 then the result should be 0, but division by zero will occur at
563  * intermediate stages of the evaluation. For IEEE implementations with
564  * non-signalling overflow this will work correctly since 1/(1/0) = 1/INF = 0,
565  * but with signalling overflow you will get an error message. */
566 
567 static PRECISION zolotarev_contfrac_eval(PRECISION x, zolotarev_data* rdata) {
568  int m;
569  PRECISION R = rdata -> beta[0] * x;
570  for (m = 1; m < rdata -> db; m++) R = rdata -> beta[m] * x + ONE / R;
571  return R;
572 }
573 
574 /* Evaluate the rational approximation R(x) using Cayley form */
575 
576 static PRECISION zolotarev_cayley_eval(PRECISION x, zolotarev_data* rdata) {
577  int m;
578  PRECISION T;
579 
580  T = rdata -> type == 0 ? ONE : -ONE;
581  for (m = 0; m < rdata -> n; m++)
582  T *= (rdata -> gamma[m] - x) / (rdata -> gamma[m] + x);
583  return (ONE - T) / (ONE + T);
584 }
585 
586 
587 /* Test program. Apart from printing out the parameters for R(x) it produces
588  * the following data files for plotting (unless NPLOT is defined):
589  *
590  * zolotarev-fn is a plot of R(x) for |x| < 1.2. This should look like sgn(x).
591  *
592  * zolotarev-err is a plot of the error |R(x) - sgn(x)| scaled by 1/Delta. This
593  * should oscillate deg_num + deg_denom + 2 times between +1 and -1 over the
594  * domain epsilon <= |x| <= 1.
595  *
596  * If ALLPLOTS is defined then zolotarev-partfrac (zolotarev-contfrac) is a
597  * plot of the difference between the values of R(x) computed using the
598  * factored form and the partial fraction (continued fraction) form, scaled by
599  * 1/Delta. It should be zero everywhere. */
600 
601 int main(int argc, char** argv) {
602 
603  int m, n, plotpts = 5000, type = 0;
604  float eps, x, ypferr, ycferr, ycaylerr, maxypferr, maxycferr, maxycaylerr;
605  zolotarev_data *rdata;
606  PRECISION y;
607  FILE *plot_function, *plot_error,
608  *plot_partfrac, *plot_contfrac, *plot_cayley;
609 
610  if (argc < 3 || argc > 4) {
611  fprintf(stderr, "Usage: %s epsilon n [type]\n", *argv);
612  exit(EXIT_FAILURE);
613  }
614  sscanf(argv[1], "%g", &eps); /* First argument is epsilon */
615  sscanf(argv[2], "%d", &n); /* Second argument is n */
616  if (argc == 4) sscanf(argv[3], "%d", &type); /* Third argument is type */
617 
618  if (type < 0 || type > 2) {
619  fprintf(stderr, "%s: type must be 0 (Zolotarev R(0) = 0),\n"
620  "\t\t1 (Zolotarev R(0) = Inf, or 2 (Higham)\n", *argv);
621  exit(EXIT_FAILURE);
622  }
623 
624  rdata = type == 2
625  ? higham((PRECISION) eps, n)
626  : zolotarev((PRECISION) eps, n, type);
627 
628  printf("Zolotarev Test: R(epsilon = %g, n = %d, type = %d)\n\t"
630  "\n\tINTERNAL_PRECISION = " STRINGIFY(INTERNAL_PRECISION)
631  "\tPRECISION = " STRINGIFY(PRECISION)
632  "\n\n\tRational approximation of degree (%d,%d), %s at x = 0\n"
633  "\tDelta = %g (maximum error)\n\n"
634  "\tA = %g (overall factor)\n",
635  (float) rdata -> epsilon, rdata -> n, type,
636  rdata -> deg_num, rdata -> deg_denom,
637  rdata -> type == 1 ? "infinite" : "zero",
638  (float) rdata -> Delta, (float) rdata -> A);
639  for (m = 0; m < MIN(rdata -> dd, rdata -> dn); m++)
640  printf("\ta[%2d] = %14.8g\t\ta'[%2d] = %14.8g\n",
641  m + 1, (float) rdata -> a[m], m + 1, (float) rdata -> ap[m]);
642  if (rdata -> dd > rdata -> dn)
643  printf("\t\t\t\t\ta'[%2d] = %14.8g\n",
644  rdata -> dn + 1, (float) rdata -> ap[rdata -> dn]);
645  if (rdata -> dd < rdata -> dn)
646  printf("\ta[%2d] = %14.8g\n",
647  rdata -> dd + 1, (float) rdata -> a[rdata -> dd]);
648 
649  printf("\n\tPartial fraction coefficients\n");
650  printf("\talpha[ 0] = %14.8g\n",
651  (float) rdata -> alpha[rdata -> da - 1]);
652  for (m = 0; m < rdata -> dd; m++)
653  printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
654  m + 1, (float) rdata -> alpha[m], m + 1, (float) rdata -> ap[m]);
655  if (rdata -> type == 1)
656  printf("\talpha[%2d] = %14.8g\ta'[%2d] = %14.8g\n",
657  rdata -> dd + 1, (float) rdata -> alpha[rdata -> dd],
658  rdata -> dd + 1, (float) ZERO);
659 
660  printf("\n\tContinued fraction coefficients\n");
661  for (m = 0; m < rdata -> db; m++)
662  printf("\tbeta[%2d] = %14.8g\n", m, (float) rdata -> beta[m]);
663 
664  printf("\n\tCayley transform coefficients\n");
665  for (m = 0; m < rdata -> n; m++)
666  printf("\tgamma[%2d] = %14.8g\n", m, (float) rdata -> gamma[m]);
667 
668 #ifndef NPLOT
669  plot_function = fopen("zolotarev-fn.dat", "w");
670  plot_error = fopen("zolotarev-err.dat", "w");
671 #ifdef ALLPLOTS
672  plot_partfrac = fopen("zolotarev-partfrac.dat", "w");
673  plot_contfrac = fopen("zolotarev-contfrac.dat", "w");
674  plot_cayley = fopen("zolotarev-cayley.dat", "w");
675 #endif /* ALLPLOTS */
676  for (m = 0, maxypferr = maxycferr = maxycaylerr = 0.0; m <= plotpts; m++) {
677  x = 2.4 * (float) m / plotpts - 1.2;
678  if (rdata -> type == 0 || fabs(x) * (float) plotpts > 1.0) {
679  /* skip x = 0 for type 1, as R(0) is singular */
680  y = zolotarev_eval((PRECISION) x, rdata);
681  fprintf(plot_function, "%g %g\n", x, (float) y);
682  fprintf(plot_error, "%g %g\n",
683  x, (float)((y - ((x > 0.0 ? ONE : -ONE))) / rdata -> Delta));
684  ypferr = (float)((zolotarev_partfrac_eval((PRECISION) x, rdata) - y)
685  / rdata -> Delta);
686  ycferr = (float)((zolotarev_contfrac_eval((PRECISION) x, rdata) - y)
687  / rdata -> Delta);
688  ycaylerr = (float)((zolotarev_cayley_eval((PRECISION) x, rdata) - y)
689  / rdata -> Delta);
690  if (fabs(x) < 1.0 && fabs(x) > rdata -> epsilon) {
691  maxypferr = MAX(maxypferr, fabs(ypferr));
692  maxycferr = MAX(maxycferr, fabs(ycferr));
693  maxycaylerr = MAX(maxycaylerr, fabs(ycaylerr));
694  }
695 #ifdef ALLPLOTS
696  fprintf(plot_partfrac, "%g %g\n", x, ypferr);
697  fprintf(plot_contfrac, "%g %g\n", x, ycferr);
698  fprintf(plot_cayley, "%g %g\n", x, ycaylerr);
699 #endif /* ALLPLOTS */
700  }
701  }
702 #ifdef ALLPLOTS
703  fclose(plot_cayley);
704  fclose(plot_contfrac);
705  fclose(plot_partfrac);
706 #endif /* ALLPLOTS */
707  fclose(plot_error);
708  fclose(plot_function);
709 
710  printf("\n\tMaximum PF error = %g (relative to Delta)\n", maxypferr);
711  printf("\tMaximum CF error = %g (relative to Delta)\n", maxycferr);
712  printf("\tMaximum Cayley error = %g (relative to Delta)\n", maxycaylerr);
713 #endif /* NPLOT */
714 
715  free(rdata -> a);
716  free(rdata -> ap);
717  free(rdata -> alpha);
718  free(rdata -> beta);
719  free(rdata -> gamma);
720  free(rdata);
721 
722  return EXIT_SUCCESS;
723 }
724 #endif /* TEST */
GTEST_API_ int main(int argc, char **argv)
Definition: gtest_main.cc:34
unsigned s
Definition: ldumul_w.cc:37
unsigned j
Definition: ldumul_w.cc:35
unsigned i
Definition: ldumul_w.cc:34
unsigned n
Definition: ldumul_w.cc:36
static int m[4]
Definition: make_seeds.cc:16
int y
Definition: meslate.cc:35
int z
Definition: meslate.cc:36
int c
Definition: meslate.cc:61
int x
Definition: meslate.cc:34
int t
Definition: meslate.cc:37
Double q
Definition: mesq.cc:17
int epsilon(int i, int j, int k)
static const LatticeInteger & beta(const int dim)
Definition: stag_phases_s.h:47
static const LatticeInteger & alpha(const int dim)
Definition: stag_phases_s.h:43
static multi1d< LatticeColorMatrix > u
LinOpSysSolverMGProtoClover::T T
Double cp
Definition: invbicg.cc:107
Complex a
Definition: invbicg.cc:95
DComplex d
Definition: invbicg.cc:99
A(A, psi, r, Ncb, PLUS)
Complex b
Definition: invbicg.cc:96
int k
Definition: invbicg.cc:119
multi1d< LatticeFermion > r(Ncb)
int l
Definition: pade_trln_w.cc:111
multi1d< LatticeColorMatrix > U
LatticeFermion T
Definition: t_clover.cc:11
#define PRECISION
Definition: zolotarev.h:6
#define HVERSION
Definition: zolotarev.h:2
#define THREE
#define VERSION
#define ONE
#define INTERNAL_PRECISION
static INTERNAL_PRECISION K
#define HALF
#define MIN(a, b)
#define FOUR
static INTERNAL_PRECISION U
zolotarev_data * higham(PRECISION epsilon, int n)
static INTERNAL_PRECISION * contfrac_B(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
static void sncndnFK(INTERNAL_PRECISION u, INTERNAL_PRECISION k, INTERNAL_PRECISION *sn, INTERNAL_PRECISION *cn, INTERNAL_PRECISION *dn, INTERNAL_PRECISION *elF, INTERNAL_PRECISION *elK)
#define TWO
static INTERNAL_PRECISION * contfrac_A(INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, INTERNAL_PRECISION *, int, int)
static INTERNAL_PRECISION * poly_factored_to_dense(INTERNAL_PRECISION A, INTERNAL_PRECISION *a, int d)
void zolotarev_free(zolotarev_data *rdata)
static INTERNAL_PRECISION F
zolotarev_data * zolotarev(PRECISION epsilon, int n, int type)
#define ZERO
static void construct_contfrac(izd *z)
static INTERNAL_PRECISION AGM(INTERNAL_PRECISION a, INTERNAL_PRECISION b, INTERNAL_PRECISION s)
#define M_PI
#define STRINGIFY(name)
#define MAX(a, b)
static void construct_partfrac(izd *z)