/* statlib.c -- Statistical functions for testing the randomness of
number sequences. */
/* Copyright 1999, 2000 Free Software Foundation, Inc.
This file is part of the GNU MP Library test suite.
The GNU MP Library test suite is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
The GNU MP Library test suite is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */
/* The theories for these functions are taken from D. Knuth's "The Art of Computer Programming: Volume 2, Seminumerical Algorithms", Third
Edition, Addison Wesley, 1998. */
/* Implementation notes.
The Kolmogorov-Smirnov test.
Eq. (13) in Knuth, p. 50, says that if X1, X2, ..., Xn are independent observations arranged into ascending order
Kp = sqr(n) * max(j/n - F(Xj)) for all 1<=j<=n Km = sqr(n) * max(F(Xj) - (j-1)/n)) for all 1<=j<=n
where F(x) = Pr(X <= x) = probability that (X <= x), which for a uniformly distributed random real number between zero and one is exactly the number itself (x).
The answer to exercise 23 gives the following implementation, which doesn't need the observations to be sorted in ascending order:
for (k = 0; k < m; k++) a[k] = 1.0 b[k] = 0.0 c[k] = 0
for (each observation Xj) Y = F(Xj) k = floor (m * Y) a[k] = min (a[k], Y) b[k] = max (b[k], Y) c[k] += 1
j = 0 rp = rm = 0 for (k = 0; k < m; k++) if (c[k] > 0) rm = max (rm, a[k] - j/n) j += c[k] rp = max (rp, j/n - b[k])
/* ks (Kp, Km, X, P, n) -- Perform a Kolmogorov-Smirnov test on the N real numbers between zero and one in vector X. P is the distribution function, called for each entry in X, which should calculate the probability of X being greater than or equal to any number in the sequence. (For a uniformly distributed sequence of real numbers between zero and one, this is simply equal to X.) The
result is put in Kp and Km. */
/* ks_table(val, n) -- calculate probability for Kp/Km less than or
equal to VAL with N observations. See [Knuth section 3.3.1] */
void
ks_table (mpf_t p, mpf_t val, constunsignedint n)
{ /* We use Eq. (27), Knuth p.58, skipping O(1/n) for simplicity. This shortcut will result in too high probabilities, especially when n is small.
/* x2_table (t, v, n) -- return chi-square table row for V in T[]. */ void
x2_table (double t[], unsignedint v)
{ int f;
/* FIXME: Do a table lookup for v <= 30 since the following formula
[Knuth, vol 2, 3.3.1] is only good for v > 30. */
/* value = v + sqrt(2*v) * X[p] + (2/3) * X[p]^2 - 2/3 + O(1/sqrt(t) */ /* NOTE: The O() term is ignored for simplicity. */
for (f = 0; f < 7; f++)
t[f] =
v +
sqrt (2 * v) * x2_table_X[0][f] +
_2D3 * x2_table_X[1][f] - _2D3;
}
/* P(p, x) -- Distribution function. Calculate the probability of X being greater than or equal to any number in the sequence. For a random real number between zero and one given by a uniformly
distributed random number generator, this is simply equal to X. */
/* mpf_freqt() -- Frequency test using KS on N real numbers between zero
and one. See [Knuth vol 2, p.61]. */ void
mpf_freqt (mpf_t Kp,
mpf_t Km,
mpf_t X[], constunsignedlongint n)
{
ks (Kp, Km, X, P, n);
}
/* The Chi-square test. Eq. (8) in Knuth vol. 2 says that if Y[] holds the observations and p[] is the probability for.. (to be continued!)
V = 1/n * sum((s=1 to k) Y[s]^2 / p[s]) - n */
void
x2 (mpf_t V, /* result */ unsignedlongint X[], /* data */ unsignedint k, /* #of categories */ void (P) (mpf_t, unsignedlongint, void *), /* probability func */ void *x, /* extra user data passed to P() */ unsignedlongint n) /* #of samples */
{ unsignedint f;
mpf_t f_t, f_t2; /* temp floats */
/* Pzf(p, s, x) -- Probability for category S in mpz_freqt(). It's
1/d for all S. X is a pointer to an unsigned int holding 'd'. */ staticvoid
Pzf (mpf_t p, unsignedlongint s, void *x)
{
mpf_set_ui (p, 1);
mpf_div_ui (p, p, *((unsignedint *) x));
}
/* mpz_freqt(V, X, imax, n) -- Frequency test on integers. [Knuth, vol 2, 3.3.2]. Keep IMAX low on this one, since we loop from 0 to IMAX. 128 or 256 could be nice.
X[] must not contain numbers outside the range 0 <= X <= IMAX.
Return value is number of observations actually used, after discarding entries out of range.
Since X[] contains integers between zero and IMAX, inclusive, we have IMAX+1 categories.
Note that N should be at least 5*IMAX. Result is put in V and can
be compared to output from x2_table (v=IMAX). */
v = (unsignedlongint *) calloc (imax + 1, sizeof (unsignedlongint)); if (NULL == v)
{
fprintf (stderr, "mpz_freqt(): out of memory\n"); exit (1);
}
/* count */
usedn = n; /* actual number of observations */ for (f = 0; f < n; f++)
{
uitemp = mpz_get_ui(X[f]); if (uitemp > imax) /* sanity check */
{ if (g_debug)
fprintf (stderr, "mpz_freqt(): warning: input insanity: %u, "\ "ignored.\n", uitemp);
usedn--; continue;
}
v[uitemp]++;
}
if (g_debug > DEBUG_2)
{
fprintf (stderr, "counts:\n"); for (f = 0; f <= imax; f++)
fprintf (stderr, "%u:\t%lu\n", f, v[f]);
}
/* chi-square with k=imax+1 and P(x)=1/(imax+1) for all x.*/
x2 (V, v, d, Pzf, (void *) &d, usedn);
free (v); return (usedn);
}
/* debug dummy to drag in dump funcs */ void
foo_debug ()
{ if (0)
{
mpf_dump (0); #ifndef OLDGMP
mpz_dump (0); #endif
}
}
/* merit (rop, t, v, m) -- calculate merit for spectral test result in dimension T, see Knuth p. 105. BUGS: Only valid for 2 <= T <=
6. */ void
merit (mpf_t rop, unsignedint t, mpf_t v, mpz_t m)
{ int f;
mpf_t f_m, f_const, f_pi;
/* From Knuth p. 104: "The exhaustive search in steps S8-S10
reduces the value of s only rarely." */ #ifdef DO_SEARCH /* S7 [Prepare for search.] */ /* Find minimum in (x[1], ..., x[t]) satisfying condition
x[k]^2 <= f(y[1], ...,y[t]) * dot(V[k],V[k]) */
ui_k = ui_t; if (g_debug > DEBUG_2)
{
printf ("searching..."); /*for (f = 0; f < ui_t*/
fflush (stdout);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.