diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2021-11-28 21:32:36 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2021-11-28 21:33:07 -0500 |
commit | 3804539e48e794781c6145c7f988f5d507418fa8 (patch) | |
tree | 317904b43ca8c1d510b23cb8fdd7b05a75e971bc /src/backend/utils/misc/sampling.c | |
parent | f44ceb46ec2d8da48f6e145bf462d5620c25e079 (diff) | |
download | postgresql-3804539e48e794781c6145c7f988f5d507418fa8.tar.gz postgresql-3804539e48e794781c6145c7f988f5d507418fa8.zip |
Replace random(), pg_erand48(), etc with a better PRNG API and algorithm.
Standardize on xoroshiro128** as our basic PRNG algorithm, eliminating
a bunch of platform dependencies as well as fundamentally-obsolete PRNG
code. In addition, this API replacement will ease replacing the
algorithm again in future, should that become necessary.
xoroshiro128** is a few percent slower than the drand48 family,
but it can produce full-width 64-bit random values not only 48-bit,
and it should be much more trustworthy. It's likely to be noticeably
faster than the platform's random(), depending on which platform you
are thinking about; and we can have non-global state vectors easily,
unlike with random(). It is not cryptographically strong, but neither
are the functions it replaces.
Fabien Coelho, reviewed by Dean Rasheed, Aleksander Alekseev, and myself
Discussion: https://postgr.es/m/alpine.DEB.2.22.394.2105241211230.165418@pseudo
Diffstat (limited to 'src/backend/utils/misc/sampling.c')
-rw-r--r-- | src/backend/utils/misc/sampling.c | 52 |
1 files changed, 30 insertions, 22 deletions
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c index 0c327e823f7..d1a2416e8b7 100644 --- a/src/backend/utils/misc/sampling.c +++ b/src/backend/utils/misc/sampling.c @@ -37,7 +37,7 @@ */ BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, - long randseed) + uint32 randseed) { bs->N = nblocks; /* measured table size */ @@ -49,7 +49,7 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize, bs->t = 0; /* blocks scanned so far */ bs->m = 0; /* blocks selected so far */ - sampler_random_init_state(randseed, bs->randstate); + sampler_random_init_state(randseed, &bs->randstate); return Min(bs->n, bs->N); } @@ -98,7 +98,7 @@ BlockSampler_Next(BlockSampler bs) * less than k, which means that we cannot fail to select enough blocks. *---------- */ - V = sampler_random_fract(bs->randstate); + V = sampler_random_fract(&bs->randstate); p = 1.0 - (double) k / (double) K; while (V < p) { @@ -136,10 +136,11 @@ reservoir_init_selection_state(ReservoirState rs, int n) * Reservoir sampling is not used anywhere where it would need to return * repeatable results so we can initialize it randomly. */ - sampler_random_init_state(random(), rs->randstate); + sampler_random_init_state(pg_prng_uint32(&pg_global_prng_state), + &rs->randstate); /* Initial value of W (for use when Algorithm Z is first applied) */ - rs->W = exp(-log(sampler_random_fract(rs->randstate)) / n); + rs->W = exp(-log(sampler_random_fract(&rs->randstate)) / n); } double @@ -154,7 +155,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) double V, quot; - V = sampler_random_fract(rs->randstate); /* Generate V */ + V = sampler_random_fract(&rs->randstate); /* Generate V */ S = 0; t += 1; /* Note: "num" in Vitter's code is always equal to t - n */ @@ -186,7 +187,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) tmp; /* Generate U and X */ - U = sampler_random_fract(rs->randstate); + U = sampler_random_fract(&rs->randstate); X = t * (W - 1.0); S = floor(X); /* S is tentatively set to floor(X) */ /* Test if U <= h(S)/cg(X) in the manner of (6.3) */ @@ -215,7 +216,7 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) y *= numer / denom; denom -= 1; } - W = exp(-log(sampler_random_fract(rs->randstate)) / n); /* Generate W in advance */ + W = exp(-log(sampler_random_fract(&rs->randstate)) / n); /* Generate W in advance */ if (exp(log(y) / n) <= (t + X) / t) break; } @@ -230,24 +231,22 @@ reservoir_get_next_S(ReservoirState rs, double t, int n) *---------- */ void -sampler_random_init_state(long seed, SamplerRandomState randstate) +sampler_random_init_state(uint32 seed, pg_prng_state *randstate) { - randstate[0] = 0x330e; /* same as pg_erand48, but could be anything */ - randstate[1] = (unsigned short) seed; - randstate[2] = (unsigned short) (seed >> 16); + pg_prng_seed(randstate, (uint64) seed); } /* Select a random value R uniformly distributed in (0 - 1) */ double -sampler_random_fract(SamplerRandomState randstate) +sampler_random_fract(pg_prng_state *randstate) { double res; - /* pg_erand48 returns a value in [0.0 - 1.0), so we must reject 0 */ + /* pg_prng_double returns a value in [0.0 - 1.0), so we must reject 0.0 */ do { - res = pg_erand48(randstate); - } while (res == 0.0); + res = pg_prng_double(randstate); + } while (unlikely(res == 0.0)); return res; } @@ -261,27 +260,36 @@ sampler_random_fract(SamplerRandomState randstate) * except that a common random state is used across all callers. */ static ReservoirStateData oldrs; +static bool oldrs_initialized = false; double anl_random_fract(void) { /* initialize if first time through */ - if (oldrs.randstate[0] == 0) - sampler_random_init_state(random(), oldrs.randstate); + if (unlikely(!oldrs_initialized)) + { + sampler_random_init_state(pg_prng_uint32(&pg_global_prng_state), + &oldrs.randstate); + oldrs_initialized = true; + } /* and compute a random fraction */ - return sampler_random_fract(oldrs.randstate); + return sampler_random_fract(&oldrs.randstate); } double anl_init_selection_state(int n) { /* initialize if first time through */ - if (oldrs.randstate[0] == 0) - sampler_random_init_state(random(), oldrs.randstate); + if (unlikely(!oldrs_initialized)) + { + sampler_random_init_state(pg_prng_uint32(&pg_global_prng_state), + &oldrs.randstate); + oldrs_initialized = true; + } /* Initial value of W (for use when Algorithm Z is first applied) */ - return exp(-log(sampler_random_fract(oldrs.randstate)) / n); + return exp(-log(sampler_random_fract(&oldrs.randstate)) / n); } double |