aboutsummaryrefslogtreecommitdiff
path: root/contrib/intarray
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2017-05-13 15:14:39 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2017-05-13 15:14:39 -0400
commit9aab83fc5039d83e84144b7bed3fb1d62a74ae78 (patch)
treeaaa84d0e3027193133fa023ce536ba7379327e17 /contrib/intarray
parent1848b73d4576e30c89ba450ad9f169774a6819bf (diff)
downloadpostgresql-9aab83fc5039d83e84144b7bed3fb1d62a74ae78.tar.gz
postgresql-9aab83fc5039d83e84144b7bed3fb1d62a74ae78.zip
Redesign get_attstatsslot()/free_attstatsslot() for more safety and speed.
The mess cleaned up in commit da0759600 is clear evidence that it's a bug hazard to expect the caller of get_attstatsslot()/free_attstatsslot() to provide the correct type OID for the array elements in the slot. Moreover, we weren't even getting any performance benefit from that, since get_attstatsslot() was extracting the real type OID from the array anyway. So we ought to get rid of that requirement; indeed, it would make more sense for get_attstatsslot() to pass back the type OID it found, in case the caller isn't sure what to expect, which is likely in binary- compatible-operator cases. Another problem with the current implementation is that if the stats array element type is pass-by-reference, we incur a palloc/memcpy/pfree cycle for each element. That seemed acceptable when the code was written because we were targeting O(10) array sizes --- but these days, stats arrays are almost always bigger than that, sometimes much bigger. We can save a significant number of cycles by doing one palloc/memcpy/pfree of the whole array. Indeed, in the now-probably-common case where the array is toasted, that happens anyway so this method is basically free. (Note: although the catcache code will inline any out-of-line toasted values, it doesn't decompress them. At the other end of the size range, it doesn't expand short-header datums either. In either case, DatumGetArrayTypeP would have to make a copy. We do end up using an extra array copy step if the element type is pass-by-value and the array length is neither small enough for a short header nor large enough to have suffered compression. But that seems like a very acceptable price for winning in pass-by-ref cases.) Hence, redesign to take these insights into account. While at it, convert to an API in which we fill a struct rather than passing a bunch of pointers to individual output arguments. That will make it less painful if we ever want further expansion of what get_attstatsslot can pass back. It's certainly arguable that this is new development and not something to push post-feature-freeze. However, I view it as primarily bug-proofing and therefore something that's better to have sooner not later. Since we aren't quite at beta phase yet, let's put it in. Discussion: https://postgr.es/m/16364.1494520862@sss.pgh.pa.us
Diffstat (limited to 'contrib/intarray')
-rw-r--r--contrib/intarray/_int_selfuncs.c31
1 files changed, 15 insertions, 16 deletions
diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c
index 9b4a22fe341..3d92025ba56 100644
--- a/contrib/intarray/_int_selfuncs.c
+++ b/contrib/intarray/_int_selfuncs.c
@@ -136,11 +136,7 @@ _int_matchsel(PG_FUNCTION_ARGS)
int nmcelems = 0;
float4 minfreq = 0.0;
float4 nullfrac = 0.0;
- Form_pg_statistic stats;
- Datum *values = NULL;
- int nvalues = 0;
- float4 *numbers = NULL;
- int nnumbers = 0;
+ AttStatsSlot sslot;
/*
* If expression is not "variable @@ something" or "something @@ variable"
@@ -193,6 +189,8 @@ _int_matchsel(PG_FUNCTION_ARGS)
*/
if (HeapTupleIsValid(vardata.statsTuple))
{
+ Form_pg_statistic stats;
+
stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
nullfrac = stats->stanullfrac;
@@ -200,29 +198,30 @@ _int_matchsel(PG_FUNCTION_ARGS)
* For an int4 array, the default array type analyze function will
* collect a Most Common Elements list, which is an array of int4s.
*/
- if (get_attstatsslot(vardata.statsTuple,
- INT4OID, -1,
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
STATISTIC_KIND_MCELEM, InvalidOid,
- NULL,
- &values, &nvalues,
- &numbers, &nnumbers))
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
{
+ Assert(sslot.valuetype == INT4OID);
+
/*
* There should be three more Numbers than Values, because the
* last three (for intarray) cells are taken for minimal, maximal
* and nulls frequency. Punt if not.
*/
- if (nnumbers == nvalues + 3)
+ if (sslot.nnumbers == sslot.nvalues + 3)
{
/* Grab the lowest frequency. */
- minfreq = numbers[nnumbers - (nnumbers - nvalues)];
+ minfreq = sslot.numbers[sslot.nnumbers - (sslot.nnumbers - sslot.nvalues)];
- mcelems = values;
- mcefreqs = numbers;
- nmcelems = nvalues;
+ mcelems = sslot.values;
+ mcefreqs = sslot.numbers;
+ nmcelems = sslot.nvalues;
}
}
}
+ else
+ memset(&sslot, 0, sizeof(sslot));
/* Process the logical expression in the query, using the stats */
selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
@@ -231,7 +230,7 @@ _int_matchsel(PG_FUNCTION_ARGS)
/* MCE stats count only non-null rows, so adjust for null rows. */
selec *= (1.0 - nullfrac);
- free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers);
+ free_attstatsslot(&sslot);
ReleaseVariableStats(vardata);
CLAMP_PROBABILITY(selec);