aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-02-17 19:01:01 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2011-02-17 19:01:01 -0500
commit2b3a0630b54ff9970a7cd2c78a686015f9a53c0c (patch)
tree5dfcc711292be33f0a8097cfa848f5c86ef22131 /src/backend
parent42e663cc4139bba218efccfb53293cd6e6fa43da (diff)
downloadpostgresql-2b3a0630b54ff9970a7cd2c78a686015f9a53c0c.tar.gz
postgresql-2b3a0630b54ff9970a7cd2c78a686015f9a53c0c.zip
Fix tsmatchsel() to account properly for null rows.
ts_typanalyze.c computes MCE statistics as fractions of the non-null rows, which seems fairly reasonable, and anyway changing it in released versions wouldn't be a good idea. But then ts_selfuncs.c has to account for that. Failure to do so results in overestimates in columns with a significant fraction of null documents. Back-patch to 8.4 where this stuff was introduced. Jesper Krogh
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/ts_selfuncs.c6
1 files changed, 6 insertions, 0 deletions
diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c
index 1f0a42d9b12..709d48c6178 100644
--- a/src/backend/tsearch/ts_selfuncs.c
+++ b/src/backend/tsearch/ts_selfuncs.c
@@ -188,11 +188,17 @@ tsquerysel(VariableStatData *vardata, Datum constval)
/* No most-common-elements info, so do without */
selec = tsquery_opr_selec_no_stats(query);
}
+
+ /*
+ * MCE stats count only non-null rows, so adjust for null rows.
+ */
+ selec *= (1.0 - stats->stanullfrac);
}
else
{
/* No stats at all, so do without */
selec = tsquery_opr_selec_no_stats(query);
+ /* we assume no nulls here, so no stanullfrac correction */
}
return selec;