aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/selfuncs.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-07-08 23:51:19 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-07-08 23:51:19 -0400
commitb6108fe59b997aafe6a8003b38a34d91c073618c (patch)
tree9f3a36dc1d7aca10a1a62eac33d70ec95dc351db /src/backend/utils/adt/selfuncs.c
parent3f35526479f388d1e3ffee7cf6c84e3212a8ec1a (diff)
downloadpostgresql-b6108fe59b997aafe6a8003b38a34d91c073618c.tar.gz
postgresql-b6108fe59b997aafe6a8003b38a34d91c073618c.zip
Fix planner to pass correct collation to operator selectivity estimators.
We can do this without creating an API break for estimation functions by passing the collation using the existing fmgr functionality for passing an input collation as a hidden parameter. The need for this was foreseen at the outset, but we didn't get around to making it happen in 9.1 because of the decision to sort all pg_statistic histograms according to the database's default collation. That meant that selectivity estimators generally need to use the default collation too, even if they're estimating for an operator that will do something different. The reason it's suddenly become more interesting is that regexp interpretation also uses a collation (for its LC_TYPE not LC_COLLATE property), and we no longer want to use the wrong collation when examining regexps during planning. It's not that the selectivity estimate is likely to change much from this; rather that we are thinking of caching compiled regexps during planner estimation, and we won't get the intended benefit if we cache them with a different collation than the executor will use. Back-patch to 9.1, both because the regexp change is likely to get back-patched and because we might as well get this right in all collation-supporting branches, in case any third-party code wants to rely on getting the collation. The patch turns out to be minuscule now that I've done it ...
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r--src/backend/utils/adt/selfuncs.c96
1 files changed, 59 insertions, 37 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index d700b86b934..acbdad95624 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -83,6 +83,15 @@
* joins, however, the selectivity is defined as the fraction of the left-hand
* side relation's rows that are expected to have a match (ie, at least one
* row with a TRUE result) in the right-hand side.
+ *
+ * For both oprrest and oprjoin functions, the operator's input collation OID
+ * (if any) is passed using the standard fmgr mechanism, so that the estimator
+ * function can fetch it with PG_GET_COLLATION(). Note, however, that all
+ * statistics in pg_statistic are currently built using the database's default
+ * collation. Thus, in most cases where we are looking at statistics, we
+ * should ignore the actual operator collation and use DEFAULT_COLLATION_OID.
+ * We expect that the error induced by doing this is usually not large enough
+ * to justify complicating matters.
*----------
*/
@@ -1087,6 +1096,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
VariableStatData vardata;
Node *other;
bool varonleft;
@@ -1187,12 +1197,15 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
}
/*
- * Divide pattern into fixed prefix and remainder. XXX we have to assume
- * default collation here, because we don't have access to the actual
- * input collation for the operator. FIXME ...
+ * Divide pattern into fixed prefix and remainder. Unlike many of the
+ * other functions in this file, we use the pattern operator's actual
+ * collation for this step. This is not because we expect the collation
+ * to make a big difference in the selectivity estimate (it seldom would),
+ * but because we want to be sure we cache compiled regexps under the
+ * right cache key, so that they can be re-used at runtime.
*/
patt = (Const *) other;
- pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
&prefix, &rest);
/*
@@ -1776,18 +1789,20 @@ scalararraysel(PlannerInfo *root,
elem_nulls[i],
elmbyval));
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
if (useOr)
s1 = s1 + s2 - s1 * s2;
else
@@ -1818,18 +1833,20 @@ scalararraysel(PlannerInfo *root,
*/
args = list_make2(leftop, elem);
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
if (useOr)
s1 = s1 + s2 - s1 * s2;
else
@@ -1854,18 +1871,20 @@ scalararraysel(PlannerInfo *root,
dummyexpr->collation = clause->inputcollid;
args = list_make2(leftop, dummyexpr);
if (is_join_clause)
- s2 = DatumGetFloat8(FunctionCall5(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int16GetDatum(jointype),
- PointerGetDatum(sjinfo)));
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
else
- s2 = DatumGetFloat8(FunctionCall4(&oprselproc,
- PointerGetDatum(root),
- ObjectIdGetDatum(operator),
- PointerGetDatum(args),
- Int32GetDatum(varRelid)));
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
s1 = useOr ? 0.0 : 1.0;
/*
@@ -1937,6 +1956,7 @@ rowcomparesel(PlannerInfo *root,
{
Selectivity s1;
Oid opno = linitial_oid(clause->opnos);
+ Oid inputcollid = linitial_oid(clause->inputcollids);
List *opargs;
bool is_join_clause;
@@ -1977,6 +1997,7 @@ rowcomparesel(PlannerInfo *root,
/* Estimate selectivity for a join clause. */
s1 = join_selectivity(root, opno,
opargs,
+ inputcollid,
jointype,
sjinfo);
}
@@ -1985,6 +2006,7 @@ rowcomparesel(PlannerInfo *root,
/* Estimate selectivity for a restriction clause. */
s1 = restriction_selectivity(root, opno,
opargs,
+ inputcollid,
varRelid);
}