diff options
Diffstat (limited to 'src/backend/utils/adt/selfuncs.c')
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 170 |
1 files changed, 115 insertions, 55 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index b67897da886..f9a2c96b0e4 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -88,11 +88,7 @@ * (if any) is passed using the standard fmgr mechanism, so that the estimator * function can fetch it with PG_GET_COLLATION(). Note, however, that all * statistics in pg_statistic are currently built using the relevant column's - * collation. Thus, in most cases where we are looking at statistics, we - * should ignore the operator collation and use the stats entry's collation. - * We expect that the error induced by doing this is usually not large enough - * to justify complicating matters. In any case, doing otherwise would yield - * entirely garbage results for ordered stats data such as histograms. + * collation. *---------- */ @@ -148,14 +144,14 @@ get_relation_stats_hook_type get_relation_stats_hook = NULL; get_index_stats_hook_type get_index_stats_hook = NULL; static double eqsel_internal(PG_FUNCTION_ARGS, bool negate); -static double eqjoinsel_inner(Oid opfuncoid, +static double eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, AttStatsSlot *sslot1, AttStatsSlot *sslot2, Form_pg_statistic stats1, Form_pg_statistic stats2, bool have_mcvs1, bool have_mcvs2); -static double eqjoinsel_semi(Oid opfuncoid, +static double eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -193,10 +189,11 @@ static double convert_timevalue_to_scalar(Datum value, Oid typid, static void examine_simple_variable(PlannerInfo *root, Var *var, VariableStatData *vardata); static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, Datum *min, Datum *max); + Oid sortop, Oid collation, + Datum *min, Datum *max); static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, + Oid sortop, Oid collation, Datum *min, Datum *max); static bool get_actual_variable_endpoint(Relation heapRel, Relation indexRel, @@ -234,6 +231,7 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -267,10 +265,10 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) * in the query.) */ if (IsA(other, Const)) - selec = var_eq_const(&vardata, operator, - ((Const *) other)->constvalue, - ((Const *) other)->constisnull, - varonleft, negate); + selec = var_eq_const_ext(&vardata, operator, collation, + ((Const *) other)->constvalue, + ((Const *) other)->constisnull, + varonleft, negate); else selec = var_eq_non_const(&vardata, operator, other, varonleft, negate); @@ -290,6 +288,16 @@ var_eq_const(VariableStatData *vardata, Oid operator, Datum constval, bool constisnull, bool varonleft, bool negate) { + return var_eq_const_ext(vardata, operator, DEFAULT_COLLATION_OID, + constval, constisnull, + varonleft, negate); +} + +double +var_eq_const_ext(VariableStatData *vardata, Oid operator, Oid collation, + Datum constval, bool constisnull, + bool varonleft, bool negate) +{ double selec; double nullfrac = 0.0; bool isdefault; @@ -353,12 +361,12 @@ var_eq_const(VariableStatData *vardata, Oid operator, /* be careful to apply operator right way 'round */ if (varonleft) match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, + collation, sslot.values[i], constval)); else match = DatumGetBool(FunctionCall2Coll(&eqproc, - sslot.stacoll, + collation, constval, sslot.values[i])); if (match) @@ -555,6 +563,7 @@ neqsel(PG_FUNCTION_ARGS) */ static double scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, + Oid collation, VariableStatData *vardata, Datum constval, Oid consttype) { Form_pg_statistic stats; @@ -654,16 +663,17 @@ scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq, * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ - mcv_selec = mcv_selectivity(vardata, &opproc, constval, true, - &sumcommon); + mcv_selec = mcv_selectivity_ext(vardata, &opproc, collation, constval, true, + &sumcommon); /* * If there is a histogram, determine which bin the constant falls in, and * compute the resulting contribution to selectivity. */ - hist_selec = ineq_histogram_selectivity(root, vardata, - &opproc, isgt, iseq, - constval, consttype); + hist_selec = ineq_histogram_selectivity_ext(root, vardata, + &opproc, isgt, iseq, + collation, + constval, consttype); /* * Now merge the results from the MCV and histogram calculations, @@ -708,6 +718,15 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp) { + return mcv_selectivity_ext(vardata, opproc, DEFAULT_COLLATION_OID, + constval, varonleft, sumcommonp); +} + +double +mcv_selectivity_ext(VariableStatData *vardata, FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + double *sumcommonp) +{ double mcv_selec, sumcommon; AttStatsSlot sslot; @@ -726,11 +745,11 @@ mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, constval, sslot.values[i]))) mcv_selec += sslot.numbers[i]; @@ -781,6 +800,20 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, int min_hist_size, int n_skip, int *hist_size) { + return histogram_selectivity_ext(vardata, + opproc, DEFAULT_COLLATION_OID, + constval, varonleft, + min_hist_size, n_skip, + hist_size); +} + +double +histogram_selectivity_ext(VariableStatData *vardata, + FmgrInfo *opproc, Oid collation, + Datum constval, bool varonleft, + int min_hist_size, int n_skip, + int *hist_size) +{ double result; AttStatsSlot sslot; @@ -804,11 +837,11 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, { if (varonleft ? DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[i], constval)) : DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, constval, sslot.values[i]))) nmatch++; @@ -849,6 +882,19 @@ ineq_histogram_selectivity(PlannerInfo *root, FmgrInfo *opproc, bool isgt, bool iseq, Datum constval, Oid consttype) { + return ineq_histogram_selectivity_ext(root, vardata, + opproc, isgt, iseq, + DEFAULT_COLLATION_OID, + constval, consttype); +} + +double +ineq_histogram_selectivity_ext(PlannerInfo *root, + VariableStatData *vardata, + FmgrInfo *opproc, bool isgt, bool iseq, + Oid collation, + Datum constval, Oid consttype) +{ double hist_selec; AttStatsSlot sslot; @@ -860,9 +906,11 @@ ineq_histogram_selectivity(PlannerInfo *root, * column type. However, to make that work we will need to figure out * which staop to search for --- it's not necessarily the one we have at * hand! (For example, we might have a '<=' operator rather than the '<' - * operator that will appear in staop.) For now, assume that whatever - * appears in pg_statistic is sorted the same way our operator sorts, or - * the reverse way if isgt is true. + * operator that will appear in staop.) The collation might not agree + * either. For now, just assume that whatever appears in pg_statistic is + * sorted the same way our operator sorts, or the reverse way if isgt is + * true. This could result in a bogus estimate, but it still seems better + * than falling back to the default estimate. */ if (HeapTupleIsValid(vardata->statsTuple) && statistic_proc_security_check(vardata, opproc->fn_oid) && @@ -908,6 +956,7 @@ ineq_histogram_selectivity(PlannerInfo *root, have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, &sslot.values[0], &sslot.values[1]); @@ -925,17 +974,19 @@ ineq_histogram_selectivity(PlannerInfo *root, have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, &sslot.values[0], NULL); else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2) have_end = get_actual_variable_range(root, vardata, sslot.staop, + collation, NULL, &sslot.values[probe]); ltcmp = DatumGetBool(FunctionCall2Coll(opproc, - sslot.stacoll, + collation, sslot.values[probe], constval)); if (isgt) @@ -1020,7 +1071,7 @@ ineq_histogram_selectivity(PlannerInfo *root, * values to a uniform comparison scale, and do a linear * interpolation within this bin. */ - if (convert_to_scalar(constval, consttype, sslot.stacoll, + if (convert_to_scalar(constval, consttype, collation, &val, sslot.values[i - 1], sslot.values[i], vardata->vartype, @@ -1160,6 +1211,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + Oid collation = PG_GET_COLLATION(); VariableStatData vardata; Node *other; bool varonleft; @@ -1212,7 +1264,7 @@ scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq) } /* The rest of the work is done by scalarineqsel(). */ - selec = scalarineqsel(root, operator, isgt, iseq, + selec = scalarineqsel(root, operator, isgt, iseq, collation, &vardata, constval, consttype); ReleaseVariableStats(vardata); @@ -1277,8 +1329,8 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid) * A boolean variable V is equivalent to the clause V = 't', so we * compute the selectivity as if that is what we have. */ - selec = var_eq_const(&vardata, BooleanEqualOperator, - BoolGetDatum(true), false, true, false); + selec = var_eq_const_ext(&vardata, BooleanEqualOperator, InvalidOid, + BoolGetDatum(true), false, true, false); } else { @@ -2003,6 +2055,7 @@ eqjoinsel(PG_FUNCTION_ARGS) JoinType jointype = (JoinType) PG_GETARG_INT16(3); #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); + Oid collation = PG_GET_COLLATION(); double selec; double selec_inner; VariableStatData vardata1; @@ -2053,7 +2106,7 @@ eqjoinsel(PG_FUNCTION_ARGS) } /* We need to compute the inner-join selectivity in all cases */ - selec_inner = eqjoinsel_inner(opfuncoid, + selec_inner = eqjoinsel_inner(opfuncoid, collation, &vardata1, &vardata2, nd1, nd2, isdefault1, isdefault2, @@ -2080,7 +2133,7 @@ eqjoinsel(PG_FUNCTION_ARGS) inner_rel = find_join_input_rel(root, sjinfo->min_righthand); if (!join_is_reversed) - selec = eqjoinsel_semi(opfuncoid, + selec = eqjoinsel_semi(opfuncoid, collation, &vardata1, &vardata2, nd1, nd2, isdefault1, isdefault2, @@ -2093,7 +2146,7 @@ eqjoinsel(PG_FUNCTION_ARGS) Oid commop = get_commutator(operator); Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid; - selec = eqjoinsel_semi(commopfuncoid, + selec = eqjoinsel_semi(commopfuncoid, collation, &vardata2, &vardata1, nd2, nd1, isdefault2, isdefault1, @@ -2141,7 +2194,7 @@ eqjoinsel(PG_FUNCTION_ARGS) * that it's worth trying to distinguish them here. */ static double -eqjoinsel_inner(Oid opfuncoid, +eqjoinsel_inner(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -2203,7 +2256,7 @@ eqjoinsel_inner(Oid opfuncoid, if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, + collation, sslot1->values[i], sslot2->values[j]))) { @@ -2321,7 +2374,7 @@ eqjoinsel_inner(Oid opfuncoid, * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid. */ static double -eqjoinsel_semi(Oid opfuncoid, +eqjoinsel_semi(Oid opfuncoid, Oid collation, VariableStatData *vardata1, VariableStatData *vardata2, double nd1, double nd2, bool isdefault1, bool isdefault2, @@ -2415,7 +2468,7 @@ eqjoinsel_semi(Oid opfuncoid, if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2Coll(&eqproc, - sslot1->stacoll, + collation, sslot1->values[i], sslot2->values[j]))) { @@ -2635,6 +2688,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, Oid op_lefttype; Oid op_righttype; Oid opno, + collation, lsortop, rsortop, lstatop, @@ -2659,6 +2713,7 @@ mergejoinscansel(PlannerInfo *root, Node *clause, if (!is_opclause(clause)) return; /* shouldn't happen */ opno = ((OpExpr *) clause)->opno; + collation = ((OpExpr *) clause)->inputcollid; left = get_leftop((Expr *) clause); right = get_rightop((Expr *) clause); if (!right) @@ -2792,20 +2847,20 @@ mergejoinscansel(PlannerInfo *root, Node *clause, /* Try to get ranges of both inputs */ if (!isgt) { - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmin, &leftmax)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmin, &rightmax)) goto fail; /* no range available from stats */ } else { /* need to swap the max and min */ - if (!get_variable_range(root, &leftvar, lstatop, + if (!get_variable_range(root, &leftvar, lstatop, collation, &leftmax, &leftmin)) goto fail; /* no range available from stats */ - if (!get_variable_range(root, &rightvar, rstatop, + if (!get_variable_range(root, &rightvar, rstatop, collation, &rightmax, &rightmin)) goto fail; /* no range available from stats */ } @@ -2815,13 +2870,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause, * fraction that's <= the right-side maximum value. But only believe * non-default estimates, else stick with our 1.0. */ - selec = scalarineqsel(root, leop, isgt, true, &leftvar, + selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar, rightmax, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftend = selec; /* And similarly for the right variable. */ - selec = scalarineqsel(root, revleop, isgt, true, &rightvar, + selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar, leftmax, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightend = selec; @@ -2845,13 +2900,13 @@ mergejoinscansel(PlannerInfo *root, Node *clause, * minimum value. But only believe non-default estimates, else stick with * our own default. */ - selec = scalarineqsel(root, ltop, isgt, false, &leftvar, + selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar, rightmin, op_righttype); if (selec != DEFAULT_INEQ_SEL) *leftstart = selec; /* And similarly for the right variable. */ - selec = scalarineqsel(root, revltop, isgt, false, &rightvar, + selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar, leftmin, op_lefttype); if (selec != DEFAULT_INEQ_SEL) *rightstart = selec; @@ -5124,9 +5179,11 @@ get_variable_numdistinct(VariableStatData *vardata, bool *isdefault) * * sortop is the "<" comparison operator to use. This should generally * be "<" not ">", as only the former is likely to be found in pg_statistic. + * The collation must be specified too. */ static bool -get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, +get_variable_range(PlannerInfo *root, VariableStatData *vardata, + Oid sortop, Oid collation, Datum *min, Datum *max) { Datum tmin = 0; @@ -5146,7 +5203,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * before enabling this. */ #ifdef NOT_USED - if (get_actual_variable_range(root, vardata, sortop, min, max)) + if (get_actual_variable_range(root, vardata, sortop, collation, min, max)) return true; #endif @@ -5174,7 +5231,7 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * * If there is a histogram that is sorted with some other operator than * the one we want, fail --- this suggests that there is data we can't - * use. + * use. XXX consider collation too. */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_HISTOGRAM, sortop, @@ -5221,14 +5278,14 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, continue; } if (DatumGetBool(FunctionCall2Coll(&opproc, - sslot.stacoll, + collation, sslot.values[i], tmin))) { tmin = sslot.values[i]; tmin_is_mcv = true; } if (DatumGetBool(FunctionCall2Coll(&opproc, - sslot.stacoll, + collation, tmax, sslot.values[i]))) { tmax = sslot.values[i]; @@ -5258,10 +5315,11 @@ get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, * If no data available, return false. * * sortop is the "<" comparison operator to use. + * collation is the required collation. */ static bool get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, - Oid sortop, + Oid sortop, Oid collation, Datum *min, Datum *max) { bool have_data = false; @@ -5301,9 +5359,11 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata, continue; /* - * The first index column must match the desired variable and sort - * operator --- but we can use a descending-order index. + * The first index column must match the desired variable, sortop, and + * collation --- but we can use a descending-order index. */ + if (collation != index->indexcollations[0]) + continue; /* test first 'cause it's cheapest */ if (!match_index_to_operand(vardata->var, 0, index)) continue; switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0])) |