diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2019-02-09 18:32:23 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2019-02-09 18:32:23 -0500 |
commit | a391ff3c3d418e404a2c6e4ff0865a107752827b (patch) | |
tree | f076c5785d06e7ccf082b08c1651b17d95af4563 /src/backend/utils | |
parent | 1fb57af92069ee104c09e2016af9e0e620681be3 (diff) | |
download | postgresql-a391ff3c3d418e404a2c6e4ff0865a107752827b.tar.gz postgresql-a391ff3c3d418e404a2c6e4ff0865a107752827b.zip |
Build out the planner support function infrastructure.
Add support function requests for estimating the selectivity, cost,
and number of result rows (if a SRF) of the target function.
The lack of a way to estimate selectivity of a boolean-returning
function in WHERE has been a recognized deficiency of the planner
since Berkeley days. This commit finally fixes it.
In addition, non-constant estimates of cost and number of output
rows are now possible. We still fall back to looking at procost
and prorows if the support function doesn't service the request,
of course.
To make concrete use of the possibility of estimating output rowcount
for SRFs, this commit adds support functions for array_unnest(anyarray)
and the integer variants of generate_series; the lack of plausible
rowcount estimates for those, even when it's obvious to a human,
has been a repeated subject of complaints. Obviously, much more
could now be done in this line, but I'm mostly just trying to get
the infrastructure in place.
Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/arrayfuncs.c | 34 | ||||
-rw-r--r-- | src/backend/utils/adt/int.c | 74 | ||||
-rw-r--r-- | src/backend/utils/adt/int8.c | 73 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 13 | ||||
-rw-r--r-- | src/backend/utils/cache/lsyscache.c | 45 |
5 files changed, 198 insertions, 41 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index a785361fd07..5b2917d1594 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -22,12 +22,16 @@ #include "catalog/pg_type.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/arrayaccess.h" #include "utils/builtins.h" #include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/selfuncs.h" #include "utils/typcache.h" @@ -6025,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS) } } +/* + * Planner support function for array_unnest(anyarray) + */ +Datum +array_unnest_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + + req->rows = estimate_array_length(arg1); + ret = (Node *) req; + } + } + + PG_RETURN_POINTER(ret); +} + /* * array_replace/array_remove support diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index ad8e6d02ee4..04825fc77de 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -30,11 +30,15 @@ #include <ctype.h> #include <limits.h> +#include <math.h> #include "catalog/pg_type.h" #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/array.h" #include "utils/builtins.h" @@ -1427,3 +1431,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int4, int4 [, int4]) + */ +Datum +generate_series_int4_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt32(((Const *) arg1)->constvalue); + finish = DatumGetInt32(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index d16cc9e574b..0ff9394a2fb 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -20,6 +20,9 @@ #include "common/int.h" #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodeFuncs.h" +#include "nodes/supportnodes.h" +#include "optimizer/optimizer.h" #include "utils/int8.h" #include "utils/builtins.h" @@ -1373,3 +1376,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS) /* do when there is no more left */ SRF_RETURN_DONE(funcctx); } + +/* + * Planner support function for generate_series(int8, int8 [, int8]) + */ +Datum +generate_series_int8_support(PG_FUNCTION_ARGS) +{ + Node *rawreq = (Node *) PG_GETARG_POINTER(0); + Node *ret = NULL; + + if (IsA(rawreq, SupportRequestRows)) + { + /* Try to estimate the number of rows returned */ + SupportRequestRows *req = (SupportRequestRows *) rawreq; + + if (is_funcclause(req->node)) /* be paranoid */ + { + List *args = ((FuncExpr *) req->node)->args; + Node *arg1, + *arg2, + *arg3; + + /* We can use estimated argument values here */ + arg1 = estimate_expression_value(req->root, linitial(args)); + arg2 = estimate_expression_value(req->root, lsecond(args)); + if (list_length(args) >= 3) + arg3 = estimate_expression_value(req->root, lthird(args)); + else + arg3 = NULL; + + /* + * If any argument is constant NULL, we can safely assume that + * zero rows are returned. Otherwise, if they're all non-NULL + * constants, we can calculate the number of rows that will be + * returned. Use double arithmetic to avoid overflow hazards. + */ + if ((IsA(arg1, Const) && + ((Const *) arg1)->constisnull) || + (IsA(arg2, Const) && + ((Const *) arg2)->constisnull) || + (arg3 != NULL && IsA(arg3, Const) && + ((Const *) arg3)->constisnull)) + { + req->rows = 0; + ret = (Node *) req; + } + else if (IsA(arg1, Const) && + IsA(arg2, Const) && + (arg3 == NULL || IsA(arg3, Const))) + { + double start, + finish, + step; + + start = DatumGetInt64(((Const *) arg1)->constvalue); + finish = DatumGetInt64(((Const *) arg2)->constvalue); + step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1; + + /* This equation works for either sign of step */ + if (step != 0) + { + req->rows = floor((finish - start + step) / step); + ret = (Node *) req; + } + } + } + } + + PG_RETURN_POINTER(ret); +} diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 74fafc64f3e..1ef6faecd1e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1577,17 +1577,6 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid) selec = var_eq_const(&vardata, BooleanEqualOperator, BoolGetDatum(true), false, true, false); } - else if (is_funcclause(arg)) - { - /* - * If we have no stats and it's a function call, estimate 0.3333333. - * This seems a pretty unprincipled choice, but Postgres has been - * using that estimate for function calls since 1992. The hoariness - * of this behavior suggests that we should not be in too much hurry - * to use another value. - */ - selec = 0.3333333; - } else { /* Otherwise, the default estimate is 0.5 */ @@ -3502,7 +3491,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows, * pointless to worry too much about this without much better * estimates for SRF output rowcounts than we have today.) */ - this_srf_multiplier = expression_returns_set_rows(groupexpr); + this_srf_multiplier = expression_returns_set_rows(root, groupexpr); if (srf_multiplier < this_srf_multiplier) srf_multiplier = this_srf_multiplier; diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index fba0ee8b847..e88c45d268a 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1605,41 +1605,28 @@ get_func_leakproof(Oid funcid) } /* - * get_func_cost - * Given procedure id, return the function's procost field. - */ -float4 -get_func_cost(Oid funcid) -{ - HeapTuple tp; - float4 result; - - tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); - - result = ((Form_pg_proc) GETSTRUCT(tp))->procost; - ReleaseSysCache(tp); - return result; -} - -/* - * get_func_rows - * Given procedure id, return the function's prorows field. + * get_func_support + * + * Returns the support function OID associated with a given function, + * or InvalidOid if there is none. */ -float4 -get_func_rows(Oid funcid) +RegProcedure +get_func_support(Oid funcid) { HeapTuple tp; - float4 result; tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(tp)) - elog(ERROR, "cache lookup failed for function %u", funcid); + if (HeapTupleIsValid(tp)) + { + Form_pg_proc functup = (Form_pg_proc) GETSTRUCT(tp); + RegProcedure result; - result = ((Form_pg_proc) GETSTRUCT(tp))->prorows; - ReleaseSysCache(tp); - return result; + result = functup->prosupport; + ReleaseSysCache(tp); + return result; + } + else + return (RegProcedure) InvalidOid; } /* ---------- RELATION CACHE ---------- */ |