aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDean Rasheed <dean.a.rasheed@gmail.com>2024-12-02 11:37:57 +0000
committerDean Rasheed <dean.a.rasheed@gmail.com>2024-12-02 11:37:57 +0000
commit97173536ed4b1c29dce0dc4119db136e142f60a2 (patch)
tree95a2b589f59b8a008b5ddc0a9fe3f6c29611d5f4
parent33152358453cc46f986483e9f82418fb8ba8eaef (diff)
downloadpostgresql-97173536ed4b1c29dce0dc4119db136e142f60a2.tar.gz
postgresql-97173536ed4b1c29dce0dc4119db136e142f60a2.zip
Add a planner support function for numeric generate_series().
This allows the planner to estimate the number of rows returned by generate_series(numeric, numeric[, numeric]), when the input values can be estimated at plan time. Song Jinzhou, reviewed by Dean Rasheed and David Rowley. Discussion: https://postgr.es/m/tencent_F43E7F4DD50EF5986D1051DE8DE547910206%40qq.com Discussion: https://postgr.es/m/tencent_1F6D5B9A1545E02FD7D0EE508DFD056DE50A%40qq.com
-rw-r--r--src/backend/utils/adt/numeric.c121
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat9
-rw-r--r--src/test/regress/expected/misc_functions.out65
-rw-r--r--src/test/regress/sql/misc_functions.sql38
5 files changed, 232 insertions, 3 deletions
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 344d7137f97..5c33cd3ab0c 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -34,6 +34,7 @@
#include "miscadmin.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/float.h"
@@ -1827,6 +1828,126 @@ generate_series_step_numeric(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * Planner support function for generate_series(numeric, numeric [, numeric])
+ */
+Datum
+generate_series_numeric_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ if (list_length(args) >= 3)
+ arg3 = estimate_expression_value(req->root, lthird(args));
+ else
+ arg3 = NULL;
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned.
+ */
+ if ((IsA(arg1, Const) &&
+ ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) &&
+ ((Const *) arg2)->constisnull) ||
+ (arg3 != NULL && IsA(arg3, Const) &&
+ ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) &&
+ IsA(arg2, Const) &&
+ (arg3 == NULL || IsA(arg3, Const)))
+ {
+ Numeric start_num;
+ Numeric stop_num;
+ NumericVar step = const_one;
+
+ /*
+ * If any argument is NaN or infinity, generate_series() will
+ * error out, so we needn't produce an estimate.
+ */
+ start_num = DatumGetNumeric(((Const *) arg1)->constvalue);
+ stop_num = DatumGetNumeric(((Const *) arg2)->constvalue);
+
+ if (NUMERIC_IS_SPECIAL(start_num) ||
+ NUMERIC_IS_SPECIAL(stop_num))
+ PG_RETURN_POINTER(NULL);
+
+ if (arg3)
+ {
+ Numeric step_num;
+
+ step_num = DatumGetNumeric(((Const *) arg3)->constvalue);
+
+ if (NUMERIC_IS_SPECIAL(step_num))
+ PG_RETURN_POINTER(NULL);
+
+ init_var_from_num(step_num, &step);
+ }
+
+ /*
+ * The number of rows that will be returned is given by
+ * floor((stop - start) / step) + 1, if the sign of step
+ * matches the sign of stop - start. Otherwise, no rows will
+ * be returned.
+ */
+ if (cmp_var(&step, &const_zero) != 0)
+ {
+ NumericVar start;
+ NumericVar stop;
+ NumericVar res;
+
+ init_var_from_num(start_num, &start);
+ init_var_from_num(stop_num, &stop);
+
+ init_var(&res);
+ sub_var(&stop, &start, &res);
+
+ if (step.sign != res.sign)
+ {
+ /* no rows will be returned */
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else
+ {
+ if (arg3)
+ div_var(&res, &step, &res, 0, false, false);
+ else
+ trunc_var(&res, 0); /* step = 1 */
+
+ req->rows = numericvar_to_double_no_overflow(&res) + 1;
+ ret = (Node *) req;
+ }
+
+ free_var(&res);
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
/*
* Implements the numeric version of the width_bucket() function
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 5dd91e190ae..3ca5dbf9e83 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202411111
+#define CATALOG_VERSION_NO 202412021
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cbbe8acd382..9575524007f 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -8464,13 +8464,18 @@
proname => 'generate_series_int8_support', prorettype => 'internal',
proargtypes => 'internal', prosrc => 'generate_series_int8_support' },
{ oid => '3259', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_numeric_support', proretset => 't',
prorettype => 'numeric', proargtypes => 'numeric numeric numeric',
prosrc => 'generate_series_step_numeric' },
{ oid => '3260', descr => 'non-persistent series generator',
- proname => 'generate_series', prorows => '1000', proretset => 't',
+ proname => 'generate_series', prorows => '1000',
+ prosupport => 'generate_series_numeric_support', proretset => 't',
prorettype => 'numeric', proargtypes => 'numeric numeric',
prosrc => 'generate_series_numeric' },
+{ oid => '8405', descr => 'planner support for generate_series',
+ proname => 'generate_series_numeric_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'generate_series_numeric_support' },
{ oid => '938', descr => 'non-persistent series generator',
proname => 'generate_series', prorows => '1000',
prosupport => 'generate_series_timestamp_support', proretset => 't',
diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out
index 36b1201f9f5..8c73d2b621e 100644
--- a/src/test/regress/expected/misc_functions.out
+++ b/src/test/regress/expected/misc_functions.out
@@ -712,6 +712,71 @@ false, true, false, true);
-- the support function.
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
ERROR: step size cannot equal zero
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
+(1 row)
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=13 width=N) (actual rows=13 loops=1)
+(1 row)
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+------------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=25 width=N) (actual rows=25 loops=1)
+(1 row)
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+ explain_mask_costs
+----------------------------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
+(1 row)
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+ explain_mask_costs
+-------------------------------------------------------------------
+ Function Scan on generate_series g (cost=N..N rows=1000 width=N)
+(1 row)
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
ok
diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql
index b7495d70eb0..0a0c6e5a6bb 100644
--- a/src/test/regress/sql/misc_functions.sql
+++ b/src/test/regress/sql/misc_functions.sql
@@ -311,6 +311,44 @@ false, true, false, true);
-- the support function.
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
+--
+-- Test the SupportRequestRows support function for generate_series_numeric()
+--
+
+-- Ensure the row estimate matches the actual rows
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0) g(s);$$,
+true, true, false, true);
+
+-- As above but with non-default step
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 2.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure the estimates match when step is decreasing
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, -1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure an empty range estimates 1 row
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 1.0, 1.0) g(s);$$,
+true, true, false, true);
+
+-- Ensure we get the default row estimate for error cases (infinity/NaN values
+-- and zero step size)
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series('-infinity'::NUMERIC, 'infinity'::NUMERIC, 1.0) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(1.0, 25.0, 'NaN'::NUMERIC) g(s);$$,
+false, true, false, true);
+
+SELECT explain_mask_costs($$
+SELECT * FROM generate_series(25.0, 2.0, 0.0) g(s);$$,
+false, true, false, true);
+
-- Test functions for control data
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
SELECT count(*) > 0 AS ok FROM pg_control_init();