aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/windowfuncs.c
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2022-12-23 12:43:52 +1300
committerDavid Rowley <drowley@postgresql.org>2022-12-23 12:43:52 +1300
commited1a88ddaccfe883e4cf74d30319accfeae6cfe5 (patch)
treeb3c2e52d5d70bc20b6fb9a1737f8647b4815934d /src/backend/utils/adt/windowfuncs.c
parentcc150596341e2a7913519769a88a1537c2e94720 (diff)
downloadpostgresql-ed1a88ddaccfe883e4cf74d30319accfeae6cfe5.tar.gz
postgresql-ed1a88ddaccfe883e4cf74d30319accfeae6cfe5.zip
Allow window functions to adjust their frameOptions
WindowFuncs such as row_number() don't care if it's called with ROWS UNBOUNDED PRECEDING AND CURRENT ROW or with RANGE UNBOUNDED PRECEDING AND CURRENT ROW. The latter is less efficient as the RANGE option requires that the executor check for peer rows, so using the ROW option instead would cause less overhead. Because RANGE is part of the default frame options for WindowClauses, it means WindowAgg is, by default, working much harder than it needs to for window functions where the ROWS / RANGE option has no effect on the window function's result. On a test query from the discussion thread, a performance improvement of 344% was seen by using ROWS instead of RANGE. Here we add a new support function node type to allow support functions to be called for window functions so that the most optimal version of the frame options can be set. The planner has been adjusted so that the frame options are changed only if all window functions sharing the same window clause agree on what the optimized frame options are. Here we give the ability for row_number(), rank(), dense_rank(), percent_rank(), cume_dist() and ntile() to alter their WindowClause's frameOptions. Reviewed-by: Vik Fearing, Erwin Brandstetter, Zhihong Yu Discussion: https://postgr.es/m/CAGHENJ7LBBszxS+SkWWFVnBmOT2oVsBhDMB1DFrgerCeYa_DyA@mail.gmail.com Discussion: https://postgr.es/m/CAApHDvohAKEtTXxq7Pc-ic2dKT8oZfbRKeEJP64M0B6+S88z+A@mail.gmail.com
Diffstat (limited to 'src/backend/utils/adt/windowfuncs.c')
-rw-r--r--src/backend/utils/adt/windowfuncs.c148
1 files changed, 148 insertions, 0 deletions
diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c
index 596564fa15c..db852361887 100644
--- a/src/backend/utils/adt/windowfuncs.c
+++ b/src/backend/utils/adt/windowfuncs.c
@@ -107,6 +107,24 @@ window_row_number_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * The frame options can always become "ROWS BETWEEN UNBOUNDED
+ * PRECEDING AND CURRENT ROW". row_number() always just increments by
+ * 1 with each row in the partition. Using ROWS instead of RANGE
+ * saves effort checking peer rows during execution.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
@@ -149,6 +167,27 @@ window_rank_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * rank() is coded in such a way that it returns "(COUNT (*) OVER
+ * (<opt> RANGE UNBOUNDED PRECEDING) - COUNT (*) OVER (<opt> RANGE
+ * CURRENT ROW) + 1)" regardless of the frame options. We'll set the
+ * frame options to "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW"
+ * so they agree with what window_row_number_support() optimized the
+ * frame options to be. Using ROWS instead of RANGE saves from doing
+ * peer row checks during execution.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
@@ -190,6 +229,24 @@ window_dense_rank_support(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(req);
}
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * dense_rank() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
PG_RETURN_POINTER(NULL);
}
@@ -223,6 +280,37 @@ window_percent_rank(PG_FUNCTION_ARGS)
}
/*
+ * window_percent_rank_support
+ * prosupport function for window_percent_rank()
+ */
+Datum
+window_percent_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * percent_rank() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+
+/*
* cume_dist
* return fraction between 0 and 1 inclusive,
* which is described as NP / NR, where NP is the number of rows preceding or
@@ -266,6 +354,36 @@ window_cume_dist(PG_FUNCTION_ARGS)
}
/*
+ * window_cume_dist_support
+ * prosupport function for window_cume_dist()
+ */
+Datum
+window_cume_dist_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * cume_dist() is unaffected by the frame options. Here we set the
+ * frame options to match what's done in row_number's support
+ * function. Using ROWS instead of RANGE (the default) saves the
+ * executor from having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+/*
* ntile
* compute an exact numeric value with scale 0 (zero),
* ranging from 1 (one) to n, per spec.
@@ -339,6 +457,36 @@ window_ntile(PG_FUNCTION_ARGS)
}
/*
+ * window_ntile_support
+ * prosupport function for window_ntile()
+ */
+Datum
+window_ntile_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestOptimizeWindowClause))
+ {
+ SupportRequestOptimizeWindowClause *req = (SupportRequestOptimizeWindowClause *) rawreq;
+
+ /*
+ * ntile() is unaffected by the frame options. Here we set the frame
+ * options to match what's done in row_number's support function.
+ * Using ROWS instead of RANGE (the default) saves the executor from
+ * having to check for peer rows.
+ */
+ req->frameOptions = (FRAMEOPTION_NONDEFAULT |
+ FRAMEOPTION_ROWS |
+ FRAMEOPTION_START_UNBOUNDED_PRECEDING |
+ FRAMEOPTION_END_CURRENT_ROW);
+
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+/*
* leadlag_common
* common operation of lead() and lag()
* For lead() forward is true, whereas for lag() it is false.