aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/postgres_fdw/expected/postgres_fdw.out71
-rw-r--r--contrib/postgres_fdw/sql/postgres_fdw.sql14
-rw-r--r--src/backend/nodes/outfuncs.c1
-rw-r--r--src/backend/optimizer/path/allpaths.c78
-rw-r--r--src/backend/optimizer/path/joinrels.c7
-rw-r--r--src/backend/optimizer/plan/setrefs.c58
-rw-r--r--src/backend/optimizer/util/placeholder.c58
-rw-r--r--src/backend/optimizer/util/relnode.c125
-rw-r--r--src/include/nodes/relation.h8
-rw-r--r--src/test/regress/expected/partition_aggregate.out32
-rw-r--r--src/test/regress/expected/partition_join.out57
-rw-r--r--src/test/regress/sql/partition_aggregate.sql5
-rw-r--r--src/test/regress/sql/partition_join.sql8
13 files changed, 266 insertions, 256 deletions
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 4048ef5d24c..48e3481d1f2 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -8218,8 +8218,9 @@ ALTER TABLE fprt2_p1 SET (autovacuum_enabled = 'false');
ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false');
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
-CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250);
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
@@ -8270,28 +8271,42 @@ SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10)
8 | |
(5 rows)
--- with whole-row reference
+-- with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
- QUERY PLAN
----------------------------------------------------------------------------------
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+ QUERY PLAN
+--------------------------------------------------------
Sort
Sort Key: ((t1.*)::fprt1), ((t2.*)::fprt2)
- -> Append
- -> Foreign Scan
- Relations: (public.ftprt1_p1 t1) INNER JOIN (public.ftprt2_p1 t2)
- -> Foreign Scan
- Relations: (public.ftprt1_p2 t1) INNER JOIN (public.ftprt2_p2 t2)
-(7 rows)
+ -> Hash Full Join
+ Hash Cond: (t1.a = t2.b)
+ -> Append
+ -> Foreign Scan on ftprt1_p1 t1
+ -> Foreign Scan on ftprt1_p2 t1_1
+ -> Hash
+ -> Append
+ -> Foreign Scan on ftprt2_p1 t2
+ -> Foreign Scan on ftprt2_p2 t2_1
+(11 rows)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
- t1 | t2
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+ wr | wr
----------------+----------------
(0,0,0000) | (0,0,0000)
+ (50,50,0001) |
+ (100,100,0002) |
(150,150,0003) | (150,150,0003)
+ (200,200,0004) |
(250,250,0005) | (250,250,0005)
+ (300,300,0006) |
+ (350,350,0007) |
(400,400,0008) | (400,400,0008)
-(4 rows)
+ (450,450,0009) |
+ | (75,75,0001)
+ | (225,225,0004)
+ | (325,325,0006)
+ | (475,475,0009)
+(14 rows)
-- join with lateral reference
EXPLAIN (COSTS OFF)
@@ -8355,6 +8370,34 @@ SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE
| | 475 | t2_phv
(14 rows)
+-- test FOR UPDATE; partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+ QUERY PLAN
+--------------------------------------------------------------
+ LockRows
+ -> Sort
+ Sort Key: t1.a
+ -> Hash Join
+ Hash Cond: (t2.b = t1.a)
+ -> Append
+ -> Foreign Scan on ftprt2_p1 t2
+ -> Foreign Scan on ftprt2_p2 t2_1
+ -> Hash
+ -> Append
+ -> Foreign Scan on ftprt1_p1 t1
+ -> Foreign Scan on ftprt1_p2 t1_1
+(12 rows)
+
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+ a | b
+-----+-----
+ 0 | 0
+ 150 | 150
+ 250 | 250
+ 400 | 400
+(4 rows)
+
RESET enable_partitionwise_join;
-- ===================================================================
-- test partitionwise aggregates
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index ad08c4ad554..88b9f5c42d6 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -2216,8 +2216,9 @@ ALTER TABLE fprt2_p1 SET (autovacuum_enabled = 'false');
ALTER TABLE fprt2_p2 SET (autovacuum_enabled = 'false');
INSERT INTO fprt2_p1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 249, 3) i;
INSERT INTO fprt2_p2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(250, 499, 3) i;
-CREATE FOREIGN TABLE ftprt2_p1 PARTITION OF fprt2 FOR VALUES FROM (0) TO (250)
+CREATE FOREIGN TABLE ftprt2_p1 (b int, c varchar, a int)
SERVER loopback OPTIONS (table_name 'fprt2_p1', use_remote_estimate 'true');
+ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250);
CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500)
SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true');
ANALYZE fprt2;
@@ -2234,10 +2235,10 @@ EXPLAIN (COSTS OFF)
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
SELECT t1.a,t2.b,t2.c FROM fprt1 t1 LEFT JOIN (SELECT * FROM fprt2 WHERE a < 10) t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a < 10 ORDER BY 1,2,3;
--- with whole-row reference
+-- with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
-SELECT t1,t2 FROM fprt1 t1 JOIN fprt2 t2 ON (t1.a = t2.b and t1.b = t2.a) WHERE t1.a % 25 =0 ORDER BY 1,2;
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
+SELECT t1.wr, t2.wr FROM (SELECT t1 wr, a FROM fprt1 t1 WHERE t1.a % 25 = 0) t1 FULL JOIN (SELECT t2 wr, b FROM fprt2 t2 WHERE t2.b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY 1,2;
-- join with lateral reference
EXPLAIN (COSTS OFF)
@@ -2249,6 +2250,11 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
SELECT t1.a, t1.phv, t2.b, t2.phv FROM (SELECT 't1_phv' phv, * FROM fprt1 WHERE a % 25 = 0) t1 FULL JOIN (SELECT 't2_phv' phv, * FROM fprt2 WHERE b % 25 = 0) t2 ON (t1.a = t2.b) ORDER BY t1.a, t2.b;
+-- test FOR UPDATE; partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
+
RESET enable_partitionwise_join;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1d78b53754b..7dfb82394cf 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2366,6 +2366,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_UINT_FIELD(baserestrict_min_security);
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
+ WRITE_BOOL_FIELD(consider_partitionwise_join);
WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_NODE_FIELD(partitioned_child_rels);
}
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 0e80aeb65cd..5db1688bf04 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -910,6 +910,17 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
}
/*
+ * If this is a partitioned baserel, set the consider_partitionwise_join
+ * flag; currently, we only consider partitionwise joins with the baserel
+ * if its targetlist doesn't contain a whole-row Var.
+ */
+ if (enable_partitionwise_join &&
+ rel->reloptkind == RELOPT_BASEREL &&
+ rte->relkind == RELKIND_PARTITIONED_TABLE &&
+ rel->attr_needed[InvalidAttrNumber - rel->min_attr] == NULL)
+ rel->consider_partitionwise_join = true;
+
+ /*
* Initialize to compute size estimates for whole append relation.
*
* We handle width estimates by weighting the widths of different child
@@ -956,54 +967,6 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
childrel = find_base_rel(root, childRTindex);
Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);
- if (rel->part_scheme)
- {
- AttrNumber attno;
-
- /*
- * We need attr_needed data for building targetlist of a join
- * relation representing join between matching partitions for
- * partitionwise join. A given attribute of a child will be needed
- * in the same highest joinrel where the corresponding attribute
- * of parent is needed. Hence it suffices to use the same Relids
- * set for parent and child.
- */
- for (attno = rel->min_attr; attno <= rel->max_attr; attno++)
- {
- int index = attno - rel->min_attr;
- Relids attr_needed = rel->attr_needed[index];
-
- /* System attributes do not need translation. */
- if (attno <= 0)
- {
- Assert(rel->min_attr == childrel->min_attr);
- childrel->attr_needed[index] = attr_needed;
- }
- else
- {
- Var *var = list_nth_node(Var,
- appinfo->translated_vars,
- attno - 1);
- int child_index;
-
- /*
- * Ignore any column dropped from the parent.
- * Corresponding Var won't have any translation. It won't
- * have attr_needed information, since it can not be
- * referenced in the query.
- */
- if (var == NULL)
- {
- Assert(attr_needed == NULL);
- continue;
- }
-
- child_index = var->varattno - childrel->min_attr;
- childrel->attr_needed[child_index] = attr_needed;
- }
- }
- }
-
/*
* Copy/Modify targetlist. Even if this child is deemed empty, we need
* its targetlist in case it falls on nullable side in a child-join
@@ -1181,6 +1144,22 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
1, &appinfo);
/*
+ * Note: we could compute appropriate attr_needed data for the child's
+ * variables, by transforming the parent's attr_needed through the
+ * translated_vars mapping. However, currently there's no need
+ * because attr_needed is only examined for base relations not
+ * otherrels. So we just leave the child's attr_needed empty.
+ */
+
+ /*
+ * If we consider partitionwise joins with the parent rel, do the same
+ * for partitioned child rels.
+ */
+ if (rel->consider_partitionwise_join &&
+ childRTE->relkind == RELKIND_PARTITIONED_TABLE)
+ childrel->consider_partitionwise_join = true;
+
+ /*
* If parallelism is allowable for this query in general, see whether
* it's allowable for this childrel in particular. But if we've
* already decided the appendrel is not parallel-safe as a whole,
@@ -3538,6 +3517,9 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
if (!IS_PARTITIONED_REL(rel))
return;
+ /* The relation should have consider_partitionwise_join set. */
+ Assert(rel->consider_partitionwise_join);
+
/* Guard against stack overflow due to overly deep partition hierarchy. */
check_stack_depth();
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 7008e1318e4..d3d21fed5d4 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1322,6 +1322,9 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
if (!IS_PARTITIONED_REL(joinrel))
return;
+ /* The join relation should have consider_partitionwise_join set. */
+ Assert(joinrel->consider_partitionwise_join);
+
/*
* Since this join relation is partitioned, all the base relations
* participating in this join must be partitioned and so are all the
@@ -1330,6 +1333,10 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
+ /* The joining relations should have consider_partitionwise_join set. */
+ Assert(rel1->consider_partitionwise_join &&
+ rel2->consider_partitionwise_join);
+
/*
* The partition scheme of the join relation should match that of the
* joining relations.
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 69dd327f0c9..f66f39d8c68 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -41,9 +41,6 @@ typedef struct
int num_vars; /* number of plain Var tlist entries */
bool has_ph_vars; /* are there PlaceHolderVar entries? */
bool has_non_vars; /* are there other entries? */
- bool has_conv_whole_rows; /* are there ConvertRowtypeExpr
- * entries encapsulating a whole-row
- * Var? */
tlist_vinfo vars[FLEXIBLE_ARRAY_MEMBER]; /* has num_vars entries */
} indexed_tlist;
@@ -143,7 +140,6 @@ static List *set_returning_clause_references(PlannerInfo *root,
int rtoffset);
static bool extract_query_dependencies_walker(Node *node,
PlannerInfo *context);
-static bool is_converted_whole_row_reference(Node *node);
/*****************************************************************************
*
@@ -1997,7 +1993,6 @@ build_tlist_index(List *tlist)
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
- itlist->has_conv_whole_rows = false;
/* Find the Vars and fill in the index array */
vinfo = itlist->vars;
@@ -2016,8 +2011,6 @@ build_tlist_index(List *tlist)
}
else if (tle->expr && IsA(tle->expr, PlaceHolderVar))
itlist->has_ph_vars = true;
- else if (is_converted_whole_row_reference((Node *) tle->expr))
- itlist->has_conv_whole_rows = true;
else
itlist->has_non_vars = true;
}
@@ -2033,10 +2026,7 @@ build_tlist_index(List *tlist)
* This is like build_tlist_index, but we only index tlist entries that
* are Vars belonging to some rel other than the one specified. We will set
* has_ph_vars (allowing PlaceHolderVars to be matched), but not has_non_vars
- * (so nothing other than Vars and PlaceHolderVars can be matched). In case of
- * DML, where this function will be used, returning lists from child relations
- * will be appended similar to a simple append relation. That does not require
- * fixing ConvertRowtypeExpr references. So, those are not considered here.
+ * (so nothing other than Vars and PlaceHolderVars can be matched).
*/
static indexed_tlist *
build_tlist_index_other_vars(List *tlist, Index ignore_rel)
@@ -2053,7 +2043,6 @@ build_tlist_index_other_vars(List *tlist, Index ignore_rel)
itlist->tlist = tlist;
itlist->has_ph_vars = false;
itlist->has_non_vars = false;
- itlist->has_conv_whole_rows = false;
/* Find the desired Vars and fill in the index array */
vinfo = itlist->vars;
@@ -2256,7 +2245,6 @@ static Node *
fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
{
Var *newvar;
- bool converted_whole_row;
if (node == NULL)
return NULL;
@@ -2326,12 +2314,8 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
}
if (IsA(node, Param))
return fix_param_node(context->root, (Param *) node);
-
/* Try matching more complex expressions too, if tlists have any */
- converted_whole_row = is_converted_whole_row_reference(node);
- if (context->outer_itlist &&
- (context->outer_itlist->has_non_vars ||
- (context->outer_itlist->has_conv_whole_rows && converted_whole_row)))
+ if (context->outer_itlist && context->outer_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->outer_itlist,
@@ -2339,9 +2323,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context)
if (newvar)
return (Node *) newvar;
}
- if (context->inner_itlist &&
- (context->inner_itlist->has_non_vars ||
- (context->inner_itlist->has_conv_whole_rows && converted_whole_row)))
+ if (context->inner_itlist && context->inner_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->inner_itlist,
@@ -2461,9 +2443,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context)
/* If no match, just fall through to process it normally */
}
/* Try matching more complex expressions too, if tlist has any */
- if (context->subplan_itlist->has_non_vars ||
- (context->subplan_itlist->has_conv_whole_rows &&
- is_converted_whole_row_reference(node)))
+ if (context->subplan_itlist->has_non_vars)
{
newvar = search_indexed_tlist_for_non_var((Expr *) node,
context->subplan_itlist,
@@ -2670,33 +2650,3 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
return expression_tree_walker(node, extract_query_dependencies_walker,
(void *) context);
}
-
-/*
- * is_converted_whole_row_reference
- * If the given node is a ConvertRowtypeExpr encapsulating a whole-row
- * reference as implicit cast, return true. Otherwise return false.
- */
-static bool
-is_converted_whole_row_reference(Node *node)
-{
- ConvertRowtypeExpr *convexpr;
-
- if (!node || !IsA(node, ConvertRowtypeExpr))
- return false;
-
- /* Traverse nested ConvertRowtypeExpr's. */
- convexpr = castNode(ConvertRowtypeExpr, node);
- while (convexpr->convertformat == COERCE_IMPLICIT_CAST &&
- IsA(convexpr->arg, ConvertRowtypeExpr))
- convexpr = castNode(ConvertRowtypeExpr, convexpr->arg);
-
- if (IsA(convexpr->arg, Var))
- {
- Var *var = castNode(Var, convexpr->arg);
-
- if (var->varattno == 0)
- return true;
- }
-
- return false;
-}
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index c79d0f25d47..4dc632dcb52 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -20,7 +20,6 @@
#include "optimizer/pathnode.h"
#include "optimizer/placeholder.h"
#include "optimizer/planmain.h"
-#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "utils/lsyscache.h"
@@ -415,10 +414,6 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
Relids relids = joinrel->relids;
ListCell *lc;
- /* This function is called only on the parent relations. */
- Assert(!IS_OTHER_REL(joinrel) && !IS_OTHER_REL(outer_rel) &&
- !IS_OTHER_REL(inner_rel));
-
foreach(lc, root->placeholder_list)
{
PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
@@ -464,56 +459,3 @@ add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel,
}
}
}
-
-/*
- * add_placeholders_to_child_joinrel
- * Translate the PHVs in parent's targetlist and add them to the child's
- * targetlist. Also adjust the cost
- */
-void
-add_placeholders_to_child_joinrel(PlannerInfo *root, RelOptInfo *childrel,
- RelOptInfo *parentrel)
-{
- ListCell *lc;
- AppendRelInfo **appinfos;
- int nappinfos;
-
- Assert(IS_JOIN_REL(childrel) && IS_JOIN_REL(parentrel));
- Assert(IS_OTHER_REL(childrel));
-
- /* Nothing to do if no PHVs. */
- if (root->placeholder_list == NIL)
- return;
-
- appinfos = find_appinfos_by_relids(root, childrel->relids, &nappinfos);
- foreach(lc, parentrel->reltarget->exprs)
- {
- PlaceHolderVar *phv = lfirst(lc);
-
- if (IsA(phv, PlaceHolderVar))
- {
- /*
- * In case the placeholder Var refers to any of the parent
- * relations, translate it to refer to the corresponding child.
- */
- if (bms_overlap(phv->phrels, parentrel->relids) &&
- childrel->reloptkind == RELOPT_OTHER_JOINREL)
- {
- phv = (PlaceHolderVar *) adjust_appendrel_attrs(root,
- (Node *) phv,
- nappinfos,
- appinfos);
- }
-
- childrel->reltarget->exprs = lappend(childrel->reltarget->exprs,
- phv);
- }
- }
-
- /* Adjust the cost and width of child targetlist. */
- childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
- childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
- childrel->reltarget->width = parentrel->reltarget->width;
-
- pfree(appinfos);
-}
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index c69740eda6c..39f5729b915 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -57,6 +57,11 @@ static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
static void build_joinrel_partition_info(RelOptInfo *joinrel,
RelOptInfo *outer_rel, RelOptInfo *inner_rel,
List *restrictlist, JoinType jointype);
+static void build_child_join_reltarget(PlannerInfo *root,
+ RelOptInfo *parentrel,
+ RelOptInfo *childrel,
+ int nappinfos,
+ AppendRelInfo **appinfos);
/*
@@ -188,6 +193,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->baserestrict_min_security = UINT_MAX;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
+ rel->consider_partitionwise_join = false; /* might get changed later */
rel->part_scheme = NULL;
rel->nparts = 0;
rel->boundinfo = NULL;
@@ -602,6 +608,7 @@ build_join_rel(PlannerInfo *root,
joinrel->baserestrict_min_security = UINT_MAX;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
+ joinrel->consider_partitionwise_join = false; /* might get changed later */
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
@@ -732,6 +739,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
/* Only joins between "other" relations land here. */
Assert(IS_OTHER_REL(outer_rel) && IS_OTHER_REL(inner_rel));
+ /* The parent joinrel should have consider_partitionwise_join set. */
+ Assert(parent_joinrel->consider_partitionwise_join);
+
joinrel->reloptkind = RELOPT_OTHER_JOINREL;
joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids);
joinrel->rows = 0;
@@ -773,6 +783,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->baserestrictcost.per_tuple = 0;
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
+ joinrel->consider_partitionwise_join = false; /* might get changed later */
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
@@ -789,14 +800,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
/* Compute information relevant to foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
- /* Build targetlist */
- build_joinrel_tlist(root, joinrel, outer_rel);
- build_joinrel_tlist(root, joinrel, inner_rel);
- /* Add placeholder variables. */
- add_placeholders_to_child_joinrel(root, joinrel, parent_joinrel);
+ appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
+
+ /* Set up reltarget struct */
+ build_child_join_reltarget(root, parent_joinrel, joinrel,
+ nappinfos, appinfos);
/* Construct joininfo list. */
- appinfos = find_appinfos_by_relids(root, joinrel->relids, &nappinfos);
joinrel->joininfo = (List *) adjust_appendrel_attrs(root,
(Node *) parent_joinrel->joininfo,
nappinfos,
@@ -826,7 +836,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
/* Child joinrel is parallel safe if parent is parallel safe. */
joinrel->consider_parallel = parent_joinrel->consider_parallel;
-
/* Set estimates of the child-joinrel's size. */
set_joinrel_size_estimates(root, joinrel, outer_rel, inner_rel,
sjinfo, restrictlist);
@@ -895,15 +904,9 @@ static void
build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
RelOptInfo *input_rel)
{
- Relids relids;
+ Relids relids = joinrel->relids;
ListCell *vars;
- /* attrs_needed refers to parent relids and not those of a child. */
- if (joinrel->top_parent_relids)
- relids = joinrel->top_parent_relids;
- else
- relids = joinrel->relids;
-
foreach(vars, input_rel->reltarget->exprs)
{
Var *var = (Var *) lfirst(vars);
@@ -919,54 +922,23 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel,
/*
* Otherwise, anything in a baserel or joinrel targetlist ought to be
- * a Var. Children of a partitioned table may have ConvertRowtypeExpr
- * translating whole-row Var of a child to that of the parent.
- * Children of an inherited table or subquery child rels can not
- * directly participate in a join, so other kinds of nodes here.
+ * a Var. (More general cases can only appear in appendrel child
+ * rels, which will never be seen here.)
*/
- if (IsA(var, Var))
- {
- baserel = find_base_rel(root, var->varno);
- ndx = var->varattno - baserel->min_attr;
- }
- else if (IsA(var, ConvertRowtypeExpr))
- {
- ConvertRowtypeExpr *child_expr = (ConvertRowtypeExpr *) var;
- Var *childvar = (Var *) child_expr->arg;
-
- /*
- * Child's whole-row references are converted to look like those
- * of parent using ConvertRowtypeExpr. There can be as many
- * ConvertRowtypeExpr decorations as the depth of partition tree.
- * The argument to the deepest ConvertRowtypeExpr is expected to
- * be a whole-row reference of the child.
- */
- while (IsA(childvar, ConvertRowtypeExpr))
- {
- child_expr = (ConvertRowtypeExpr *) childvar;
- childvar = (Var *) child_expr->arg;
- }
- Assert(IsA(childvar, Var) &&childvar->varattno == 0);
-
- baserel = find_base_rel(root, childvar->varno);
- ndx = 0 - baserel->min_attr;
- }
- else
+ if (!IsA(var, Var))
elog(ERROR, "unexpected node type in rel targetlist: %d",
(int) nodeTag(var));
+ /* Get the Var's original base rel */
+ baserel = find_base_rel(root, var->varno);
- /* Is the target expression still needed above this joinrel? */
+ /* Is it still needed above this joinrel? */
+ ndx = var->varattno - baserel->min_attr;
if (bms_nonempty_difference(baserel->attr_needed[ndx], relids))
{
/* Yup, add it to the output */
joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, var);
-
- /*
- * Vars have cost zero, so no need to adjust reltarget->cost. Even
- * if it's a ConvertRowtypeExpr, it will be computed only for the
- * base relation, costing nothing for a join.
- */
+ /* Vars have cost zero, so no need to adjust reltarget->cost */
joinrel->reltarget->width += baserel->attr_widths[ndx];
}
}
@@ -1626,16 +1598,18 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
/*
* We can only consider this join as an input to further partitionwise
- * joins if (a) the input relations are partitioned, (b) the partition
- * schemes match, and (c) we can identify an equi-join between the
- * partition keys. Note that if it were possible for
- * have_partkey_equi_join to return different answers for the same joinrel
- * depending on which join ordering we try first, this logic would break.
- * That shouldn't happen, though, because of the way the query planner
- * deduces implied equalities and reorders the joins. Please see
- * optimizer/README for details.
+ * joins if (a) the input relations are partitioned and have
+ * consider_partitionwise_join=true, (b) the partition schemes match, and
+ * (c) we can identify an equi-join between the partition keys. Note that
+ * if it were possible for have_partkey_equi_join to return different
+ * answers for the same joinrel depending on which join ordering we try
+ * first, this logic would break. That shouldn't happen, though, because
+ * of the way the query planner deduces implied equalities and reorders
+ * the joins. Please see optimizer/README for details.
*/
if (!IS_PARTITIONED_REL(outer_rel) || !IS_PARTITIONED_REL(inner_rel) ||
+ !outer_rel->consider_partitionwise_join ||
+ !inner_rel->consider_partitionwise_join ||
outer_rel->part_scheme != inner_rel->part_scheme ||
!have_partkey_equi_join(joinrel, outer_rel, inner_rel,
jointype, restrictlist))
@@ -1687,6 +1661,12 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
joinrel->part_rels =
(RelOptInfo **) palloc0(sizeof(RelOptInfo *) * joinrel->nparts);
+ /*
+ * Set the consider_partitionwise_join flag.
+ */
+ Assert(outer_rel->consider_partitionwise_join);
+ Assert(inner_rel->consider_partitionwise_join);
+ joinrel->consider_partitionwise_join = true;
/*
* Construct partition keys for the join.
@@ -1768,3 +1748,26 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
joinrel->nullable_partexprs[cnt] = nullable_partexpr;
}
}
+
+/*
+ * build_child_join_reltarget
+ * Set up a child-join relation's reltarget from a parent-join relation.
+ */
+static void
+build_child_join_reltarget(PlannerInfo *root,
+ RelOptInfo *parentrel,
+ RelOptInfo *childrel,
+ int nappinfos,
+ AppendRelInfo **appinfos)
+{
+ /* Build the targetlist */
+ childrel->reltarget->exprs = (List *)
+ adjust_appendrel_attrs(root,
+ (Node *) parentrel->reltarget->exprs,
+ nappinfos, appinfos);
+
+ /* Set the cost and width fields */
+ childrel->reltarget->cost.startup = parentrel->reltarget->cost.startup;
+ childrel->reltarget->cost.per_tuple = parentrel->reltarget->cost.per_tuple;
+ childrel->reltarget->width = parentrel->reltarget->width;
+}
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 41caf873fb4..adb42650479 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -687,8 +687,12 @@ typedef struct RelOptInfo
* involving this rel */
bool has_eclass_joins; /* T means joininfo is incomplete */
- /* used by "other" relations */
- Relids top_parent_relids; /* Relids of topmost parents */
+ /* used by partitionwise joins: */
+ bool consider_partitionwise_join; /* consider partitionwise
+ * join paths? (if
+ * partitioned rel) */
+ Relids top_parent_relids; /* Relids of topmost parents (if "other"
+ * rel) */
/* used for partitioned relations */
PartitionScheme part_scheme; /* Partitioning scheme. */
diff --git a/src/test/regress/expected/partition_aggregate.out b/src/test/regress/expected/partition_aggregate.out
index d286050c9aa..6bc106831ee 100644
--- a/src/test/regress/expected/partition_aggregate.out
+++ b/src/test/regress/expected/partition_aggregate.out
@@ -449,6 +449,38 @@ SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2
24 | 900 | 100
(5 rows)
+-- Check with whole-row reference; partitionwise aggregation does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+ QUERY PLAN
+-------------------------------------------------------------
+ Sort
+ Sort Key: t1.x, (sum(t1.y)), (count(((t1.*)::pagg_tab1)))
+ -> HashAggregate
+ Group Key: t1.x
+ -> Hash Join
+ Hash Cond: (t1.x = t2.y)
+ -> Append
+ -> Seq Scan on pagg_tab1_p1 t1
+ -> Seq Scan on pagg_tab1_p2 t1_1
+ -> Seq Scan on pagg_tab1_p3 t1_2
+ -> Hash
+ -> Append
+ -> Seq Scan on pagg_tab2_p1 t2
+ -> Seq Scan on pagg_tab2_p2 t2_1
+ -> Seq Scan on pagg_tab2_p3 t2_2
+(15 rows)
+
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+ x | sum | count
+----+------+-------
+ 0 | 500 | 100
+ 6 | 1100 | 100
+ 12 | 700 | 100
+ 18 | 1300 | 100
+ 24 | 900 | 100
+(5 rows)
+
-- GROUP BY having other matching key
EXPLAIN (COSTS OFF)
SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3;
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index ae552eb362c..a1fe29fc3a9 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -62,33 +62,28 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b =
450 | 0450 | 450 | 0450
(4 rows)
--- left outer join, with whole-row reference
+-- left outer join, with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
QUERY PLAN
--------------------------------------------------
Sort
Sort Key: t1.a, t2.b
- -> Append
- -> Hash Right Join
- Hash Cond: (t2.b = t1.a)
+ -> Hash Right Join
+ Hash Cond: (t2.b = t1.a)
+ -> Append
-> Seq Scan on prt2_p1 t2
- -> Hash
+ -> Seq Scan on prt2_p2 t2_1
+ -> Seq Scan on prt2_p3 t2_2
+ -> Hash
+ -> Append
-> Seq Scan on prt1_p1 t1
Filter: (b = 0)
- -> Hash Right Join
- Hash Cond: (t2_1.b = t1_1.a)
- -> Seq Scan on prt2_p2 t2_1
- -> Hash
-> Seq Scan on prt1_p2 t1_1
Filter: (b = 0)
- -> Hash Right Join
- Hash Cond: (t2_2.b = t1_2.a)
- -> Seq Scan on prt2_p3 t2_2
- -> Hash
-> Seq Scan on prt1_p3 t1_2
Filter: (b = 0)
-(21 rows)
+(16 rows)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
t1 | t2
@@ -1042,6 +1037,40 @@ SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT *
400 |
(9 rows)
+-- merge join when expression with whole-row reference needs to be sorted;
+-- partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+ QUERY PLAN
+-----------------------------------------------------------------------------------------
+ Merge Join
+ Merge Cond: ((t1.a = t2.b) AND (((((t1.*)::prt1))::text) = ((((t2.*)::prt2))::text)))
+ -> Sort
+ Sort Key: t1.a, ((((t1.*)::prt1))::text)
+ -> Result
+ -> Append
+ -> Seq Scan on prt1_p1 t1
+ -> Seq Scan on prt1_p2 t1_1
+ -> Seq Scan on prt1_p3 t1_2
+ -> Sort
+ Sort Key: t2.b, ((((t2.*)::prt2))::text)
+ -> Result
+ -> Append
+ -> Seq Scan on prt2_p1 t2
+ -> Seq Scan on prt2_p2 t2_1
+ -> Seq Scan on prt2_p3 t2_2
+(16 rows)
+
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+ a | b
+----+----
+ 0 | 0
+ 6 | 6
+ 12 | 12
+ 18 | 18
+ 24 | 24
+(5 rows)
+
RESET enable_hashjoin;
RESET enable_nestloop;
--
diff --git a/src/test/regress/sql/partition_aggregate.sql b/src/test/regress/sql/partition_aggregate.sql
index 6d8b73964a1..c387d64db3a 100644
--- a/src/test/regress/sql/partition_aggregate.sql
+++ b/src/test/regress/sql/partition_aggregate.sql
@@ -111,6 +111,11 @@ EXPLAIN (COSTS OFF)
SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
SELECT t1.x, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+-- Check with whole-row reference; partitionwise aggregation does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+SELECT t1.x, sum(t1.y), count(t1) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t1.x ORDER BY 1, 2, 3;
+
-- GROUP BY having other matching key
EXPLAIN (COSTS OFF)
SELECT t2.y, sum(t1.y), count(*) FROM pagg_tab1 t1, pagg_tab2 t2 WHERE t1.x = t2.y GROUP BY t2.y ORDER BY 1, 2, 3;
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index 32d4927409e..a74117111b3 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -34,7 +34,7 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t1.c, t2.b, t2.c FROM prt1 t1, prt2 t2 WHERE t1.a = t2.b AND t1.b = 0 ORDER BY t1.a, t2.b;
--- left outer join, with whole-row reference
+-- left outer join, with whole-row reference; partitionwise join does not apply
EXPLAIN (COSTS OFF)
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1, t2 FROM prt1 t1 LEFT JOIN prt2 t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
@@ -160,6 +160,12 @@ EXPLAIN (COSTS OFF)
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
SELECT t1.a, t2.b FROM (SELECT * FROM prt1 WHERE a < 450) t1 LEFT JOIN (SELECT * FROM prt2 WHERE b > 250) t2 ON t1.a = t2.b WHERE t1.b = 0 ORDER BY t1.a, t2.b;
+-- merge join when expression with whole-row reference needs to be sorted;
+-- partitionwise join does not apply
+EXPLAIN (COSTS OFF)
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+SELECT t1.a, t2.b FROM prt1 t1, prt2 t2 WHERE t1::text = t2::text AND t1.a = t2.b ORDER BY t1.a;
+
RESET enable_hashjoin;
RESET enable_nestloop;