aboutsummaryrefslogtreecommitdiff
path: root/contrib/postgres_fdw/postgres_fdw.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/postgres_fdw/postgres_fdw.c')
-rw-r--r--contrib/postgres_fdw/postgres_fdw.c388
1 files changed, 359 insertions, 29 deletions
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index d0d36aaa0dc..1b37332cda3 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -247,6 +247,25 @@ typedef struct PgFdwAnalyzeState
} PgFdwAnalyzeState;
/*
+ * This enum describes what's kept in the fdw_private list for a ForeignPath.
+ * We store:
+ *
+ * 1) Boolean flag showing if the remote query has the final sort
+ */
+enum FdwPathPrivateIndex
+{
+ /* has-final-sort flag (as an integer Value node) */
+ FdwPathPrivateHasFinalSort
+};
+
+/* Struct for extra information passed to estimate_path_cost_size() */
+typedef struct
+{
+ PathTarget *target;
+ bool has_final_sort;
+} PgFdwPathExtraData;
+
+/*
* Identify the attribute where data conversion fails.
*/
typedef struct ConversionLocation
@@ -368,6 +387,7 @@ static void estimate_path_cost_size(PlannerInfo *root,
RelOptInfo *foreignrel,
List *param_join_conds,
List *pathkeys,
+ PgFdwPathExtraData *fpextra,
double *p_rows, int *p_width,
Cost *p_startup_cost, Cost *p_total_cost);
static void get_remote_estimate(const char *sql,
@@ -376,6 +396,12 @@ static void get_remote_estimate(const char *sql,
int *width,
Cost *startup_cost,
Cost *total_cost);
+static void adjust_foreign_grouping_path_cost(PlannerInfo *root,
+ List *pathkeys,
+ double retrieved_rows,
+ double width,
+ Cost *p_startup_cost,
+ Cost *p_run_cost);
static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
EquivalenceClass *ec, EquivalenceMember *em,
void *arg);
@@ -452,6 +478,9 @@ static void add_foreign_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel,
GroupPathExtraData *extra);
+static void add_foreign_ordered_paths(PlannerInfo *root,
+ RelOptInfo *input_rel,
+ RelOptInfo *ordered_rel);
static void apply_server_options(PgFdwRelationInfo *fpinfo);
static void apply_table_options(PgFdwRelationInfo *fpinfo);
static void merge_fdw_options(PgFdwRelationInfo *fpinfo,
@@ -637,7 +666,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
* values in fpinfo so we don't need to do it again to generate the
* basic foreign path.
*/
- estimate_path_cost_size(root, baserel, NIL, NIL,
+ estimate_path_cost_size(root, baserel, NIL, NIL, NULL,
&fpinfo->rows, &fpinfo->width,
&fpinfo->startup_cost, &fpinfo->total_cost);
@@ -668,7 +697,7 @@ postgresGetForeignRelSize(PlannerInfo *root,
set_baserel_size_estimates(root, baserel);
/* Fill in basically-bogus cost estimates for use later. */
- estimate_path_cost_size(root, baserel, NIL, NIL,
+ estimate_path_cost_size(root, baserel, NIL, NIL, NULL,
&fpinfo->rows, &fpinfo->width,
&fpinfo->startup_cost, &fpinfo->total_cost);
}
@@ -827,6 +856,7 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
* Pushing the query_pathkeys to the remote server is always worth
* considering, because it might let us avoid a local sort.
*/
+ fpinfo->qp_is_pushdown_safe = false;
if (root->query_pathkeys)
{
bool query_pathkeys_ok = true;
@@ -857,7 +887,10 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
}
if (query_pathkeys_ok)
+ {
useful_pathkeys_list = list_make1(list_copy(root->query_pathkeys));
+ fpinfo->qp_is_pushdown_safe = true;
+ }
}
/*
@@ -1102,7 +1135,7 @@ postgresGetForeignPaths(PlannerInfo *root,
/* Get a cost estimate from the remote */
estimate_path_cost_size(root, baserel,
- param_info->ppi_clauses, NIL,
+ param_info->ppi_clauses, NIL, NULL,
&rows, &width,
&startup_cost, &total_cost);
@@ -1149,8 +1182,16 @@ postgresGetForeignPlan(PlannerInfo *root,
List *fdw_recheck_quals = NIL;
List *retrieved_attrs;
StringInfoData sql;
+ bool has_final_sort = false;
ListCell *lc;
+ /*
+ * Get FDW private data created by postgresGetForeignUpperPaths(), if any.
+ */
+ if (best_path->fdw_private)
+ has_final_sort = intVal(list_nth(best_path->fdw_private,
+ FdwPathPrivateHasFinalSort));
+
if (IS_SIMPLE_REL(foreignrel))
{
/*
@@ -1299,7 +1340,8 @@ postgresGetForeignPlan(PlannerInfo *root,
initStringInfo(&sql);
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_exprs, best_path->path.pathkeys,
- false, &retrieved_attrs, &params_list);
+ has_final_sort, false,
+ &retrieved_attrs, &params_list);
/* Remember remote_exprs for possible use by postgresPlanDirectModify */
fpinfo->final_remote_exprs = remote_exprs;
@@ -2483,6 +2525,8 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
*
* param_join_conds are the parameterization clauses with outer relations.
* pathkeys specify the expected sort order if any for given path being costed.
+ * fpextra specifies additional post-scan/join-processing steps such as the
+ * final sort.
*
* The function returns the cost and size estimates in p_row, p_width,
* p_startup_cost and p_total_cost variables.
@@ -2492,6 +2536,7 @@ estimate_path_cost_size(PlannerInfo *root,
RelOptInfo *foreignrel,
List *param_join_conds,
List *pathkeys,
+ PgFdwPathExtraData *fpextra,
double *p_rows, int *p_width,
Cost *p_startup_cost, Cost *p_total_cost)
{
@@ -2556,8 +2601,9 @@ estimate_path_cost_size(PlannerInfo *root,
initStringInfo(&sql);
appendStringInfoString(&sql, "EXPLAIN ");
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
- remote_conds, pathkeys, false,
- &retrieved_attrs, NULL);
+ remote_conds, pathkeys,
+ fpextra ? fpextra->has_final_sort : false,
+ false, &retrieved_attrs, NULL);
/* Get the remote estimate */
conn = GetConnection(fpinfo->user, false);
@@ -2625,9 +2671,9 @@ estimate_path_cost_size(PlannerInfo *root,
/*
* We will come here again and again with different set of pathkeys
- * that caller wants to cost. We don't need to calculate the cost of
- * bare scan each time. Instead, use the costs if we have cached them
- * already.
+ * that caller wants to cost. We don't need to calculate the costs of
+ * the underlying scan, join, or grouping each time. Instead, use the
+ * costs if we have cached them already.
*/
if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
{
@@ -2845,23 +2891,52 @@ estimate_path_cost_size(PlannerInfo *root,
*/
if (pathkeys != NIL)
{
- startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
- run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+ if (IS_UPPER_REL(foreignrel))
+ {
+ Assert(foreignrel->reloptkind == RELOPT_UPPER_REL &&
+ fpinfo->stage == UPPERREL_GROUP_AGG);
+ adjust_foreign_grouping_path_cost(root, pathkeys,
+ retrieved_rows, width,
+ &startup_cost, &run_cost);
+ }
+ else
+ {
+ startup_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+ run_cost *= DEFAULT_FDW_SORT_MULTIPLIER;
+ }
}
total_cost = startup_cost + run_cost;
}
/*
- * Cache the costs for scans without any pathkeys or parameterization
- * before adding the costs for transferring data from the foreign server.
- * These costs are useful for costing the join between this relation and
- * another foreign relation or to calculate the costs of paths with
- * pathkeys for this relation, when the costs can not be obtained from the
- * foreign server. This function will be called at least once for every
- * foreign relation without pathkeys and parameterization.
+ * If this includes the final sort step, the given target, which will be
+ * applied to the resulting path, might have different expressions from
+ * the foreignrel's reltarget (see make_sort_input_target()); adjust tlist
+ * eval costs.
*/
- if (pathkeys == NIL && param_join_conds == NIL)
+ if (fpextra && fpextra->target != foreignrel->reltarget)
+ {
+ QualCost oldcost = foreignrel->reltarget->cost;
+ QualCost newcost = fpextra->target->cost;
+
+ startup_cost += newcost.startup - oldcost.startup;
+ total_cost += newcost.startup - oldcost.startup;
+ total_cost += (newcost.per_tuple - oldcost.per_tuple) * rows;
+ }
+
+ /*
+ * Cache the costs for scans, joins, or groupings without any
+ * parameterization, pathkeys, or additional post-scan/join-processing
+ * steps, before adding the costs for transferring data from the foreign
+ * server. These costs are useful for costing remote joins involving this
+ * relation or costing other remote operations for this relation such as
+ * remote sorts, when the costs can not be obtained from the foreign
+ * server. This function will be called at least once for every foreign
+ * relation without any parameterization, pathkeys, or additional
+ * post-scan/join-processing steps.
+ */
+ if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
{
fpinfo->rel_startup_cost = startup_cost;
fpinfo->rel_total_cost = total_cost;
@@ -2937,6 +3012,58 @@ get_remote_estimate(const char *sql, PGconn *conn,
}
/*
+ * Adjust the cost estimates of a foreign grouping path to include the cost of
+ * generating properly-sorted output.
+ */
+static void
+adjust_foreign_grouping_path_cost(PlannerInfo *root,
+ List *pathkeys,
+ double retrieved_rows,
+ double width,
+ Cost *p_startup_cost,
+ Cost *p_run_cost)
+{
+ /*
+ * If the GROUP BY clause isn't sort-able, the plan chosen by the remote
+ * side is unlikely to generate properly-sorted output, so it would need
+ * an explicit sort; adjust the given costs with cost_sort(). Likewise,
+ * if the GROUP BY clause is sort-able but isn't a superset of the given
+ * pathkeys, adjust the costs with that function. Otherwise, adjust the
+ * costs by applying the same heuristic as for the scan or join case.
+ */
+ if (!grouping_is_sortable(root->parse->groupClause) ||
+ !pathkeys_contained_in(pathkeys, root->group_pathkeys))
+ {
+ Path sort_path; /* dummy for result of cost_sort */
+
+ cost_sort(&sort_path,
+ root,
+ pathkeys,
+ *p_startup_cost + *p_run_cost,
+ retrieved_rows,
+ width,
+ 0.0,
+ work_mem,
+ -1.0);
+
+ *p_startup_cost = sort_path.startup_cost;
+ *p_run_cost = sort_path.total_cost - sort_path.startup_cost;
+ }
+ else
+ {
+ /*
+ * The default extra cost seems too large for foreign-grouping cases;
+ * add 1/4th of that default.
+ */
+ double sort_multiplier = 1.0 + (DEFAULT_FDW_SORT_MULTIPLIER
+ - 1.0) * 0.25;
+
+ *p_startup_cost *= sort_multiplier;
+ *p_run_cost *= sort_multiplier;
+ }
+}
+
+/*
* Detect whether we want to process an EquivalenceClass member.
*
* This is a callback for use by generate_implied_equalities_for_column.
@@ -4935,7 +5062,7 @@ add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel,
List *useful_pathkeys = lfirst(lc);
Path *sorted_epq_path;
- estimate_path_cost_size(root, rel, NIL, useful_pathkeys,
+ estimate_path_cost_size(root, rel, NIL, useful_pathkeys, NULL,
&rows, &width, &startup_cost, &total_cost);
/*
@@ -5186,8 +5313,8 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
extra->sjinfo);
/* Estimate costs for bare join relation */
- estimate_path_cost_size(root, joinrel, NIL, NIL, &rows,
- &width, &startup_cost, &total_cost);
+ estimate_path_cost_size(root, joinrel, NIL, NIL, NULL,
+ &rows, &width, &startup_cost, &total_cost);
/* Now update this information in the joinrel */
joinrel->rows = rows;
joinrel->reltarget->width = width;
@@ -5437,8 +5564,6 @@ foreign_grouping_ok(PlannerInfo *root, RelOptInfo *grouped_rel,
* postgresGetForeignUpperPaths
* Add paths for post-join operations like aggregation, grouping etc. if
* corresponding operations are safe to push down.
- *
- * Right now, we only support aggregate, grouping and having clause pushdown.
*/
static void
postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage,
@@ -5456,15 +5581,29 @@ postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage,
return;
/* Ignore stages we don't support; and skip any duplicate calls. */
- if (stage != UPPERREL_GROUP_AGG || output_rel->fdw_private)
+ if ((stage != UPPERREL_GROUP_AGG &&
+ stage != UPPERREL_ORDERED) ||
+ output_rel->fdw_private)
return;
fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
fpinfo->pushdown_safe = false;
+ fpinfo->stage = stage;
output_rel->fdw_private = fpinfo;
- add_foreign_grouping_paths(root, input_rel, output_rel,
- (GroupPathExtraData *) extra);
+ switch (stage)
+ {
+ case UPPERREL_GROUP_AGG:
+ add_foreign_grouping_paths(root, input_rel, output_rel,
+ (GroupPathExtraData *) extra);
+ break;
+ case UPPERREL_ORDERED:
+ add_foreign_ordered_paths(root, input_rel, output_rel);
+ break;
+ default:
+ elog(ERROR, "unexpected upper relation: %d", (int) stage);
+ break;
+ }
}
/*
@@ -5534,8 +5673,8 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/* Estimate the cost of push down */
- estimate_path_cost_size(root, grouped_rel, NIL, NIL, &rows,
- &width, &startup_cost, &total_cost);
+ estimate_path_cost_size(root, grouped_rel, NIL, NIL, NULL,
+ &rows, &width, &startup_cost, &total_cost);
/* Now update this information in the fpinfo */
fpinfo->rows = rows;
@@ -5543,6 +5682,8 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
fpinfo->startup_cost = startup_cost;
fpinfo->total_cost = total_cost;
+ grouped_rel->rows = fpinfo->rows;
+
/* Create and add foreign path to the grouping relation. */
grouppath = create_foreign_upper_path(root,
grouped_rel,
@@ -5559,6 +5700,133 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
+ * add_foreign_ordered_paths
+ * Add foreign paths for performing the final sort remotely.
+ *
+ * Given input_rel contains the source-data Paths. The paths are added to the
+ * given ordered_rel.
+ */
+static void
+add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ RelOptInfo *ordered_rel)
+{
+ Query *parse = root->parse;
+ PgFdwRelationInfo *ifpinfo = input_rel->fdw_private;
+ PgFdwRelationInfo *fpinfo = ordered_rel->fdw_private;
+ PgFdwPathExtraData *fpextra;
+ double rows;
+ int width;
+ Cost startup_cost;
+ Cost total_cost;
+ List *fdw_private;
+ ForeignPath *ordered_path;
+ ListCell *lc;
+
+ /* Shouldn't get here unless the query has ORDER BY */
+ Assert(parse->sortClause);
+
+ /* We don't support cases where there are any SRFs in the targetlist */
+ if (parse->hasTargetSRFs)
+ return;
+
+ /* Save the input_rel as outerrel in fpinfo */
+ fpinfo->outerrel = input_rel;
+
+ /*
+ * Copy foreign table, foreign server, user mapping, FDW options etc.
+ * details from the input relation's fpinfo.
+ */
+ fpinfo->table = ifpinfo->table;
+ fpinfo->server = ifpinfo->server;
+ fpinfo->user = ifpinfo->user;
+ merge_fdw_options(fpinfo, ifpinfo, NULL);
+
+ /*
+ * If the input_rel is a base or join relation, we would already have
+ * considered pushing down the final sort to the remote server when
+ * creating pre-sorted foreign paths for that relation, because the
+ * query_pathkeys is set to the root->sort_pathkeys in that case (see
+ * standard_qp_callback()).
+ */
+ if (input_rel->reloptkind == RELOPT_BASEREL ||
+ input_rel->reloptkind == RELOPT_JOINREL)
+ {
+ Assert(root->query_pathkeys == root->sort_pathkeys);
+
+ /* Safe to push down if the query_pathkeys is safe to push down */
+ fpinfo->pushdown_safe = ifpinfo->qp_is_pushdown_safe;
+
+ return;
+ }
+
+ /* The input_rel should be a grouping relation */
+ Assert(input_rel->reloptkind == RELOPT_UPPER_REL &&
+ ifpinfo->stage == UPPERREL_GROUP_AGG);
+
+ /*
+ * We try to create a path below by extending a simple foreign path for
+ * the underlying grouping relation to perform the final sort remotely,
+ * which is stored into the fdw_private list of the resulting path.
+ */
+
+ /* Assess if it is safe to push down the final sort */
+ foreach(lc, root->sort_pathkeys)
+ {
+ PathKey *pathkey = (PathKey *) lfirst(lc);
+ EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
+ Expr *sort_expr;
+
+ /*
+ * is_foreign_expr would detect volatile expressions as well, but
+ * checking ec_has_volatile here saves some cycles.
+ */
+ if (pathkey_ec->ec_has_volatile)
+ return;
+
+ /* Get the sort expression for the pathkey_ec */
+ sort_expr = find_em_expr_for_input_target(root,
+ pathkey_ec,
+ input_rel->reltarget);
+
+ /* If it's unsafe to remote, we cannot push down the final sort */
+ if (!is_foreign_expr(root, input_rel, sort_expr))
+ return;
+ }
+
+ /* Safe to push down */
+ fpinfo->pushdown_safe = true;
+
+ /* Construct PgFdwPathExtraData */
+ fpextra = (PgFdwPathExtraData *) palloc0(sizeof(PgFdwPathExtraData));
+ fpextra->target = root->upper_targets[UPPERREL_ORDERED];
+ fpextra->has_final_sort = true;
+
+ /* Estimate the costs of performing the final sort remotely */
+ estimate_path_cost_size(root, input_rel, NIL, root->sort_pathkeys, fpextra,
+ &rows, &width, &startup_cost, &total_cost);
+
+ /*
+ * Build the fdw_private list that will be used by postgresGetForeignPlan.
+ * Items in the list must match order in enum FdwPathPrivateIndex.
+ */
+ fdw_private = list_make1(makeInteger(true));
+
+ /* Create foreign ordering path */
+ ordered_path = create_foreign_upper_path(root,
+ input_rel,
+ root->upper_targets[UPPERREL_ORDERED],
+ rows,
+ startup_cost,
+ total_cost,
+ root->sort_pathkeys,
+ NULL, /* no extra plan */
+ fdw_private);
+
+ /* and add it to the ordered_rel */
+ add_path(ordered_rel, (Path *) ordered_path);
+}
+
+/*
* Create a tuple from the specified row of the PGresult.
*
* rel is the local representation of the foreign table, attinmeta is
@@ -5808,3 +6076,65 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
/* We didn't find any suitable equivalence class expression */
return NULL;
}
+
+/*
+ * Find an equivalence class member expression to be computed as a sort column
+ * in the given target.
+ */
+Expr *
+find_em_expr_for_input_target(PlannerInfo *root,
+ EquivalenceClass *ec,
+ PathTarget *target)
+{
+ ListCell *lc1;
+ int i;
+
+ i = 0;
+ foreach(lc1, target->exprs)
+ {
+ Expr *expr = (Expr *) lfirst(lc1);
+ Index sgref = get_pathtarget_sortgroupref(target, i);
+ ListCell *lc2;
+
+ /* Ignore non-sort expressions */
+ if (sgref == 0 ||
+ get_sortgroupref_clause_noerr(sgref,
+ root->parse->sortClause) == NULL)
+ {
+ i++;
+ continue;
+ }
+
+ /* We ignore binary-compatible relabeling on both ends */
+ while (expr && IsA(expr, RelabelType))
+ expr = ((RelabelType *) expr)->arg;
+
+ /* Locate an EquivalenceClass member matching this expr, if any */
+ foreach(lc2, ec->ec_members)
+ {
+ EquivalenceMember *em = (EquivalenceMember *) lfirst(lc2);
+ Expr *em_expr;
+
+ /* Don't match constants */
+ if (em->em_is_const)
+ continue;
+
+ /* Ignore child members */
+ if (em->em_is_child)
+ continue;
+
+ /* Match if same expression (after stripping relabel) */
+ em_expr = em->em_expr;
+ while (em_expr && IsA(em_expr, RelabelType))
+ em_expr = ((RelabelType *) em_expr)->arg;
+
+ if (equal(em_expr, expr))
+ return em->em_expr;
+ }
+
+ i++;
+ }
+
+ elog(ERROR, "could not find pathkey item to sort");
+ return NULL; /* keep compiler quiet */
+}