diff options
Diffstat (limited to 'src/backend/optimizer/util/pathnode.c')
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 320 |
1 files changed, 129 insertions, 191 deletions
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 691afbf0ed6..8677ff20fd5 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -37,7 +37,6 @@ typedef enum COSTS_DIFFERENT /* neither path dominates the other on cost */ } PathCostComparison; -static void add_parameterized_path(RelOptInfo *parent_rel, Path *new_path); static List *translate_sub_tlist(List *tlist, int relid); static bool query_is_distinct_for(Query *query, List *colnos, List *opids); static Oid distinct_col_search(int colno, List *colnos, List *opids); @@ -139,6 +138,12 @@ compare_fractional_path_costs(Path *path1, Path *path2, * is fuzzily better than the other on startup cost and fuzzily worse on * total cost, we just say that their costs are "different", since neither * dominates the other across the whole performance spectrum. + * + * This function also includes special hacks to support a policy enforced + * by its sole caller, add_path(): paths that have any parameterization + * cannot win comparisons on the grounds of having cheaper startup cost, + * since we deem only total cost to be of interest for a parameterized path. + * (Unparameterized paths are more common, so we check for this case last.) */ static PathCostComparison compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) @@ -150,7 +155,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) if (path1->total_cost > path2->total_cost * fuzz_factor) { /* path1 fuzzily worse on total cost */ - if (path2->startup_cost > path1->startup_cost * fuzz_factor) + if (path2->startup_cost > path1->startup_cost * fuzz_factor && + path1->param_info == NULL) { /* ... but path2 fuzzily worse on startup, so DIFFERENT */ return COSTS_DIFFERENT; @@ -161,7 +167,8 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) if (path2->total_cost > path1->total_cost * fuzz_factor) { /* path2 fuzzily worse on total cost */ - if (path1->startup_cost > path2->startup_cost * fuzz_factor) + if (path1->startup_cost > path2->startup_cost * fuzz_factor && + path2->param_info == NULL) { /* ... but path1 fuzzily worse on startup, so DIFFERENT */ return COSTS_DIFFERENT; @@ -170,12 +177,14 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) return COSTS_BETTER1; } /* fuzzily the same on total cost */ - if (path1->startup_cost > path2->startup_cost * fuzz_factor) + if (path1->startup_cost > path2->startup_cost * fuzz_factor && + path2->param_info == NULL) { /* ... but path1 fuzzily worse on startup, so path2 wins */ return COSTS_BETTER2; } - if (path2->startup_cost > path1->startup_cost * fuzz_factor) + if (path2->startup_cost > path1->startup_cost * fuzz_factor && + path1->param_info == NULL) { /* ... but path2 fuzzily worse on startup, so path1 wins */ return COSTS_BETTER1; @@ -189,11 +198,19 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) * Find the minimum-cost paths from among a relation's paths, * and save them in the rel's cheapest-path fields. * - * Only unparameterized paths are considered candidates for cheapest_startup - * and cheapest_total. The cheapest_parameterized_paths list collects paths - * that are cheapest-total for their parameterization (i.e., there is no - * cheaper path with the same or weaker parameterization). This list always - * includes the unparameterized cheapest-total path, too, if there is one. + * cheapest_total_path is normally the cheapest-total-cost unparameterized + * path; but if there are no unparameterized paths, we assign it to be the + * best (cheapest least-parameterized) parameterized path. However, only + * unparameterized paths are considered candidates for cheapest_startup_path, + * so that will be NULL if there are no unparameterized paths. + * + * The cheapest_parameterized_paths list collects all parameterized paths + * that have survived the add_path() tournament for this relation. (Since + * add_path ignores pathkeys and startup cost for a parameterized path, + * these will be paths that have best total cost or best row count for their + * parameterization.) cheapest_parameterized_paths always includes the + * cheapest-total unparameterized path, too, if there is one; the users of + * that list find it more convenient if that's included. * * This is normally called only after we've finished constructing the path * list for the rel node. @@ -203,77 +220,118 @@ set_cheapest(RelOptInfo *parent_rel) { Path *cheapest_startup_path; Path *cheapest_total_path; - bool have_parameterized_paths; + Path *best_param_path; + List *parameterized_paths; ListCell *p; Assert(IsA(parent_rel, RelOptInfo)); - cheapest_startup_path = cheapest_total_path = NULL; - have_parameterized_paths = false; + if (parent_rel->pathlist == NIL) + elog(ERROR, "could not devise a query plan for the given query"); + + cheapest_startup_path = cheapest_total_path = best_param_path = NULL; + parameterized_paths = NIL; foreach(p, parent_rel->pathlist) { Path *path = (Path *) lfirst(p); int cmp; - /* We only consider unparameterized paths in this step */ if (path->param_info) { - have_parameterized_paths = true; - continue; - } + /* Parameterized path, so add it to parameterized_paths */ + parameterized_paths = lappend(parameterized_paths, path); - if (cheapest_total_path == NULL) - { - cheapest_startup_path = cheapest_total_path = path; - continue; + /* + * If we have an unparameterized cheapest-total, we no longer care + * about finding the best parameterized path, so move on. + */ + if (cheapest_total_path) + continue; + + /* + * Otherwise, track the best parameterized path, which is the one + * with least total cost among those of the minimum + * parameterization. + */ + if (best_param_path == NULL) + best_param_path = path; + else + { + switch (bms_subset_compare(PATH_REQ_OUTER(path), + PATH_REQ_OUTER(best_param_path))) + { + case BMS_EQUAL: + /* keep the cheaper one */ + if (compare_path_costs(path, best_param_path, + TOTAL_COST) < 0) + best_param_path = path; + break; + case BMS_SUBSET1: + /* new path is less-parameterized */ + best_param_path = path; + break; + case BMS_SUBSET2: + /* old path is less-parameterized, keep it */ + break; + case BMS_DIFFERENT: + /* + * This means that neither path has the least possible + * parameterization for the rel. We'll sit on the old + * path until something better comes along. + */ + break; + } + } } + else + { + /* Unparameterized path, so consider it for cheapest slots */ + if (cheapest_total_path == NULL) + { + cheapest_startup_path = cheapest_total_path = path; + continue; + } - /* - * If we find two paths of identical costs, try to keep the - * better-sorted one. The paths might have unrelated sort orderings, - * in which case we can only guess which might be better to keep, but - * if one is superior then we definitely should keep that one. - */ - cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST); - if (cmp > 0 || - (cmp == 0 && - compare_pathkeys(cheapest_startup_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) - cheapest_startup_path = path; - - cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); - if (cmp > 0 || - (cmp == 0 && - compare_pathkeys(cheapest_total_path->pathkeys, - path->pathkeys) == PATHKEYS_BETTER2)) - cheapest_total_path = path; + /* + * If we find two paths of identical costs, try to keep the + * better-sorted one. The paths might have unrelated sort + * orderings, in which case we can only guess which might be + * better to keep, but if one is superior then we definitely + * should keep that one. + */ + cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_startup_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_startup_path = path; + + cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST); + if (cmp > 0 || + (cmp == 0 && + compare_pathkeys(cheapest_total_path->pathkeys, + path->pathkeys) == PATHKEYS_BETTER2)) + cheapest_total_path = path; + } } - if (cheapest_total_path == NULL && !have_parameterized_paths) - elog(ERROR, "could not devise a query plan for the given query"); + /* Add cheapest unparameterized path, if any, to parameterized_paths */ + if (cheapest_total_path) + parameterized_paths = lcons(cheapest_total_path, parameterized_paths); + + /* + * If there is no unparameterized path, use the best parameterized path + * as cheapest_total_path (but not as cheapest_startup_path). + */ + if (cheapest_total_path == NULL) + cheapest_total_path = best_param_path; + Assert(cheapest_total_path != NULL); parent_rel->cheapest_startup_path = cheapest_startup_path; parent_rel->cheapest_total_path = cheapest_total_path; parent_rel->cheapest_unique_path = NULL; /* computed only if needed */ - - /* Seed the parameterized-paths list with the cheapest total, if any */ - if (cheapest_total_path) - parent_rel->cheapest_parameterized_paths = list_make1(cheapest_total_path); - else - parent_rel->cheapest_parameterized_paths = NIL; - - /* And, if there are any parameterized paths, add them in one at a time */ - if (have_parameterized_paths) - { - foreach(p, parent_rel->pathlist) - { - Path *path = (Path *) lfirst(p); - - if (path->param_info) - add_parameterized_path(parent_rel, path); - } - } + parent_rel->cheapest_parameterized_paths = parameterized_paths; } /* @@ -295,11 +353,12 @@ set_cheapest(RelOptInfo *parent_rel) * one parameterization can seldom dominate a path of another. But such * cases do arise, so we make the full set of checks anyway. * - * There is one policy decision embedded in this function, along with its - * sibling add_path_precheck: we treat all parameterized paths as having - * NIL pathkeys, so that they compete only on cost. This is to reduce - * the number of parameterized paths that are kept. See discussion in - * src/backend/optimizer/README. + * There are two policy decisions embedded in this function, along with + * its sibling add_path_precheck: we treat all parameterized paths as + * having NIL pathkeys, and we ignore their startup costs, so that they + * compete only on parameterization, total cost and rowcount. This is to + * reduce the number of parameterized paths that are kept. See discussion + * in src/backend/optimizer/README. * * The pathlist is kept sorted by total_cost, with cheaper paths * at the front. Within this routine, that's simply a speed hack: @@ -552,7 +611,7 @@ add_path_precheck(RelOptInfo *parent_rel, List *new_path_pathkeys; ListCell *p1; - /* Pretend parameterized paths have no pathkeys, per add_path comment */ + /* Pretend parameterized paths have no pathkeys, per add_path policy */ new_path_pathkeys = required_outer ? NIL : pathkeys; foreach(p1, parent_rel->pathlist) @@ -572,8 +631,10 @@ add_path_precheck(RelOptInfo *parent_rel, */ if (total_cost >= old_path->total_cost) { - if (startup_cost >= old_path->startup_cost) + /* can win on startup cost only if unparameterized */ + if (startup_cost >= old_path->startup_cost || required_outer) { + /* new path does not win on cost, so check pathkeys... */ List *old_path_pathkeys; old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; @@ -582,6 +643,7 @@ add_path_precheck(RelOptInfo *parent_rel, if (keyscmp == PATHKEYS_EQUAL || keyscmp == PATHKEYS_BETTER2) { + /* new path does not win on pathkeys... */ if (bms_equal(required_outer, PATH_REQ_OUTER(old_path))) { /* Found an old path that dominates the new one */ @@ -604,123 +666,6 @@ add_path_precheck(RelOptInfo *parent_rel, return true; } -/* - * add_parameterized_path - * Consider a parameterized implementation path for the specified rel, - * and add it to the rel's cheapest_parameterized_paths list if it - * belongs there, removing any old entries that it dominates. - * - * This is essentially a cut-down form of add_path(): we do not care - * about startup cost or sort ordering, only total cost, rowcount, and - * parameterization. Also, we must not recycle rejected paths, since - * they will still be present in the rel's pathlist. - * - * 'parent_rel' is the relation entry to which the path corresponds. - * 'new_path' is a parameterized path for parent_rel. - * - * Returns nothing, but modifies parent_rel->cheapest_parameterized_paths. - */ -static void -add_parameterized_path(RelOptInfo *parent_rel, Path *new_path) -{ - bool accept_new = true; /* unless we find a superior old path */ - ListCell *insert_after = NULL; /* where to insert new item */ - ListCell *p1; - ListCell *p1_prev; - ListCell *p1_next; - - /* - * Loop to check proposed new path against old paths. Note it is possible - * for more than one old path to be tossed out because new_path dominates - * it. - * - * We can't use foreach here because the loop body may delete the current - * list cell. - */ - p1_prev = NULL; - for (p1 = list_head(parent_rel->cheapest_parameterized_paths); - p1 != NULL; p1 = p1_next) - { - Path *old_path = (Path *) lfirst(p1); - bool remove_old = false; /* unless new proves superior */ - int costcmp; - BMS_Comparison outercmp; - - p1_next = lnext(p1); - - costcmp = compare_path_costs(new_path, old_path, TOTAL_COST); - outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), - PATH_REQ_OUTER(old_path)); - if (outercmp != BMS_DIFFERENT) - { - if (costcmp < 0) - { - if (outercmp != BMS_SUBSET2 && - new_path->rows <= old_path->rows) - remove_old = true; /* new dominates old */ - } - else if (costcmp > 0) - { - if (outercmp != BMS_SUBSET1 && - new_path->rows >= old_path->rows) - accept_new = false; /* old dominates new */ - } - else if (outercmp == BMS_SUBSET1 && - new_path->rows <= old_path->rows) - remove_old = true; /* new dominates old */ - else if (outercmp == BMS_SUBSET2 && - new_path->rows >= old_path->rows) - accept_new = false; /* old dominates new */ - else if (new_path->rows < old_path->rows) - remove_old = true; /* new dominates old */ - else - { - /* Same cost, rows, and param rels; arbitrarily keep old */ - accept_new = false; /* old equals or dominates new */ - } - } - - /* - * Remove current element from cheapest_parameterized_paths if - * dominated by new. - */ - if (remove_old) - { - parent_rel->cheapest_parameterized_paths = - list_delete_cell(parent_rel->cheapest_parameterized_paths, - p1, p1_prev); - /* p1_prev does not advance */ - } - else - { - /* new belongs after this old path if it has cost >= old's */ - if (costcmp >= 0) - insert_after = p1; - /* p1_prev advances */ - p1_prev = p1; - } - - /* - * If we found an old path that dominates new_path, we can quit - * scanning the list; we will not add new_path, and we assume new_path - * cannot dominate any other elements of the list. - */ - if (!accept_new) - break; - } - - if (accept_new) - { - /* Accept the new path: insert it at proper place in list */ - if (insert_after) - lappend_cell(parent_rel->cheapest_parameterized_paths, - insert_after, new_path); - else - parent_rel->cheapest_parameterized_paths = - lcons(new_path, parent_rel->cheapest_parameterized_paths); - } -} - /***************************************************************************** * PATH NODE CREATION ROUTINES @@ -1137,13 +1082,6 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, int numCols; ListCell *lc; - /* XXX temporary band-aid to not crash on LATERAL queries */ - if (subpath == NULL) - { - Assert(subpath == rel->cheapest_total_path); - return NULL; - } - /* Caller made a mistake if subpath isn't cheapest_total ... */ Assert(subpath == rel->cheapest_total_path); Assert(subpath->parent == rel); |