diff options
author | Tomas Vondra <tomas.vondra@postgresql.org> | 2020-01-13 01:20:57 +0100 |
---|---|---|
committer | Tomas Vondra <tomas.vondra@postgresql.org> | 2020-01-13 01:21:06 +0100 |
commit | aaa6761876ba5b06a5c3fa914b2951ace1e31dee (patch) | |
tree | 9e2cabceec793ae6e8091054d972cf2866f5b8aa /src/backend | |
parent | 652686a334b437f57f9bd0e3baa5dbd245a9e15d (diff) | |
download | postgresql-aaa6761876ba5b06a5c3fa914b2951ace1e31dee.tar.gz postgresql-aaa6761876ba5b06a5c3fa914b2951ace1e31dee.zip |
Apply all available functional dependencies
When considering functional dependencies during selectivity estimation,
it's not necessary to bother with selecting the best extended statistic
object and then use just dependencies from it. We can simply consider
all applicable functional dependencies at once.
This means we need to deserialie all (applicable) dependencies before
applying them to the clauses. This is a bit more expensive than picking
the best statistics and deserializing dependencies for it. To minimize
the additional cost, we ignore statistics that are not applicable.
Author: Tomas Vondra
Reviewed-by: Mark Dilger
Discussion: https://postgr.es/m/20191028152048.jc6pqv5hb7j77ocp@development
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/statistics/dependencies.c | 116 |
1 files changed, 77 insertions, 39 deletions
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index e97f2b4bfec..e2f6c5bb979 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -77,8 +77,8 @@ static bool dependency_implies_attribute(MVDependency *dependency, AttrNumber attnum); static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum); -static MVDependency *find_strongest_dependency(StatisticExtInfo *stats, - MVDependencies *dependencies, +static MVDependency *find_strongest_dependency(MVDependencies **dependencies, + int ndependencies, Bitmapset *attnums); static void @@ -862,10 +862,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) * (see the comment in dependencies_clauselist_selectivity). */ static MVDependency * -find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, +find_strongest_dependency(MVDependencies **dependencies, int ndependencies, Bitmapset *attnums) { - int i; + int i, j; MVDependency *strongest = NULL; /* number of attnums in clauses */ @@ -876,36 +876,39 @@ find_strongest_dependency(StatisticExtInfo *stats, MVDependencies *dependencies, * fully-matched dependencies. We do the cheap checks first, before * matching it against the attnums. */ - for (i = 0; i < dependencies->ndeps; i++) + for (i = 0; i < ndependencies; i++) { - MVDependency *dependency = dependencies->deps[i]; - - /* - * Skip dependencies referencing more attributes than available - * clauses, as those can't be fully matched. - */ - if (dependency->nattributes > nattnums) - continue; - - if (strongest) + for (j = 0; j < dependencies[i]->ndeps; j++) { - /* skip dependencies on fewer attributes than the strongest. */ - if (dependency->nattributes < strongest->nattributes) - continue; + MVDependency *dependency = dependencies[i]->deps[j]; - /* also skip weaker dependencies when attribute count matches */ - if (strongest->nattributes == dependency->nattributes && - strongest->degree > dependency->degree) + /* + * Skip dependencies referencing more attributes than available + * clauses, as those can't be fully matched. + */ + if (dependency->nattributes > nattnums) continue; - } - /* - * this dependency is stronger, but we must still check that it's - * fully matched to these attnums. We perform this check last as it's - * slightly more expensive than the previous checks. - */ - if (dependency_is_fully_matched(dependency, attnums)) - strongest = dependency; /* save new best match */ + if (strongest) + { + /* skip dependencies on fewer attributes than the strongest. */ + if (dependency->nattributes < strongest->nattributes) + continue; + + /* also skip weaker dependencies when attribute count matches */ + if (strongest->nattributes == dependency->nattributes && + strongest->degree > dependency->degree) + continue; + } + + /* + * this dependency is stronger, but we must still check that it's + * fully matched to these attnums. We perform this check last as it's + * slightly more expensive than the previous checks. + */ + if (dependency_is_fully_matched(dependency, attnums)) + strongest = dependency; /* save new best match */ + } } return strongest; @@ -949,10 +952,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root, Selectivity s1 = 1.0; ListCell *l; Bitmapset *clauses_attnums = NULL; - StatisticExtInfo *stat; - MVDependencies *dependencies; Bitmapset **list_attnums; int listidx; + MVDependencies **dependencies = NULL; + int ndependencies = 0; + int i; /* check if there's any stats that might be useful for us. */ if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES)) @@ -1001,20 +1005,50 @@ dependencies_clauselist_selectivity(PlannerInfo *root, return 1.0; } - /* find the best suited statistics object for these attnums */ - stat = choose_best_statistics(rel->statlist, STATS_EXT_DEPENDENCIES, - list_attnums, list_length(clauses)); + /* + * Load all functional dependencies matching at least two parameters. We + * can simply consider all dependencies at once, without having to search + * for the best statistics object. + * + * To not waste cycles and memory, we deserialize dependencies only for + * statistics that match at least two attributes. The array is allocated + * with the assumption that all objects match - we could grow the array + * to make it just the right size, but it's likely wasteful anyway thanks + * to moving the freed chunks to freelists etc. + */ + ndependencies = 0; + dependencies = (MVDependencies **) palloc(sizeof(MVDependencies *) * + list_length(rel->statlist)); + + foreach(l,rel->statlist) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(l); + Bitmapset *matched; + int num_matched; + + /* skip statistics that are not of the correct type */ + if (stat->kind != STATS_EXT_DEPENDENCIES) + continue; + + matched = bms_intersect(clauses_attnums, stat->keys); + num_matched = bms_num_members(matched); + bms_free(matched); + + /* skip objects matching fewer than two attributes from clauses */ + if (num_matched < 2) + continue; + + dependencies[ndependencies++] + = statext_dependencies_load(stat->statOid); + } /* if no matching stats could be found then we've nothing to do */ - if (!stat) + if (!ndependencies) { pfree(list_attnums); return 1.0; } - /* load the dependency items stored in the statistics object */ - dependencies = statext_dependencies_load(stat->statOid); - /* * Apply the dependencies recursively, starting with the widest/strongest * ones, and proceeding to the smaller/weaker ones. At the end of each @@ -1027,7 +1061,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root, MVDependency *dependency; /* the widest/strongest dependency, fully matched by clauses */ - dependency = find_strongest_dependency(stat, dependencies, + dependency = find_strongest_dependency(dependencies, ndependencies, clauses_attnums); /* if no suitable dependency was found, we're done */ @@ -1097,6 +1131,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root, s1 *= (dependency->degree + (1 - dependency->degree) * s2); } + /* free deserialized functional dependencies (and then the array) */ + for (i = 0; i < ndependencies; i++) + pfree(dependencies[i]); + pfree(dependencies); pfree(list_attnums); |