aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2020-01-18 01:11:39 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2020-01-18 01:11:39 +0300
commit4b754d6c16e16cc1a1adf12ab0f48603069a0efd (patch)
treec976cb8180007dd33bcf9f48d8a24b490c03605e /src/backend/utils
parent41c6f9db25b5e3a8bb8afbb7d6715cff541fd41e (diff)
downloadpostgresql-4b754d6c16e16cc1a1adf12ab0f48603069a0efd.tar.gz
postgresql-4b754d6c16e16cc1a1adf12ab0f48603069a0efd.zip
Avoid full scan of GIN indexes when possible
The strategy of GIN index scan is driven by opclass-specific extract_query method. This method that needed search mode is GIN_SEARCH_MODE_ALL. This mode means that matching tuple may contain none of extracted entries. Simple example is '!term' tsquery, which doesn't need any term to exist in matching tsvector. In order to handle such scan key GIN calculates virtual entry, which contains all TIDs of all entries of attribute. In fact this is full scan of index attribute. And typically this is very slow, but allows to handle some queries correctly in GIN. However, current algorithm calculate such virtual entry for each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same attribute. This is clearly not optimal. This commit improves the situation by introduction of "exclude only" scan keys. Such scan keys are not capable to return set of matching TIDs. Instead, they are capable only to filter TIDs produced by normal scan keys. Therefore, each attribute should contain at least one normal scan key, while rest of them may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL. The same optimization might be applied to the whole scan, not per-attribute. But that leads to NULL values elimination problem. There is trade-off between multiple possible ways to do this. We probably want to do this later using some cost-based decision algorithm. Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/selfuncs.c37
1 files changed, 31 insertions, 6 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 18d77ac0b77..7c6f0574b37 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -6356,7 +6356,8 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
typedef struct
{
- bool haveFullScan;
+ bool attHasFullScan[INDEX_MAX_KEYS];
+ bool attHasNormalScan[INDEX_MAX_KEYS];
double partialEntries;
double exactEntries;
double searchEntries;
@@ -6452,16 +6453,21 @@ gincost_pattern(IndexOptInfo *index, int indexcol,
counts->searchEntries++;
}
- if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+ if (searchMode == GIN_SEARCH_MODE_DEFAULT)
+ {
+ counts->attHasNormalScan[indexcol] = true;
+ }
+ else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
{
/* Treat "include empty" like an exact-match item */
+ counts->attHasNormalScan[indexcol] = true;
counts->exactEntries++;
counts->searchEntries++;
}
- else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
+ else
{
/* It's GIN_SEARCH_MODE_ALL */
- counts->haveFullScan = true;
+ counts->attHasFullScan[indexcol] = true;
}
return true;
@@ -6597,7 +6603,8 @@ gincost_scalararrayopexpr(PlannerInfo *root,
/* We ignore array elements that are unsatisfiable patterns */
numPossible++;
- if (elemcounts.haveFullScan)
+ if (elemcounts.attHasFullScan[indexcol] &&
+ !elemcounts.attHasNormalScan[indexcol])
{
/*
* Full index scan will be required. We treat this as if
@@ -6654,6 +6661,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
numEntries;
GinQualCounts counts;
bool matchPossible;
+ bool fullIndexScan;
double partialScale;
double entryPagesFetched,
dataPagesFetched,
@@ -6665,6 +6673,7 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
Relation indexRel;
GinStatsData ginStats;
ListCell *lc;
+ int i;
/*
* Obtain statistical information from the meta page, if possible. Else
@@ -6821,7 +6830,23 @@ gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
return;
}
- if (counts.haveFullScan || indexQuals == NIL)
+ /*
+ * If attribute has a full scan and at the same time doesn't have normal
+ * scan, then we'll have to scan all non-null entries of that attribute.
+ * Currently, we don't have per-attribute statistics for GIN. Thus, we
+ * must assume the whole GIN index has to be scanned in this case.
+ */
+ fullIndexScan = false;
+ for (i = 0; i < index->nkeycolumns; i++)
+ {
+ if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
+ {
+ fullIndexScan = true;
+ break;
+ }
+ }
+
+ if (fullIndexScan || indexQuals == NIL)
{
/*
* Full index scan will be required. We treat this as if every key in