aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2015-08-05 16:20:50 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2015-08-05 16:20:50 -0300
commit94a8b45feb3019d2e6b04806415dd8bc85994706 (patch)
tree76c5adb9baecd4cf99d7b75c3965cb983a78841c /src/backend/access
parent06663971bba43ea967daf00fff0b70ee066c3a13 (diff)
downloadpostgresql-94a8b45feb3019d2e6b04806415dd8bc85994706.tar.gz
postgresql-94a8b45feb3019d2e6b04806415dd8bc85994706.zip
Fix BRIN to use SnapshotAny during summarization
For correctness of summarization results, it is critical that the snapshot used during the summarization scan is able to see all tuples that are live to all transactions -- including tuples inserted or deleted by in-progress transactions. Otherwise, it would be possible for a transaction to insert a tuple, then idle for a long time while a concurrent transaction executes summarization of the range: this would result in the inserted value not being considered in the summary. Previously we were trying to use a MVCC snapshot in conjunction with adding a "placeholder" tuple in the index: the snapshot would see all committed tuples, and the placeholder tuple would catch insertions by any new inserters. The hole is that prior insertions by transactions that are still in progress by the time the MVCC snapshot was taken were ignored. Kevin Grittner reported this as a bogus error message during vacuum with default transaction isolation mode set to repeatable read (because the error report mentioned a function name not being invoked during), but the problem is larger than that. To fix, tweak IndexBuildHeapRangeScan to have a new mode that behaves the way we need using SnapshotAny visibility rules. This change simplifies the BRIN code a bit, mainly by removing large comments that were mistaken. Instead, rely on the SnapshotAny semantics to provide what it needs. (The business about a placeholder tuple needs to remain: that covers the case that a transaction inserts a a tuple in a page that summarization already scanned.) Discussion: https://www.postgresql.org/message-id/20150731175700.GX2441@postgresql.org In passing, remove a couple of unused declarations from brin.h and reword a comment to be proper English. This part submitted by Kevin Grittner. Backpatch to 9.5, where BRIN was introduced.
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/brin/brin.c41
1 files changed, 9 insertions, 32 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 360b26e6fc4..fc9f964bf78 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -696,7 +696,7 @@ brinbuildempty(PG_FUNCTION_ARGS)
*
* XXX we could mark item tuples as "dirty" (when a minimum or maximum heap
* tuple is deleted), meaning the need to re-run summarization on the affected
- * range. Need to an extra flag in brintuples for that.
+ * range. Would need to add an extra flag in brintuples for that.
*/
Datum
brinbulkdelete(PG_FUNCTION_ARGS)
@@ -951,9 +951,13 @@ summarize_range(IndexInfo *indexInfo, BrinBuildState *state, Relation heapRel,
* Execute the partial heap scan covering the heap blocks in the specified
* page range, summarizing the heap tuples in it. This scan stops just
* short of brinbuildCallback creating the new index entry.
+ *
+ * Note that it is critical we use the "any visible" mode of
+ * IndexBuildHeapRangeScan here: otherwise, we would miss tuples inserted
+ * by transactions that are still in progress, among other corner cases.
*/
state->bs_currRangeStart = heapBlk;
- IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false,
+ IndexBuildHeapRangeScan(heapRel, state->bs_irel, indexInfo, false, true,
heapBlk, state->bs_pagesPerRange,
brinbuildCallback, (void *) state);
@@ -1058,36 +1062,6 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
state = initialize_brin_buildstate(index, revmap,
pagesPerRange);
indexInfo = BuildIndexInfo(index);
-
- /*
- * We only have ShareUpdateExclusiveLock on the table, and
- * therefore other sessions may insert tuples into the range
- * we're going to scan. This is okay, because we take
- * additional precautions to avoid losing the additional
- * tuples; see comments in summarize_range. Set the
- * concurrent flag, which causes IndexBuildHeapRangeScan to
- * use a snapshot other than SnapshotAny, and silences
- * warnings emitted there.
- */
- indexInfo->ii_Concurrent = true;
-
- /*
- * If using transaction-snapshot mode, it would be possible
- * for another transaction to insert a tuple that's not
- * visible to our snapshot if we have already acquired one,
- * when in snapshot-isolation mode; therefore, disallow this
- * from running in such a transaction unless a snapshot hasn't
- * been acquired yet.
- *
- * This code is called by VACUUM and
- * brin_summarize_new_values. Have the error message mention
- * the latter because VACUUM cannot run in a transaction and
- * thus cannot cause this issue.
- */
- if (IsolationUsesXactSnapshot() && FirstSnapshotSet)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_TRANSACTION_STATE),
- errmsg("brin_summarize_new_values() cannot run in a transaction that has already obtained a snapshot")));
}
summarize_range(indexInfo, state, heapRel, heapBlk);
@@ -1111,7 +1085,10 @@ brinsummarize(Relation index, Relation heapRel, double *numSummarized,
/* free resources */
brinRevmapTerminate(revmap);
if (state)
+ {
terminate_brin_buildstate(state);
+ pfree(indexInfo);
+ }
}
/*