diff options
author | Peter Geoghegan <pg@bowt.ie> | 2018-08-10 13:01:34 -0700 |
---|---|---|
committer | Peter Geoghegan <pg@bowt.ie> | 2018-08-10 13:01:34 -0700 |
commit | 4974d7f87e62a58e80c6524e49677cb25cc10e12 (patch) | |
tree | dff7c5c0c15c4e407aa0187047cf0dc922460656 /src/backend/utils/cache | |
parent | d4a900458e505092a8013eb77c9631d58c3c2a0a (diff) | |
download | postgresql-4974d7f87e62a58e80c6524e49677cb25cc10e12.tar.gz postgresql-4974d7f87e62a58e80c6524e49677cb25cc10e12.zip |
Handle parallel index builds on mapped relations.
Commit 9da0cc35284, which introduced parallel CREATE INDEX, failed to
propagate relmapper.c backend local cache state to parallel worker
processes. This could result in parallel index builds against mapped
catalog relations where the leader process (participating as a worker)
scans the new, pristine relfilenode, while worker processes scan the
obsolescent relfilenode. When this happened, the final index structure
was typically not consistent with the owning table's structure. The
final index structure could contain entries formed from both heap
relfilenodes. Only rebuilds on mapped catalog relations that occur as
part of a VACUUM FULL or CLUSTER could become corrupt in practice, since
their mapped relation relfilenode swap is what allows the inconsistency
to arise.
On master, fix the problem by propagating the required relmapper.c
backend state as part of standard parallel initialization (Cf. commit
29d58fd3). On v11, simply disallow builds against mapped catalog
relations by deeming them parallel unsafe.
Author: Peter Geoghegan
Reported-By: "death lock"
Reviewed-By: Tom Lane, Amit Kapila
Bug: #15309
Discussion: https://postgr.es/m/153329671686.1405.18298309097348420351@wrigleys.postgresql.org
Backpatch: 11-, where parallel CREATE INDEX was introduced.
Diffstat (limited to 'src/backend/utils/cache')
-rw-r--r-- | src/backend/utils/cache/relmapper.c | 84 |
1 files changed, 78 insertions, 6 deletions
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index c7f0e6f6d4a..905867dc767 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -92,6 +92,16 @@ typedef struct RelMapFile } RelMapFile; /* + * State for serializing local and shared relmappings for parallel workers + * (active states only). See notes on active_* and pending_* updates state. + */ +typedef struct SerializedActiveRelMaps +{ + RelMapFile active_shared_updates; + RelMapFile active_local_updates; +} SerializedActiveRelMaps; + +/* * The currently known contents of the shared map file and our database's * local map file are stored here. These can be reloaded from disk * immediately whenever we receive an update sinval message. @@ -111,6 +121,9 @@ static RelMapFile local_map; * they will become active at the next CommandCounterIncrement. This setup * lets map updates act similarly to updates of pg_class rows, ie, they * become visible only at the next CommandCounterIncrement boundary. + * + * Active shared and active local updates are serialized by the parallel + * infrastructure, and deserialized within parallel workers. */ static RelMapFile active_shared_updates; static RelMapFile active_local_updates; @@ -263,13 +276,16 @@ RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, else { /* - * We don't currently support map changes within subtransactions. This - * could be done with more bookkeeping infrastructure, but it doesn't - * presently seem worth it. + * We don't currently support map changes within subtransactions, or + * when in parallel mode. This could be done with more bookkeeping + * infrastructure, but it doesn't presently seem worth it. */ if (GetCurrentTransactionNestLevel() > 1) elog(ERROR, "cannot change relation mapping within subtransaction"); + if (IsInParallelMode()) + elog(ERROR, "cannot change relation mapping in parallel mode"); + if (immediate) { /* Make it active, but only locally */ @@ -452,11 +468,14 @@ AtCCI_RelationMap(void) * * During abort, we just have to throw away any pending map changes. * Normal post-abort cleanup will take care of fixing relcache entries. + * Parallel worker commit/abort is handled by resetting active mappings + * that may have been received from the leader process. (There should be + * no pending updates in parallel workers.) */ void -AtEOXact_RelationMap(bool isCommit) +AtEOXact_RelationMap(bool isCommit, bool isParallelWorker) { - if (isCommit) + if (isCommit && !isParallelWorker) { /* * We should not get here with any "pending" updates. (We could @@ -482,7 +501,10 @@ AtEOXact_RelationMap(bool isCommit) } else { - /* Abort --- drop all local and pending updates */ + /* Abort or parallel worker --- drop all local and pending updates */ + Assert(!isParallelWorker || pending_shared_updates.num_mappings == 0); + Assert(!isParallelWorker || pending_local_updates.num_mappings == 0); + active_shared_updates.num_mappings = 0; active_local_updates.num_mappings = 0; pending_shared_updates.num_mappings = 0; @@ -615,6 +637,56 @@ RelationMapInitializePhase3(void) } /* + * EstimateRelationMapSpace + * + * Estimate space needed to pass active shared and local relmaps to parallel + * workers. + */ +Size +EstimateRelationMapSpace(void) +{ + return sizeof(SerializedActiveRelMaps); +} + +/* + * SerializeRelationMap + * + * Serialize active shared and local relmap state for parallel workers. + */ +void +SerializeRelationMap(Size maxSize, char *startAddress) +{ + SerializedActiveRelMaps *relmaps; + + Assert(maxSize >= EstimateRelationMapSpace()); + + relmaps = (SerializedActiveRelMaps *) startAddress; + relmaps->active_shared_updates = active_shared_updates; + relmaps->active_local_updates = active_local_updates; +} + +/* + * RestoreRelationMap + * + * Restore active shared and local relmap state within a parallel worker. + */ +void +RestoreRelationMap(char *startAddress) +{ + SerializedActiveRelMaps *relmaps; + + if (active_shared_updates.num_mappings != 0 || + active_local_updates.num_mappings != 0 || + pending_shared_updates.num_mappings != 0 || + pending_local_updates.num_mappings != 0) + elog(ERROR, "parallel worker has existing mappings"); + + relmaps = (SerializedActiveRelMaps *) startAddress; + active_shared_updates = relmaps->active_shared_updates; + active_local_updates = relmaps->active_local_updates; +} + +/* * load_relmap_file -- load data from the shared or local map file * * Because the map file is essential for access to core system catalogs, |