diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2013-01-18 18:06:20 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2013-01-18 18:06:20 -0500 |
commit | c2a14bc7c994065edcc48183913a1fae2af27990 (patch) | |
tree | 861568bdf9f37ca8f2cc011ec3b8ea522f696124 /src/backend/commands/dbcommands.c | |
parent | 530bbfac57c8c5df9d38754759d95f1588c427f7 (diff) | |
download | postgresql-c2a14bc7c994065edcc48183913a1fae2af27990.tar.gz postgresql-c2a14bc7c994065edcc48183913a1fae2af27990.zip |
Protect against SnapshotNow race conditions in pg_tablespace scans.
Use of SnapshotNow is known to expose us to race conditions if the tuple(s)
being sought could be updated by concurrently-committing transactions.
CREATE DATABASE and DROP DATABASE are particularly exposed because they do
heavyweight filesystem operations during their scans of pg_tablespace,
so that the scans run for a very long time compared to most. Furthermore,
the potential consequences of a missed or twice-visited row are nastier
than average:
* createdb() could fail with a bogus "file already exists" error, or
silently fail to copy one or more tablespace's worth of files into the
new database.
* remove_dbtablespaces() could miss one or more tablespaces, thus failing
to free filesystem space for the dropped database.
* check_db_file_conflict() could likewise miss a tablespace, leading to an
OID conflict that could result in data loss either immediately or in
future operations. (This seems of very low probability, though, since a
duplicate database OID would be unlikely to start with.)
Hence, it seems worth fixing these three places to use MVCC snapshots, even
though this will someday be superseded by a generic solution to SnapshotNow
race conditions.
Back-patch to all active branches.
Stephen Frost and Tom Lane
Diffstat (limited to 'src/backend/commands/dbcommands.c')
-rw-r--r-- | src/backend/commands/dbcommands.c | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 1f6e02d3c9e..4ad4b997585 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -131,6 +131,7 @@ createdb(const CreatedbStmt *stmt) int notherbackends; int npreparedxacts; createdb_failure_params fparms; + Snapshot snapshot; /* Extract options from the statement node tree */ foreach(option, stmt->options) @@ -535,6 +536,29 @@ createdb(const CreatedbStmt *stmt) RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT); /* + * Take an MVCC snapshot to use while scanning through pg_tablespace. For + * safety, register the snapshot (this prevents it from changing if + * something else were to request a snapshot during the loop). + * + * Traversing pg_tablespace with an MVCC snapshot is necessary to provide + * us with a consistent view of the tablespaces that exist. Using + * SnapshotNow here would risk seeing the same tablespace multiple times, + * or worse not seeing a tablespace at all, if its tuple is moved around + * by a concurrent update (eg an ACL change). + * + * Inconsistency of this sort is inherent to all SnapshotNow scans, unless + * some lock is held to prevent concurrent updates of the rows being + * sought. There should be a generic fix for that, but in the meantime + * it's worth fixing this case in particular because we are doing very + * heavyweight operations within the scan, so that the elapsed time for + * the scan is vastly longer than for most other catalog scans. That + * means there's a much wider window for concurrent updates to cause + * trouble here than anywhere else. XXX this code should be changed + * whenever a generic fix is implemented. + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); + + /* * Once we start copying subdirectories, we need to be able to clean 'em * up if we fail. Use an ENSURE block to make sure this happens. (This * is not a 100% solution, because of the possibility of failure during @@ -551,7 +575,7 @@ createdb(const CreatedbStmt *stmt) * each one to the new database. */ rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan(rel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid srctablespace = HeapTupleGetOid(tuple); @@ -656,6 +680,9 @@ createdb(const CreatedbStmt *stmt) PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback, PointerGetDatum(&fparms)); + /* Free our snapshot */ + UnregisterSnapshot(snapshot); + return dboid; } @@ -1715,9 +1742,20 @@ remove_dbtablespaces(Oid db_id) Relation rel; HeapScanDesc scan; HeapTuple tuple; + Snapshot snapshot; + + /* + * As in createdb(), we'd better use an MVCC snapshot here, since this + * scan can run for a long time. Duplicate visits to tablespaces would be + * harmless, but missing a tablespace could result in permanently leaked + * files. + * + * XXX change this when a generic fix for SnapshotNow races is implemented + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan(rel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); @@ -1763,6 +1801,7 @@ remove_dbtablespaces(Oid db_id) heap_endscan(scan); heap_close(rel, AccessShareLock); + UnregisterSnapshot(snapshot); } /* @@ -1784,9 +1823,19 @@ check_db_file_conflict(Oid db_id) Relation rel; HeapScanDesc scan; HeapTuple tuple; + Snapshot snapshot; + + /* + * As in createdb(), we'd better use an MVCC snapshot here; missing a + * tablespace could result in falsely reporting the OID is unique, with + * disastrous future consequences per the comment above. + * + * XXX change this when a generic fix for SnapshotNow races is implemented + */ + snapshot = RegisterSnapshot(GetLatestSnapshot()); rel = heap_open(TableSpaceRelationId, AccessShareLock); - scan = heap_beginscan(rel, SnapshotNow, 0, NULL); + scan = heap_beginscan(rel, snapshot, 0, NULL); while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Oid dsttablespace = HeapTupleGetOid(tuple); @@ -1812,6 +1861,8 @@ check_db_file_conflict(Oid db_id) heap_endscan(scan); heap_close(rel, AccessShareLock); + UnregisterSnapshot(snapshot); + return result; } |