aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2020-03-22 09:24:09 -0700
committerNoah Misch <noah@leadboat.com>2020-03-22 09:24:13 -0700
commit63631ee64f84bae7feeb06b8417b628e82319ce2 (patch)
tree35d8fe6f43ea3c9d7489062474c26ba599973c6b /src/backend/commands
parente4b0a02ef8c8c69f2a49f4ca86de12ef34e97ac3 (diff)
downloadpostgresql-63631ee64f84bae7feeb06b8417b628e82319ce2.tar.gz
postgresql-63631ee64f84bae7feeb06b8417b628e82319ce2.zip
Revert "Skip WAL for new relfilenodes, under wal_level=minimal."
This reverts commit cb2fd7eac285b1b0a24eeb2b8ed4456b66c5a09f. Per numerous buildfarm members, it was incompatible with parallel query, and a test case assumed LP64. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/cluster.c19
-rw-r--r--src/backend/commands/copy.c58
-rw-r--r--src/backend/commands/createas.c11
-rw-r--r--src/backend/commands/indexcmds.c2
-rw-r--r--src/backend/commands/matview.c12
-rw-r--r--src/backend/commands/tablecmds.c26
6 files changed, 78 insertions, 50 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index b53f89ee71b..c7fda21634d 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1112,25 +1112,6 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
}
/*
- * Recognize that rel1's relfilenode (swapped from rel2) is new in this
- * subtransaction. The rel2 storage (swapped from rel1) may or may not be
- * new.
- */
- {
- Relation rel1,
- rel2;
-
- rel1 = relation_open(r1, NoLock);
- rel2 = relation_open(r2, NoLock);
- rel2->rd_createSubid = rel1->rd_createSubid;
- rel2->rd_newRelfilenodeSubid = rel1->rd_newRelfilenodeSubid;
- rel2->rd_firstRelfilenodeSubid = rel1->rd_firstRelfilenodeSubid;
- RelationAssumeNewRelfilenode(rel1);
- relation_close(rel1, NoLock);
- relation_close(rel2, NoLock);
- }
-
- /*
* In the case of a shared catalog, these next few steps will only affect
* our own database's pg_class row; but that's okay, because they are all
* noncritical updates. That's also an important fact for the case of a
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 57c35bd6aa0..4f04d122c30 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2715,15 +2715,63 @@ CopyFrom(CopyState cstate)
RelationGetRelationName(cstate->rel))));
}
- /*
- * If the target file is new-in-transaction, we assume that checking FSM
- * for free space is a waste of time. This could possibly be wrong, but
- * it's unlikely.
+ /*----------
+ * Check to see if we can avoid writing WAL
+ *
+ * If archive logging/streaming is not enabled *and* either
+ * - table was created in same transaction as this COPY
+ * - data is being written to relfilenode created in this transaction
+ * then we can skip writing WAL. It's safe because if the transaction
+ * doesn't commit, we'll discard the table (or the new relfilenode file).
+ * If it does commit, we'll have done the table_finish_bulk_insert() at
+ * the bottom of this routine first.
+ *
+ * As mentioned in comments in utils/rel.h, the in-same-transaction test
+ * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+ * can be cleared before the end of the transaction. The exact case is
+ * when a relation sets a new relfilenode twice in same transaction, yet
+ * the second one fails in an aborted subtransaction, e.g.
+ *
+ * BEGIN;
+ * TRUNCATE t;
+ * SAVEPOINT save;
+ * TRUNCATE t;
+ * ROLLBACK TO save;
+ * COPY ...
+ *
+ * Also, if the target file is new-in-transaction, we assume that checking
+ * FSM for free space is a waste of time, even if we must use WAL because
+ * of archiving. This could possibly be wrong, but it's unlikely.
+ *
+ * The comments for table_tuple_insert and RelationGetBufferForTuple
+ * specify that skipping WAL logging is only safe if we ensure that our
+ * tuples do not go into pages containing tuples from any other
+ * transactions --- but this must be the case if we have a new table or
+ * new relfilenode, so we need no additional work to enforce that.
+ *
+ * We currently don't support this optimization if the COPY target is a
+ * partitioned table as we currently only lazily initialize partition
+ * information when routing the first tuple to the partition. We cannot
+ * know at this stage if we can perform this optimization. It should be
+ * possible to improve on this, but it does mean maintaining heap insert
+ * option flags per partition and setting them when we first open the
+ * partition.
+ *
+ * This optimization is not supported for relation types which do not
+ * have any physical storage, with foreign tables and views using
+ * INSTEAD OF triggers entering in this category. Partitioned tables
+ * are not supported as per the description above.
+ *----------
*/
+ /* createSubid is creation check, newRelfilenodeSubid is truncation check */
if (RELKIND_HAS_STORAGE(cstate->rel->rd_rel->relkind) &&
(cstate->rel->rd_createSubid != InvalidSubTransactionId ||
- cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId))
+ cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId))
+ {
ti_options |= TABLE_INSERT_SKIP_FSM;
+ if (!XLogIsNeeded())
+ ti_options |= TABLE_INSERT_SKIP_WAL;
+ }
/*
* Optimize if new relfilenode was created in this subxact or one of its
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index 568cfba08c0..4c1d909d380 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -553,13 +553,16 @@ intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
myState->rel = intoRelationDesc;
myState->reladdr = intoRelationAddr;
myState->output_cid = GetCurrentCommandId(true);
- myState->ti_options = TABLE_INSERT_SKIP_FSM;
- myState->bistate = GetBulkInsertState();
/*
- * Valid smgr_targblock implies something already wrote to the relation.
- * This may be harmless, but this function hasn't planned for it.
+ * We can skip WAL-logging the insertions, unless PITR or streaming
+ * replication is in use. We can skip the FSM in any case.
*/
+ myState->ti_options = TABLE_INSERT_SKIP_FSM |
+ (XLogIsNeeded() ? 0 : TABLE_INSERT_SKIP_WAL);
+ myState->bistate = GetBulkInsertState();
+
+ /* Not using WAL requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
}
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 41c67be15bd..ea5e4f67e9e 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -1195,8 +1195,6 @@ DefineIndex(Oid relationId,
childStmt->relation = NULL;
childStmt->indexOid = InvalidOid;
childStmt->oldNode = InvalidOid;
- childStmt->oldCreateSubid = InvalidSubTransactionId;
- childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
/*
* Adjust any Vars (both in expressions and in the index's
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index ae809c98012..537d0e8ceff 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -457,13 +457,17 @@ transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
*/
myState->transientrel = transientrel;
myState->output_cid = GetCurrentCommandId(true);
- myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
- myState->bistate = GetBulkInsertState();
/*
- * Valid smgr_targblock implies something already wrote to the relation.
- * This may be harmless, but this function hasn't planned for it.
+ * We can skip WAL-logging the insertions, unless PITR or streaming
+ * replication is in use. We can skip the FSM in any case.
*/
+ myState->ti_options = TABLE_INSERT_SKIP_FSM | TABLE_INSERT_FROZEN;
+ if (!XLogIsNeeded())
+ myState->ti_options |= TABLE_INSERT_SKIP_WAL;
+ myState->bistate = GetBulkInsertState();
+
+ /* Not using WAL requires smgr_targblock be initially invalid */
Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber);
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index b125d51e2fc..74eb7ac8d76 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -4785,14 +4785,19 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
newrel = NULL;
/*
- * Prepare a BulkInsertState and options for table_tuple_insert. The FSM
- * is empty, so don't bother using it.
+ * Prepare a BulkInsertState and options for table_tuple_insert. Because
+ * we're building a new heap, we can skip WAL-logging and fsync it to disk
+ * at the end instead (unless WAL-logging is required for archiving or
+ * streaming replication). The FSM is empty too, so don't bother using it.
*/
if (newrel)
{
mycid = GetCurrentCommandId(true);
bistate = GetBulkInsertState();
+
ti_options = TABLE_INSERT_SKIP_FSM;
+ if (!XLogIsNeeded())
+ ti_options |= TABLE_INSERT_SKIP_WAL;
}
else
{
@@ -7300,19 +7305,14 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
/*
* If TryReuseIndex() stashed a relfilenode for us, we used it for the new
- * index instead of building from scratch. Restore associated fields.
- * This may store InvalidSubTransactionId in both fields, in which case
- * relcache.c will assume it can rebuild the relcache entry. Hence, do
- * this after the CCI that made catalog rows visible to any rebuild. The
- * DROP of the old edition of this index will have scheduled the storage
- * for deletion at commit, so cancel that pending deletion.
+ * index instead of building from scratch. The DROP of the old edition of
+ * this index will have scheduled the storage for deletion at commit, so
+ * cancel that pending deletion.
*/
if (OidIsValid(stmt->oldNode))
{
Relation irel = index_open(address.objectId, NoLock);
- irel->rd_createSubid = stmt->oldCreateSubid;
- irel->rd_firstRelfilenodeSubid = stmt->oldFirstRelfilenodeSubid;
RelationPreserveStorage(irel->rd_node, true);
index_close(irel, NoLock);
}
@@ -11619,11 +11619,7 @@ TryReuseIndex(Oid oldId, IndexStmt *stmt)
/* If it's a partitioned index, there is no storage to share. */
if (irel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
- {
stmt->oldNode = irel->rd_node.relNode;
- stmt->oldCreateSubid = irel->rd_createSubid;
- stmt->oldFirstRelfilenodeSubid = irel->rd_firstRelfilenodeSubid;
- }
index_close(irel, NoLock);
}
}
@@ -12557,8 +12553,6 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode)
table_close(pg_class, RowExclusiveLock);
- RelationAssumeNewRelfilenode(rel);
-
relation_close(rel, NoLock);
/* Make sure the reltablespace change is visible */