aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/copy.c
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2020-03-22 09:24:09 -0700
committerNoah Misch <noah@leadboat.com>2020-03-22 09:24:15 -0700
commitb31e96ba420f049625435591ba7ae3ded89a781d (patch)
treebf07eafdd96ba96bbcdb42b21e3e9047e6214317 /src/backend/commands/copy.c
parent9db4b9da2801ed94c8f209c807e654c139dc1d7e (diff)
downloadpostgresql-b31e96ba420f049625435591ba7ae3ded89a781d.tar.gz
postgresql-b31e96ba420f049625435591ba7ae3ded89a781d.zip
Revert "Skip WAL for new relfilenodes, under wal_level=minimal."
This reverts commit cb2fd7eac285b1b0a24eeb2b8ed4456b66c5a09f. Per numerous buildfarm members, it was incompatible with parallel query, and a test case assumed LP64. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20200321224920.GB1763544@rfd.leadboat.com
Diffstat (limited to 'src/backend/commands/copy.c')
-rw-r--r--src/backend/commands/copy.c52
1 files changed, 47 insertions, 5 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f053f70c6e2..9276f775c41 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2274,14 +2274,49 @@ CopyFrom(CopyState cstate)
tupDesc = RelationGetDescr(cstate->rel);
- /*
- * If the target file is new-in-transaction, we assume that checking FSM
- * for free space is a waste of time. This could possibly be wrong, but
- * it's unlikely.
+ /*----------
+ * Check to see if we can avoid writing WAL
+ *
+ * If archive logging/streaming is not enabled *and* either
+ * - table was created in same transaction as this COPY
+ * - data is being written to relfilenode created in this transaction
+ * then we can skip writing WAL. It's safe because if the transaction
+ * doesn't commit, we'll discard the table (or the new relfilenode file).
+ * If it does commit, we'll have done the heap_sync at the bottom of this
+ * routine first.
+ *
+ * As mentioned in comments in utils/rel.h, the in-same-transaction test
+ * is not always set correctly, since in rare cases rd_newRelfilenodeSubid
+ * can be cleared before the end of the transaction. The exact case is
+ * when a relation sets a new relfilenode twice in same transaction, yet
+ * the second one fails in an aborted subtransaction, e.g.
+ *
+ * BEGIN;
+ * TRUNCATE t;
+ * SAVEPOINT save;
+ * TRUNCATE t;
+ * ROLLBACK TO save;
+ * COPY ...
+ *
+ * Also, if the target file is new-in-transaction, we assume that checking
+ * FSM for free space is a waste of time, even if we must use WAL because
+ * of archiving. This could possibly be wrong, but it's unlikely.
+ *
+ * The comments for heap_insert and RelationGetBufferForTuple specify that
+ * skipping WAL logging is only safe if we ensure that our tuples do not
+ * go into pages containing tuples from any other transactions --- but this
+ * must be the case if we have a new table or new relfilenode, so we need
+ * no additional work to enforce that.
+ *----------
*/
+ /* createSubid is creation check, newRelfilenodeSubid is truncation check */
if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
- cstate->rel->rd_firstRelfilenodeSubid != InvalidSubTransactionId)
+ cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
+ {
hi_options |= HEAP_INSERT_SKIP_FSM;
+ if (!XLogIsNeeded())
+ hi_options |= HEAP_INSERT_SKIP_WAL;
+ }
/*
* Optimize if new relfilenode was created in this subxact or one of its
@@ -2540,6 +2575,13 @@ CopyFrom(CopyState cstate)
FreeExecutorState(estate);
+ /*
+ * If we skipped writing WAL, then we need to sync the heap (but not
+ * indexes since those use WAL anyway)
+ */
+ if (hi_options & HEAP_INSERT_SKIP_WAL)
+ heap_sync(cstate->rel);
+
return processed;
}