aboutsummaryrefslogtreecommitdiff
path: root/src/include/utils/rel.h
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2020-03-21 09:38:26 -0700
committerNoah Misch <noah@leadboat.com>2020-03-21 09:38:36 -0700
commit9db4b9da2801ed94c8f209c807e654c139dc1d7e (patch)
tree69fe368d6fdc02a316399cdeeaf64c11ea75477e /src/include/utils/rel.h
parente0dd086414f782d9200ad525a1643a9f57a2b497 (diff)
downloadpostgresql-9db4b9da2801ed94c8f209c807e654c139dc1d7e.tar.gz
postgresql-9db4b9da2801ed94c8f209c807e654c139dc1d7e.zip
Skip WAL for new relfilenodes, under wal_level=minimal.
Until now, only selected bulk operations (e.g. COPY) did this. If a given relfilenode received both a WAL-skipping COPY and a WAL-logged operation (e.g. INSERT), recovery could lose tuples from the COPY. See src/backend/access/transam/README section "Skipping WAL for New RelFileNode" for the new coding rules. Maintainers of table access methods should examine that section. To maintain data durability, just before commit, we choose between an fsync of the relfilenode and copying its contents to WAL. A new GUC, wal_skip_threshold, guides that choice. If this change slows a workload that creates small, permanent relfilenodes under wal_level=minimal, try adjusting wal_skip_threshold. Users setting a timeout on COMMIT may need to adjust that timeout, and log_min_duration_statement analysis will reflect time consumption moving to COMMIT from commands like COPY. Internally, this requires a reliable determination of whether RollbackAndReleaseCurrentSubTransaction() would unlink a relation's current relfilenode. Introduce rd_firstRelfilenodeSubid. Amend the specification of rd_createSubid such that the field is zero when a new rel has an old rd_node. Make relcache.c retain entries for certain dropped relations until end of transaction. Back-patch to 9.5 (all supported versions). This introduces a new WAL record type, XLOG_GIST_ASSIGN_LSN, without bumping XLOG_PAGE_MAGIC. As always, update standby systems before master systems. This changes sizeof(RelationData) and sizeof(IndexStmt), breaking binary compatibility for affected extensions. (The most recent commit to affect the same class of extensions was 089e4d405d0f3b94c74a2c6a54357a84a681754b.) Kyotaro Horiguchi, reviewed (in earlier, similar versions) by Robert Haas. Heikki Linnakangas and Michael Paquier implemented earlier designs that materially clarified the problem. Reviewed, in earlier designs, by Andrew Dunstan, Andres Freund, Alvaro Herrera, Tom Lane, Fujii Masao, and Simon Riggs. Reported by Martijn van Oosterhout. Discussion: https://postgr.es/m/20150702220524.GA9392@svana.org
Diffstat (limited to 'src/include/utils/rel.h')
-rw-r--r--src/include/utils/rel.h58
1 files changed, 44 insertions, 14 deletions
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index ca6c0a5adfa..99e6351c037 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -15,6 +15,7 @@
#define REL_H
#include "access/tupdesc.h"
+#include "access/xlog.h"
#include "catalog/pg_am.h"
#include "catalog/pg_class.h"
#include "catalog/pg_index.h"
@@ -82,25 +83,43 @@ typedef struct RelationData
/*----------
* rd_createSubid is the ID of the highest subtransaction the rel has
- * survived into; or zero if the rel was not created in the current top
- * transaction. This can be now be relied on, whereas previously it could
- * be "forgotten" in earlier releases. Likewise, rd_newRelfilenodeSubid is
- * the ID of the highest subtransaction the relfilenode change has
- * survived into, or zero if not changed in the current transaction (or we
- * have forgotten changing it). rd_newRelfilenodeSubid can be forgotten
- * when a relation has multiple new relfilenodes within a single
- * transaction, with one of them occurring in a subsequently aborted
- * subtransaction, e.g.
+ * survived into or zero if the rel or its rd_node was created before the
+ * current top transaction. (IndexStmt.oldNode leads to the case of a new
+ * rel with an old rd_node.) rd_firstRelfilenodeSubid is the ID of the
+ * highest subtransaction an rd_node change has survived into or zero if
+ * rd_node matches the value it had at the start of the current top
+ * transaction. (Rolling back the subtransaction that
+ * rd_firstRelfilenodeSubid denotes would restore rd_node to the value it
+ * had at the start of the current top transaction. Rolling back any
+ * lower subtransaction would not.) Their accuracy is critical to
+ * RelationNeedsWAL().
+ *
+ * rd_newRelfilenodeSubid is the ID of the highest subtransaction the
+ * most-recent relfilenode change has survived into or zero if not changed
+ * in the current transaction (or we have forgotten changing it). This
+ * field is accurate when non-zero, but it can be zero when a relation has
+ * multiple new relfilenodes within a single transaction, with one of them
+ * occurring in a subsequently aborted subtransaction, e.g.
* BEGIN;
* TRUNCATE t;
* SAVEPOINT save;
* TRUNCATE t;
* ROLLBACK TO save;
* -- rd_newRelfilenodeSubid is now forgotten
+ *
+ * If every rd_*Subid field is zero, they are read-only outside
+ * relcache.c. Files that trigger rd_node changes by updating
+ * pg_class.reltablespace and/or pg_class.relfilenode call
+ * RelationAssumeNewRelfilenode() to update rd_*Subid.
+ *
+ * rd_droppedSubid is the ID of the highest subtransaction that a drop of
+ * the rel has survived into. In entries visible outside relcache.c, this
+ * is always zero.
*/
SubTransactionId rd_createSubid; /* rel was created in current xact */
- SubTransactionId rd_newRelfilenodeSubid; /* new relfilenode assigned in
- * current xact */
+ SubTransactionId rd_newRelfilenodeSubid; /* highest subxact changing
+ * rd_node to current value */
+ /* see end for rd_firstRelfilenodeSubid and rd_droppedSubid */
Form_pg_class rd_rel; /* RELATION tuple */
TupleDesc rd_att; /* tuple descriptor */
@@ -189,6 +208,10 @@ typedef struct RelationData
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
+
+ SubTransactionId rd_firstRelfilenodeSubid; /* highest subxact changing
+ * rd_node to any value */
+ SubTransactionId rd_droppedSubid; /* dropped with another Subid set */
} RelationData;
/*
@@ -437,9 +460,16 @@ typedef struct ViewOptions
/*
* RelationNeedsWAL
* True if relation needs WAL.
- */
-#define RelationNeedsWAL(relation) \
- ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT)
+ *
+ * Returns false if wal_level = minimal and this relation is created or
+ * truncated in the current transaction. See "Skipping WAL for New
+ * RelFileNode" in src/backend/access/transam/README.
+ */
+#define RelationNeedsWAL(relation) \
+ ((relation)->rd_rel->relpersistence == RELPERSISTENCE_PERMANENT && \
+ (XLogIsNeeded() || \
+ (relation->rd_createSubid == InvalidSubTransactionId && \
+ relation->rd_firstRelfilenodeSubid == InvalidSubTransactionId)))
/*
* RelationUsesLocalBuffers