aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2016-03-29 21:16:12 -0400
committerRobert Haas <rhaas@postgresql.org>2016-03-29 21:29:49 -0400
commit314cbfc5da988eff8998655158f84c9815ecfbcd (patch)
tree94415b7fed00b4a70f337e403c1d5fe7e811ec70 /src/include
parenta898b409f66f956e99694710f537829db02652c0 (diff)
downloadpostgresql-314cbfc5da988eff8998655158f84c9815ecfbcd.tar.gz
postgresql-314cbfc5da988eff8998655158f84c9815ecfbcd.zip
Add new replication mode synchronous_commit = 'remote_apply'.
In this mode, the master waits for the transaction to be applied on the remote side, not just written to disk. That means that you can count on a transaction started on the standby to see all commits previously acknowledged by the master. To make this work, the standby sends a reply after replaying each commit record generated with synchronous_commit >= 'remote_apply'. This introduces a small inefficiency: the extra replies will be sent even by standbys that aren't the current synchronous standby. But previously-existing synchronous_commit levels make no attempt at all to optimize which replies are sent based on what the primary cares about, so this is no worse, and at least avoids any extra replies for people not using the feature at all. Thomas Munro, reviewed by Michael Paquier and by me. Some additional tweaks by me.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/xact.h7
-rw-r--r--src/include/access/xlog.h2
-rw-r--r--src/include/access/xlog_internal.h2
-rw-r--r--src/include/replication/syncrep.h5
-rw-r--r--src/include/replication/walreceiver.h12
5 files changed, 22 insertions, 6 deletions
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 3ba23f5e87b..503ae1b82d7 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -60,7 +60,9 @@ typedef enum
SYNCHRONOUS_COMMIT_LOCAL_FLUSH, /* wait for local flush only */
SYNCHRONOUS_COMMIT_REMOTE_WRITE, /* wait for local flush and remote
* write */
- SYNCHRONOUS_COMMIT_REMOTE_FLUSH /* wait for local and remote flush */
+ SYNCHRONOUS_COMMIT_REMOTE_FLUSH, /* wait for local and remote flush */
+ SYNCHRONOUS_COMMIT_REMOTE_APPLY /* wait for local flush and remote
+ * apply */
} SyncCommitLevel;
/* Define the default setting for synchonous_commit */
@@ -144,10 +146,13 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
* EOXact... routines which run at the end of the original transaction
* completion.
*/
+#define XACT_COMPLETION_APPLY_FEEDBACK (1U << 29)
#define XACT_COMPLETION_UPDATE_RELCACHE_FILE (1U << 30)
#define XACT_COMPLETION_FORCE_SYNC_COMMIT (1U << 31)
/* Access macros for above flags */
+#define XactCompletionApplyFeedback(xinfo) \
+ ((xinfo & XACT_COMPLETION_APPLY_FEEDBACK) != 0)
#define XactCompletionRelcacheInitFileInval(xinfo) \
((xinfo & XACT_COMPLETION_UPDATE_RELCACHE_FILE) != 0)
#define XactCompletionForceSyncCommit(xinfo) \
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 74a139496e6..a7dcdae67f8 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -267,6 +267,8 @@ extern bool CheckPromoteSignal(void);
extern void WakeupRecovery(void);
extern void SetWalWriterSleeping(bool sleeping);
+extern void XLogRequestWalReceiverReply(void);
+
extern void assign_max_wal_size(int newval, void *extra);
extern void assign_checkpoint_completion_target(double newval, void *extra);
diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h
index e8b64dd4262..aa2f074201a 100644
--- a/src/include/access/xlog_internal.h
+++ b/src/include/access/xlog_internal.h
@@ -31,7 +31,7 @@
/*
* Each page of XLOG file has a header like this:
*/
-#define XLOG_PAGE_MAGIC 0xD088 /* can be used as WAL version indicator */
+#define XLOG_PAGE_MAGIC 0xD089 /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
diff --git a/src/include/replication/syncrep.h b/src/include/replication/syncrep.h
index 96e059bc3fa..c005a425836 100644
--- a/src/include/replication/syncrep.h
+++ b/src/include/replication/syncrep.h
@@ -23,8 +23,9 @@
#define SYNC_REP_NO_WAIT -1
#define SYNC_REP_WAIT_WRITE 0
#define SYNC_REP_WAIT_FLUSH 1
+#define SYNC_REP_WAIT_APPLY 2
-#define NUM_SYNC_REP_WAIT_MODE 2
+#define NUM_SYNC_REP_WAIT_MODE 3
/* syncRepState */
#define SYNC_REP_NOT_WAITING 0
@@ -35,7 +36,7 @@
extern char *SyncRepStandbyNames;
/* called by user backend */
-extern void SyncRepWaitForLSN(XLogRecPtr XactCommitLSN);
+extern void SyncRepWaitForLSN(XLogRecPtr lsn, bool commit);
/* called at backend exit */
extern void SyncRepCleanupAtProcExit(void);
diff --git a/src/include/replication/walreceiver.h b/src/include/replication/walreceiver.h
index 6eacb095d1b..36bcb471720 100644
--- a/src/include/replication/walreceiver.h
+++ b/src/include/replication/walreceiver.h
@@ -113,9 +113,16 @@ typedef struct
slock_t mutex; /* locks shared variables shown above */
/*
+ * force walreceiver reply? This doesn't need to be locked; memory
+ * barriers for ordering are sufficient.
+ */
+ bool force_reply;
+
+ /*
* Latch used by startup process to wake up walreceiver after telling it
* where to start streaming (after setting receiveStart and
- * receiveStartTLI).
+ * receiveStartTLI), and also to tell it to send apply feedback to the
+ * primary whenever specially marked commit records are applied.
*/
Latch latch;
} WalRcvData;
@@ -138,7 +145,7 @@ extern PGDLLIMPORT walrcv_startstreaming_type walrcv_startstreaming;
typedef void (*walrcv_endstreaming_type) (TimeLineID *next_tli);
extern PGDLLIMPORT walrcv_endstreaming_type walrcv_endstreaming;
-typedef int (*walrcv_receive_type) (int timeout, char **buffer);
+typedef int (*walrcv_receive_type) (char **buffer, int *wait_fd);
extern PGDLLIMPORT walrcv_receive_type walrcv_receive;
typedef void (*walrcv_send_type) (const char *buffer, int nbytes);
@@ -162,5 +169,6 @@ extern void RequestXLogStreaming(TimeLineID tli, XLogRecPtr recptr,
extern XLogRecPtr GetWalRcvWriteRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI);
extern int GetReplicationApplyDelay(void);
extern int GetReplicationTransferLatency(void);
+extern void WalRcvForceReply(void);
#endif /* _WALRECEIVER_H */