aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMasahiko Sawada <msawada@postgresql.org>2025-02-21 10:12:08 -0800
committerMasahiko Sawada <msawada@postgresql.org>2025-02-21 10:12:08 -0800
commit44fe30fdab6746a287163e7cc093fd36cda8eb92 (patch)
tree983324d294a1b25ca1d362f1b349337b59a451c2 /src
parent901a1cf8b4a291a3a22021bf7acb516c1d278e53 (diff)
downloadpostgresql-44fe30fdab6746a287163e7cc093fd36cda8eb92.tar.gz
postgresql-44fe30fdab6746a287163e7cc093fd36cda8eb92.zip
Add default_char_signedness field to ControlFileData.
The signedness of the 'char' type in C is implementation-dependent. For instance, 'signed char' is used by default on x86 CPUs, while 'unsigned char' is used on aarch CPUs. Previously, we accidentally let C implementation signedness affect persistent data. This led to inconsistent results when comparing char data across different platforms. This commit introduces a new 'default_char_signedness' field in ControlFileData to store the signedness of the 'char' type. While this change does not encourage the use of 'char' without explicitly specifying its signedness, this field can be used as a hint to ensure consistent behavior for pre-v18 data files that store data sorted by the 'char' type on disk (e.g., GIN and GiST indexes), especially in cross-platform replication scenarios. Newly created database clusters unconditionally set the default char signedness to true. pg_upgrade (with an upcoming commit) changes this flag for clusters if the source database cluster has signedness=false. As a result, signedness=false setting will become rare over time. If we had known about the problem during the last development cycle that forced initdb (v8.3), we would have made all clusters signed or all clusters unsigned. Making pg_upgrade the only source of signedness=false will cause the population of database clusters to converge toward that retrospective ideal. Bump catalog version (for the catalog changes) and PG_CONTROL_VERSION (for the additions in ControlFileData). Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/CB11ADBC-0C3F-4FE0-A678-666EE80CBB07%40amazon.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/xlog.c40
-rw-r--r--src/backend/utils/misc/pg_controldata.c7
-rw-r--r--src/bin/pg_controldata/pg_controldata.c2
-rw-r--r--src/include/access/xlog.h1
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_control.h8
-rw-r--r--src/include/catalog/pg_proc.dat6
7 files changed, 59 insertions, 7 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index d10704360a6..ea1f2d2993c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4284,6 +4284,33 @@ WriteControlFile(void)
ControlFile->float8ByVal = FLOAT8PASSBYVAL;
+ /*
+ * Initialize the default 'char' signedness.
+ *
+ * The signedness of the char type is implementation-defined. For instance
+ * on x86 architecture CPUs, the char data type is typically treated as
+ * signed by default, whereas on aarch architecture CPUs, it is typically
+ * treated as unsigned by default. In v17 or earlier, we accidentally let
+ * C implementation signedness affect persistent data. This led to
+ * inconsistent results when comparing char data across different
+ * platforms.
+ *
+ * This flag can be used as a hint to ensure consistent behavior for
+ * pre-v18 data files that store data sorted by the 'char' type on disk,
+ * especially in cross-platform replication scenarios.
+ *
+ * Newly created database clusters unconditionally set the default char
+ * signedness to true. pg_upgrade changes this flag for clusters that were
+ * initialized on signedness=false platforms. As a result,
+ * signedness=false setting will become rare over time. If we had known
+ * about this problem during the last development cycle that forced initdb
+ * (v8.3), we would have made all clusters signed or all clusters
+ * unsigned. Making pg_upgrade the only source of signedness=false will
+ * cause the population of database clusters to converge toward that
+ * retrospective ideal.
+ */
+ ControlFile->default_char_signedness = true;
+
/* Contents are protected with a CRC */
INIT_CRC32C(ControlFile->crc);
COMP_CRC32C(ControlFile->crc,
@@ -4613,6 +4640,19 @@ DataChecksumsEnabled(void)
}
/*
+ * Return true if the cluster was initialized on a platform where the
+ * default signedness of char is "signed". This function exists for code
+ * that deals with pre-v18 data files that store data sorted by the 'char'
+ * type on disk (e.g., GIN and GiST indexes). See the comments in
+ * WriteControlFile() for details.
+ */
+bool
+GetDefaultCharSignedness(void)
+{
+ return ControlFile->default_char_signedness;
+}
+
+/*
* Returns a fake LSN for unlogged relations.
*
* Each call generates an LSN that is greater than any previous value
diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c
index 9dfba499c13..6d036e3bf32 100644
--- a/src/backend/utils/misc/pg_controldata.c
+++ b/src/backend/utils/misc/pg_controldata.c
@@ -203,8 +203,8 @@ pg_control_recovery(PG_FUNCTION_ARGS)
Datum
pg_control_init(PG_FUNCTION_ARGS)
{
- Datum values[11];
- bool nulls[11];
+ Datum values[12];
+ bool nulls[12];
TupleDesc tupdesc;
HeapTuple htup;
ControlFileData *ControlFile;
@@ -254,6 +254,9 @@ pg_control_init(PG_FUNCTION_ARGS)
values[10] = Int32GetDatum(ControlFile->data_checksum_version);
nulls[10] = false;
+ values[11] = BoolGetDatum(ControlFile->default_char_signedness);
+ nulls[11] = false;
+
htup = heap_form_tuple(tupdesc, values, nulls);
PG_RETURN_DATUM(HeapTupleGetDatum(htup));
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index cf11ab3f2ee..bea779eef94 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -336,6 +336,8 @@ main(int argc, char *argv[])
(ControlFile->float8ByVal ? _("by value") : _("by reference")));
printf(_("Data page checksum version: %u\n"),
ControlFile->data_checksum_version);
+ printf(_("Default char data signedness: %s\n"),
+ (ControlFile->default_char_signedness ? _("signed") : _("unsigned")));
printf(_("Mock authentication nonce: %s\n"),
mock_auth_nonce_str);
return 0;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 4411c1468ac..d313099c027 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -231,6 +231,7 @@ extern XLogRecPtr GetXLogWriteRecPtr(void);
extern uint64 GetSystemIdentifier(void);
extern char *GetMockAuthenticationNonce(void);
extern bool DataChecksumsEnabled(void);
+extern bool GetDefaultCharSignedness(void);
extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
extern Size XLOGShmemSize(void);
extern void XLOGShmemInit(void);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 1d609f1af47..d179b512be9 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202502211
+#define CATALOG_VERSION_NO 202502212
#endif
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 3797f25b306..63e834a6ce4 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -22,7 +22,7 @@
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 1700
+#define PG_CONTROL_VERSION 1800
/* Nonce key length, see below */
#define MOCK_AUTH_NONCE_LEN 32
@@ -222,6 +222,12 @@ typedef struct ControlFileData
uint32 data_checksum_version;
/*
+ * True if the default signedness of char is "signed" on a platform where
+ * the cluster is initialized.
+ */
+ bool default_char_signedness;
+
+ /*
* Random nonce, used in authentication requests that need to proceed
* based on values that are cluster-unique, like a SASL exchange that
* failed at an early stage.
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 9e803d610d7..e2d5c0d0886 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -12206,9 +12206,9 @@
descr => 'pg_controldata init state information as a function',
proname => 'pg_control_init', provolatile => 'v', prorettype => 'record',
proargtypes => '',
- proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4}',
- proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
- proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version}',
+ proallargtypes => '{int4,int4,int4,int4,int4,int4,int4,int4,int4,bool,int4,bool}',
+ proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+ proargnames => '{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float8_pass_by_value,data_page_checksum_version,default_char_signedness}',
prosrc => 'pg_control_init' },
# subscripting support for built-in types