aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Davis <jdavis@postgresql.org>2024-03-13 23:33:44 -0700
committerJeff Davis <jdavis@postgresql.org>2024-03-13 23:33:44 -0700
commit2d819a08a1cbc11364e36f816b02e33e8dcc030b (patch)
tree1a8d3b459866d7df936faffa0e64f5e339e6a6c2
parent6ab2e8385d55e0b73bb8bbc41d9c286f5f7f357f (diff)
downloadpostgresql-2d819a08a1cbc11364e36f816b02e33e8dcc030b.tar.gz
postgresql-2d819a08a1cbc11364e36f816b02e33e8dcc030b.zip
Introduce "builtin" collation provider.
New provider for collations, like "libc" or "icu", but without any external dependency. Initially, the only locale supported by the builtin provider is "C", which is identical to the libc provider's "C" locale. The libc provider's "C" locale has always been treated as a special case that uses an internal implementation, without using libc at all -- so the new builtin provider uses the same implementation. The builtin provider's locale is independent of the server environment variables LC_COLLATE and LC_CTYPE. Using the builtin provider, the database collation locale can be "C" while LC_COLLATE and LC_CTYPE are set to "en_US", which is impossible with the libc provider. By offering a new builtin provider, it clarifies that the semantics of a collation using this provider will never depend on libc, and makes it easier to document the behavior. Discussion: https://postgr.es/m/ab925f69-5f9d-f85e-b87c-bd2a44798659@joeconway.com Discussion: https://postgr.es/m/dd9261f4-7a98-4565-93ec-336c1c110d90@manitou-mail.org Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com Reviewed-by: Daniel Vérité, Peter Eisentraut, Jeremy Schneider
-rw-r--r--doc/src/sgml/charset.sgml90
-rw-r--r--doc/src/sgml/ref/create_collation.sgml11
-rw-r--r--doc/src/sgml/ref/create_database.sgml7
-rw-r--r--doc/src/sgml/ref/createdb.sgml2
-rw-r--r--doc/src/sgml/ref/initdb.sgml17
-rw-r--r--src/backend/catalog/pg_collation.c5
-rw-r--r--src/backend/commands/collationcmds.c74
-rw-r--r--src/backend/commands/dbcommands.c129
-rw-r--r--src/backend/utils/adt/formatting.c6
-rw-r--r--src/backend/utils/adt/pg_locale.c123
-rw-r--r--src/backend/utils/init/postinit.c20
-rw-r--r--src/bin/initdb/initdb.c53
-rw-r--r--src/bin/initdb/t/001_initdb.pl40
-rw-r--r--src/bin/pg_dump/pg_dump.c23
-rw-r--r--src/bin/pg_upgrade/t/002_pg_upgrade.pl81
-rw-r--r--src/bin/psql/describe.c4
-rw-r--r--src/bin/scripts/createdb.c19
-rw-r--r--src/bin/scripts/t/020_createdb.pl60
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_collation.dat6
-rw-r--r--src/include/catalog/pg_collation.h3
-rw-r--r--src/include/utils/pg_locale.h5
-rw-r--r--src/test/icu/t/010_database.pl22
-rw-r--r--src/test/regress/expected/collate.out19
-rw-r--r--src/test/regress/sql/collate.sql8
25 files changed, 671 insertions, 158 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 4fc143025ef..7114eb7b522 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -342,22 +342,14 @@ initdb --locale=sv_SE
<title>Locale Providers</title>
<para>
- <productname>PostgreSQL</productname> supports multiple <firstterm>locale
- providers</firstterm>. This specifies which library supplies the locale
- data. One standard provider name is <literal>libc</literal>, which uses
- the locales provided by the operating system C library. These are the
- locales used by most tools provided by the operating system. Another
- provider is <literal>icu</literal>, which uses the external
- ICU<indexterm><primary>ICU</primary></indexterm> library. ICU locales can
- only be used if support for ICU was configured when PostgreSQL was built.
+ A locale provider specifies which library defines the locale behavior for
+ collations and character classifications.
</para>
<para>
The commands and tools that select the locale settings, as described
- above, each have an option to select the locale provider. The examples
- shown earlier all use the <literal>libc</literal> provider, which is the
- default. Here is an example to initialize a database cluster using the
- ICU provider:
+ above, each have an option to select the locale provider. Here is an
+ example to initialize a database cluster using the ICU provider:
<programlisting>
initdb --locale-provider=icu --icu-locale=en
</programlisting>
@@ -370,12 +362,76 @@ initdb --locale-provider=icu --icu-locale=en
</para>
<para>
- Which locale provider to use depends on individual requirements. For most
- basic uses, either provider will give adequate results. For the libc
- provider, it depends on what the operating system offers; some operating
- systems are better than others. For advanced uses, ICU offers more locale
- variants and customization options.
+ Regardless of the locale provider, the operating system is still used to
+ provide some locale-aware behavior, such as messages (see <xref
+ linkend="guc-lc-messages"/>).
</para>
+
+ <para>
+ The available locale providers are listed below:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>builtin</literal></term>
+ <listitem>
+ <para>
+ The <literal>builtin</literal> provider uses built-in operations. Only
+ the <literal>C</literal> locale is supported for this provider.
+ </para>
+ <para>
+ The <literal>C</literal> locale behavior is identical to the
+ <literal>C</literal> locale in the libc provider. When using this
+ locale, the behavior may depend on the database encoding.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>icu</literal></term>
+ <listitem>
+ <para>
+ The <literal>icu</literal> provider uses the external
+ ICU<indexterm><primary>ICU</primary></indexterm>
+ library. <productname>PostgreSQL</productname> must have been
+ configured with support.
+ </para>
+ <para>
+ ICU provides collation and character classification behavior that is
+ independent of the operating system and database encoding, which is
+ preferable if you expect to transition to other platforms without any
+ change in results. <literal>LC_COLLATE</literal> and
+ <literal>LC_CTYPE</literal> can be set independently of the ICU
+ locale.
+ </para>
+ <note>
+ <para>
+ For the ICU provider, results may depend on the version of the ICU
+ library used, as it is updated to reflect changes in natural language
+ over time.
+ </para>
+ </note>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>libc</literal></term>
+ <listitem>
+ <para>
+ The <literal>libc</literal> provider uses the operating system's C
+ library. The collation and character classification behavior is
+ controlled by the settings <literal>LC_COLLATE</literal> and
+ <literal>LC_CTYPE</literal>, so they cannot be set independently.
+ </para>
+ <note>
+ <para>
+ The same locale name may have different behavior on different
+ platforms when using the libc provider.
+ </para>
+ </note>
+ </listitem>
+ </varlistentry>
+ </variablelist>
</sect2>
<sect2 id="icu-locales">
diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml
index 5cf9777764b..98cd7d56be9 100644
--- a/doc/src/sgml/ref/create_collation.sgml
+++ b/doc/src/sgml/ref/create_collation.sgml
@@ -96,6 +96,11 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
<replaceable>locale</replaceable>, you cannot specify either of those
parameters.
</para>
+ <para>
+ If <replaceable>provider</replaceable> is <literal>builtin</literal>,
+ then <replaceable>locale</replaceable> must be specified and set to
+ <literal>C</literal>.
+ </para>
</listitem>
</varlistentry>
@@ -129,9 +134,9 @@ CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replace
<listitem>
<para>
Specifies the provider to use for locale services associated with this
- collation. Possible values are
- <literal>icu</literal><indexterm><primary>ICU</primary></indexterm>
- (if the server was built with ICU support) or <literal>libc</literal>.
+ collation. Possible values are <literal>builtin</literal>,
+ <literal>icu</literal><indexterm><primary>ICU</primary></indexterm> (if
+ the server was built with ICU support) or <literal>libc</literal>.
<literal>libc</literal> is the default. See <xref
linkend="locale-providers"/> for details.
</para>
diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml
index 72927960ebb..6c1fd95602d 100644
--- a/doc/src/sgml/ref/create_database.sgml
+++ b/doc/src/sgml/ref/create_database.sgml
@@ -162,6 +162,11 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
linkend="create-database-lc-ctype"/>, or <xref
linkend="create-database-icu-locale"/> individually.
</para>
+ <para>
+ If <xref linkend="create-database-locale-provider"/> is
+ <literal>builtin</literal>, then <replaceable>locale</replaceable>
+ must be specified and set to <literal>C</literal>.
+ </para>
<tip>
<para>
The other locale settings <xref linkend="guc-lc-messages"/>, <xref
@@ -243,7 +248,7 @@ CREATE DATABASE <replaceable class="parameter">name</replaceable>
<listitem>
<para>
Specifies the provider to use for the default collation in this
- database. Possible values are
+ database. Possible values are <literal>builtin</literal>,
<literal>icu</literal><indexterm><primary>ICU</primary></indexterm>
(if the server was built with ICU support) or <literal>libc</literal>.
By default, the provider is the same as that of the <xref
diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml
index e4647d5ce71..d3e815f659c 100644
--- a/doc/src/sgml/ref/createdb.sgml
+++ b/doc/src/sgml/ref/createdb.sgml
@@ -171,7 +171,7 @@ PostgreSQL documentation
</varlistentry>
<varlistentry>
- <term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+ <term><option>--locale-provider={<literal>builtin</literal>|<literal>libc</literal>|<literal>icu</literal>}</option></term>
<listitem>
<para>
Specifies the locale provider for the database's default collation.
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index cd75cae10e2..4760570f6ab 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -286,6 +286,11 @@ PostgreSQL documentation
environment that <command>initdb</command> runs in. Locale
support is described in <xref linkend="locale"/>.
</para>
+ <para>
+ If <option>--locale-provider</option> is <literal>builtin</literal>,
+ <option>--locale</option> must be specified and set to
+ <literal>C</literal>.
+ </para>
</listitem>
</varlistentry>
@@ -314,8 +319,18 @@ PostgreSQL documentation
</listitem>
</varlistentry>
+ <varlistentry id="app-initdb-builtin-locale">
+ <term><option>--builtin-locale=<replaceable>locale</replaceable></option></term>
+ <listitem>
+ <para>
+ Specifies the locale name when the builtin provider is used. Locale support
+ is described in <xref linkend="locale"/>.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="app-initdb-option-locale-provider">
- <term><option>--locale-provider={<literal>libc</literal>|<literal>icu</literal>}</option></term>
+ <term><option>--locale-provider={<literal>builtin</literal>|<literal>libc</literal>|<literal>icu</literal>}</option></term>
<listitem>
<para>
This option sets the locale provider for databases created in the new
diff --git a/src/backend/catalog/pg_collation.c b/src/backend/catalog/pg_collation.c
index e42f2afccbc..7f2f7012299 100644
--- a/src/backend/catalog/pg_collation.c
+++ b/src/backend/catalog/pg_collation.c
@@ -64,7 +64,10 @@ CollationCreate(const char *collname, Oid collnamespace,
Assert(collname);
Assert(collnamespace);
Assert(collowner);
- Assert((collcollate && collctype) || colllocale);
+ Assert((collprovider == COLLPROVIDER_LIBC &&
+ collcollate && collctype && !colllocale) ||
+ (collprovider != COLLPROVIDER_LIBC &&
+ !collcollate && !collctype && colllocale));
/*
* Make sure there is no existing collation of same name & encoding.
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 59d7e17804b..9059f8b3efd 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -66,7 +66,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
DefElem *versionEl = NULL;
char *collcollate;
char *collctype;
- char *colllocale;
+ const char *colllocale;
char *collicurules;
bool collisdeterministic;
int collencoding;
@@ -213,7 +213,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
if (collproviderstr)
{
- if (pg_strcasecmp(collproviderstr, "icu") == 0)
+ if (pg_strcasecmp(collproviderstr, "builtin") == 0)
+ collprovider = COLLPROVIDER_BUILTIN;
+ else if (pg_strcasecmp(collproviderstr, "icu") == 0)
collprovider = COLLPROVIDER_ICU;
else if (pg_strcasecmp(collproviderstr, "libc") == 0)
collprovider = COLLPROVIDER_LIBC;
@@ -243,7 +245,18 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
if (lcctypeEl)
collctype = defGetString(lcctypeEl);
- if (collprovider == COLLPROVIDER_LIBC)
+ if (collprovider == COLLPROVIDER_BUILTIN)
+ {
+ if (!colllocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("parameter \"%s\" must be specified",
+ "locale")));
+
+ colllocale = builtin_validate_locale(GetDatabaseEncoding(),
+ colllocale);
+ }
+ else if (collprovider == COLLPROVIDER_LIBC)
{
if (!collcollate)
ereport(ERROR,
@@ -303,7 +316,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("ICU rules cannot be specified unless locale provider is ICU")));
- if (collprovider == COLLPROVIDER_ICU)
+ if (collprovider == COLLPROVIDER_BUILTIN)
+ {
+ collencoding = GetDatabaseEncoding();
+ }
+ else if (collprovider == COLLPROVIDER_ICU)
{
#ifdef USE_ICU
/*
@@ -332,7 +349,16 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
}
if (!collversion)
- collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colllocale : collcollate);
+ {
+ const char *locale;
+
+ if (collprovider == COLLPROVIDER_LIBC)
+ locale = collcollate;
+ else
+ locale = colllocale;
+
+ collversion = get_collation_actual_version(collprovider, locale);
+ }
newoid = CollationCreate(collName,
collNamespace,
@@ -433,8 +459,13 @@ AlterCollation(AlterCollationStmt *stmt)
datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
oldversion = isnull ? NULL : TextDatumGetCString(datum);
- datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
- newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum));
+ if (collForm->collprovider == COLLPROVIDER_LIBC)
+ datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
+ else
+ datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
+
+ newversion = get_collation_actual_version(collForm->collprovider,
+ TextDatumGetCString(datum));
/* cannot change from NULL to non-NULL or vice versa */
if ((!oldversion && newversion) || (oldversion && !newversion))
@@ -498,11 +529,16 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
- datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup,
- provider == COLLPROVIDER_ICU ?
- Anum_pg_database_datlocale : Anum_pg_database_datcollate);
-
- locale = TextDatumGetCString(datum);
+ if (provider == COLLPROVIDER_LIBC)
+ {
+ datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
+ locale = TextDatumGetCString(datum);
+ }
+ else
+ {
+ datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
+ locale = TextDatumGetCString(datum);
+ }
ReleaseSysCache(dbtup);
}
@@ -519,11 +555,17 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
Assert(provider != COLLPROVIDER_DEFAULT);
- datum = SysCacheGetAttrNotNull(COLLOID, colltp,
- provider == COLLPROVIDER_ICU ?
- Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
- locale = TextDatumGetCString(datum);
+ if (provider == COLLPROVIDER_LIBC)
+ {
+ datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
+ locale = TextDatumGetCString(datum);
+ }
+ else
+ {
+ datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
+ locale = TextDatumGetCString(datum);
+ }
ReleaseSysCache(colltp);
}
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 0f27d7b14cf..65464fac8e5 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -697,6 +697,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
DefElem *dtemplate = NULL;
DefElem *dencoding = NULL;
DefElem *dlocale = NULL;
+ DefElem *dbuiltinlocale = NULL;
DefElem *dcollate = NULL;
DefElem *dctype = NULL;
DefElem *diculocale = NULL;
@@ -712,7 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
const char *dbtemplate = NULL;
char *dbcollate = NULL;
char *dbctype = NULL;
- char *dblocale = NULL;
+ const char *dblocale = NULL;
char *dbicurules = NULL;
char dblocprovider = '\0';
char *canonname;
@@ -761,6 +762,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
errorConflictingDefElem(defel, pstate);
dlocale = defel;
}
+ else if (strcmp(defel->defname, "builtin_locale") == 0)
+ {
+ if (dbuiltinlocale)
+ errorConflictingDefElem(defel, pstate);
+ dbuiltinlocale = defel;
+ }
else if (strcmp(defel->defname, "lc_collate") == 0)
{
if (dcollate)
@@ -896,7 +903,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
{
dbcollate = defGetString(dlocale);
dbctype = defGetString(dlocale);
+ dblocale = defGetString(dlocale);
}
+ if (dbuiltinlocale && dbuiltinlocale->arg)
+ dblocale = defGetString(dbuiltinlocale);
if (dcollate && dcollate->arg)
dbcollate = defGetString(dcollate);
if (dctype && dctype->arg)
@@ -909,7 +919,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
{
char *locproviderstr = defGetString(dlocprovider);
- if (pg_strcasecmp(locproviderstr, "icu") == 0)
+ if (pg_strcasecmp(locproviderstr, "builtin") == 0)
+ dblocprovider = COLLPROVIDER_BUILTIN;
+ else if (pg_strcasecmp(locproviderstr, "icu") == 0)
dblocprovider = COLLPROVIDER_ICU;
else if (pg_strcasecmp(locproviderstr, "libc") == 0)
dblocprovider = COLLPROVIDER_LIBC;
@@ -1026,14 +1038,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dbctype = src_ctype;
if (dblocprovider == '\0')
dblocprovider = src_locprovider;
- if (dblocale == NULL && dblocprovider == COLLPROVIDER_ICU)
- {
- if (dlocale && dlocale->arg)
- dblocale = defGetString(dlocale);
- else
- dblocale = src_locale;
- }
- if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
+ if (dblocale == NULL)
+ dblocale = src_locale;
+ if (dbicurules == NULL)
dbicurules = src_icurules;
/* Some encodings are client only */
@@ -1058,7 +1065,42 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
check_encoding_locale_matches(encoding, dbcollate, dbctype);
- if (dblocprovider == COLLPROVIDER_ICU)
+ /* validate provider-specific parameters */
+ if (dblocprovider != COLLPROVIDER_BUILTIN)
+ {
+ if (dbuiltinlocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("BUILTIN_LOCALE cannot be specified unless locale provider is builtin")));
+ }
+ else if (dblocprovider != COLLPROVIDER_ICU)
+ {
+ if (diculocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("ICU locale cannot be specified unless locale provider is ICU")));
+
+ if (dbicurules)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("ICU rules cannot be specified unless locale provider is ICU")));
+ }
+
+ /* validate and canonicalize locale for the provider */
+ if (dblocprovider == COLLPROVIDER_BUILTIN)
+ {
+ /*
+ * This would happen if template0 uses the libc provider but the new
+ * database uses builtin.
+ */
+ if (!dblocale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("LOCALE or BUILTIN_LOCALE must be specified")));
+
+ dblocale = builtin_validate_locale(encoding, dblocale);
+ }
+ else if (dblocprovider == COLLPROVIDER_ICU)
{
if (!(is_encoding_supported_by_icu(encoding)))
ereport(ERROR,
@@ -1097,18 +1139,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
icu_validate_locale(dblocale);
}
- else
- {
- if (dblocale)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
- errmsg("ICU locale cannot be specified unless locale provider is ICU")));
- if (dbicurules)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
- errmsg("ICU rules cannot be specified unless locale provider is ICU")));
- }
+ /* for libc, locale comes from datcollate and datctype */
+ if (dblocprovider == COLLPROVIDER_LIBC)
+ dblocale = NULL;
/*
* Check that the new encoding and locale settings match the source
@@ -1195,8 +1229,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (src_collversion && !dcollversion)
{
char *actual_versionstr;
+ const char *locale;
+
+ if (dblocprovider == COLLPROVIDER_LIBC)
+ locale = dbcollate;
+ else
+ locale = dblocale;
- actual_versionstr = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate);
+ actual_versionstr = get_collation_actual_version(dblocprovider, locale);
if (!actual_versionstr)
ereport(ERROR,
(errmsg("template database \"%s\" has a collation version, but no actual collation version could be determined",
@@ -1224,7 +1264,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
* collation version, which is normally only the case for template0.
*/
if (dbcollversion == NULL)
- dbcollversion = get_collation_actual_version(dblocprovider, dblocprovider == COLLPROVIDER_ICU ? dblocale : dbcollate);
+ {
+ const char *locale;
+
+ if (dblocprovider == COLLPROVIDER_LIBC)
+ locale = dbcollate;
+ else
+ locale = dblocale;
+
+ dbcollversion = get_collation_actual_version(dblocprovider, locale);
+ }
/* Resolve default tablespace for new database */
if (dtablespacename && dtablespacename->arg)
@@ -1363,8 +1412,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
* block on the unique index, and fail after we commit).
*/
- Assert((dblocprovider == COLLPROVIDER_ICU && dblocale) ||
- (dblocprovider != COLLPROVIDER_ICU && !dblocale));
+ Assert((dblocprovider != COLLPROVIDER_LIBC && dblocale) ||
+ (dblocprovider == COLLPROVIDER_LIBC && !dblocale));
/* Form tuple */
new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
@@ -2471,10 +2520,21 @@ AlterDatabaseRefreshColl(AlterDatabaseRefreshCollStmt *stmt)
datum = heap_getattr(tuple, Anum_pg_database_datcollversion, RelationGetDescr(rel), &isnull);
oldversion = isnull ? NULL : TextDatumGetCString(datum);
- datum = heap_getattr(tuple, datForm->datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
- if (isnull)
- elog(ERROR, "unexpected null in pg_database");
- newversion = get_collation_actual_version(datForm->datlocprovider, TextDatumGetCString(datum));
+ if (datForm->datlocprovider == COLLPROVIDER_LIBC)
+ {
+ datum = heap_getattr(tuple, Anum_pg_database_datcollate, RelationGetDescr(rel), &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_database");
+ }
+ else
+ {
+ datum = heap_getattr(tuple, Anum_pg_database_datlocale, RelationGetDescr(rel), &isnull);
+ if (isnull)
+ elog(ERROR, "unexpected null in pg_database");
+ }
+
+ newversion = get_collation_actual_version(datForm->datlocprovider,
+ TextDatumGetCString(datum));
/* cannot change from NULL to non-NULL or vice versa */
if ((!oldversion && newversion) || (oldversion && !newversion))
@@ -2669,8 +2729,13 @@ pg_database_collation_actual_version(PG_FUNCTION_ARGS)
datlocprovider = ((Form_pg_database) GETSTRUCT(tp))->datlocprovider;
- datum = SysCacheGetAttrNotNull(DATABASEOID, tp, datlocprovider == COLLPROVIDER_ICU ? Anum_pg_database_datlocale : Anum_pg_database_datcollate);
- version = get_collation_actual_version(datlocprovider, TextDatumGetCString(datum));
+ if (datlocprovider == COLLPROVIDER_LIBC)
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datcollate);
+ else
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tp, Anum_pg_database_datlocale);
+
+ version = get_collation_actual_version(datlocprovider,
+ TextDatumGetCString(datum));
ReleaseSysCache(tp);
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 036a463491c..5f483b8dbc2 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1680,6 +1680,8 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
else
#endif
{
+ Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
@@ -1798,6 +1800,8 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
else
#endif
{
+ Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
@@ -1917,6 +1921,8 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
else
#endif
{
+ Assert(!mylocale || mylocale->provider == COLLPROVIDER_LIBC);
+
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index 77d5752dc8e..39390fbe4eb 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -1268,7 +1268,18 @@ lookup_collation_cache(Oid collation, bool set_flags)
elog(ERROR, "cache lookup failed for collation %u", collation);
collform = (Form_pg_collation) GETSTRUCT(tp);
- if (collform->collprovider == COLLPROVIDER_LIBC)
+ if (collform->collprovider == COLLPROVIDER_BUILTIN)
+ {
+ Datum datum;
+ const char *colllocale;
+
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+ colllocale = TextDatumGetCString(datum);
+
+ cache_entry->collate_is_c = true;
+ cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
+ }
+ else if (collform->collprovider == COLLPROVIDER_LIBC)
{
Datum datum;
const char *collcollate;
@@ -1319,16 +1330,30 @@ lc_collate_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
- char *localeptr;
-
- if (default_locale.provider == COLLPROVIDER_ICU)
- return false;
+ const char *localeptr;
if (result >= 0)
return (bool) result;
- localeptr = setlocale(LC_COLLATE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_COLLATE setting");
+
+ if (default_locale.provider == COLLPROVIDER_BUILTIN)
+ {
+ result = true;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_ICU)
+ {
+ result = false;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_LIBC)
+ {
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+ }
+ else
+ elog(ERROR, "unexpected collation provider '%c'",
+ default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1372,16 +1397,29 @@ lc_ctype_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
- char *localeptr;
-
- if (default_locale.provider == COLLPROVIDER_ICU)
- return false;
+ const char *localeptr;
if (result >= 0)
return (bool) result;
- localeptr = setlocale(LC_CTYPE, NULL);
- if (!localeptr)
- elog(ERROR, "invalid LC_CTYPE setting");
+
+ if (default_locale.provider == COLLPROVIDER_BUILTIN)
+ {
+ localeptr = default_locale.info.builtin.locale;
+ }
+ else if (default_locale.provider == COLLPROVIDER_ICU)
+ {
+ result = false;
+ return (bool) result;
+ }
+ else if (default_locale.provider == COLLPROVIDER_LIBC)
+ {
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+ }
+ else
+ elog(ERROR, "unexpected collation provider '%c'",
+ default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1519,10 +1557,10 @@ pg_newlocale_from_collation(Oid collid)
if (collid == DEFAULT_COLLATION_OID)
{
- if (default_locale.provider == COLLPROVIDER_ICU)
- return &default_locale;
- else
+ if (default_locale.provider == COLLPROVIDER_LIBC)
return (pg_locale_t) 0;
+ else
+ return &default_locale;
}
cache_entry = lookup_collation_cache(collid, false);
@@ -1547,7 +1585,19 @@ pg_newlocale_from_collation(Oid collid)
result.provider = collform->collprovider;
result.deterministic = collform->collisdeterministic;
- if (collform->collprovider == COLLPROVIDER_LIBC)
+ if (collform->collprovider == COLLPROVIDER_BUILTIN)
+ {
+ const char *locstr;
+
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
+ locstr = TextDatumGetCString(datum);
+
+ builtin_validate_locale(GetDatabaseEncoding(), locstr);
+
+ result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
+ locstr);
+ }
+ else if (collform->collprovider == COLLPROVIDER_LIBC)
{
const char *collcollate;
const char *collctype pg_attribute_unused();
@@ -1626,7 +1676,11 @@ pg_newlocale_from_collation(Oid collid)
collversionstr = TextDatumGetCString(datum);
- datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
+ Assert(collform->collprovider != COLLPROVIDER_BUILTIN);
+ if (collform->collprovider == COLLPROVIDER_LIBC)
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
+ else
+ datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
actual_versionstr = get_collation_actual_version(collform->collprovider,
TextDatumGetCString(datum));
@@ -1677,6 +1731,10 @@ get_collation_actual_version(char collprovider, const char *collcollate)
{
char *collversion = NULL;
+ /* the builtin collation provider is not versioned */
+ if (collprovider == COLLPROVIDER_BUILTIN)
+ return NULL;
+
#ifdef USE_ICU
if (collprovider == COLLPROVIDER_ICU)
{
@@ -2443,6 +2501,31 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
return result;
}
+const char *
+builtin_validate_locale(int encoding, const char *locale)
+{
+ const char *canonical_name = NULL;
+ int required_encoding = -1;
+
+ if (strcmp(locale, "C") == 0)
+ canonical_name = "C";
+
+ if (!canonical_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid locale name \"%s\" for builtin provider",
+ locale)));
+
+ if (required_encoding >= 0 && encoding != required_encoding)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("encoding \"%s\" does not match locale \"%s\"",
+ pg_encoding_to_char(encoding), locale)));
+
+ return canonical_name;
+}
+
+
#ifdef USE_ICU
/*
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 2875bc97d3b..0805398e24d 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -423,7 +423,17 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
strcmp(ctype, "POSIX") == 0)
database_ctype_is_c = true;
- if (dbform->datlocprovider == COLLPROVIDER_ICU)
+ if (dbform->datlocprovider == COLLPROVIDER_BUILTIN)
+ {
+ datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
+ datlocale = TextDatumGetCString(datum);
+
+ builtin_validate_locale(dbform->encoding, datlocale);
+
+ default_locale.info.builtin.locale = MemoryContextStrdup(
+ TopMemoryContext, datlocale);
+ }
+ else if (dbform->datlocprovider == COLLPROVIDER_ICU)
{
char *icurules;
@@ -461,10 +471,16 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
{
char *actual_versionstr;
char *collversionstr;
+ char *locale;
collversionstr = TextDatumGetCString(datum);
- actual_versionstr = get_collation_actual_version(dbform->datlocprovider, dbform->datlocprovider == COLLPROVIDER_ICU ? datlocale : collate);
+ if (dbform->datlocprovider == COLLPROVIDER_LIBC)
+ locale = collate;
+ else
+ locale = datlocale;
+
+ actual_versionstr = get_collation_actual_version(dbform->datlocprovider, locale);
if (!actual_versionstr)
/* should not happen */
elog(WARNING,
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index de58002a5d4..8d53ef4a1fc 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -145,7 +145,9 @@ static char *lc_numeric = NULL;
static char *lc_time = NULL;
static char *lc_messages = NULL;
static char locale_provider = COLLPROVIDER_LIBC;
+static bool builtin_locale_specified = false;
static char *datlocale = NULL;
+static bool icu_locale_specified = false;
static char *icu_rules = NULL;
static const char *default_text_search_config = NULL;
static char *username = NULL;
@@ -2368,7 +2370,7 @@ setlocales(void)
lc_monetary = locale;
if (!lc_messages)
lc_messages = locale;
- if (!datlocale && locale_provider == COLLPROVIDER_ICU)
+ if (!datlocale && locale_provider != COLLPROVIDER_LIBC)
datlocale = locale;
}
@@ -2395,14 +2397,20 @@ setlocales(void)
lc_messages = canonname;
#endif
- if (locale_provider == COLLPROVIDER_ICU)
+ if (locale_provider != COLLPROVIDER_LIBC && datlocale == NULL)
+ pg_fatal("locale must be specified if provider is %s",
+ collprovider_name(locale_provider));
+
+ if (locale_provider == COLLPROVIDER_BUILTIN)
+ {
+ if (strcmp(datlocale, "C") != 0)
+ pg_fatal("invalid locale name \"%s\" for builtin provider",
+ datlocale);
+ }
+ else if (locale_provider == COLLPROVIDER_ICU)
{
char *langtag;
- /* acquire default locale from the environment, if not specified */
- if (datlocale == NULL)
- pg_fatal("ICU locale must be specified");
-
/* canonicalize to a language tag */
langtag = icu_language_tag(datlocale);
printf(_("Using language tag \"%s\" for ICU locale \"%s\".\n"),
@@ -2447,7 +2455,8 @@ usage(const char *progname)
" set default locale in the respective category for\n"
" new databases (default taken from environment)\n"));
printf(_(" --no-locale equivalent to --locale=C\n"));
- printf(_(" --locale-provider={libc|icu}\n"
+ printf(_(" --builtin-locale=LOCALE set builtin locale name for new databases\n"));
+ printf(_(" --locale-provider={builtin|libc|icu}\n"
" set default locale provider for new databases\n"));
printf(_(" --pwfile=FILE read password for the new superuser from file\n"));
printf(_(" -T, --text-search-config=CFG\n"
@@ -2609,9 +2618,9 @@ setup_locale_encoding(void)
else
{
printf(_("The database cluster will be initialized with this locale configuration:\n"));
- printf(_(" provider: %s\n"), collprovider_name(locale_provider));
- if (datlocale)
- printf(_(" ICU locale: %s\n"), datlocale);
+ printf(_(" default collation provider: %s\n"), collprovider_name(locale_provider));
+ if (locale_provider != COLLPROVIDER_LIBC)
+ printf(_(" default collation locale: %s\n"), datlocale);
printf(_(" LC_COLLATE: %s\n"
" LC_CTYPE: %s\n"
" LC_MESSAGES: %s\n"
@@ -3104,9 +3113,10 @@ main(int argc, char *argv[])
{"allow-group-access", no_argument, NULL, 'g'},
{"discard-caches", no_argument, NULL, 14},
{"locale-provider", required_argument, NULL, 15},
- {"icu-locale", required_argument, NULL, 16},
- {"icu-rules", required_argument, NULL, 17},
- {"sync-method", required_argument, NULL, 18},
+ {"builtin-locale", required_argument, NULL, 16},
+ {"icu-locale", required_argument, NULL, 17},
+ {"icu-rules", required_argument, NULL, 18},
+ {"sync-method", required_argument, NULL, 19},
{NULL, 0, NULL, 0}
};
@@ -3274,7 +3284,9 @@ main(int argc, char *argv[])
"-c debug_discard_caches=1");
break;
case 15:
- if (strcmp(optarg, "icu") == 0)
+ if (strcmp(optarg, "builtin") == 0)
+ locale_provider = COLLPROVIDER_BUILTIN;
+ else if (strcmp(optarg, "icu") == 0)
locale_provider = COLLPROVIDER_ICU;
else if (strcmp(optarg, "libc") == 0)
locale_provider = COLLPROVIDER_LIBC;
@@ -3283,11 +3295,16 @@ main(int argc, char *argv[])
break;
case 16:
datlocale = pg_strdup(optarg);
+ builtin_locale_specified = true;
break;
case 17:
- icu_rules = pg_strdup(optarg);
+ datlocale = pg_strdup(optarg);
+ icu_locale_specified = true;
break;
case 18:
+ icu_rules = pg_strdup(optarg);
+ break;
+ case 19:
if (!parse_sync_method(optarg, &sync_method))
exit(1);
break;
@@ -3317,7 +3334,11 @@ main(int argc, char *argv[])
exit(1);
}
- if (datlocale && locale_provider != COLLPROVIDER_ICU)
+ if (builtin_locale_specified && locale_provider != COLLPROVIDER_BUILTIN)
+ pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
+ "--builtin-locale", "builtin");
+
+ if (icu_locale_specified && locale_provider != COLLPROVIDER_ICU)
pg_fatal("%s cannot be specified unless locale provider \"%s\" is chosen",
"--icu-locale", "icu");
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 594b20cc743..e719f70dae2 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -117,7 +117,7 @@ if ($ENV{with_icu} eq 'yes')
{
command_fails_like(
[ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
- qr/initdb: error: ICU locale must be specified/,
+ qr/initdb: error: locale must be specified if provider is icu/,
'locale provider ICU requires --icu-locale');
command_ok(
@@ -138,7 +138,7 @@ if ($ENV{with_icu} eq 'yes')
'--lc-monetary=C', '--lc-time=C',
"$tempdir/data4"
],
- qr/^\s+ICU locale:\s+und\n/ms,
+ qr/^\s+default collation locale:\s+und\n/ms,
'options --locale-provider=icu --locale=und --lc-*=C');
command_fails_like(
@@ -185,6 +185,42 @@ else
}
command_fails(
+ [ 'initdb', '--no-sync', '--locale-provider=builtin', "$tempdir/data6" ],
+ 'locale provider builtin fails without --locale');
+
+command_ok(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=builtin', '--locale=C',
+ "$tempdir/data7"
+ ],
+ 'locale provider builtin with --locale');
+
+command_ok(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=builtin', '--lc-ctype=C',
+ '--locale=C', "$tempdir/data10"
+ ],
+ 'locale provider builtin with --lc-ctype');
+
+command_fails(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=builtin', '--icu-locale=en',
+ "$tempdir/dataX"
+ ],
+ 'fails for locale provider builtin with ICU locale');
+
+command_fails(
+ [
+ 'initdb', '--no-sync',
+ '--locale-provider=builtin', '--icu-rules=""',
+ "$tempdir/dataX"
+ ],
+ 'fails for locale provider builtin with ICU rules');
+
+command_fails(
[ 'initdb', '--no-sync', '--locale-provider=xyz', "$tempdir/dataX" ],
'fails for invalid locale provider');
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 23e6217b73f..171e5916965 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3114,7 +3114,9 @@ dumpDatabase(Archive *fout)
}
appendPQExpBufferStr(creaQry, " LOCALE_PROVIDER = ");
- if (datlocprovider[0] == 'c')
+ if (datlocprovider[0] == 'b')
+ appendPQExpBufferStr(creaQry, "builtin");
+ else if (datlocprovider[0] == 'c')
appendPQExpBufferStr(creaQry, "libc");
else if (datlocprovider[0] == 'i')
appendPQExpBufferStr(creaQry, "icu");
@@ -3142,7 +3144,11 @@ dumpDatabase(Archive *fout)
}
if (locale)
{
- appendPQExpBufferStr(creaQry, " ICU_LOCALE = ");
+ if (datlocprovider[0] == 'b')
+ appendPQExpBufferStr(creaQry, " BUILTIN_LOCALE = ");
+ else
+ appendPQExpBufferStr(creaQry, " ICU_LOCALE = ");
+
appendStringLiteralAH(creaQry, locale, fout);
}
@@ -13870,7 +13876,9 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
fmtQualifiedDumpable(collinfo));
appendPQExpBufferStr(q, "provider = ");
- if (collprovider[0] == 'c')
+ if (collprovider[0] == 'b')
+ appendPQExpBufferStr(q, "builtin");
+ else if (collprovider[0] == 'c')
appendPQExpBufferStr(q, "libc");
else if (collprovider[0] == 'i')
appendPQExpBufferStr(q, "icu");
@@ -13891,6 +13899,15 @@ dumpCollation(Archive *fout, const CollInfo *collinfo)
/* no locale -- the default collation cannot be reloaded anyway */
}
+ else if (collprovider[0] == 'b')
+ {
+ if (collcollate || collctype || !colllocale || collicurules)
+ pg_log_warning("invalid collation \"%s\"", qcollname);
+
+ appendPQExpBufferStr(q, ", locale = ");
+ appendStringLiteralAH(q, colllocale ? colllocale : "",
+ fout);
+ }
else if (collprovider[0] == 'i')
{
if (fout->remoteVersion >= 150000)
diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
index 34a459496e1..ed79c0930b0 100644
--- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl
+++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl
@@ -104,19 +104,13 @@ if ($oldnode->pg_version >= 11)
push @custom_opts, '--allow-group-access';
}
-# Set up the locale settings for the original cluster, so that we
-# can test that pg_upgrade copies the locale settings of template0
-# from the old to the new cluster.
+my $old_provider_field;
+my $old_datlocale_field;
-my $original_encoding = "6"; # UTF-8
-my $original_provider = "c";
-my $original_locale = "C";
-my $original_datlocale = "";
-my $provider_field = "'c' AS datlocprovider";
-my $old_datlocale_field = "NULL AS datlocale";
-if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
+# account for field additions and changes
+if ($oldnode->pg_version >= 15)
{
- $provider_field = "datlocprovider";
+ $old_provider_field = "datlocprovider";
if ($oldnode->pg_version >= '17devel')
{
$old_datlocale_field = "datlocale";
@@ -125,18 +119,65 @@ if ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
{
$old_datlocale_field = "daticulocale AS datlocale";
}
+}
+else
+{
+ $old_provider_field = "'c' AS datlocprovider";
+ $old_datlocale_field = "NULL AS datlocale";
+}
+
+# Set up the locale settings for the original cluster, so that we
+# can test that pg_upgrade copies the locale settings of template0
+# from the old to the new cluster.
+
+my $original_enc_name;
+my $original_provider;
+my $original_datcollate = "C";
+my $original_datctype = "C";
+my $original_datlocale;
+
+if ($oldnode->pg_version >= '17devel')
+{
+ $original_enc_name = "UTF-8";
+ $original_provider = "b";
+ $original_datlocale = "C";
+}
+elsif ($oldnode->pg_version >= 15 && $ENV{with_icu} eq 'yes')
+{
+ $original_enc_name = "UTF-8";
$original_provider = "i";
$original_datlocale = "fr-CA";
}
+else
+{
+ $original_enc_name = "SQL_ASCII";
+ $original_provider = "c";
+ $original_datlocale = "";
+}
+
+my %encodings = ('UTF-8' => 6, 'SQL_ASCII' => 0);
+my $original_encoding = $encodings{$original_enc_name};
my @initdb_params = @custom_opts;
-push @initdb_params, ('--encoding', 'UTF-8');
-push @initdb_params, ('--locale', $original_locale);
-if ($original_provider eq "i")
+push @initdb_params, ('--encoding', $original_enc_name);
+push @initdb_params, ('--lc-collate', $original_datcollate);
+push @initdb_params, ('--lc-ctype', $original_datctype);
+
+# add --locale-provider, if supported
+my %provider_name = ('b' => 'builtin', 'i' => 'icu', 'c' => 'libc');
+if ($oldnode->pg_version >= 15)
{
- push @initdb_params, ('--locale-provider', 'icu');
- push @initdb_params, ('--icu-locale', 'fr-CA');
+ push @initdb_params,
+ ('--locale-provider', $provider_name{$original_provider});
+ if ($original_provider eq 'b')
+ {
+ push @initdb_params, ('--builtin-locale', $original_datlocale);
+ }
+ elsif ($original_provider eq 'i')
+ {
+ push @initdb_params, ('--icu-locale', $original_datlocale);
+ }
}
$node_params{extra} = \@initdb_params;
@@ -146,10 +187,10 @@ $oldnode->start;
my $result;
$result = $oldnode->safe_psql(
'postgres',
- "SELECT encoding, $provider_field, datcollate, datctype, $old_datlocale_field
+ "SELECT encoding, $old_provider_field, datcollate, datctype, $old_datlocale_field
FROM pg_database WHERE datname='template0'");
is( $result,
- "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale",
+ "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
"check locales in original cluster");
# The default location of the source code is the root of this directory.
@@ -433,10 +474,10 @@ if (-d $log_path)
# Test that upgraded cluster has original locale settings.
$result = $newnode->safe_psql(
'postgres',
- "SELECT encoding, $provider_field, datcollate, datctype, datlocale
+ "SELECT encoding, datlocprovider, datcollate, datctype, datlocale
FROM pg_database WHERE datname='template0'");
is( $result,
- "$original_encoding|$original_provider|$original_locale|$original_locale|$original_datlocale",
+ "$original_encoding|$original_provider|$original_datcollate|$original_datctype|$original_datlocale",
"check that locales in new cluster match original cluster");
# Second dump from the upgraded instance.
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index 68b2ea8872a..1ab80eb7cac 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -926,7 +926,7 @@ listAllDbs(const char *pattern, bool verbose)
gettext_noop("Encoding"));
if (pset.sversion >= 150000)
appendPQExpBuffer(&buf,
- " CASE d.datlocprovider WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
+ " CASE d.datlocprovider WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
gettext_noop("Locale Provider"));
else
appendPQExpBuffer(&buf,
@@ -4974,7 +4974,7 @@ listCollations(const char *pattern, bool verbose, bool showSystem)
if (pset.sversion >= 100000)
appendPQExpBuffer(&buf,
- " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
+ " CASE c.collprovider WHEN 'd' THEN 'default' WHEN 'b' THEN 'builtin' WHEN 'c' THEN 'libc' WHEN 'i' THEN 'icu' END AS \"%s\",\n",
gettext_noop("Provider"));
else
appendPQExpBuffer(&buf,
diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c
index 14970a6a5f8..007061e756f 100644
--- a/src/bin/scripts/createdb.c
+++ b/src/bin/scripts/createdb.c
@@ -40,8 +40,9 @@ main(int argc, char *argv[])
{"locale", required_argument, NULL, 'l'},
{"maintenance-db", required_argument, NULL, 3},
{"locale-provider", required_argument, NULL, 4},
- {"icu-locale", required_argument, NULL, 5},
- {"icu-rules", required_argument, NULL, 6},
+ {"builtin-locale", required_argument, NULL, 5},
+ {"icu-locale", required_argument, NULL, 6},
+ {"icu-rules", required_argument, NULL, 7},
{NULL, 0, NULL, 0}
};
@@ -67,6 +68,7 @@ main(int argc, char *argv[])
char *lc_ctype = NULL;
char *locale = NULL;
char *locale_provider = NULL;
+ char *builtin_locale = NULL;
char *icu_locale = NULL;
char *icu_rules = NULL;
@@ -134,9 +136,12 @@ main(int argc, char *argv[])
locale_provider = pg_strdup(optarg);
break;
case 5:
- icu_locale = pg_strdup(optarg);
+ builtin_locale = pg_strdup(optarg);
break;
case 6:
+ icu_locale = pg_strdup(optarg);
+ break;
+ case 7:
icu_rules = pg_strdup(optarg);
break;
default:
@@ -216,6 +221,11 @@ main(int argc, char *argv[])
appendPQExpBufferStr(&sql, " LOCALE ");
appendStringLiteralConn(&sql, locale, conn);
}
+ if (builtin_locale)
+ {
+ appendPQExpBufferStr(&sql, " BUILTIN_LOCALE ");
+ appendStringLiteralConn(&sql, builtin_locale, conn);
+ }
if (lc_collate)
{
appendPQExpBufferStr(&sql, " LC_COLLATE ");
@@ -294,9 +304,10 @@ help(const char *progname)
printf(_(" -l, --locale=LOCALE locale settings for the database\n"));
printf(_(" --lc-collate=LOCALE LC_COLLATE setting for the database\n"));
printf(_(" --lc-ctype=LOCALE LC_CTYPE setting for the database\n"));
+ printf(_(" --builtin-locale=LOCALE builtin locale setting for the database\n"));
printf(_(" --icu-locale=LOCALE ICU locale setting for the database\n"));
printf(_(" --icu-rules=RULES ICU rules setting for the database\n"));
- printf(_(" --locale-provider={libc|icu}\n"
+ printf(_(" --locale-provider={builtin|libc|icu}\n"
" locale provider for the database's default collation\n"));
printf(_(" -O, --owner=OWNER database user to own the new database\n"));
printf(_(" -S, --strategy=STRATEGY database creation strategy wal_log or file_copy\n"));
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 37e47b00782..dfd635bfab2 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -105,6 +105,66 @@ else
'create database with ICU fails since no ICU support');
}
+$node->command_fails(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ 'tbuiltin1'
+ ],
+ 'create database with provider "builtin" fails without --locale');
+
+$node->command_ok(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ '--locale=C', 'tbuiltin2'
+ ],
+ 'create database with provider "builtin" and locale "C"');
+
+$node->command_ok(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ '--locale=C', '--lc-collate=C',
+ 'tbuiltin3'
+ ],
+ 'create database with provider "builtin" and LC_COLLATE=C');
+
+$node->command_ok(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ '--locale=C', '--lc-ctype=C',
+ 'tbuiltin4'
+ ],
+ 'create database with provider "builtin" and LC_CTYPE=C');
+
+$node->command_fails(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ '--locale=C', '--icu-locale=en',
+ 'tbuiltin7'
+ ],
+ 'create database with provider "builtin" and ICU_LOCALE="en"');
+
+$node->command_fails(
+ [
+ 'createdb', '-T',
+ 'template0', '--locale-provider=builtin',
+ '--locale=C', '--icu-rules=""',
+ 'tbuiltin8'
+ ],
+ 'create database with provider "builtin" and ICU_RULES=""');
+
+$node->command_fails(
+ [
+ 'createdb', '-T',
+ 'template1', '--locale-provider=builtin',
+ '--locale=C', 'tbuiltin9'
+ ],
+ 'create database with provider "builtin" not matching template');
+
$node->command_fails([ 'createdb', 'foobar1' ],
'fails if database already exists');
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 07793117162..429989efd91 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202403131
+#define CATALOG_VERSION_NO 202403132
#endif
diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat
index 7396ff10c45..938432e8a4b 100644
--- a/src/include/catalog/pg_collation.dat
+++ b/src/include/catalog/pg_collation.dat
@@ -23,9 +23,9 @@
descr => 'standard POSIX collation',
collname => 'POSIX', collprovider => 'c', collencoding => '-1',
collcollate => 'POSIX', collctype => 'POSIX' },
-{ oid => '962', descr => 'sorts by Unicode code point',
- collname => 'ucs_basic', collprovider => 'c', collencoding => '6',
- collcollate => 'C', collctype => 'C' },
+{ oid => '962', descr => 'sorts by Unicode code point, C character semantics',
+ collname => 'ucs_basic', collprovider => 'b', collencoding => '6',
+ colllocale => 'C' },
{ oid => '963',
descr => 'sorts using the Unicode Collation Algorithm with default settings',
collname => 'unicode', collprovider => 'i', collencoding => '-1',
diff --git a/src/include/catalog/pg_collation.h b/src/include/catalog/pg_collation.h
index a3e196fb53d..5ce289d74bd 100644
--- a/src/include/catalog/pg_collation.h
+++ b/src/include/catalog/pg_collation.h
@@ -68,6 +68,7 @@ MAKE_SYSCACHE(COLLOID, pg_collation_oid_index, 8);
#ifdef EXPOSE_TO_CLIENT_CODE
#define COLLPROVIDER_DEFAULT 'd'
+#define COLLPROVIDER_BUILTIN 'b'
#define COLLPROVIDER_ICU 'i'
#define COLLPROVIDER_LIBC 'c'
@@ -76,6 +77,8 @@ collprovider_name(char c)
{
switch (c)
{
+ case COLLPROVIDER_BUILTIN:
+ return "builtin";
case COLLPROVIDER_ICU:
return "icu";
case COLLPROVIDER_LIBC:
diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h
index 28c925b5af1..3d949d51123 100644
--- a/src/include/utils/pg_locale.h
+++ b/src/include/utils/pg_locale.h
@@ -76,6 +76,10 @@ struct pg_locale_struct
bool deterministic;
union
{
+ struct
+ {
+ const char *locale;
+ } builtin;
locale_t lt;
#ifdef USE_ICU
struct
@@ -113,6 +117,7 @@ extern size_t pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
extern size_t pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
size_t srclen, pg_locale_t locale);
+extern const char *builtin_validate_locale(int encoding, const char *loc_str);
extern void icu_validate_locale(const char *loc_str);
extern char *icu_language_tag(const char *loc_str, int elevel);
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 8a1fc12ec63..5f8ef168034 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -27,9 +27,8 @@ CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE upp
INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B');
});
-is( $node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}),
- qq(t),
- 'ICU unicode version defined');
+is($node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}),
+ qq(t), 'ICU unicode version defined');
is( $node1->safe_psql('dbicu', q{SELECT def FROM icu ORDER BY def}),
qq(A
@@ -63,14 +62,13 @@ is( $node1->psql(
0,
"C locale works for ICU");
-# Test that LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE
-# are specified
-is( $node1->psql(
- 'postgres',
- q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary'
- LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8}
- ),
- 0,
- "LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified");
+my ($ret, $stdout, $stderr) = $node1->psql('postgres',
+ q{CREATE DATABASE dbicu LOCALE_PROVIDER builtin LOCALE 'C' TEMPLATE dbicu}
+);
+isnt($ret, 0, "locale provider must match template: exit code not 0");
+like(
+ $stderr,
+ qr/ERROR: new locale provider \(builtin\) does not match locale provider of the template database \(icu\)/,
+ "locale provider must match template: error message");
done_testing();
diff --git a/src/test/regress/expected/collate.out b/src/test/regress/expected/collate.out
index 06495644852..593a6226376 100644
--- a/src/test/regress/expected/collate.out
+++ b/src/test/regress/expected/collate.out
@@ -650,6 +650,22 @@ EXPLAIN (COSTS OFF)
(3 rows)
-- CREATE/DROP COLLATION
+CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" );
+SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c;
+ b
+-----
+ ABD
+ Abc
+ abc
+ bbc
+(4 rows)
+
+CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails
+ERROR: parameter "locale" must be specified
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails
+ERROR: invalid locale name "en_US" for builtin provider
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails
+ERROR: parameter "locale" must be specified
CREATE COLLATION mycoll1 FROM "C";
CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" );
CREATE COLLATION mycoll3 FROM "default"; -- intentionally unsupported
@@ -754,7 +770,7 @@ DETAIL: FROM cannot be specified together with any other options.
-- must get rid of them.
--
DROP SCHEMA collate_tests CASCADE;
-NOTICE: drop cascades to 19 other objects
+NOTICE: drop cascades to 20 other objects
DETAIL: drop cascades to table collate_test1
drop cascades to table collate_test_like
drop cascades to table collate_test2
@@ -771,6 +787,7 @@ drop cascades to function dup(anyelement)
drop cascades to table collate_test20
drop cascades to table collate_test21
drop cascades to table collate_test22
+drop cascades to collation builtin_c
drop cascades to collation mycoll2
drop cascades to table collate_test23
drop cascades to view collate_on_int
diff --git a/src/test/regress/sql/collate.sql b/src/test/regress/sql/collate.sql
index c3d40fc1959..4b0e4472c3f 100644
--- a/src/test/regress/sql/collate.sql
+++ b/src/test/regress/sql/collate.sql
@@ -244,6 +244,14 @@ EXPLAIN (COSTS OFF)
-- CREATE/DROP COLLATION
+CREATE COLLATION builtin_c ( PROVIDER = builtin, LOCALE = "C" );
+
+SELECT b FROM collate_test1 ORDER BY b COLLATE builtin_c;
+
+CREATE COLLATION builtin2 ( PROVIDER = builtin ); -- fails
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LOCALE = "en_US" ); -- fails
+CREATE COLLATION builtin2 ( PROVIDER = builtin, LC_CTYPE = "C", LC_COLLATE = "C" ); -- fails
+
CREATE COLLATION mycoll1 FROM "C";
CREATE COLLATION mycoll2 ( LC_COLLATE = "POSIX", LC_CTYPE = "POSIX" );
CREATE COLLATION mycoll3 FROM "default"; -- intentionally unsupported