aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2023-06-12 09:14:13 +0900
committerMichael Paquier <michael@paquier.xyz>2023-06-12 09:14:13 +0900
commit3a5222a432beeb980208a2d9abeb517412f4d469 (patch)
tree804bdc65435cd990cff70e2812c3df7fe453e2d9
parente25e5f7fc6b74c9d4ce82627e9145ef5537412e2 (diff)
downloadpostgresql-3a5222a432beeb980208a2d9abeb517412f4d469.tar.gz
postgresql-3a5222a432beeb980208a2d9abeb517412f4d469.zip
hstore: Tighten key/value parsing check for whitespaces
isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11
-rw-r--r--contrib/hstore/Makefile2
-rw-r--r--contrib/hstore/expected/hstore_utf8.out36
-rw-r--r--contrib/hstore/expected/hstore_utf8_1.out8
-rw-r--r--contrib/hstore/hstore_io.c9
-rw-r--r--contrib/hstore/sql/hstore_utf8.sql19
5 files changed, 69 insertions, 5 deletions
diff --git a/contrib/hstore/Makefile b/contrib/hstore/Makefile
index c4e339b57c1..48ee98f0d5c 100644
--- a/contrib/hstore/Makefile
+++ b/contrib/hstore/Makefile
@@ -22,7 +22,7 @@ PGFILEDESC = "hstore - key/value pair data type"
HEADERS = hstore.h
-REGRESS = hstore
+REGRESS = hstore hstore_utf8
ifdef USE_PGXS
PG_CONFIG = pg_config
diff --git a/contrib/hstore/expected/hstore_utf8.out b/contrib/hstore/expected/hstore_utf8.out
new file mode 100644
index 00000000000..44058244132
--- /dev/null
+++ b/contrib/hstore/expected/hstore_utf8.out
@@ -0,0 +1,36 @@
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SET client_encoding = utf8;
+-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
+-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
+SELECT E'key\u0105=>value\u0105'::hstore;
+ hstore
+------------------
+ "keyą"=>"valueą"
+(1 row)
+
+SELECT 'keyą=>valueą'::hstore;
+ hstore
+------------------
+ "keyą"=>"valueą"
+(1 row)
+
+SELECT 'ą=>ą'::hstore;
+ hstore
+----------
+ "ą"=>"ą"
+(1 row)
+
+SELECT 'keyąfoo=>valueą'::hstore;
+ hstore
+---------------------
+ "keyąfoo"=>"valueą"
+(1 row)
+
diff --git a/contrib/hstore/expected/hstore_utf8_1.out b/contrib/hstore/expected/hstore_utf8_1.out
new file mode 100644
index 00000000000..37aead89c0c
--- /dev/null
+++ b/contrib/hstore/expected/hstore_utf8_1.out
@@ -0,0 +1,8 @@
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index b3304ff8445..03057f085d1 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -12,6 +12,7 @@
#include "hstore.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
+#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/json.h"
#include "utils/jsonb.h"
@@ -88,7 +89,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
{
st = GV_WAITESCIN;
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
*(state->cur) = *(state->ptr);
state->cur++;
@@ -111,7 +112,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
state->ptr--;
return true;
}
- else if (isspace((unsigned char) *(state->ptr)))
+ else if (scanner_isspace((unsigned char) *(state->ptr)))
{
return true;
}
@@ -219,7 +220,7 @@ parse_hstore(HSParser *state)
{
elog(ERROR, "Unexpected end of string");
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
@@ -271,7 +272,7 @@ parse_hstore(HSParser *state)
{
return;
}
- else if (!isspace((unsigned char) *(state->ptr)))
+ else if (!scanner_isspace((unsigned char) *(state->ptr)))
{
elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
diff --git a/contrib/hstore/sql/hstore_utf8.sql b/contrib/hstore/sql/hstore_utf8.sql
new file mode 100644
index 00000000000..face878324c
--- /dev/null
+++ b/contrib/hstore/sql/hstore_utf8.sql
@@ -0,0 +1,19 @@
+/*
+ * This test must be run in a database with UTF-8 encoding,
+ * because other encodings don't support all the characters used.
+ */
+
+SELECT getdatabaseencoding() <> 'UTF8'
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+SET client_encoding = utf8;
+
+-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
+-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
+SELECT E'key\u0105=>value\u0105'::hstore;
+SELECT 'keyą=>valueą'::hstore;
+SELECT 'ą=>ą'::hstore;
+SELECT 'keyąfoo=>valueą'::hstore;