aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-12-21 17:39:32 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2016-12-21 17:39:32 -0500
commit88e1e91da58422c5cf0e6e2d32f2aa15d75b8de9 (patch)
tree3495592ba415fb5b3e968e011ae78132c4702fc4
parent4e2477b7b8b6c025d273a316852f2dbf62fff5bc (diff)
downloadpostgresql-88e1e91da58422c5cf0e6e2d32f2aa15d75b8de9.tar.gz
postgresql-88e1e91da58422c5cf0e6e2d32f2aa15d75b8de9.zip
Fix detection of unfinished Unicode surrogate pair at end of string.
The U&'...' and U&"..." syntaxes silently discarded a surrogate pair start (that is, a code between U+D800 and U+DBFF) if it occurred at the very end of the string. This seems like an obvious oversight, since we throw an error for every other invalid combination of surrogate characters, including the very same situation in E'...' syntax. This has been wrong since the pair processing was added (in 9.0), so back-patch to all supported branches. Discussion: https://postgr.es/m/19113.1482337898@sss.pgh.pa.us
-rw-r--r--src/backend/parser/scan.l7
1 files changed, 7 insertions, 0 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 998349d7421..acd92698057 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -1435,6 +1435,13 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
}
}
+ /* unfinished surrogate pair? */
+ if (pair_first)
+ {
+ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
+ yyerror("invalid Unicode surrogate pair");
+ }
+
*out = '\0';
/*