aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-09-27 03:27:24 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-09-27 03:27:24 +0000
commit799ac992014374c23a1fc437f4fd9aa413be4920 (patch)
treeb5ee578a4e981a67217bafedf04c62105cad5dbf /src
parent3686bcb9c9891d7b6404711ed8d91a3e01ddb8aa (diff)
downloadpostgresql-799ac992014374c23a1fc437f4fd9aa413be4920.tar.gz
postgresql-799ac992014374c23a1fc437f4fd9aa413be4920.zip
Sync psql's scanner with recent changes in backend scanner's flex rules.
Marko Kreen, Tom Lane
Diffstat (limited to 'src')
-rw-r--r--src/backend/parser/scan.l10
-rw-r--r--src/bin/psql/psqlscan.l44
2 files changed, 47 insertions, 7 deletions
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index fdc95135509..150202e77ce 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.161 2009/09/25 21:13:06 petere Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -571,18 +571,16 @@ other .
BEGIN(xe);
}
-<xeu>. |
-<xeu>\n |
+<xeu>. { yyerror("invalid Unicode surrogate pair"); }
+<xeu>\n { yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
-
<xe,xeu>{xeunicodefail} {
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
lexer_errposition()));
- }
-
+ }
<xe>{xeescape} {
if (yytext[1] == '\'')
{
diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l
index 235fe9599c6..894800aaf58 100644
--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -33,7 +33,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.28 2009/01/01 17:23:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
char **txtcopy);
static void emit(const char *txt, int len);
+static bool is_utf16_surrogate_first(uint32 c);
#define ECHO emit(yytext, yyleng)
@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
* <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes
* <xus> quoted string with Unicode escapes
+ * <xeu> Unicode surrogate pair in extended quoted string
*/
%x xb
@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
%x xdolq
%x xui
%x xus
+%x xeu
/* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd
%x xslasharg
@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
* did not end with a newline.
*
* XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree, and see also the plpgsql lexer.
*/
space [ \t\n\r\f]
@@ -253,6 +259,8 @@ xeinside [^\\']+
xeescape [\\][^0-7]
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
+xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
+xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
/* Extended quote
* xqdouble implements embedded quote, ''''
@@ -334,6 +342,10 @@ identifier {ident_start}{ident_cont}*
typecast "::"
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot \.\.
+colon_equals ":="
+
/*
* "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
@@ -511,6 +523,22 @@ other .
<xe>{xeinside} {
ECHO;
}
+<xe>{xeunicode} {
+ uint32 c = strtoul(yytext+2, NULL, 16);
+
+ if (is_utf16_surrogate_first(c))
+ BEGIN(xeu);
+ ECHO;
+ }
+<xeu>{xeunicode} {
+ BEGIN(xe);
+ ECHO;
+ }
+<xeu>. { ECHO; }
+<xeu>\n { ECHO; }
+<xe,xeu>{xeunicodefail} {
+ ECHO;
+ }
<xe>{xeescape} {
ECHO;
}
@@ -605,6 +633,14 @@ other .
ECHO;
}
+{dot_dot} {
+ ECHO;
+ }
+
+{colon_equals} {
+ ECHO;
+ }
+
/*
* These rules are specific to psql --- they implement parenthesis
* counting and detection of command-ending semicolon. These must
@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
}
}
}
+
+static bool
+is_utf16_surrogate_first(uint32 c)
+{
+ return (c >= 0xD800 && c <= 0xDBFF);
+}