1 files changed, 55 insertions, 3 deletions
diff --git a/src/interfaces/ecpg/preproc/parser.c b/src/interfaces/ecpg/preproc/parser.c
index 9daeee33034..ca0dead26d0 100644
--- a/src/interfaces/ecpg/preproc/parser.c
+++ b/src/interfaces/ecpg/preproc/parser.c
@@ -31,6 +31,7 @@ static YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
 static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */
 static char *lookahead_yytext;	/* start current token */
 
+static int	base_yylex_location(void);
 static bool check_uescapechar(unsigned char escape);
 static bool ecpg_isspace(char ch);
 
@@ -71,7 +72,7 @@ filtered_base_yylex(void)
 		have_lookahead = false;
 	}
 	else
-		cur_token = base_yylex();
+		cur_token = base_yylex_location();
 
 	/*
 	 * If this token isn't one that requires lookahead, just return it.
@@ -96,7 +97,7 @@ filtered_base_yylex(void)
 	cur_yytext = base_yytext;
 
 	/* Get next token, saving outputs into lookahead variables */
-	next_token = base_yylex();
+	next_token = base_yylex_location();
 
 	lookahead_token = next_token;
 	lookahead_yylval = base_yylval;
@@ -184,7 +185,7 @@ filtered_base_yylex(void)
 				cur_yytext = base_yytext;
 
 				/* Get third token */
-				next_token = base_yylex();
+				next_token = base_yylex_location();
 
 				if (next_token != SCONST)
 					mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
@@ -203,6 +204,7 @@ filtered_base_yylex(void)
 
 				/* Combine 3 tokens into 1 */
 				base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
+				base_yylloc = mm_strdup(base_yylval.str);
 
 				/* Clear have_lookahead, thereby consuming all three tokens */
 				have_lookahead = false;
@@ -219,6 +221,56 @@ filtered_base_yylex(void)
 }
 
 /*
+ * Call base_yylex() and fill in base_yylloc.
+ *
+ * pgc.l does not worry about setting yylloc, and given what we want for
+ * that, trying to set it there would be pretty inconvenient.  What we
+ * want is: if the returned token has type <str>, then duplicate its
+ * string value as yylloc; otherwise, make a downcased copy of yytext.
+ * The downcasing is ASCII-only because all that we care about there
+ * is producing uniformly-cased output of keywords.  (That's mostly
+ * cosmetic, but there are places in ecpglib that expect to receive
+ * downcased keywords, plus it keeps us regression-test-compatible
+ * with the pre-v18 implementation of ecpg.)
+ */
+static int
+base_yylex_location(void)
+{
+	int			token = base_yylex();
+
+	switch (token)
+	{
+			/* List a token here if pgc.l assigns to base_yylval.str for it */
+		case Op:
+		case CSTRING:
+		case CPP_LINE:
+		case CVARIABLE:
+		case BCONST:
+		case SCONST:
+		case USCONST:
+		case XCONST:
+		case FCONST:
+		case IDENT:
+		case UIDENT:
+		case IP:
+			/* Duplicate the <str> value */
+			base_yylloc = mm_strdup(base_yylval.str);
+			break;
+		default:
+			/* Else just use the input, i.e., yytext */
+			base_yylloc = mm_strdup(base_yytext);
+			/* Apply an ASCII-only downcasing */
+			for (unsigned char *ptr = (unsigned char *) base_yylloc; *ptr; ptr++)
+			{
+				if (*ptr >= 'A' && *ptr <= 'Z')
+					*ptr += 'a' - 'A';
+			}
+			break;
+	}
+	return token;
+}
+
+/*
  * check_uescapechar() and ecpg_isspace() should match their equivalents
  * in pgc.l.
  */