diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2001-02-21 18:53:47 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2001-02-21 18:53:47 +0000 |
commit | be92ad49e0e94565e0ca10caeba186db80a24224 (patch) | |
tree | d49392c2997a8c4c69ecd768b270187578d0e7aa /src/backend | |
parent | 496373e2e4dc37a3789fa56d615b6665aa376c5b (diff) | |
download | postgresql-be92ad49e0e94565e0ca10caeba186db80a24224.tar.gz postgresql-be92ad49e0e94565e0ca10caeba186db80a24224.zip |
Change case-folding of keywords to conform to SQL99 and fix misbehavior
in Turkish locale. Keywords are now checked under pure ASCII case-folding
rules ('A'-'Z'->'a'-'z' and nothing else). However, once a word is
determined not to be a keyword, it will be case-folded under the current
locale, same as before. See pghackers discussion 20-Feb-01.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/parser/keywords.c | 63 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 36 | ||||
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 6 |
3 files changed, 78 insertions, 27 deletions
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c index 7936f3a580f..c8f5f2c0e92 100644 --- a/src/backend/parser/keywords.c +++ b/src/backend/parser/keywords.c @@ -1,23 +1,22 @@ /*------------------------------------------------------------------------- * * keywords.c - * lexical token lookup for reserved words in postgres SQL + * lexical token lookup for reserved words in PostgreSQL * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.88 2001/01/24 19:43:01 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.89 2001/02/21 18:53:46 tgl Exp $ * *------------------------------------------------------------------------- */ -#include <ctype.h> - #include "postgres.h" +#include <ctype.h> + #include "nodes/parsenodes.h" -#include "nodes/pg_list.h" #include "parser/keywords.h" #include "parser/parse.h" @@ -286,18 +285,62 @@ static ScanKeyword ScanKeywords[] = { {"zone", ZONE}, }; +/* + * ScanKeywordLookup - see if a given word is a keyword + * + * Returns a pointer to the ScanKeyword table entry, or NULL if no match. + * + * The match is done case-insensitively. Note that we deliberately use a + * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z', + * even if we are in a locale where tolower() would produce more or different + * translations. This is to conform to the SQL99 spec, which says that + * keywords are to be matched in this way even though non-keyword identifiers + * receive a different case-normalization mapping. + */ ScanKeyword * ScanKeywordLookup(char *text) { - ScanKeyword *low = &ScanKeywords[0]; - ScanKeyword *high = endof(ScanKeywords) - 1; - ScanKeyword *middle; - int difference; + int len, + i; + char word[NAMEDATALEN]; + ScanKeyword *low; + ScanKeyword *high; + + len = strlen(text); + /* We assume all keywords are shorter than NAMEDATALEN. */ + if (len >= NAMEDATALEN) + return NULL; + + /* + * Apply an ASCII-only downcasing. We must not use tolower() since + * it may produce the wrong translation in some locales (eg, Turkish), + * and we don't trust isupper() very much either. In an ASCII-based + * encoding the tests against A and Z are sufficient, but we also check + * isupper() so that we will work correctly under EBCDIC. The actual + * case conversion step should work for either ASCII or EBCDIC. + */ + for (i = 0; i < len; i++) + { + char ch = text[i]; + if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch)) + ch += 'a' - 'A'; + word[i] = ch; + } + word[len] = '\0'; + + /* + * Now do a binary search using plain strcmp() comparison. + */ + low = &ScanKeywords[0]; + high = endof(ScanKeywords) - 1; while (low <= high) { + ScanKeyword *middle; + int difference; + middle = low + (high - low) / 2; - difference = strcmp(middle->name, text); + difference = strcmp(middle->name, word); if (difference == 0) return middle; else if (difference < 0) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index f0f4626b953..f913584c1a7 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -2,14 +2,14 @@ /*------------------------------------------------------------------------- * * scan.l - * lexical scanner for POSTGRES + * lexical scanner for PostgreSQL * * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.86 2001/02/03 20:13:05 petere Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.87 2001/02/21 18:53:47 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -477,12 +477,27 @@ other . {identifier} { - int i; - ScanKeyword *keyword; + ScanKeyword *keyword; + int i; - for(i = 0; yytext[i]; i++) + /* Is it a keyword? */ + keyword = ScanKeywordLookup((char*) yytext); + if (keyword != NULL) + return keyword->value; + + /* + * No. Convert the identifier to lower case, and truncate + * if necessary. + * + * Note: here we use a locale-dependent case conversion, + * which seems appropriate under SQL99 rules, whereas + * the keyword comparison was NOT locale-dependent. + */ + for (i = 0; yytext[i]; i++) + { if (isupper((unsigned char) yytext[i])) yytext[i] = tolower((unsigned char) yytext[i]); + } if (i >= NAMEDATALEN) { #ifdef MULTIBYTE @@ -497,15 +512,8 @@ other . yytext[NAMEDATALEN-1] = '\0'; #endif } - keyword = ScanKeywordLookup((char*)yytext); - if (keyword != NULL) { - return keyword->value; - } - else - { - yylval.str = pstrdup((char*)yytext); - return IDENT; - } + yylval.str = pstrdup((char*) yytext); + return IDENT; } {other} { return yytext[0]; } diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 872b607e87c..2dd460a442b 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -3,7 +3,7 @@ * back to source text * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.72 2001/02/14 21:35:05 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/ruleutils.c,v 1.73 2001/02/21 18:53:47 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -2563,8 +2563,8 @@ quote_identifier(char *ident) * but the parser doesn't provide any easy way to test for whether * an identifier is safe or not... so be safe not sorry. * - * Note: ScanKeywordLookup() expects an all-lower-case input, but - * we've already checked we have that. + * Note: ScanKeywordLookup() does case-insensitive comparison, + * but that's fine, since we already know we have all-lower-case. */ if (ScanKeywordLookup(ident) != NULL) safe = false; |