aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-02-21 00:35:13 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-02-21 00:35:13 +0000
commit8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4 (patch)
tree2b529b95daa1fd6ee92c72f91a68b1b4e426a514
parentfe92ed8b78ce527739547fcd233c63debb2f3538 (diff)
downloadpostgresql-8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4.tar.gz
postgresql-8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4.zip
Implement a solution to the 'Turkish locale downcases I incorrectly'
problem, per previous discussion. Make some additional changes to centralize the knowledge of just how identifier downcasing is done, in hopes of simplifying any future tweaking in this area.
-rw-r--r--src/backend/commands/define.c19
-rw-r--r--src/backend/commands/functioncmds.c6
-rw-r--r--src/backend/commands/proclang.c24
-rw-r--r--src/backend/parser/keywords.c10
-rw-r--r--src/backend/parser/scan.l44
-rw-r--r--src/backend/parser/scansup.c78
-rw-r--r--src/backend/utils/adt/varlena.c44
-rw-r--r--src/include/commands/defrem.h4
-rw-r--r--src/include/parser/scansup.h9
-rw-r--r--src/pl/plpgsql/src/pl_funcs.c45
10 files changed, 158 insertions, 125 deletions
diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c
index 4ac687259da..68c3248e541 100644
--- a/src/backend/commands/define.c
+++ b/src/backend/commands/define.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84 2003/08/04 02:39:58 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84.4.1 2004/02/21 00:35:13 tgl Exp $
*
* DESCRIPTION
* The "DefineFoo" routines take the parse tree and pick out the
@@ -38,24 +38,19 @@
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "parser/parse_type.h"
+#include "parser/scansup.h"
#include "utils/int8.h"
/*
- * Translate the input language name to lower case.
+ * Translate the input language name to lower case, and truncate if needed.
*
- * Output buffer must be NAMEDATALEN long.
+ * Returns a palloc'd string
*/
-void
-case_translate_language_name(const char *input, char *output)
+char *
+case_translate_language_name(const char *input)
{
- int i;
-
- MemSet(output, 0, NAMEDATALEN); /* ensure result Name is
- * zero-filled */
-
- for (i = 0; i < NAMEDATALEN - 1 && input[i]; ++i)
- output[i] = tolower((unsigned char) input[i]);
+ return downcase_truncate_identifier(input, strlen(input), false);
}
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 35ab80c09a7..ce5b2cc2bf6 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38 2003/10/02 06:34:03 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38.2.1 2004/02/21 00:35:13 tgl Exp $
*
* DESCRIPTION
* These routines take the parse tree and pick out the
@@ -393,7 +393,7 @@ CreateFunction(CreateFunctionStmt *stmt)
Oid prorettype;
bool returnsSet;
char *language;
- char languageName[NAMEDATALEN];
+ char *languageName;
Oid languageOid;
Oid languageValidator;
char *funcname;
@@ -428,7 +428,7 @@ CreateFunction(CreateFunctionStmt *stmt)
&as_clause, &language, &volatility, &isStrict, &security);
/* Convert language name to canonical case */
- case_translate_language_name(language, languageName);
+ languageName = case_translate_language_name(language);
/* Look up the language and validate permissions */
languageTuple = SearchSysCache(LANGNAME,
diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c
index 09325d647ca..97d7c38a2e9 100644
--- a/src/backend/commands/proclang.c
+++ b/src/backend/commands/proclang.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51 2003/10/02 06:34:03 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51.2.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,11 +40,12 @@
void
CreateProceduralLanguage(CreatePLangStmt *stmt)
{
- char languageName[NAMEDATALEN];
+ char *languageName;
Oid procOid,
valProcOid;
Oid funcrettype;
Oid typev[FUNC_MAX_ARGS];
+ NameData langname;
char nulls[Natts_pg_language];
Datum values[Natts_pg_language];
Relation rel;
@@ -66,7 +67,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
* Translate the language name and check that this language doesn't
* already exist
*/
- case_translate_language_name(stmt->plname, languageName);
+ languageName = case_translate_language_name(stmt->plname);
if (SearchSysCacheExists(LANGNAME,
PointerGetDatum(languageName),
@@ -124,12 +125,13 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
}
i = 0;
- values[i++] = PointerGetDatum(languageName);
- values[i++] = BoolGetDatum(true); /* lanispl */
- values[i++] = BoolGetDatum(stmt->pltrusted);
- values[i++] = ObjectIdGetDatum(procOid);
- values[i++] = ObjectIdGetDatum(valProcOid);
- nulls[i] = 'n'; /* lanacl */
+ namestrcpy(&langname, languageName);
+ values[i++] = NameGetDatum(&langname); /* lanname */
+ values[i++] = BoolGetDatum(true); /* lanispl */
+ values[i++] = BoolGetDatum(stmt->pltrusted); /* lanpltrusted */
+ values[i++] = ObjectIdGetDatum(procOid); /* lanplcallfoid */
+ values[i++] = ObjectIdGetDatum(valProcOid); /* lanvalidator */
+ nulls[i] = 'n'; /* lanacl */
rel = heap_openr(LanguageRelationName, RowExclusiveLock);
@@ -173,7 +175,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
void
DropProceduralLanguage(DropPLangStmt *stmt)
{
- char languageName[NAMEDATALEN];
+ char *languageName;
HeapTuple langTup;
ObjectAddress object;
@@ -189,7 +191,7 @@ DropProceduralLanguage(DropPLangStmt *stmt)
* Translate the language name, check that this language exist and is
* a PL
*/
- case_translate_language_name(stmt->plname, languageName);
+ languageName = case_translate_language_name(stmt->plname);
langTup = SearchSysCache(LANGNAME,
CStringGetDatum(languageName),
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index c4048b4c1d8..f4f454715c6 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141 2003/08/04 02:40:01 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -365,17 +365,13 @@ ScanKeywordLookup(const char *text)
/*
* Apply an ASCII-only downcasing. We must not use tolower() since it
- * may produce the wrong translation in some locales (eg, Turkish),
- * and we don't trust isupper() very much either. In an ASCII-based
- * encoding the tests against A and Z are sufficient, but we also
- * check isupper() so that we will work correctly under EBCDIC. The
- * actual case conversion step should work for either ASCII or EBCDIC.
+ * may produce the wrong translation in some locales (eg, Turkish).
*/
for (i = 0; i < len; i++)
{
char ch = text[i];
- if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
+ if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
word[i] = ch;
}
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index b10d4531851..c3a423a7f4a 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111 2003/10/09 19:13:23 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111.2.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,7 @@
#include "parser/keywords.h"
/* Not needed now that this file is compiled as part of gram.y */
/* #include "parser/parse.h" */
+#include "parser/scansup.h"
#include "utils/builtins.h"
#include "mb/pg_wchar.h"
@@ -394,23 +395,15 @@ other .
startlit();
}
<xd>{xdstop} {
+ char *ident;
+
BEGIN(INITIAL);
if (literallen == 0)
yyerror("zero-length delimited identifier");
+ ident = litbufdup();
if (literallen >= NAMEDATALEN)
- {
- int len;
-
- len = pg_mbcliplen(literalbuf, literallen,
- NAMEDATALEN-1);
- ereport(NOTICE,
- (errcode(ERRCODE_NAME_TOO_LONG),
- errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
- literalbuf, len, literalbuf)));
- literalbuf[len] = '\0';
- literallen = len;
- }
- yylval.str = litbufdup();
+ truncate_identifier(ident, literallen, true);
+ yylval.str = ident;
return IDENT;
}
<xd>{xddouble} {
@@ -532,7 +525,6 @@ other .
{identifier} {
const ScanKeyword *keyword;
char *ident;
- int i;
/* Is it a keyword? */
keyword = ScanKeywordLookup(yytext);
@@ -545,28 +537,8 @@ other .
/*
* No. Convert the identifier to lower case, and truncate
* if necessary.
- *
- * Note: here we use a locale-dependent case conversion,
- * which seems appropriate under standard SQL rules, whereas
- * the keyword comparison was NOT locale-dependent.
*/
- ident = pstrdup(yytext);
- for (i = 0; ident[i]; i++)
- {
- if (isupper((unsigned char) ident[i]))
- ident[i] = tolower((unsigned char) ident[i]);
- }
- if (i >= NAMEDATALEN)
- {
- int len;
-
- len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
- ereport(NOTICE,
- (errcode(ERRCODE_NAME_TOO_LONG),
- errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
- ident, len, ident)));
- ident[len] = '\0';
- }
+ ident = downcase_truncate_identifier(yytext, yyleng, true);
yylval.str = ident;
return IDENT;
}
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index e00d284edcc..14467dcdc77 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24 2003/08/04 02:40:02 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,8 @@
#include "miscadmin.h"
#include "parser/scansup.h"
+#include "mb/pg_wchar.h"
+
/* ----------------
* scanstr
@@ -32,7 +34,7 @@
*/
char *
-scanstr(char *s)
+scanstr(const char *s)
{
char *newStr;
int len,
@@ -109,3 +111,75 @@ scanstr(char *s)
newStr[j] = '\0';
return newStr;
}
+
+
+/*
+ * downcase_truncate_identifier() --- do appropriate downcasing and
+ * truncation of an unquoted identifier. Optionally warn of truncation.
+ *
+ * Returns a palloc'd string containing the adjusted identifier.
+ *
+ * Note: in some usages the passed string is not null-terminated.
+ *
+ * Note: the API of this function is designed to allow for downcasing
+ * transformations that increase the string length, but we don't yet
+ * support that. If you want to implement it, you'll need to fix
+ * SplitIdentifierString() in utils/adt/varlena.c.
+ */
+char *
+downcase_truncate_identifier(const char *ident, int len, bool warn)
+{
+ char *result;
+ int i;
+
+ result = palloc(len + 1);
+ /*
+ * SQL99 specifies Unicode-aware case normalization, which we don't yet
+ * have the infrastructure for. Instead we use tolower() to provide a
+ * locale-aware translation. However, there are some locales where this
+ * is not right either (eg, Turkish may do strange things with 'i' and
+ * 'I'). Our current compromise is to use tolower() for characters with
+ * the high bit set, and use an ASCII-only downcasing for 7-bit
+ * characters.
+ */
+ for (i = 0; i < len; i++)
+ {
+ unsigned char ch = (unsigned char) ident[i];
+
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ else if (ch >= 0x80 && isupper(ch))
+ ch = tolower(ch);
+ result[i] = (char) ch;
+ }
+ result[i] = '\0';
+
+ if (i >= NAMEDATALEN)
+ truncate_identifier(result, i, warn);
+
+ return result;
+}
+
+/*
+ * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
+ *
+ * The given string is modified in-place, if necessary. A warning is
+ * issued if requested.
+ *
+ * We require the caller to pass in the string length since this saves a
+ * strlen() call in some common usages.
+ */
+void
+truncate_identifier(char *ident, int len, bool warn)
+{
+ if (len >= NAMEDATALEN)
+ {
+ len = pg_mbcliplen(ident, len, NAMEDATALEN-1);
+ if (warn)
+ ereport(NOTICE,
+ (errcode(ERRCODE_NAME_TOO_LONG),
+ errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
+ ident, len, ident)));
+ ident[len] = '\0';
+ }
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index caf0250e886..7b17b50aec4 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.3 2004/01/31 00:45:34 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.4 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,17 +16,18 @@
#include <ctype.h>
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
#include "access/tuptoaster.h"
#include "catalog/pg_type.h"
#include "lib/stringinfo.h"
#include "libpq/crypt.h"
#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/scansup.h"
#include "utils/array.h"
#include "utils/builtins.h"
-#include "utils/pg_locale.h"
#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
typedef struct varlena unknown;
@@ -1681,7 +1682,6 @@ SplitIdentifierString(char *rawstring, char separator,
{
char *curname;
char *endp;
- int curlen;
if (*nextp == '\"')
{
@@ -1704,21 +1704,30 @@ SplitIdentifierString(char *rawstring, char separator,
else
{
/* Unquoted name --- extends to separator or whitespace */
+ char *downname;
+ int len;
+
curname = nextp;
while (*nextp && *nextp != separator &&
!isspace((unsigned char) *nextp))
- {
- /*
- * It's important that this match the identifier
- * downcasing code used by backend/parser/scan.l.
- */
- if (isupper((unsigned char) *nextp))
- *nextp = tolower((unsigned char) *nextp);
nextp++;
- }
endp = nextp;
if (curname == nextp)
return false; /* empty unquoted name not allowed */
+ /*
+ * Downcase the identifier, using same code as main lexer does.
+ *
+ * XXX because we want to overwrite the input in-place, we cannot
+ * support a downcasing transformation that increases the
+ * string length. This is not a problem given the current
+ * implementation of downcase_truncate_identifier, but we'll
+ * probably have to do something about this someday.
+ */
+ len = endp - curname;
+ downname = downcase_truncate_identifier(curname, len, false);
+ Assert(strlen(downname) <= len);
+ strncpy(curname, downname, len);
+ pfree(downname);
}
while (isspace((unsigned char) *nextp))
@@ -1739,13 +1748,8 @@ SplitIdentifierString(char *rawstring, char separator,
/* Now safe to overwrite separator with a null */
*endp = '\0';
- /* Truncate name if it's overlength; again, should match scan.l */
- curlen = strlen(curname);
- if (curlen >= NAMEDATALEN)
- {
- curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
- curname[curlen] = '\0';
- }
+ /* Truncate name if it's overlength */
+ truncate_identifier(curname, strlen(curname), false);
/*
* Finished isolating current name --- add it to list
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index e192c868fa0..f6a5da4531d 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: defrem.h,v 1.52 2003/08/04 02:40:13 momjian Exp $
+ * $Id: defrem.h,v 1.52.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,7 +61,7 @@ extern void RenameOpClass(List *name, const char *access_method, const char *new
/* support routines in commands/define.c */
-extern void case_translate_language_name(const char *input, char *output);
+extern char *case_translate_language_name(const char *input);
extern char *defGetString(DefElem *def);
extern double defGetNumeric(DefElem *def);
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 12b8794d28d..ef4e1179a99 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: scansup.h,v 1.13 2003/08/04 02:40:14 momjian Exp $
+ * $Id: scansup.h,v 1.13.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -15,6 +15,11 @@
#ifndef SCANSUP_H
#define SCANSUP_H
-extern char *scanstr(char *s);
+extern char *scanstr(const char *s);
+
+extern char *downcase_truncate_identifier(const char *ident, int len,
+ bool warn);
+
+extern void truncate_identifier(char *ident, int len, bool warn);
#endif /* SCANSUP_H */
diff --git a/src/pl/plpgsql/src/pl_funcs.c b/src/pl/plpgsql/src/pl_funcs.c
index c47da263099..1f1e0f38910 100644
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@@ -3,7 +3,7 @@
* procedural language
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30 2003/09/25 23:02:12 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30.2.1 2004/02/21 00:35:13 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@@ -40,7 +40,7 @@
#include <ctype.h>
-#include "mb/pg_wchar.h"
+#include "parser/scansup.h"
/* ----------
@@ -348,15 +348,15 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
{
char *curident;
char *cp;
- int i;
/* Process current identifier */
- curident = palloc(strlen(s) + 1); /* surely enough room */
- cp = curident;
if (*s == '"')
{
/* Quoted identifier: copy, collapsing out doubled quotes */
+
+ curident = palloc(strlen(s) + 1); /* surely enough room */
+ cp = curident;
s++;
while (*s)
{
@@ -373,35 +373,20 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unterminated \" in name: %s", sstart)));
s++;
+ *cp = '\0';
+ /* Truncate to NAMEDATALEN */
+ truncate_identifier(curident, cp-curident, false);
}
else
{
- /*
- * Normal identifier: downcase, stop at dot or whitespace.
- *
- * Note that downcasing is locale-sensitive, following SQL99
- * rules for identifiers. We have already decided that the
- * item is not a PLPGSQL keyword.
- */
- while (*s && *s != '.' && !isspace((unsigned char) *s))
- {
- if (isupper((unsigned char) *s))
- *cp++ = tolower((unsigned char) *s++);
- else
- *cp++ = *s++;
- }
- }
-
- /* Truncate to NAMEDATALEN */
- *cp = '\0';
- i = cp - curident;
-
- if (i >= NAMEDATALEN)
- {
- int len;
+ /* Normal identifier: extends till dot or whitespace */
+ const char *thisstart = s;
- len = pg_mbcliplen(curident, i, NAMEDATALEN - 1);
- curident[len] = '\0';
+ while (*s && *s != '.' && !isspace((unsigned char) *s))
+ s++;
+ /* Downcase and truncate to NAMEDATALEN */
+ curident = downcase_truncate_identifier(thisstart, s-thisstart,
+ false);
}
/* Pass ident to caller */