aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-02-21 00:35:13 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-02-21 00:35:13 +0000
commit8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4 (patch)
tree2b529b95daa1fd6ee92c72f91a68b1b4e426a514 /src
parentfe92ed8b78ce527739547fcd233c63debb2f3538 (diff)
downloadpostgresql-8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4.tar.gz
postgresql-8c99671a3b2e5b90d263cfd883c9cdeba63d6cc4.zip
Implement a solution to the 'Turkish locale downcases I incorrectly'
problem, per previous discussion. Make some additional changes to centralize the knowledge of just how identifier downcasing is done, in hopes of simplifying any future tweaking in this area.
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/define.c19
-rw-r--r--src/backend/commands/functioncmds.c6
-rw-r--r--src/backend/commands/proclang.c24
-rw-r--r--src/backend/parser/keywords.c10
-rw-r--r--src/backend/parser/scan.l44
-rw-r--r--src/backend/parser/scansup.c78
-rw-r--r--src/backend/utils/adt/varlena.c44
-rw-r--r--src/include/commands/defrem.h4
-rw-r--r--src/include/parser/scansup.h9
-rw-r--r--src/pl/plpgsql/src/pl_funcs.c45
10 files changed, 158 insertions, 125 deletions
diff --git a/src/backend/commands/define.c b/src/backend/commands/define.c
index 4ac687259da..68c3248e541 100644
--- a/src/backend/commands/define.c
+++ b/src/backend/commands/define.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84 2003/08/04 02:39:58 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/define.c,v 1.84.4.1 2004/02/21 00:35:13 tgl Exp $
*
* DESCRIPTION
* The "DefineFoo" routines take the parse tree and pick out the
@@ -38,24 +38,19 @@
#include "catalog/namespace.h"
#include "commands/defrem.h"
#include "parser/parse_type.h"
+#include "parser/scansup.h"
#include "utils/int8.h"
/*
- * Translate the input language name to lower case.
+ * Translate the input language name to lower case, and truncate if needed.
*
- * Output buffer must be NAMEDATALEN long.
+ * Returns a palloc'd string
*/
-void
-case_translate_language_name(const char *input, char *output)
+char *
+case_translate_language_name(const char *input)
{
- int i;
-
- MemSet(output, 0, NAMEDATALEN); /* ensure result Name is
- * zero-filled */
-
- for (i = 0; i < NAMEDATALEN - 1 && input[i]; ++i)
- output[i] = tolower((unsigned char) input[i]);
+ return downcase_truncate_identifier(input, strlen(input), false);
}
diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c
index 35ab80c09a7..ce5b2cc2bf6 100644
--- a/src/backend/commands/functioncmds.c
+++ b/src/backend/commands/functioncmds.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38 2003/10/02 06:34:03 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/functioncmds.c,v 1.38.2.1 2004/02/21 00:35:13 tgl Exp $
*
* DESCRIPTION
* These routines take the parse tree and pick out the
@@ -393,7 +393,7 @@ CreateFunction(CreateFunctionStmt *stmt)
Oid prorettype;
bool returnsSet;
char *language;
- char languageName[NAMEDATALEN];
+ char *languageName;
Oid languageOid;
Oid languageValidator;
char *funcname;
@@ -428,7 +428,7 @@ CreateFunction(CreateFunctionStmt *stmt)
&as_clause, &language, &volatility, &isStrict, &security);
/* Convert language name to canonical case */
- case_translate_language_name(language, languageName);
+ languageName = case_translate_language_name(language);
/* Look up the language and validate permissions */
languageTuple = SearchSysCache(LANGNAME,
diff --git a/src/backend/commands/proclang.c b/src/backend/commands/proclang.c
index 09325d647ca..97d7c38a2e9 100644
--- a/src/backend/commands/proclang.c
+++ b/src/backend/commands/proclang.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51 2003/10/02 06:34:03 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/proclang.c,v 1.51.2.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,11 +40,12 @@
void
CreateProceduralLanguage(CreatePLangStmt *stmt)
{
- char languageName[NAMEDATALEN];
+ char *languageName;
Oid procOid,
valProcOid;
Oid funcrettype;
Oid typev[FUNC_MAX_ARGS];
+ NameData langname;
char nulls[Natts_pg_language];
Datum values[Natts_pg_language];
Relation rel;
@@ -66,7 +67,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
* Translate the language name and check that this language doesn't
* already exist
*/
- case_translate_language_name(stmt->plname, languageName);
+ languageName = case_translate_language_name(stmt->plname);
if (SearchSysCacheExists(LANGNAME,
PointerGetDatum(languageName),
@@ -124,12 +125,13 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
}
i = 0;
- values[i++] = PointerGetDatum(languageName);
- values[i++] = BoolGetDatum(true); /* lanispl */
- values[i++] = BoolGetDatum(stmt->pltrusted);
- values[i++] = ObjectIdGetDatum(procOid);
- values[i++] = ObjectIdGetDatum(valProcOid);
- nulls[i] = 'n'; /* lanacl */
+ namestrcpy(&langname, languageName);
+ values[i++] = NameGetDatum(&langname); /* lanname */
+ values[i++] = BoolGetDatum(true); /* lanispl */
+ values[i++] = BoolGetDatum(stmt->pltrusted); /* lanpltrusted */
+ values[i++] = ObjectIdGetDatum(procOid); /* lanplcallfoid */
+ values[i++] = ObjectIdGetDatum(valProcOid); /* lanvalidator */
+ nulls[i] = 'n'; /* lanacl */
rel = heap_openr(LanguageRelationName, RowExclusiveLock);
@@ -173,7 +175,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
void
DropProceduralLanguage(DropPLangStmt *stmt)
{
- char languageName[NAMEDATALEN];
+ char *languageName;
HeapTuple langTup;
ObjectAddress object;
@@ -189,7 +191,7 @@ DropProceduralLanguage(DropPLangStmt *stmt)
* Translate the language name, check that this language exist and is
* a PL
*/
- case_translate_language_name(stmt->plname, languageName);
+ languageName = case_translate_language_name(stmt->plname);
langTup = SearchSysCache(LANGNAME,
CStringGetDatum(languageName),
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index c4048b4c1d8..f4f454715c6 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141 2003/08/04 02:40:01 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.141.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -365,17 +365,13 @@ ScanKeywordLookup(const char *text)
/*
* Apply an ASCII-only downcasing. We must not use tolower() since it
- * may produce the wrong translation in some locales (eg, Turkish),
- * and we don't trust isupper() very much either. In an ASCII-based
- * encoding the tests against A and Z are sufficient, but we also
- * check isupper() so that we will work correctly under EBCDIC. The
- * actual case conversion step should work for either ASCII or EBCDIC.
+ * may produce the wrong translation in some locales (eg, Turkish).
*/
for (i = 0; i < len; i++)
{
char ch = text[i];
- if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
+ if (ch >= 'A' && ch <= 'Z')
ch += 'a' - 'A';
word[i] = ch;
}
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index b10d4531851..c3a423a7f4a 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111 2003/10/09 19:13:23 petere Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.111.2.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,7 @@
#include "parser/keywords.h"
/* Not needed now that this file is compiled as part of gram.y */
/* #include "parser/parse.h" */
+#include "parser/scansup.h"
#include "utils/builtins.h"
#include "mb/pg_wchar.h"
@@ -394,23 +395,15 @@ other .
startlit();
}
<xd>{xdstop} {
+ char *ident;
+
BEGIN(INITIAL);
if (literallen == 0)
yyerror("zero-length delimited identifier");
+ ident = litbufdup();
if (literallen >= NAMEDATALEN)
- {
- int len;
-
- len = pg_mbcliplen(literalbuf, literallen,
- NAMEDATALEN-1);
- ereport(NOTICE,
- (errcode(ERRCODE_NAME_TOO_LONG),
- errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
- literalbuf, len, literalbuf)));
- literalbuf[len] = '\0';
- literallen = len;
- }
- yylval.str = litbufdup();
+ truncate_identifier(ident, literallen, true);
+ yylval.str = ident;
return IDENT;
}
<xd>{xddouble} {
@@ -532,7 +525,6 @@ other .
{identifier} {
const ScanKeyword *keyword;
char *ident;
- int i;
/* Is it a keyword? */
keyword = ScanKeywordLookup(yytext);
@@ -545,28 +537,8 @@ other .
/*
* No. Convert the identifier to lower case, and truncate
* if necessary.
- *
- * Note: here we use a locale-dependent case conversion,
- * which seems appropriate under standard SQL rules, whereas
- * the keyword comparison was NOT locale-dependent.
*/
- ident = pstrdup(yytext);
- for (i = 0; ident[i]; i++)
- {
- if (isupper((unsigned char) ident[i]))
- ident[i] = tolower((unsigned char) ident[i]);
- }
- if (i >= NAMEDATALEN)
- {
- int len;
-
- len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
- ereport(NOTICE,
- (errcode(ERRCODE_NAME_TOO_LONG),
- errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
- ident, len, ident)));
- ident[len] = '\0';
- }
+ ident = downcase_truncate_identifier(yytext, yyleng, true);
yylval.str = ident;
return IDENT;
}
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
index e00d284edcc..14467dcdc77 100644
--- a/src/backend/parser/scansup.c
+++ b/src/backend/parser/scansup.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24 2003/08/04 02:40:02 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scansup.c,v 1.24.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,8 @@
#include "miscadmin.h"
#include "parser/scansup.h"
+#include "mb/pg_wchar.h"
+
/* ----------------
* scanstr
@@ -32,7 +34,7 @@
*/
char *
-scanstr(char *s)
+scanstr(const char *s)
{
char *newStr;
int len,
@@ -109,3 +111,75 @@ scanstr(char *s)
newStr[j] = '\0';
return newStr;
}
+
+
+/*
+ * downcase_truncate_identifier() --- do appropriate downcasing and
+ * truncation of an unquoted identifier. Optionally warn of truncation.
+ *
+ * Returns a palloc'd string containing the adjusted identifier.
+ *
+ * Note: in some usages the passed string is not null-terminated.
+ *
+ * Note: the API of this function is designed to allow for downcasing
+ * transformations that increase the string length, but we don't yet
+ * support that. If you want to implement it, you'll need to fix
+ * SplitIdentifierString() in utils/adt/varlena.c.
+ */
+char *
+downcase_truncate_identifier(const char *ident, int len, bool warn)
+{
+ char *result;
+ int i;
+
+ result = palloc(len + 1);
+ /*
+ * SQL99 specifies Unicode-aware case normalization, which we don't yet
+ * have the infrastructure for. Instead we use tolower() to provide a
+ * locale-aware translation. However, there are some locales where this
+ * is not right either (eg, Turkish may do strange things with 'i' and
+ * 'I'). Our current compromise is to use tolower() for characters with
+ * the high bit set, and use an ASCII-only downcasing for 7-bit
+ * characters.
+ */
+ for (i = 0; i < len; i++)
+ {
+ unsigned char ch = (unsigned char) ident[i];
+
+ if (ch >= 'A' && ch <= 'Z')
+ ch += 'a' - 'A';
+ else if (ch >= 0x80 && isupper(ch))
+ ch = tolower(ch);
+ result[i] = (char) ch;
+ }
+ result[i] = '\0';
+
+ if (i >= NAMEDATALEN)
+ truncate_identifier(result, i, warn);
+
+ return result;
+}
+
+/*
+ * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
+ *
+ * The given string is modified in-place, if necessary. A warning is
+ * issued if requested.
+ *
+ * We require the caller to pass in the string length since this saves a
+ * strlen() call in some common usages.
+ */
+void
+truncate_identifier(char *ident, int len, bool warn)
+{
+ if (len >= NAMEDATALEN)
+ {
+ len = pg_mbcliplen(ident, len, NAMEDATALEN-1);
+ if (warn)
+ ereport(NOTICE,
+ (errcode(ERRCODE_NAME_TOO_LONG),
+ errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
+ ident, len, ident)));
+ ident[len] = '\0';
+ }
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index caf0250e886..7b17b50aec4 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.3 2004/01/31 00:45:34 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.106.2.4 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,17 +16,18 @@
#include <ctype.h>
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
#include "access/tuptoaster.h"
#include "catalog/pg_type.h"
#include "lib/stringinfo.h"
#include "libpq/crypt.h"
#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/scansup.h"
#include "utils/array.h"
#include "utils/builtins.h"
-#include "utils/pg_locale.h"
#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
typedef struct varlena unknown;
@@ -1681,7 +1682,6 @@ SplitIdentifierString(char *rawstring, char separator,
{
char *curname;
char *endp;
- int curlen;
if (*nextp == '\"')
{
@@ -1704,21 +1704,30 @@ SplitIdentifierString(char *rawstring, char separator,
else
{
/* Unquoted name --- extends to separator or whitespace */
+ char *downname;
+ int len;
+
curname = nextp;
while (*nextp && *nextp != separator &&
!isspace((unsigned char) *nextp))
- {
- /*
- * It's important that this match the identifier
- * downcasing code used by backend/parser/scan.l.
- */
- if (isupper((unsigned char) *nextp))
- *nextp = tolower((unsigned char) *nextp);
nextp++;
- }
endp = nextp;
if (curname == nextp)
return false; /* empty unquoted name not allowed */
+ /*
+ * Downcase the identifier, using same code as main lexer does.
+ *
+ * XXX because we want to overwrite the input in-place, we cannot
+ * support a downcasing transformation that increases the
+ * string length. This is not a problem given the current
+ * implementation of downcase_truncate_identifier, but we'll
+ * probably have to do something about this someday.
+ */
+ len = endp - curname;
+ downname = downcase_truncate_identifier(curname, len, false);
+ Assert(strlen(downname) <= len);
+ strncpy(curname, downname, len);
+ pfree(downname);
}
while (isspace((unsigned char) *nextp))
@@ -1739,13 +1748,8 @@ SplitIdentifierString(char *rawstring, char separator,
/* Now safe to overwrite separator with a null */
*endp = '\0';
- /* Truncate name if it's overlength; again, should match scan.l */
- curlen = strlen(curname);
- if (curlen >= NAMEDATALEN)
- {
- curlen = pg_mbcliplen(curname, curlen, NAMEDATALEN - 1);
- curname[curlen] = '\0';
- }
+ /* Truncate name if it's overlength */
+ truncate_identifier(curname, strlen(curname), false);
/*
* Finished isolating current name --- add it to list
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index e192c868fa0..f6a5da4531d 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: defrem.h,v 1.52 2003/08/04 02:40:13 momjian Exp $
+ * $Id: defrem.h,v 1.52.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,7 +61,7 @@ extern void RenameOpClass(List *name, const char *access_method, const char *new
/* support routines in commands/define.c */
-extern void case_translate_language_name(const char *input, char *output);
+extern char *case_translate_language_name(const char *input);
extern char *defGetString(DefElem *def);
extern double defGetNumeric(DefElem *def);
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
index 12b8794d28d..ef4e1179a99 100644
--- a/src/include/parser/scansup.h
+++ b/src/include/parser/scansup.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: scansup.h,v 1.13 2003/08/04 02:40:14 momjian Exp $
+ * $Id: scansup.h,v 1.13.4.1 2004/02/21 00:35:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -15,6 +15,11 @@
#ifndef SCANSUP_H
#define SCANSUP_H
-extern char *scanstr(char *s);
+extern char *scanstr(const char *s);
+
+extern char *downcase_truncate_identifier(const char *ident, int len,
+ bool warn);
+
+extern void truncate_identifier(char *ident, int len, bool warn);
#endif /* SCANSUP_H */
diff --git a/src/pl/plpgsql/src/pl_funcs.c b/src/pl/plpgsql/src/pl_funcs.c
index c47da263099..1f1e0f38910 100644
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@@ -3,7 +3,7 @@
* procedural language
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30 2003/09/25 23:02:12 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.30.2.1 2004/02/21 00:35:13 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@@ -40,7 +40,7 @@
#include <ctype.h>
-#include "mb/pg_wchar.h"
+#include "parser/scansup.h"
/* ----------
@@ -348,15 +348,15 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
{
char *curident;
char *cp;
- int i;
/* Process current identifier */
- curident = palloc(strlen(s) + 1); /* surely enough room */
- cp = curident;
if (*s == '"')
{
/* Quoted identifier: copy, collapsing out doubled quotes */
+
+ curident = palloc(strlen(s) + 1); /* surely enough room */
+ cp = curident;
s++;
while (*s)
{
@@ -373,35 +373,20 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unterminated \" in name: %s", sstart)));
s++;
+ *cp = '\0';
+ /* Truncate to NAMEDATALEN */
+ truncate_identifier(curident, cp-curident, false);
}
else
{
- /*
- * Normal identifier: downcase, stop at dot or whitespace.
- *
- * Note that downcasing is locale-sensitive, following SQL99
- * rules for identifiers. We have already decided that the
- * item is not a PLPGSQL keyword.
- */
- while (*s && *s != '.' && !isspace((unsigned char) *s))
- {
- if (isupper((unsigned char) *s))
- *cp++ = tolower((unsigned char) *s++);
- else
- *cp++ = *s++;
- }
- }
-
- /* Truncate to NAMEDATALEN */
- *cp = '\0';
- i = cp - curident;
-
- if (i >= NAMEDATALEN)
- {
- int len;
+ /* Normal identifier: extends till dot or whitespace */
+ const char *thisstart = s;
- len = pg_mbcliplen(curident, i, NAMEDATALEN - 1);
- curident[len] = '\0';
+ while (*s && *s != '.' && !isspace((unsigned char) *s))
+ s++;
+ /* Downcase and truncate to NAMEDATALEN */
+ curident = downcase_truncate_identifier(thisstart, s-thisstart,
+ false);
}
/* Pass ident to caller */