aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2009-03-02 15:11:25 +0000
committerTeodor Sigaev <teodor@sigaev.ru>2009-03-02 15:11:25 +0000
commit327c5e37d7c87aded85d348a3236d71a6e2d34e7 (patch)
tree2fab4ab201012b8e3c3fa78ba798bd41c2bc859b /src
parent69daf2defeb48e26a96bd4eeeecad0f9404fd97c (diff)
downloadpostgresql-327c5e37d7c87aded85d348a3236d71a6e2d34e7.tar.gz
postgresql-327c5e37d7c87aded85d348a3236d71a6e2d34e7.zip
Fix usage of char2wchar/wchar2char. Changes:
- pg_wchar and wchar_t could have different size, so char2wchar doesn't call pg_mb2wchar_with_len to prevent out-of-bound memory bug - make char2wchar/wchar2char symmetric, now they should not be called with C-locale because mbstowcs/wcstombs oftenly doesn't work correct with C-locale. - Text parser uses pg_mb2wchar_with_len directly in case of C-locale and multibyte encoding Per bug report by Hiroshi Inoue <inoue@tpf.co.jp> and following discussion. Backpatch up to 8.2 when multybyte support was implemented in tsearch.
Diffstat (limited to 'src')
-rw-r--r--src/backend/tsearch/ts_locale.c15
-rw-r--r--src/backend/tsearch/wparser_def.c51
2 files changed, 37 insertions, 29 deletions
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index 73d8ed51de7..71f99febdbc 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.7.2.1 2008/06/18 20:55:49 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.7.2.2 2009/03/02 15:11:25 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen)
}
#endif /* WIN32 */
+ Assert( !lc_ctype_is_c() );
return wcstombs(to, from, tolen);
}
@@ -99,17 +100,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
return r;
}
-#endif /* WIN32 */
-
- if (lc_ctype_is_c())
- {
- /*
- * pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
- * allocated with sufficient space
- */
- return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
- }
else
+#endif /* WIN32 */
{
/*
* mbstowcs requires ending '\0'
@@ -117,6 +109,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
char *str = pnstrdup(from, fromlen);
size_t result;
+ Assert( !lc_ctype_is_c() );
result = mbstowcs(to, str, tolen);
pfree(str);
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 0ab06ad0f15..61e367b650c 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.14.2.3 2009/01/15 17:06:03 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.14.2.4 2009/03/02 15:11:25 teodor Exp $
*
*-------------------------------------------------------------------------
*/
@@ -240,12 +240,12 @@ typedef struct TParser
int lenstr; /* length of mbstring */
#ifdef TS_USE_WIDE
wchar_t *wstr; /* wide character string */
- int lenwstr; /* length of wsting */
+ pg_wchar *pgwstr; /* wide character string for C-locale */
+ bool usewide;
#endif
/* State of parse */
int charmaxlen;
- bool usewide;
TParserPosition *state;
bool ignore;
bool wanthost;
@@ -299,13 +299,24 @@ TParserInit(char *str, int len)
if (prs->charmaxlen > 1)
{
prs->usewide = true;
- prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
- prs->lenwstr = char2wchar(prs->wstr, prs->lenstr + 1,
- prs->str, prs->lenstr);
+ if ( lc_ctype_is_c() )
+ {
+ /*
+ * char2wchar doesn't work for C-locale and
+ * sizeof(pg_wchar) could be not equal to sizeof(wchar_t)
+ */
+ prs->pgwstr = (pg_wchar*) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
+ pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
+ }
+ else
+ {
+ prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
+ char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr);
+ }
}
else
-#endif
prs->usewide = false;
+#endif
prs->state = newTParserPosition(NULL);
prs->state->state = TPS_Base;
@@ -331,6 +342,8 @@ TParserClose(TParser *prs)
#ifdef TS_USE_WIDE
if (prs->wstr)
pfree(prs->wstr);
+ if (prs->pgwstr)
+ pfree(prs->pgwstr);
#endif
pfree(prs);
@@ -338,10 +351,12 @@ TParserClose(TParser *prs)
/*
* Character-type support functions, equivalent to is* macros, but
- * working with any possible encodings and locales. Note,
- * that with multibyte encoding and C-locale isw* function may fail
- * or give wrong result. Note 2: multibyte encoding and C-locale
- * often are used for Asian languages
+ * working with any possible encodings and locales. Notes:
+ * - with multibyte encoding and C-locale isw* function may fail
+ * or give wrong result.
+ * - multibyte encoding and C-locale often are used for
+ * Asian languages.
+ * - if locale is C the we use pgwstr instead of wstr
*/
#ifdef TS_USE_WIDE
@@ -352,14 +367,14 @@ p_is##type(TParser *prs) { \
Assert( prs->state ); \
if ( prs->usewide ) \
{ \
- if ( lc_ctype_is_c() ) \
- return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \
+ if ( prs->pgwstr ) \
+ return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
\
return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
} \
\
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
-} \
+} \
\
static int \
p_isnot##type(TParser *prs) { \
@@ -373,9 +388,9 @@ p_isalnum(TParser *prs)
if (prs->usewide)
{
- if (lc_ctype_is_c())
+ if (prs->pgwstr)
{
- unsigned int c = *(prs->wstr + prs->state->poschar);
+ unsigned int c = *(prs->pgwstr + prs->state->poschar);
/*
* any non-ascii symbol with multibyte encoding with C-locale is
@@ -405,9 +420,9 @@ p_isalpha(TParser *prs)
if (prs->usewide)
{
- if (lc_ctype_is_c())
+ if (prs->pgwstr)
{
- unsigned int c = *(prs->wstr + prs->state->poschar);
+ unsigned int c = *(prs->pgwstr + prs->state->poschar);
/*
* any non-ascii symbol with multibyte encoding with C-locale is