aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2014-02-01 18:27:04 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2014-02-01 18:27:48 -0500
commit399d23e19c9cf3cf94561e0137f59a44530d0efb (patch)
treeef14039bd75db81e769482da6431be2bc34a4039
parent141119c2a78efbb6dae5b0f37f9c07c2b9f9c8cb (diff)
downloadpostgresql-399d23e19c9cf3cf94561e0137f59a44530d0efb.tar.gz
postgresql-399d23e19c9cf3cf94561e0137f59a44530d0efb.zip
Fix some wide-character bugs in the text-search parser.
In p_isdigit and other character class test functions generated by the p_iswhat macro, the code path for non-C locales with multibyte encodings contained a bogus pointer cast that would accidentally fail to malfunction if types wchar_t and wint_t have the same width. Apparently that is true on most platforms, but not on recent Cygwin releases. Remove the cast, as it seems completely unnecessary (I think it arose from a false analogy to the need to cast to unsigned char when dealing with the <ctype.h> functions). Per bug #8970 from Marco Atzeri. In the same functions, the code path for C locale with a multibyte encoding simply ANDed each wide character with 0xFF before passing it to the corresponding <ctype.h> function. This could result in false positive answers for some non-ASCII characters, so use a range test instead. Noted by me while investigating Marco's complaint. Also, remove some useless though not actually buggy maskings and casts in the hand-coded p_isalnum and p_isalpha functions, which evidently got tested a bit more carefully than the macro-generated functions.
-rw-r--r--src/backend/tsearch/wparser_def.c20
1 files changed, 12 insertions, 8 deletions
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 7ba33145889..f66851426b4 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -432,7 +432,7 @@ TParserCopyClose(TParser *prs)
* or give wrong result.
* - multibyte encoding and C-locale often are used for
* Asian languages.
- * - if locale is C the we use pgwstr instead of wstr
+ * - if locale is C then we use pgwstr instead of wstr.
*/
#ifdef USE_WIDE_UPPER_LOWER
@@ -444,9 +444,13 @@ p_is##type(TParser *prs) { \
if ( prs->usewide ) \
{ \
if ( prs->pgwstr ) \
- return is##type( 0xff & *( prs->pgwstr + prs->state->poschar) );\
- \
- return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \
+ { \
+ unsigned int c = *(prs->pgwstr + prs->state->poschar); \
+ if ( c > 0x7f ) \
+ return 0; \
+ return is##type( c ); \
+ } \
+ return isw##type( *( prs->wstr + prs->state->poschar ) ); \
} \
\
return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \
@@ -475,10 +479,10 @@ p_isalnum(TParser *prs)
if (c > 0x7f)
return 1;
- return isalnum(0xff & c);
+ return isalnum(c);
}
- return iswalnum((wint_t) *(prs->wstr + prs->state->poschar));
+ return iswalnum(*(prs->wstr + prs->state->poschar));
}
return isalnum(*(unsigned char *) (prs->str + prs->state->posbyte));
@@ -507,10 +511,10 @@ p_isalpha(TParser *prs)
if (c > 0x7f)
return 1;
- return isalpha(0xff & c);
+ return isalpha(c);
}
- return iswalpha((wint_t) *(prs->wstr + prs->state->poschar));
+ return iswalpha(*(prs->wstr + prs->state->poschar));
}
return isalpha(*(unsigned char *) (prs->str + prs->state->posbyte));