aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
authorTatsuo Ishii <ishii@postgresql.org>2005-12-24 10:40:55 +0000
committerTatsuo Ishii <ishii@postgresql.org>2005-12-24 10:40:55 +0000
commitb2af5a28cc364d45c8ec41f32ece9e13d0598406 (patch)
tree3d0cb01cacf8c79b840c382324846133f98306cb /src/backend/utils
parent0862aeaeec46153337df89c9c0d4d25e6d5c7713 (diff)
downloadpostgresql-b2af5a28cc364d45c8ec41f32ece9e13d0598406.tar.gz
postgresql-b2af5a28cc364d45c8ec41f32ece9e13d0598406.zip
Fix long standing Asian multibyte charsets bug.
See: Subject: [HACKERS] bugs with certain Asian multibyte charsets From: Tatsuo Ishii <ishii@sraoss.co.jp> To: pgsql-hackers@postgresql.org Date: Sat, 24 Dec 2005 18:25:33 +0900 (JST) for more details.
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/mb/wchar.c45
1 files changed, 23 insertions, 22 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 0e19a6075f6..5dbc01032ac 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multibyte streams.
* Tatsuo Ishii
- * $Id: wchar.c,v 1.34 2003/09/25 06:58:05 petere Exp $
+ * $Id: wchar.c,v 1.34.2.1 2005/12/24 10:40:55 ishii Exp $
*
* WIN1250 client encoding updated by Pavel Behal
*
@@ -52,7 +52,6 @@ pg_ascii_mblen(const unsigned char *s)
/*
* EUC
*/
-
static int pg_euc2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len)
{
@@ -60,26 +59,26 @@ static int pg_euc2wchar_with_len
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 2)
+ if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte KANA") */
{
from++;
- *to = 0xff & *from++;
+ *to = (SS2 << 8) | *from++;
len -= 2;
}
- else if (*from == SS3 && len >= 3)
+ else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */
{
from++;
- *to = *from++ << 8;
- *to |= 0x3f & *from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
len -= 3;
}
- else if ((*from & 0x80) && len >= 2)
+ else if ((*from & 0x80) && len >= 2) /* JIS X 0208 KANJI */
{
*to = *from++ << 8;
*to |= *from++;
len -= 2;
}
- else
+ else /* must be ASCII */
{
*to = *from++;
len--;
@@ -139,6 +138,7 @@ pg_euckr_mblen(const unsigned char *s)
/*
* EUC_CN
+ *
*/
static int pg_euccn2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len)
@@ -147,21 +147,21 @@ static int pg_euccn2wchar_with_len
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 3)
+ if (*from == SS2 && len >= 3) /* code set 2 (unused?) */
{
from++;
- *to = 0x3f00 & (*from++ << 8);
- *to = *from++;
+ *to = (SS2 << 16) | (*from++ << 8);
+ *to |= *from++;
len -= 3;
}
- else if (*from == SS3 && len >= 3)
+ else if (*from == SS3 && len >= 3) /* code set 3 (unsed ?) */
{
from++;
- *to = *from++ << 8;
- *to |= 0x3f & *from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
len -= 3;
}
- else if ((*from & 0x80) && len >= 2)
+ else if ((*from & 0x80) && len >= 2) /* code set 1 */
{
*to = *from++ << 8;
*to |= *from++;
@@ -193,6 +193,7 @@ pg_euccn_mblen(const unsigned char *s)
/*
* EUC_TW
+ *
*/
static int pg_euctw2wchar_with_len
(const unsigned char *from, pg_wchar *to, int len)
@@ -201,22 +202,22 @@ static int pg_euctw2wchar_with_len
while (len > 0 && *from)
{
- if (*from == SS2 && len >= 4)
+ if (*from == SS2 && len >= 4) /* code set 2 */
{
from++;
- *to = *from++ << 16;
+ *to = (SS2 << 24) | (*from++ << 16) ;
*to |= *from++ << 8;
*to |= *from++;
len -= 4;
}
- else if (*from == SS3 && len >= 3)
+ else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */
{
from++;
- *to = *from++ << 8;
- *to |= 0x3f & *from++;
+ *to = (SS3 << 16) | (*from++ << 8);
+ *to |= *from++;
len -= 3;
}
- else if ((*from & 0x80) && len >= 2)
+ else if ((*from & 0x80) && len >= 2) /* code set 2 */
{
*to = *from++ << 8;
*to |= *from++;