diff options
author | Tatsuo Ishii <ishii@postgresql.org> | 2005-12-24 09:42:30 +0000 |
---|---|---|
committer | Tatsuo Ishii <ishii@postgresql.org> | 2005-12-24 09:42:30 +0000 |
commit | ab2d8c41ffbee182e6f84cf77049b00f2d3155c5 (patch) | |
tree | 53623b2627c8e1a968e20ec168bec1c3a98356e0 | |
parent | 5343bcadd6d4c6151b68f80f0b0c4b4c01450d67 (diff) | |
download | postgresql-ab2d8c41ffbee182e6f84cf77049b00f2d3155c5.tar.gz postgresql-ab2d8c41ffbee182e6f84cf77049b00f2d3155c5.zip |
Fix long standing Asian multibyte charsets bug.
See:
Subject: [HACKERS] bugs with certain Asian multibyte charsets
From: Tatsuo Ishii <ishii@sraoss.co.jp>
To: pgsql-hackers@postgresql.org
Date: Sat, 24 Dec 2005 18:25:33 +0900 (JST)
for more details.
-rw-r--r-- | src/backend/utils/mb/wchar.c | 45 |
1 files changed, 23 insertions, 22 deletions
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c index 17f74ed6adf..293cab42c37 100644 --- a/src/backend/utils/mb/wchar.c +++ b/src/backend/utils/mb/wchar.c @@ -1,7 +1,7 @@ /* * conversion functions between pg_wchar and multibyte streams. * Tatsuo Ishii - * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.47 2005/10/29 00:31:52 petere Exp $ + * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.47.2.1 2005/12/24 09:42:30 ishii Exp $ * * WIN1250 client encoding updated by Pavel Behal * @@ -58,7 +58,6 @@ pg_ascii_dsplen(const unsigned char *s) /* * EUC */ - static int pg_euc2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { @@ -66,26 +65,26 @@ static int pg_euc2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 2) + if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte KANA") */ { from++; - *to = 0xff & *from++; + *to = (SS2 << 8) | *from++; len -= 2; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* JIS X 0208 KANJI */ { *to = *from++ << 8; *to |= *from++; len -= 2; } - else + else /* must be ASCII */ { *to = *from++; len--; @@ -183,6 +182,7 @@ pg_euckr_dsplen(const unsigned char *s) /* * EUC_CN + * */ static int pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -191,21 +191,21 @@ static int pg_euccn2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 3) + if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ { from++; - *to = 0x3f00 & (*from++ << 8); - *to = *from++; + *to = (SS2 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unsed ?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 1 */ { *to = *from++ << 8; *to |= *from++; @@ -249,6 +249,7 @@ pg_euccn_dsplen(const unsigned char *s) /* * EUC_TW + * */ static int pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) @@ -257,22 +258,22 @@ static int pg_euctw2wchar_with_len while (len > 0 && *from) { - if (*from == SS2 && len >= 4) + if (*from == SS2 && len >= 4) /* code set 2 */ { from++; - *to = *from++ << 16; + *to = (SS2 << 24) | (*from++ << 16) ; *to |= *from++ << 8; *to |= *from++; len -= 4; } - else if (*from == SS3 && len >= 3) + else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ { from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; + *to = (SS3 << 16) | (*from++ << 8); + *to |= *from++; len -= 3; } - else if ((*from & 0x80) && len >= 2) + else if ((*from & 0x80) && len >= 2) /* code set 2 */ { *to = *from++ << 8; *to |= *from++; |