diff options
author | Bruce Momjian <bruce@momjian.us> | 2002-03-05 05:52:50 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 2002-03-05 05:52:50 +0000 |
commit | a8bd7e1c6e026678019b2f25cffc0a94ce62b24b (patch) | |
tree | 0334b3c7648b888f1c416579e8ca29fcdadb4a6e /src/interfaces | |
parent | 03194432de712f7afb4ddc2ade2bc44f0536dae1 (diff) | |
download | postgresql-a8bd7e1c6e026678019b2f25cffc0a94ce62b24b.tar.gz postgresql-a8bd7e1c6e026678019b2f25cffc0a94ce62b24b.zip |
> Tatsuo Ishii wrote:
> > > > It was made to cope with encoding such as an Asian bloc in 7.2Beta2.
> > > >
> > > > Added ServerEncoding
> > > > Korean (JOHAB), Thai (WIN874),
> > > > Vietnamese (TCVN), Arabic (WIN1256)
> > > >
> > > > Added ClientEncoding
> > > > Simplified Chinese (GBK), Korean (UHC)
> > > >
> > > >
> > > >
> http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2b2.newencoding.diff.tar.gz
> > > > (608K)
> > >
> > > Looks good. I need some people to review this for me.
> >
> > For me they look good too. The only missing part is a
> > documentation. I will ask him to write it up. If he couldn't, I will
> > do it for him.
> > > The diff is 3mb
> > > but appears to address only additions to multibyte. I have attached a
> > > list of files it modifies. Also, look at the sizes of the mb/
> > > directory. It is getting large:
> > >
> > > 4 ./CVS
> > > 6 ./Unicode/CVS
> > > 3433 ./Unicode
> > > 6197 .
> >
> > Yes. We definitely need the on-the-fly encoding addition capability:
> > i.e. CREATE CHRACTER SET in the future...
> > --
> > Tatsuo Ishii
> >
> >
Address chainge.
http://www.sankyo-unyu.co.jp/Pool/postgresql-7.2.newencoding.diff.gz
Add PsqlODBC and document ...etc patch.
Eiji Tokuya
Diffstat (limited to 'src/interfaces')
-rw-r--r-- | src/interfaces/odbc/connection.c | 27 | ||||
-rw-r--r-- | src/interfaces/odbc/multibyte.c | 378 | ||||
-rw-r--r-- | src/interfaces/odbc/multibyte.h | 100 |
3 files changed, 383 insertions, 122 deletions
diff --git a/src/interfaces/odbc/connection.c b/src/interfaces/odbc/connection.c index 66ef5e9ae4f..389da5698ae 100644 --- a/src/interfaces/odbc/connection.c +++ b/src/interfaces/odbc/connection.c @@ -944,6 +944,9 @@ another_version_retry: CC_send_settings(self); CC_lookup_lo(self); /* a hack to get the oid of our large * object oid type */ +#ifdef MULTIBYTE
+ CC_lookup_characterset(self);
+#endif
CC_lookup_pg_version(self); /* Get PostgreSQL version for SQLGetInfo * use */ @@ -1247,9 +1250,27 @@ CC_send_query(ConnectionClass *self, char *query, QueryInfo *qi) if (QR_command_successful(res)) QR_set_status(res, PGRES_NONFATAL_ERROR); QR_set_notice(res, cmdbuffer); /* will dup this string */ - - mylog("~~~ NOTICE: '%s'\n", cmdbuffer); - qlog("NOTICE from backend during send_query: '%s'\n", cmdbuffer); +#ifdef MULTIBYTE
+ if (strstr(cmdbuffer,"encoding is"))
+ {
+ if (strstr(cmdbuffer,"Current client encoding is"))
+ strcpy(PG_CCSS, cmdbuffer + 36);
+ if (strstr(cmdbuffer,"Current server encoding is"))
+ strcpy(PG_SCSS, cmdbuffer + 36);
+ mylog("~~~ NOTICE: '%s'\n", cmdbuffer);
+ qlog("NOTICE from backend during send_query: '%s'\n ClientEncoding = %s\n ServerEncoding = %s\n", cmdbuffer, PG_CCSS, PG_SCSS);
+
+ }
+ else
+ {
+
+ mylog("~~~ NOTICE: '%s'\n", cmdbuffer);
+ qlog("NOTICE from backend during send_query: '%s'\n", cmdbuffer);
+ }
+#else
+ mylog("~~~ NOTICE: '%s'\n", cmdbuffer);
+ qlog("NOTICE from backend during send_query: '%s'\n", cmdbuffer);
+#endif while (msg_truncated) msg_truncated = SOCK_get_string(sock, cmdbuffer, ERROR_MSG_LENGTH); diff --git a/src/interfaces/odbc/multibyte.c b/src/interfaces/odbc/multibyte.c index d10c5754455..464bf244326 100644 --- a/src/interfaces/odbc/multibyte.c +++ b/src/interfaces/odbc/multibyte.c @@ -1,138 +1,330 @@ /*-------- * Module : multibyte.c * - * Description: Mlutibyte related additional function. + * Description: New Mlutibyte related additional function. * * Create 2001-03-03 Eiji Tokuya + * New Create 2001-09-16 Eiji Tokuya *-------- */ #include "multibyte.h" +#include "connection.h" +#include "pgapifunc.h" #include <string.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> -int multibyte_client_encoding; /* Multibyte Client Encoding. */ -int multibyte_status; /* Multibyte Odds and ends character. */ +int PG_CCST; /* Client Charcter Status */ +int PG_SCSC; /* Server Charcter Set (code) */ +int PG_CCSC; /* Client Charcter Set (code) */ +unsigned char *PG_SCSS; /* Server Charcter Set (string) */ +unsigned char *PG_CCSS; /* Client Charcter Set (string) */ -unsigned char * -multibyte_strchr(const unsigned char *s, unsigned char c) +pg_CS CS_Table[] = { - int mb_st = 0, - i = 0; + { "SQL_ASCII", SQL_ASCII }, + { "EUC_JP", EUC_JP }, + { "EUC_CN", EUC_CN }, + { "EUC_KR", EUC_KR }, + { "EUC_TW", EUC_TW }, + { "JOHAB", JOHAB }, + { "UNICODE", UTF8 }, + { "MULE_INTERNAL",MULE_INTERNAL }, + { "LATIN1", LATIN1 }, + { "LATIN2", LATIN2 }, + { "LATIN3", LATIN3 }, + { "LATIN4", LATIN4 }, + { "LATIN5", LATIN5 }, + { "LATIN6", LATIN6 }, + { "LATIN7", LATIN7 }, + { "LATIN8", LATIN8 }, + { "LATIN9", LATIN9 }, + { "LATIN10", LATIN10 }, + { "WIN1256", WIN1256 }, + { "TCVN", TCVN }, + { "WIN874", WIN874 }, + { "KOI8", KOI8R }, + { "WIN", WIN1251 }, + { "ALT", ALT }, + { "ISO_8859_5", ISO_8859_5 }, + { "ISO_8859_6", ISO_8859_6 }, + { "ISO_8859_7", ISO_8859_7 }, + { "ISO_8859_8", ISO_8859_8 }, - while (!(mb_st == 0 && (s[i] == c || s[i] == 0))) - { - if (s[i] == 0) - return (0); - switch (multibyte_client_encoding) - { - case SJIS: - { - if (mb_st < 2 && s[i] > 0x80 && !(s[i] > 0x9f && s[i] < 0xe0)) - mb_st = 2; - else if (mb_st == 2) - mb_st = 1; - else - mb_st = 0; - } - break; -/* Chinese Big5 Support. */ - case BIG5: - { - if (mb_st < 2 && s[i] > 0xA0) - mb_st = 2; - else if (mb_st == 2) - mb_st = 1; - else - mb_st = 0; - } - break; - default: - mb_st = 0; - } + { "SJIS", SJIS }, + { "BIG5", BIG5 }, + { "GBK", GBK }, + { "UHC", UHC }, + { "WIN1250", WIN1250 }, + { "OTHER", OTHER } +}; + +int +pg_ismb(int characterset_code) +{ + int i=0,MB_CHARACTERSET[]={EUC_JP,EUC_CN,EUC_KR,EUC_TW,UTF8,MULE_INTERNAL,SJIS,BIG5,GBK,UHC,JOHAB}; + + while (MB_CHARACTERSET[i] != characterset_code || OTHER != MB_CHARACTERSET[i] ) + { i++; } -#ifdef _DEBUG - qlog("i = %d\n", i); -#endif - return (char *) (s + i); + return (MB_CHARACTERSET[i]); } - -void -multibyte_init(void) +int +pg_CS_code(const unsigned char *characterset_string) { - multibyte_status = 0; + int i = 0, c; + for(i = 0; CS_Table[i].code != OTHER; i++) + { + if (strstr(characterset_string,CS_Table[i].name)) + c = CS_Table[i].code; + } + return (c); } - unsigned char * -check_client_encoding(unsigned char *str) +pg_CS_name(const int characterset_code) { - if (strstr(str, "%27SJIS%27") || - strstr(str, "%27Shift_JIS%27") || - strstr(str, "'SJIS'") || - strstr(str, "'sjis'") || - strstr(str, "'Shift_JIS'")) - { - multibyte_client_encoding = SJIS; - return ("SJIS"); - } - if (strstr(str, "%27BIG5%27") || - strstr(str, "%27Big5%27") || - strstr(str, "'BIG5'") || - strstr(str, "'big5'") || - strstr(str, "'Big5'")) + int i = 0; + for (i = 0; CS_Table[i].code != OTHER; i++) { - multibyte_client_encoding = BIG5; - return ("BIG5"); + if (CS_Table[i].code == characterset_code) + return CS_Table[i].name; } return ("OTHER"); } - -/*-------- - * Multibyte Status Function. - * Input char - * Output 0 : 1 Byte Character. - * 1 : MultibyteCharacter Last Byte. - * N : MultibyteCharacter Fast or Middle Byte. - *-------- - */ int -multibyte_char_check(unsigned char s) +pg_CS_stat(int stat,unsigned int character,int characterset_code) { - switch (multibyte_client_encoding) + if (character == 0) + stat = 0; + switch (characterset_code) { - /* Japanese Shift-JIS(CP932) Support. */ - case SJIS: + case UTF8: { - if (multibyte_status < 2 && s > 0x80 && !(s > 0x9f && s < 0xE0)) - multibyte_status = 2; - else if (multibyte_status == 2) - multibyte_status = 1; + if (stat < 2 && + character >= 0x80) + { + if (character >= 0xfc) + stat = 6; + else if (character >= 0xf8) + stat = 5; + else if (character >= 0xf0) + stat = 4; + else if (character >= 0xe0) + stat = 3; + else if (character >= 0xc0) + stat = 2; + } + else if (stat > 2 && + character > 0x7f) + stat--; else - multibyte_status = 0; + stat=0; } break; - - /* Chinese Big5(CP950) Support. */ +/* Shift-JIS Support. */ + case SJIS: + { + if (stat < 2 && + character > 0x80 && + !(character > 0x9f && + character < 0xe0)) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; +/* Chinese Big5 Support. */ case BIG5: { - if (multibyte_status < 2 && s > 0xA0) - multibyte_status = 2; - else if (multibyte_status == 2) - multibyte_status = 1; + if (stat < 2 && + character > 0xA0) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; +/* Chinese GBK Support. */ + case GBK: + { + if (stat < 2 && + character > 0x7F) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; + +/* Korian UHC Support. */ + case UHC: + { + if (stat < 2 && + character > 0x7F) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; + +/* EUC_JP Support */ + case EUC_JP: + { + if (stat < 3 && + character == 0x8f) /* JIS X 0212 */ + stat = 3; else - multibyte_status = 0; + if (stat != 2 && + (character == 0x8e || + character > 0xa0)) /* Half Katakana HighByte & Kanji HighByte */ + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; + +/* EUC_CN, EUC_KR, JOHAB Support */ + case EUC_CN: + case EUC_KR: + case JOHAB: + { + if (stat < 2 && + character > 0xa0) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; + } + break; + case EUC_TW: + { + if (stat < 4 && + character == 0x8e) + stat = 4; + else if (stat == 4 && + character > 0xa0) + stat = 3; + else if (stat == 3 || + stat < 2 && + character > 0xa0) + stat = 2; + else if (stat == 2) + stat = 1; + else + stat = 0; } break; default: - multibyte_status = 0; + { + stat = 0; + } + break; + } + return stat; +} + + +unsigned char * +pg_mbschr(const unsigned char *string, unsigned int character) +{ + int mb_st = 0; + unsigned char *s; + s = (unsigned char *) string; + + for(;;) + { + mb_st = pg_CS_stat(mb_st, (unsigned char) *s,PG_CCSC); + if (mb_st == 0 && (*s == character || *s == 0)) + break; + else + s++; + } + return (s); +} + +int +pg_mbslen(const unsigned char *string) +{ + unsigned char *s; + int len, cs_stat; + for (len = 0, cs_stat = 0, s = (unsigned char *) string; *s != 0; s++) + { + cs_stat = pg_CS_stat(cs_stat,(unsigned int) *s, PG_CCSC); + if (cs_stat < 2) + len++; + } + return len; +} + +unsigned char * +pg_mbsinc(const unsigned char *current ) +{ + int mb_stat = 0; + if (*current != 0) + { + mb_stat = (int) pg_CS_stat(mb_stat, *current, PG_CCSC); + if (mb_stat == 0) + mb_stat = 1; + return ((unsigned char *) current + mb_stat); + } + else + return NULL; +} + +void +CC_lookup_characterset(ConnectionClass *self) +{ + HSTMT hstmt; + StatementClass *stmt; + RETCODE result; + static char *func = "CC_lookup_characterset"; + + mylog("%s: entering...\n", func); + PG_SCSS = malloc(MAX_CHARACTERSET_NAME); + PG_CCSS = malloc(MAX_CHARACTERSET_NAME); + + result = PGAPI_AllocStmt(self, &hstmt); + if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) + return; + stmt = (StatementClass *) hstmt; + + result = PGAPI_ExecDirect(hstmt, "Show Client_Encoding", SQL_NTS); + if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) + { + PGAPI_FreeStmt(hstmt, SQL_DROP); + return; + } + result = PGAPI_AllocStmt(self, &hstmt); + if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) + return; + stmt = (StatementClass *) hstmt; + + result = PGAPI_ExecDirect(hstmt, "Show Server_Encoding", SQL_NTS); + if ((result != SQL_SUCCESS) && (result != SQL_SUCCESS_WITH_INFO)) + { + PGAPI_FreeStmt(hstmt, SQL_DROP); + return; } -#ifdef _DEBUG - qlog("multibyte_client_encoding = %d s = 0x%02X multibyte_stat = %d\n", multibyte_client_encoding, s, multibyte_status); -#endif - return (multibyte_status); + + strcpy(PG_SCSS , pg_CS_name(PG_SCSC = pg_CS_code(PG_SCSS))); + strcpy(PG_CCSS , pg_CS_name(PG_CCSC = pg_CS_code(PG_CCSS))); + + qlog(" [ Server encoding = '%s' (code = %d), Client encoding = '%s' (code = %d) ]\n", PG_SCSS, PG_SCSC, PG_CCSS, PG_CCSC); } diff --git a/src/interfaces/odbc/multibyte.h b/src/interfaces/odbc/multibyte.h index c171c68fbd3..b6d487b634d 100644 --- a/src/interfaces/odbc/multibyte.h +++ b/src/interfaces/odbc/multibyte.h @@ -10,30 +10,78 @@ #define EUC_JP 1 /* EUC for Japanese */ #define EUC_CN 2 /* EUC for Chinese */ #define EUC_KR 3 /* EUC for Korean */ -#define EUC_TW 4 /* EUC for Taiwan */ -#define UNICODE 5 /* Unicode UTF-8 */ -#define MULE_INTERNAL 6 /* Mule internal code */ -#define LATIN1 7 /* ISO-8859 Latin 1 */ -#define LATIN2 8 /* ISO-8859 Latin 2 */ -#define LATIN3 9 /* ISO-8859 Latin 3 */ -#define LATIN4 10 /* ISO-8859 Latin 4 */ -#define LATIN5 11 /* ISO-8859 Latin 5 */ -#define LATIN6 12 /* ISO-8859 Latin 6 */ -#define LATIN7 13 /* ISO-8859 Latin 7 */ -#define LATIN8 14 /* ISO-8859 Latin 8 */ -#define LATIN9 15 /* ISO-8859 Latin 9 */ -#define KOI8 16 /* KOI8-R/U */ -#define WIN 17 /* windows-1251 */ -#define ALT 18 /* Alternativny Variant (MS-DOS CP866) */ -#define SJIS 32 /* Shift JIS */ -#define BIG5 33 /* Big5 */ -#define WIN1250 34 /* windows-1250 */ +#define EUC_TW 4 /* EUC for Taiwan */
+#define JOHAB 5 +#define UTF8 6 /* Unicode UTF-8 */ +#define MULE_INTERNAL 7 /* Mule internal code */ +#define LATIN1 8 /* ISO-8859 Latin 1 */ +#define LATIN2 9 /* ISO-8859 Latin 2 */ +#define LATIN3 10 /* ISO-8859 Latin 3 */ +#define LATIN4 11 /* ISO-8859 Latin 4 */ +#define LATIN5 12 /* ISO-8859 Latin 5 */ +#define LATIN6 13 /* ISO-8859 Latin 6 */ +#define LATIN7 14 /* ISO-8859 Latin 7 */ +#define LATIN8 15 /* ISO-8859 Latin 8 */ +#define LATIN9 16 /* ISO-8859 Latin 9 */
+#define LATIN10 17 /* ISO-8859 Latin 10 */
+#define WIN1256 18 /* Arabic Windows */
+#define TCVN 19 /* Vietnamese Windows */
+#define WIN874 20 /* Thai Windows */
+#define KOI8R 21 /* KOI8-R/U */
+#define WIN1251 22 /* windows-1251 */ +#define ALT 23 /* Alternativny Variant (MS-DOS CP866) */ +#define ISO_8859_5 24 /* ISO-8859-5 */
+#define ISO_8859_6 25 /* ISO-8859-6 */
+#define ISO_8859_7 26 /* ISO-8859-7 */
+#define ISO_8859_8 27 /* ISO-8859-8 */
+
+#define SJIS 28 /* Shift JIS */ +#define BIG5 29 /* Big5 */
+#define GBK 30 /* GBK */
+#define UHC 31 /* UHC */
+#define WIN1250 32 /* windows-1250 */
+#define OTHER -1
+
+#define MAX_CHARACTERSET_NAME 24
+#define MAX_CHARACTER_LEN 6
- -extern int multibyte_client_encoding; /* Multibyte client encoding. */ -extern int multibyte_status; /* Multibyte charcter status. */ - -void multibyte_init(void); -unsigned char *check_client_encoding(unsigned char *str); -int multibyte_char_check(unsigned char s); -unsigned char *multibyte_strchr(const unsigned char *s, unsigned char c); +/* OLD Type */
+// extern int multibyte_client_encoding; /* Multibyte client encoding. */ +// extern int multibyte_status; /* Multibyte charcter status. */ +// +// void multibyte_init(void); +// unsigned char *check_client_encoding(unsigned char *sql_string); +// int multibyte_char_check(unsigned char s); +// unsigned char *multibyte_strchr(const unsigned char *string, unsigned int c);
+
+/* New Type */
+
+extern int PG_CCST; /* Client Character StaTus */
+
+extern int PG_SCSC; /* Server Character Set (Code) */
+extern int PG_CCSC; /* Client Character Set (Code) */
+extern unsigned char *PG_SCSS; /* Server Character Set (String) */
+extern unsigned char *PG_CCSS; /* Client Character Set (String) */
+
+extern void CC_lookup_characterset(ConnectionClass *self);
+
+extern int pg_CS_stat(int stat,unsigned int charcter,int characterset_code);
+extern int pg_CS_code(const unsigned char *stat_string);
+extern unsigned char *pg_CS_name(const int code);
+
+typedef struct pg_CS
+{
+ unsigned char *name;
+ int code;
+}pg_CS;
+extern pg_CS CS_Table[];
+
+extern int pg_mbslen(const unsigned char *string);
+extern unsigned char *pg_mbschr(const unsigned char *string, unsigned int character);
+extern unsigned char *pg_mbsinc( const unsigned char *current );
+
+/* Old Type Compatible */
+#define multibyte_init() (PG_CCST = 0)
+#define multibyte_char_check(X) pg_CS_stat(PG_CCST, (unsigned int) X, PG_CCSC)
+#define multibyte_strchr(X,Y) pg_mbschr(X,Y)
+#define check_client_encoding(X) pg_CS_name(PG_CCSC = pg_CS_code(X))
|