aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/mb/conv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/mb/conv.c')
-rw-r--r--src/backend/utils/mb/conv.c1246
1 files changed, 31 insertions, 1215 deletions
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index cdfa54ac65c..095ca562cc0 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -1,104 +1,24 @@
-/*
- * conversion between client encoding and server internal encoding
- * (currently mule internal code (mic) is used)
- * Tatsuo Ishii
+/*-------------------------------------------------------------------------
*
- * WIN1250 client encoding support contributed by Pavel Behal
- * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
+ * Utility functions for conversion procs.
*
- * $Id: conv.c,v 1.38 2002/06/13 08:28:54 ishii Exp $
+ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
*
+ * IDENTIFICATION
+ * $Header: /cvsroot/pgsql/src/backend/utils/mb/conv.c,v 1.39 2002/07/18 02:02:30 ishii Exp $
*
+ *-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "mb/pg_wchar.h"
-#ifdef UNICODE_CONVERSION
-
-/*
- * for Unicode (UTF-8) support
- */
-#include "Unicode/utf8_to_iso8859_2.map"
-#include "Unicode/utf8_to_iso8859_3.map"
-#include "Unicode/utf8_to_iso8859_4.map"
-#include "Unicode/utf8_to_iso8859_5.map"
-#include "Unicode/utf8_to_iso8859_6.map"
-#include "Unicode/utf8_to_iso8859_7.map"
-#include "Unicode/utf8_to_iso8859_8.map"
-#include "Unicode/utf8_to_iso8859_9.map"
-#include "Unicode/utf8_to_iso8859_10.map"
-#include "Unicode/utf8_to_iso8859_13.map"
-#include "Unicode/utf8_to_iso8859_14.map"
-#include "Unicode/utf8_to_iso8859_15.map"
-#include "Unicode/utf8_to_iso8859_16.map"
-#include "Unicode/iso8859_2_to_utf8.map"
-#include "Unicode/iso8859_3_to_utf8.map"
-#include "Unicode/iso8859_4_to_utf8.map"
-#include "Unicode/iso8859_5_to_utf8.map"
-#include "Unicode/iso8859_6_to_utf8.map"
-#include "Unicode/iso8859_7_to_utf8.map"
-#include "Unicode/iso8859_8_to_utf8.map"
-#include "Unicode/iso8859_9_to_utf8.map"
-#include "Unicode/iso8859_10_to_utf8.map"
-#include "Unicode/iso8859_13_to_utf8.map"
-#include "Unicode/iso8859_14_to_utf8.map"
-#include "Unicode/iso8859_15_to_utf8.map"
-#include "Unicode/iso8859_16_to_utf8.map"
-#include "Unicode/utf8_to_euc_jp.map"
-#include "Unicode/euc_jp_to_utf8.map"
-#include "Unicode/utf8_to_euc_cn.map"
-#include "Unicode/euc_cn_to_utf8.map"
-#include "Unicode/utf8_to_gb18030.map"
-#include "Unicode/gb18030_to_utf8.map"
-#include "Unicode/utf8_to_euc_kr.map"
-#include "Unicode/euc_kr_to_utf8.map"
-#include "Unicode/utf8_to_euc_tw.map"
-#include "Unicode/euc_tw_to_utf8.map"
-#include "Unicode/utf8_to_sjis.map"
-#include "Unicode/sjis_to_utf8.map"
-#include "Unicode/utf8_to_big5.map"
-#include "Unicode/big5_to_utf8.map"
-#include "Unicode/utf8_to_gbk.map"
-#include "Unicode/gbk_to_utf8.map"
-#include "Unicode/utf8_to_uhc.map"
-#include "Unicode/uhc_to_utf8.map"
-#include "Unicode/utf8_to_johab.map"
-#include "Unicode/johab_to_utf8.map"
-#include "Unicode/utf8_to_tcvn.map"
-#include "Unicode/tcvn_to_utf8.map"
-#include "Unicode/utf8_to_win1250.map"
-#include "Unicode/win1250_to_utf8.map"
-#include "Unicode/utf8_to_win1256.map"
-#include "Unicode/win1256_to_utf8.map"
-#include "Unicode/utf8_to_win874.map"
-#include "Unicode/win874_to_utf8.map"
-/* Cyrillic charset conversion */
-#include "Unicode/alt_to_utf8.map"
-#include "Unicode/koi8r_to_utf8.map"
-#include "Unicode/win1251_to_utf8.map"
-#include "Unicode/utf8_to_alt.map"
-#include "Unicode/utf8_to_koi8r.map"
-#include "Unicode/utf8_to_win1251.map"
-#endif /* UNICODE_CONVERSION */
-
-/*
- * SJIS alternative code.
- * this code is used if a mapping EUC -> SJIS is not defined.
- */
-#define PGSJISALTCODE 0x81ac
-#define PGEUCALTCODE 0xa2ae
-
-/*
- * conversion table between SJIS UDC (IBM kanji) and EUC_JP
- */
-#include "sjis.map"
-
/*
* convert bogus chars that cannot be represented in the current
* encoding system.
*/
-static void
-printBogusChar(unsigned char **mic, unsigned char **p)
+void
+pg_print_bogus_char(unsigned char **mic, unsigned char **p)
{
char strbuf[16];
int l = pg_mic_mblen(*mic);
@@ -114,298 +34,6 @@ printBogusChar(unsigned char **mic, unsigned char **p)
}
/*
- * SJIS ---> MIC
- */
-static void
-sjis2mic(unsigned char *sjis, unsigned char *p, int len)
-{
- int c1,
- c2,
-/* Eiji Tokuya patched begin */
- i,
- k,
- k2;
-
-/* Eiji Tokuya patched end */
- while (len > 0 && (c1 = *sjis++))
- {
- if (c1 >= 0xa1 && c1 <= 0xdf)
- {
- /* JIS X0201 (1 byte kana) */
- len--;
- *p++ = LC_JISX0201K;
- *p++ = c1;
- }
- else if (c1 > 0x7f)
- {
- /*
- * JIS X0208, X0212, user defined extended characters
- */
- c2 = *sjis++;
- k = (c1 << 8) + c2;
-/* Eiji Tokuya patched begin */
- if (k >= 0xed40 && k < 0xf040)
- {
- /* NEC selection IBM kanji */
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].nec;
- if (k2 == 0xffff)
- break;
- if (k2 == k)
- {
- k = ibmkanji[i].sjis;
- c1 = (k >> 8) & 0xff;
- c2 = k & 0xff;
- }
- }
- }
-
- if (k < 0xeb3f)
-/* Eiji Tokuya patched end */
- {
- /* JIS X0208 */
- len -= 2;
- *p++ = LC_JISX0208;
- *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
-/* Eiji Tokuya patched begin */
- else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
- {
- /* NEC selection IBM kanji - Other undecided justice */
-/* Eiji Tokuya patched end */
- *p++ = LC_JISX0208;
- *p++ = PGEUCALTCODE >> 8;
- *p++ = PGEUCALTCODE & 0xff;
- }
- else if (k >= 0xf040 && k < 0xf540)
- {
- /*
- * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
- * 0x7e7e EUC 0xf5a1 - 0xfefe
- */
- len -= 2;
- *p++ = LC_JISX0208;
- c1 -= 0x6f;
- *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
- else if (k >= 0xf540 && k < 0xfa40)
- {
- /*
- * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
- * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
- */
- len -= 2;
- *p++ = LC_JISX0212;
- c1 -= 0x74;
- *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
- *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
- }
- else if (k >= 0xfa40)
- {
- /*
- * mapping IBM kanji to X0208 and X0212
- *
- */
- len -= 2;
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].sjis;
- if (k2 == 0xffff)
- break;
- if (k2 == k)
- {
- k = ibmkanji[i].euc;
- if (k >= 0x8f0000)
- {
- *p++ = LC_JISX0212;
- *p++ = 0x80 | ((k & 0xff00) >> 8);
- *p++ = 0x80 | (k & 0xff);
- }
- else
- {
- *p++ = LC_JISX0208;
- *p++ = 0x80 | (k >> 8);
- *p++ = 0x80 | (k & 0xff);
- }
- }
- }
- }
- }
- else
- { /* should be ASCII */
- len--;
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
- * MIC ---> SJIS
- */
-static void
-mic2sjis(unsigned char *mic, unsigned char *p, int len)
-{
- int c1,
- c2,
- k;
-
- while (len > 0 && (c1 = *mic))
- {
- len -= pg_mic_mblen(mic++);
-
- if (c1 == LC_JISX0201K)
- *p++ = *mic++;
- else if (c1 == LC_JISX0208)
- {
- c1 = *mic++;
- c2 = *mic++;
- k = (c1 << 8) | (c2 & 0xff);
- if (k >= 0xf5a1)
- {
- /* UDC1 */
- c1 -= 0x54;
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
- }
- else
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
- *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
- }
- else if (c1 == LC_JISX0212)
- {
- int i,
- k2;
-
- c1 = *mic++;
- c2 = *mic++;
- k = c1 << 8 | c2;
- if (k >= 0xf5a1)
- {
- /* UDC2 */
- c1 -= 0x54;
- *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
- *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
- }
- else
- {
- /* IBM kanji */
- for (i = 0;; i++)
- {
- k2 = ibmkanji[i].euc & 0xffff;
- if (k2 == 0xffff)
- {
- *p++ = PGSJISALTCODE >> 8;
- *p++ = PGSJISALTCODE & 0xff;
- break;
- }
- if (k2 == k)
- {
- k = ibmkanji[i].sjis;
- *p++ = k >> 8;
- *p++ = k & 0xff;
- break;
- }
- }
- }
- }
- else if (c1 > 0x7f)
- {
- /* cannot convert to SJIS! */
- *p++ = PGSJISALTCODE >> 8;
- *p++ = PGSJISALTCODE & 0xff;
- }
- else
- { /* should be ASCII */
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
- * EUC_JP ---> MIC
- */
-static void
-euc_jp2mic(unsigned char *euc, unsigned char *p, int len)
-{
- int c1;
-
- while (len > 0 && (c1 = *euc++))
- {
- if (c1 == SS2)
- { /* 1 byte kana? */
- len -= 2;
- *p++ = LC_JISX0201K;
- *p++ = *euc++;
- }
- else if (c1 == SS3)
- { /* JIS X0212 kanji? */
- len -= 3;
- *p++ = LC_JISX0212;
- *p++ = *euc++;
- *p++ = *euc++;
- }
- else if (c1 & 0x80)
- { /* kanji? */
- len -= 2;
- *p++ = LC_JISX0208;
- *p++ = c1;
- *p++ = *euc++;
- }
- else
- { /* should be ASCII */
- len--;
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
- * MIC ---> EUC_JP
- */
-static void
-mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
-{
- int c1;
-
- while (len > 0 && (c1 = *mic))
- {
- len -= pg_mic_mblen(mic++);
-
- if (c1 == LC_JISX0201K)
- {
- *p++ = SS2;
- *p++ = *mic++;
- }
- else if (c1 == LC_JISX0212)
- {
- *p++ = SS3;
- *p++ = *mic++;
- *p++ = *mic++;
- }
- else if (c1 == LC_JISX0208)
- {
- *p++ = *mic++;
- *p++ = *mic++;
- }
- else if (c1 > 0x7f)
- { /* cannot convert to EUC_JP! */
- mic--;
- printBogusChar(&mic, &p);
- }
- else
- { /* should be ASCII */
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
* EUC_KR ---> MIC
*/
static void
@@ -451,7 +79,7 @@ mic2euc_kr(unsigned char *mic, unsigned char *p, int len)
else if (c1 > 0x7f)
{ /* cannot convert to EUC_KR! */
mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
}
else
{ /* should be ASCII */
@@ -507,7 +135,7 @@ mic2euc_cn(unsigned char *mic, unsigned char *p, int len)
else if (c1 > 0x7f)
{ /* cannot convert to EUC_CN! */
mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
}
else
{ /* should be ASCII */
@@ -517,6 +145,7 @@ mic2euc_cn(unsigned char *mic, unsigned char *p, int len)
*p = '\0';
}
+#ifdef NOT_USED
/*
* GB18030 ---> MIC
* Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
@@ -594,206 +223,19 @@ mic2gb18030(unsigned char *mic, unsigned char *p, int len)
}
else{
mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
}
}
else{
mic--;
- printBogusChar(&mic, &p);
- }
- }
- *p = '\0';
-}
-
-/*
- * EUC_TW ---> MIC
- */
-static void
-euc_tw2mic(unsigned char *euc, unsigned char *p, int len)
-{
- int c1;
-
- while (len > 0 && (c1 = *euc++))
- {
- if (c1 == SS2)
- {
- len -= 4;
- c1 = *euc++; /* plane No. */
- if (c1 == 0xa1)
- *p++ = LC_CNS11643_1;
- else if (c1 == 0xa2)
- *p++ = LC_CNS11643_2;
- else
- {
- *p++ = 0x9d; /* LCPRV2 */
- *p++ = 0xa3 - c1 + LC_CNS11643_3;
- }
- *p++ = *euc++;
- *p++ = *euc++;
- }
- else if (c1 & 0x80)
- { /* CNS11643-1 */
- len -= 2;
- *p++ = LC_CNS11643_1;
- *p++ = c1;
- *p++ = *euc++;
- }
- else
- { /* should be ASCII */
- len--;
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
- * MIC ---> EUC_TW
- */
-static void
-mic2euc_tw(unsigned char *mic, unsigned char *p, int len)
-{
- int c1;
-
- while (len > 0 && (c1 = *mic))
- {
- len -= pg_mic_mblen(mic++);
-
- if (c1 == LC_CNS11643_1)
- {
- *p++ = *mic++;
- *p++ = *mic++;
- }
- else if (c1 == LC_CNS11643_2)
- {
- *p++ = SS2;
- *p++ = 0xa2;
- *p++ = *mic++;
- *p++ = *mic++;
- }
- else if (c1 == 0x9d)
- { /* LCPRV2? */
- *p++ = SS2;
- *p++ = *mic++ - LC_CNS11643_3 + 0xa3;
- *p++ = *mic++;
- *p++ = *mic++;
- }
- else if (c1 > 0x7f)
- { /* cannot convert to EUC_TW! */
- mic--;
- printBogusChar(&mic, &p);
- }
- else
- { /* should be ASCII */
- *p++ = c1;
- }
- }
- *p = '\0';
-}
-
-/*
- * Big5 ---> MIC
- */
-static void
-big52mic(unsigned char *big5, unsigned char *p, int len)
-{
- unsigned short c1;
- unsigned short big5buf,
- cnsBuf;
- unsigned char lc;
- char bogusBuf[3];
- int i;
-
- while (len > 0 && (c1 = *big5++))
- {
- if (c1 <= 0x7fU)
- { /* ASCII */
- len--;
- *p++ = c1;
- }
- else
- {
- len -= 2;
- big5buf = c1 << 8;
- c1 = *big5++;
- big5buf |= c1;
- cnsBuf = BIG5toCNS(big5buf, &lc);
- if (lc != 0)
- {
- if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
- {
- *p++ = 0x9d; /* LCPRV2 */
- }
- *p++ = lc; /* Plane No. */
- *p++ = (cnsBuf >> 8) & 0x00ff;
- *p++ = cnsBuf & 0x00ff;
- }
- else
- { /* cannot convert */
- big5 -= 2;
- *p++ = '(';
- for (i = 0; i < 2; i++)
- {
- sprintf(bogusBuf, "%02x", *big5++);
- *p++ = bogusBuf[0];
- *p++ = bogusBuf[1];
- }
- *p++ = ')';
- }
- }
- }
- *p = '\0';
-}
-
-/*
- * MIC ---> Big5
- */
-static void
-mic2big5(unsigned char *mic, unsigned char *p, int len)
-{
- int l;
- unsigned short c1;
- unsigned short big5buf,
- cnsBuf;
-
- while (len > 0 && (c1 = *mic))
- {
- l = pg_mic_mblen(mic++);
- len -= l;
-
- /* 0x9d means LCPRV2 */
- if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == 0x9d)
- {
- if (c1 == 0x9d)
- {
- c1 = *mic++; /* get plane no. */
- }
- cnsBuf = (*mic++) << 8;
- cnsBuf |= (*mic++) & 0x00ff;
- big5buf = CNStoBIG5(cnsBuf, c1);
- if (big5buf == 0)
- { /* cannot convert to Big5! */
- mic -= l;
- printBogusChar(&mic, &p);
- }
- else
- {
- *p++ = (big5buf >> 8) & 0x00ff;
- *p++ = big5buf & 0x00ff;
- }
- }
- else if (c1 <= 0x7f) /* ASCII */
- *p++ = c1;
- else
- { /* cannot convert to Big5! */
- mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
}
}
*p = '\0';
}
+#endif
/*
* LATINn ---> MIC
@@ -806,7 +248,7 @@ latin2mic(unsigned char *l, unsigned char *p, int len, int lc)
while (len-- > 0 && (c1 = *l++))
{
if (c1 > 0x7f)
- { /* Latin1? */
+ { /* Latin? */
*p++ = lc;
}
*p++ = c1;
@@ -831,7 +273,7 @@ mic2latin(unsigned char *mic, unsigned char *p, int len, int lc)
else if (c1 > 0x7f)
{
mic--;
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
}
else
{ /* should be ASCII */
@@ -882,24 +324,11 @@ mic2latin4(unsigned char *mic, unsigned char *p, int len)
mic2latin(mic, p, len, LC_ISO8859_4);
}
-#ifdef NOT_USED
-static void
-latin52mic(unsigned char *l, unsigned char *p, int len)
-{
- latin2mic(l, p, len, LC_ISO8859_5);
-}
-static void
-mic2latin5(unsigned char *mic, unsigned char *p, int len)
-{
- mic2latin(mic, p, len, LC_ISO8859_5);
-}
-#endif
-
/*
* ASCII ---> MIC
*/
-static void
-ascii2mic(unsigned char *l, unsigned char *p, int len)
+void
+pg_ascii2mic(unsigned char *l, unsigned char *p, int len)
{
int c1;
@@ -911,15 +340,15 @@ ascii2mic(unsigned char *l, unsigned char *p, int len)
/*
* MIC ---> ASCII
*/
-static void
-mic2ascii(unsigned char *mic, unsigned char *p, int len)
+void
+pg_mic2ascii(unsigned char *mic, unsigned char *p, int len)
{
int c1;
while (len-- > 0 && (c1 = *mic))
{
if (c1 > 0x7f)
- printBogusChar(&mic, &p);
+ pg_print_bogus_char(&mic, &p);
else
{ /* should be ASCII */
*p++ = c1;
@@ -1257,82 +686,6 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len)
mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250);
}
-#ifdef UNICODE_CONVERSION
-
-/*
- * UNICODE(UTF-8) support
- */
-
-/*
- * ASCII ---> UTF-8
- */
-static void
-ascii2utf(unsigned char *ascii, unsigned char *utf, int len)
-{
- ascii2mic(ascii, utf, len);
-}
-
-/*
- * UTF-8 ---> ASCII
- */
-static void
-utf2ascii(unsigned char *utf, unsigned char *ascii, int len)
-{
- mic2ascii(utf, ascii, len);
-}
-
-/*
- * ISO8859-1 ---> UTF-8
- */
-static void
-iso8859_1_to_utf(unsigned char *iso, unsigned char *utf, int len)
-{
- unsigned short c;
-
- while (len-- > 0 && (c = *iso++))
- {
- if (c < 0x80)
- *utf++ = c;
- else
- {
- *utf++ = (c >> 6) | 0xc0;
- *utf++ = (c & 0x003f) | 0x80;
- }
- }
- *utf = '\0';
-}
-
-/*
- * UTF-8 ---> ISO8859-1
- */
-static void
-utf_to_iso8859_1(unsigned char *utf, unsigned char *iso, int len)
-{
- unsigned short c,
- c1,
- c2;
-
- while (len > 0 && (c = *utf++))
- {
- if ((c & 0xe0) == 0xc0)
- {
- c1 = c & 0x1f;
- c2 = *utf++ & 0x3f;
- *iso = c1 << 6;
- *iso++ |= c2;
- len -= 2;
- }
- else if ((c & 0xe0) == 0xe0)
- elog(ERROR, "Could not convert UTF-8 to ISO8859-1");
- else
- {
- *iso++ = c;
- len--;
- }
- }
- *iso = '\0';
-}
-
/*
* comparison routine for bsearch()
* this routine is intended for UTF-8 -> local code
@@ -1372,9 +725,9 @@ compare2(const void *p1, const void *p2)
* map: the conversion map.
* size: the size of the conversion map.
*/
-static void
-utf_to_local(unsigned char *utf, unsigned char *iso,
- pg_utf_to_local *map, int size, int len)
+void
+UtfToLocal(unsigned char *utf, unsigned char *iso,
+ pg_utf_to_local *map, int size, int len)
{
unsigned int iutf;
int l;
@@ -1420,43 +773,6 @@ utf_to_local(unsigned char *utf, unsigned char *iso,
#ifdef NOT_USED
/*
- * UTF-8 ---> ISO8859-2
- */
-static void
-utf_to_latin2(unsigned char *utf, unsigned char *iso, int len)
-{
- utf_to_local(utf, iso, ULmapISO8859_2, sizeof(ULmapISO8859_2) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * UTF-8 ---> ISO8859-3
- */
-static void
-utf_to_latin3(unsigned char *utf, unsigned char *iso, int len)
-{
- utf_to_local(utf, iso, ULmapISO8859_3, sizeof(ULmapISO8859_3) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * UTF-8 ---> ISO8859-4
- */
-static void
-utf_to_latin4(unsigned char *utf, unsigned char *iso, int len)
-{
- utf_to_local(utf, iso, ULmapISO8859_4, sizeof(ULmapISO8859_4) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * UTF-8 ---> ISO8859-5
- */
-static void
-utf_to_latin5(unsigned char *utf, unsigned char *iso, int len)
-{
- utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5) / sizeof(pg_utf_to_local), len);
-}
-#endif /* NOT_USED */
-
-/*
* Cyrillic charsets
*/
@@ -1490,11 +806,13 @@ utf_to_ALT(unsigned char *utf, unsigned char *iso, int len)
utf_to_local(utf, iso, ULmap_ALT, sizeof(ULmap_ALT) / sizeof(pg_utf_to_local), len);
}
+#endif
+
/*
* local code ---> UTF-8
*/
-static void
-local_to_utf(unsigned char *iso, unsigned char *utf,
+void
+LocalToUtf(unsigned char *iso, unsigned char *utf,
pg_local_to_utf *map, int size, int encoding, int len)
{
unsigned int iiso;
@@ -1557,69 +875,6 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
#ifdef NOT_USED
/*
- * ISO-8859-2 ---> UTF-8
- */
-static void
-latin2_to_utf(unsigned char *iso, unsigned char *utf, int len)
-{
- local_to_utf(iso, utf, LUmapISO8859_2, sizeof(LUmapISO8859_2) / sizeof(pg_local_to_utf), PG_LATIN2, len);
-}
-
-/*
- * ISO-8859-3 ---> UTF-8
- */
-static void
-latin3_to_utf(unsigned char *iso, unsigned char *utf, int len)
-{
- local_to_utf(iso, utf, LUmapISO8859_3, sizeof(LUmapISO8859_3) / sizeof(pg_local_to_utf), PG_LATIN3, len);
-}
-
-/*
- * ISO-8859-4 ---> UTF-8
- */
-static void
-latin4_to_utf(unsigned char *iso, unsigned char *utf, int len)
-{
- local_to_utf(iso, utf, LUmapISO8859_4, sizeof(LUmapISO8859_4) / sizeof(pg_local_to_utf), PG_LATIN4, len);
-}
-
-/*
- * ISO-8859-5 ---> UTF-8
- */
-static void
-latin5_to_utf(unsigned char *iso, unsigned char *utf, int len)
-{
- local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5) / sizeof(pg_local_to_utf), PG_LATIN5, len);
-}
-#endif /* NOT_USED */
-
-#define UTF_ISO8859(_id_) \
-static void \
-utf_to_iso8859_##_id_(unsigned char *utf, unsigned char *iso, int len) \
-{ \
- utf_to_local(utf, iso, ULmapISO8859_##_id_, sizeof(ULmapISO8859_##_id_) / sizeof(pg_utf_to_local), len); \
-} \
-static void \
-iso8859_##_id_##_to_utf(unsigned char *iso, unsigned char *utf, int len) \
-{ \
- local_to_utf(iso, utf, LUmapISO8859_##_id_, sizeof(LUmapISO8859_##_id_) / sizeof(pg_local_to_utf), PG_LATIN1, len); \
-}
-
-UTF_ISO8859(2);
-UTF_ISO8859(3);
-UTF_ISO8859(4);
-UTF_ISO8859(5);
-UTF_ISO8859(6);
-UTF_ISO8859(7);
-UTF_ISO8859(8);
-UTF_ISO8859(9);
-UTF_ISO8859(10);
-UTF_ISO8859(13);
-UTF_ISO8859(14);
-UTF_ISO8859(15);
-UTF_ISO8859(16);
-
-/*
* KOI8-R ---> UTF-8
*/
static void
@@ -1647,215 +902,6 @@ ALT_to_utf(unsigned char *iso, unsigned char *utf, int len)
}
/*
- * UTF-8 ---> EUC_JP
- */
-static void
-utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapEUC_JP,
- sizeof(ULmapEUC_JP) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * EUC_JP ---> UTF-8
- */
-static void
-euc_jp_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapEUC_JP,
- sizeof(LUmapEUC_JP) / sizeof(pg_local_to_utf), PG_EUC_JP, len);
-}
-
-/*
- * UTF-8 ---> EUC_CN
- */
-static void
-utf_to_euc_cn(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapEUC_CN,
- sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * EUC_CN ---> UTF-8
- */
-static void
-euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapEUC_CN,
- sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
-}
-
-/*
- * UTF-8 ---> GB18030
- */
-static void
-utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapGB18030,
- sizeof(ULmapGB18030) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * GB18030 ---> UTF-8
- */
-static void
-gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapGB18030,
- sizeof(LUmapGB18030) / sizeof(pg_local_to_utf), PG_GB18030, len);
-}
-/*
- * UTF-8 ---> EUC_KR
- */
-static void
-utf_to_euc_kr(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapEUC_KR,
- sizeof(ULmapEUC_KR) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * EUC_KR ---> UTF-8
- */
-static void
-euc_kr_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapEUC_KR,
- sizeof(LUmapEUC_KR) / sizeof(pg_local_to_utf), PG_EUC_KR, len);
-}
-
-/*
- * UTF-8 ---> EUC_TW
- */
-static void
-utf_to_euc_tw(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapEUC_TW,
- sizeof(ULmapEUC_TW) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * EUC_TW ---> UTF-8
- */
-static void
-euc_tw_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapEUC_TW,
- sizeof(LUmapEUC_TW) / sizeof(pg_local_to_utf), PG_EUC_TW, len);
-}
-
-/*
- * UTF-8 ---> SJIS
- */
-static void
-utf_to_sjis(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapSJIS,
- sizeof(ULmapSJIS) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * SJIS ---> UTF-8
- */
-static void
-sjis_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapSJIS,
- sizeof(LUmapSJIS) / sizeof(pg_local_to_utf), PG_SJIS, len);
-}
-
-/*
- * UTF-8 ---> BIG5
- */
-static void
-utf_to_big5(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapBIG5,
- sizeof(ULmapBIG5) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * BIG5 ---> UTF-8
- */
-static void
-big5_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapBIG5,
- sizeof(LUmapBIG5) / sizeof(pg_local_to_utf), PG_BIG5, len);
-}
-
-/*
- * UTF-8 ---> GBK
- */
-static void
-utf_to_gbk(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapGBK,
- sizeof(ULmapGBK) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * GBK ---> UTF-8
- */
-static void
-gbk_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapGBK,
- sizeof(LUmapGBK) / sizeof(pg_local_to_utf), PG_GBK, len);
-}
-
-/*
- * UTF-8 ---> UHC
- */
-static void
-utf_to_uhc(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapUHC,
- sizeof(ULmapUHC) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * UHC ---> UTF-8
- */
-static void
-uhc_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapUHC,
- sizeof(LUmapUHC) / sizeof(pg_local_to_utf), PG_UHC, len);
-}
-
-/*
- * UTF-8 ---> JOHAB
- */
-static void
-utf_to_johab(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapJOHAB,
- sizeof(ULmapJOHAB) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * JOHAB ---> UTF-8
- */
-static void
-johab_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapJOHAB,
- sizeof(LUmapJOHAB) / sizeof(pg_local_to_utf), PG_JOHAB, len);
-}
-
-/*
* UTF-8 ---> WIN1250
*/
static void
@@ -1898,27 +944,6 @@ win1256_to_utf(unsigned char *euc, unsigned char *utf, int len)
}
/*
- * UTF-8 ---> TCVN
- */
-static void
-utf_to_tcvn(unsigned char *utf, unsigned char *euc, int len)
-
-{
- utf_to_local(utf, euc, ULmapTCVN,
- sizeof(ULmapTCVN) / sizeof(pg_utf_to_local), len);
-}
-
-/*
- * TCVN ---> UTF-8
- */
-static void
-tcvn_to_utf(unsigned char *euc, unsigned char *utf, int len)
-{
- local_to_utf(euc, utf, LUmapTCVN,
- sizeof(LUmapTCVN) / sizeof(pg_local_to_utf), PG_TCVN, len);
-}
-
-/*
* UTF-8 ---> WIN874
*/
static void
@@ -1939,213 +964,4 @@ win874_to_utf(unsigned char *euc, unsigned char *utf, int len)
sizeof(LUmapWIN874) / sizeof(pg_local_to_utf), PG_WIN874, len);
}
-/* ----------
- * Encoding conversion routines
- *
- * WARINIG: must by same order as pg_enc in include/mb/pg_wchar.h!
- * ----------
- */
-pg_enconv pg_enconv_tbl[] =
-{
- {
- PG_SQL_ASCII, ascii2mic, mic2ascii, ascii2utf, utf2ascii
- },
- {
- PG_EUC_JP, euc_jp2mic, mic2euc_jp, euc_jp_to_utf, utf_to_euc_jp
- },
- {
- PG_EUC_CN, euc_cn2mic, mic2euc_cn, euc_cn_to_utf, utf_to_euc_cn
- },
- {
- PG_EUC_KR, euc_kr2mic, mic2euc_kr, euc_kr_to_utf, utf_to_euc_kr
- },
- {
- PG_EUC_TW, euc_tw2mic, mic2euc_tw, euc_tw_to_utf, utf_to_euc_tw
- },
- {
- PG_JOHAB, 0, 0, johab_to_utf, utf_to_johab
- },
- {
- PG_UTF8, 0, 0, 0, 0
- },
- {
- PG_MULE_INTERNAL, 0, 0, 0, 0
- },
- {
- PG_LATIN1, latin12mic, mic2latin1, iso8859_1_to_utf, utf_to_iso8859_1
- },
- {
- PG_LATIN2, latin22mic, mic2latin2, iso8859_2_to_utf, utf_to_iso8859_2
- },
- {
- PG_LATIN3, latin32mic, mic2latin3, iso8859_3_to_utf, utf_to_iso8859_3
- },
- {
- PG_LATIN4, latin42mic, mic2latin4, iso8859_4_to_utf, utf_to_iso8859_4
- },
- {
- PG_LATIN5, iso2mic, mic2iso, iso8859_9_to_utf, utf_to_iso8859_9
- },
- {
- PG_LATIN6, 0, 0, iso8859_10_to_utf, utf_to_iso8859_10
- },
- {
- PG_LATIN7, 0, 0, iso8859_13_to_utf, utf_to_iso8859_13
- },
- {
- PG_LATIN8, 0, 0, iso8859_14_to_utf, utf_to_iso8859_14
- },
- {
- PG_LATIN9, 0, 0, iso8859_15_to_utf, utf_to_iso8859_15
- },
- {
- PG_LATIN10, 0, 0, iso8859_16_to_utf, utf_to_iso8859_16
- },
- {
- PG_WIN1256, 0, 0, win1256_to_utf, utf_to_win1256
- },
- {
- PG_TCVN, 0, 0, tcvn_to_utf, utf_to_tcvn
- },
- {
- PG_WIN874, 0, 0, win874_to_utf, utf_to_win874
- },
- {
- PG_KOI8R, koi8r2mic, mic2koi8r, KOI8R_to_utf, utf_to_KOI8R
- },
- {
- PG_WIN1251, win12512mic, mic2win1251, WIN1251_to_utf, utf_to_WIN1251
- },
- {
- PG_ALT, alt2mic, mic2alt, ALT_to_utf, utf_to_ALT
- },
- {
- PG_ISO_8859_5, 0, 0, iso8859_5_to_utf, utf_to_iso8859_5
- },
- {
- PG_ISO_8859_6, 0, 0, iso8859_6_to_utf, utf_to_iso8859_6
- },
- {
- PG_ISO_8859_7, 0, 0, iso8859_7_to_utf, utf_to_iso8859_7
- },
- {
- PG_ISO_8859_8, 0, 0, iso8859_8_to_utf, utf_to_iso8859_8
- },
-
- {
- PG_SJIS, sjis2mic, mic2sjis, sjis_to_utf, utf_to_sjis
- },
- {
- PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5
- },
- {
- PG_GBK, 0, 0, gbk_to_utf, utf_to_gbk
- },
- {
- PG_UHC, 0, 0, uhc_to_utf, utf_to_uhc
- },
- {
- PG_WIN1250, win12502mic, mic2win1250, win1250_to_utf, utf_to_win1250
- },
- {
- PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030
- },
-};
-
-#else
-
-pg_enconv pg_enconv_tbl[] =
-{
- {
- PG_SQL_ASCII, ascii2mic, mic2ascii, 0, 0
- },
- {
- PG_EUC_JP, euc_jp2mic, mic2euc_jp, 0, 0
- },
- {
- PG_EUC_CN, euc_cn2mic, mic2euc_cn, 0, 0
- },
- {
- PG_EUC_KR, euc_kr2mic, mic2euc_kr, 0, 0
- },
- {
- PG_EUC_TW, euc_tw2mic, mic2euc_tw, 0, 0
- },
- {
- PG_UTF8, 0, 0, 0, 0
- },
- {
- PG_MULE_INTERNAL, 0, 0, 0, 0
- },
- {
- PG_LATIN1, latin12mic, mic2latin1, 0, 0
- },
- {
- PG_LATIN2, latin22mic, mic2latin2, 0, 0
- },
- {
- PG_LATIN3, latin32mic, mic2latin3, 0, 0
- },
- {
- PG_LATIN4, latin42mic, mic2latin4, 0, 0
- },
- {
- PG_LATIN5, iso2mic, mic2iso, 0, 0
- },
- {
- PG_LATIN6, 0, 0, 0, 0
- },
- {
- PG_LATIN7, 0, 0, 0, 0
- },
- {
- PG_LATIN8, 0, 0, 0, 0
- },
- {
- PG_LATIN9, 0, 0, 0, 0
- },
- {
- PG_LATIN10, 0, 0, 0, 0
- },
- {
- PG_KOI8R, koi8r2mic, mic2koi8r, 0, 0
- },
- {
- PG_WIN1251, win12512mic, mic2win1251, 0, 0
- },
- {
- PG_ALT, alt2mic, mic2alt, 0, 0
- },
- {
- PG_ISO_8859_5, 0, 0, 0, 0
- },
- {
- PG_ISO_8859_6, 0, 0, 0, 0
- },
- {
- PG_ISO_8859_7, 0, 0, 0, 0
- },
- {
- PG_ISO_8859_8, 0, 0, 0, 0
- },
- {
- PG_SJIS, sjis2mic, mic2sjis, 0, 0
- },
- {
- PG_BIG5, big52mic, mic2big5, 0, 0
- },
- {
- PG_GBK, 0, 0, 0, 0
- },
- {
- PG_UHC, 0, 0, 0, 0
- },
- {
- PG_WIN1250, win12502mic, mic2win1250, 0, 0
- },
- {
- PG_GB18030, gb180302mic, mic2gb18030, 0, 0
- },
-};
-
-#endif /* UNICODE_CONVERSION */
+#endif