aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-07-10 16:52:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-07-10 16:53:27 -0400
commited45a5373029f2ff08ce76cf3807499afe3873ee (patch)
tree81755ade53a1e7691500cb0702c187e08df60074 /src
parent892a8d05440deaf0f73b91a98f35d32b9415d497 (diff)
downloadpostgresql-ed45a5373029f2ff08ce76cf3807499afe3873ee.tar.gz
postgresql-ed45a5373029f2ff08ce76cf3807499afe3873ee.zip
Back-patch addition of pg_wchar-to-multibyte conversion functionality.
Back-patch of commits 72dd6291f216440f6bb61a8733729a37c7e3b2d2, f6a05fd973a102f7e66c491d3f854864b8d24844, and 60e9c224a197aa37abb1aa3aefa3aad42da61f7f. This is needed to support fixing the regex prefix extraction bug in back branches.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/mb/mbutils.c22
-rw-r--r--src/backend/utils/mb/wchar.c254
-rw-r--r--src/include/mb/pg_wchar.h32
3 files changed, 259 insertions, 49 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 848c26f41fb..287ff808fc1 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -710,6 +710,28 @@ pg_encoding_mb2wchar_with_len(int encoding,
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
}
+/* convert a wchar string to a multibyte */
+int
+pg_wchar2mb(const pg_wchar *from, char *to)
+{
+ return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, pg_wchar_strlen(from));
+}
+
+/* convert a wchar string to a multibyte with a limited length */
+int
+pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len)
+{
+ return (*pg_wchar_table[DatabaseEncoding->encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
+}
+
+/* same, with any encoding */
+int
+pg_encoding_wchar2mb_with_len(int encoding,
+ const pg_wchar *from, char *to, int len)
+{
+ return (*pg_wchar_table[encoding].wchar2mb_with_len) (from, (unsigned char *)to, len);
+}
+
/* returns the byte length of a multibyte character */
int
pg_mblen(const char *mbstr)
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 5b0cf628fe9..3ebd2dda7a2 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -99,8 +99,7 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
*to |= *from++;
len -= 2;
}
- else
- /* must be ASCII */
+ else /* must be ASCII */
{
*to = *from++;
len--;
@@ -340,6 +339,55 @@ pg_euctw_dsplen(const unsigned char *s)
}
/*
+ * Convert pg_wchar to EUC_* encoding.
+ * caller must allocate enough space for "to", including a trailing zero!
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2euc_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char c;
+
+ if ((c = (*from >> 24)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 16) & 0xff;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if ((c = (*from >> 16)))
+ {
+ *to++ = c;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if ((c = (*from >> 8)))
+ {
+ *to++ = c;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else
+ {
+ *to++ = *from;
+ cnt++;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
+
+/*
* JOHAB
*/
static int
@@ -453,6 +501,31 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
return utf8string;
}
+/*
+ * Trivial conversion from pg_wchar to UTF-8.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2utf_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ int char_len;
+
+ unicode_to_utf8(*from, to);
+ char_len = pg_utf_mblen(to);
+ cnt += char_len;
+ to += char_len;
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
/*
* Return the byte length of a UTF8 character pointed to by s
@@ -719,6 +792,77 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
return cnt;
}
+/*
+ * convert pg_wchar to mule internal code
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ unsigned char lb;
+
+ lb = (*from >> 16) & 0xff;
+ if (IS_LC1(lb))
+ {
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 2;
+ }
+ else if (IS_LC2(lb))
+ {
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_A_RANGE(lb))
+ {
+ *to++ = LCPRV1_A;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV1_B_RANGE(lb))
+ {
+ *to++ = LCPRV1_B;
+ *to++ = lb;
+ *to++ = *from & 0xff;
+ cnt += 3;
+ }
+ else if (IS_LCPRV2_A_RANGE(lb))
+ {
+ *to++ = LCPRV2_A;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else if (IS_LCPRV2_B_RANGE(lb))
+ {
+ *to++ = LCPRV2_B;
+ *to++ = lb;
+ *to++ = (*from >> 8) & 0xff;
+ *to++ = *from & 0xff;
+ cnt += 4;
+ }
+ else
+ {
+ *to++ = *from & 0xff;
+ cnt += 1;
+ }
+ from++;
+ len--;
+ }
+ *to = 0;
+ return cnt;
+}
+
int
pg_mule_mblen(const unsigned char *s)
{
@@ -774,6 +918,28 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
return cnt;
}
+/*
+ * Trivial conversion from pg_wchar to single byte encoding. Just ignores
+ * high bits.
+ * caller should allocate enough space for "to"
+ * len: length of from.
+ * "from" not necessarily null terminated.
+ */
+static int
+pg_wchar2single_with_len(const pg_wchar *from, unsigned char *to, int len)
+{
+ int cnt = 0;
+
+ while (len > 0 && *from)
+ {
+ *to++ = *from++;
+ len--;
+ cnt++;
+ }
+ *to = 0;
+ return cnt;
+}
+
static int
pg_latin1_mblen(const unsigned char *s)
{
@@ -1341,48 +1507,48 @@ pg_utf8_islegal(const unsigned char *source, int length)
*-------------------------------------------------------------------
*/
pg_wchar_tbl pg_wchar_table[] = {
- {pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
- {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
- {pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
- {pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
- {pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
- {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
- {pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
- {pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
- {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
- {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
- {0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
- {0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
- {0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
- {0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
- {0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
- {0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
+ {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifier, 1}, /* PG_SQL_ASCII */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JP */
+ {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifier, 2}, /* PG_EUC_CN */
+ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifier, 3}, /* PG_EUC_KR */
+ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifier, 4}, /* PG_EUC_TW */
+ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifier, 3}, /* PG_EUC_JIS_2004 */
+ {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifier, 4}, /* PG_UTF8 */
+ {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifier, 4}, /* PG_MULE_INTERNAL */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN1 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN2 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN3 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN4 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN9 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_LATIN10 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1256 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1258 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN866 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN874 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8R */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1251 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1252 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-5 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-6 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-7 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* ISO-8859-8 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1250 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1253 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1254 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1255 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_WIN1257 */
+ {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifier, 1}, /* PG_KOI8U */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2}, /* PG_SJIS */
+ {0, 0, pg_big5_mblen, pg_big5_dsplen, pg_big5_verifier, 2}, /* PG_BIG5 */
+ {0, 0, pg_gbk_mblen, pg_gbk_dsplen, pg_gbk_verifier, 2}, /* PG_GBK */
+ {0, 0, pg_uhc_mblen, pg_uhc_dsplen, pg_uhc_verifier, 2}, /* PG_UHC */
+ {0, 0, pg_gb18030_mblen, pg_gb18030_dsplen, pg_gb18030_verifier, 4}, /* PG_GB18030 */
+ {0, 0, pg_johab_mblen, pg_johab_dsplen, pg_johab_verifier, 3}, /* PG_JOHAB */
+ {0, 0, pg_sjis_mblen, pg_sjis_dsplen, pg_sjis_verifier, 2} /* PG_SHIFT_JIS_2004 */
};
/* returns the byte length of a word for mule internal code */
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 826c7af53bc..88960c433b3 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -49,7 +49,13 @@ typedef unsigned int pg_wchar;
/*
* Is a prefix byte for "private" single byte encodings?
*/
-#define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
+#define LCPRV1_A 0x9a
+#define LCPRV1_B 0x9b
+#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
+#define IS_LCPRV1_A_RANGE(c) \
+ ((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
+#define IS_LCPRV1_B_RANGE(c) \
+ ((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
/*
* Is a leading byte for "official" multibyte encodings?
*/
@@ -57,7 +63,13 @@ typedef unsigned int pg_wchar;
/*
* Is a prefix byte for "private" multibyte encodings?
*/
-#define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
+#define LCPRV2_A 0x9c
+#define LCPRV2_B 0x9d
+#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
+#define IS_LCPRV2_A_RANGE(c) \
+ ((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
+#define IS_LCPRV2_B_RANGE(c) \
+ ((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
/*----------------------------------------------------
* leading characters
@@ -277,7 +289,11 @@ extern pg_enc2gettext pg_enc2gettext_tbl[];
* pg_wchar stuff
*/
typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
- pg_wchar *to,
+ pg_wchar *to,
+ int len);
+
+typedef int (*wchar2mb_with_len_converter) (const pg_wchar *from,
+ unsigned char *to,
int len);
typedef int (*mblen_converter) (const unsigned char *mbstr);
@@ -288,8 +304,10 @@ typedef int (*mbverifier) (const unsigned char *mbstr, int len);
typedef struct
{
- mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
- * string to a wchar */
+ mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte
+ * string to a wchar */
+ wchar2mb_with_len_converter wchar2mb_with_len; /* convert a wchar
+ * string to a multibyte */
mblen_converter mblen; /* get byte length of a char */
mbdisplaylen_converter dsplen; /* get display width of a char */
mbverifier mbverify; /* verify multibyte sequence */
@@ -370,6 +388,10 @@ extern int pg_mb2wchar(const char *from, pg_wchar *to);
extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
extern int pg_encoding_mb2wchar_with_len(int encoding,
const char *from, pg_wchar *to, int len);
+extern int pg_wchar2mb(const pg_wchar *from, char *to);
+extern int pg_wchar2mb_with_len(const pg_wchar *from, char *to, int len);
+extern int pg_encoding_wchar2mb_with_len(int encoding,
+ const pg_wchar *from, char *to, int len);
extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);