aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/conversioncmds.c32
-rw-r--r--src/backend/utils/error/elog.c2
-rw-r--r--src/backend/utils/mb/conv.c139
-rw-r--r--src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c127
-rw-r--r--src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c94
-rw-r--r--src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c57
-rw-r--r--src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c153
-rw-r--r--src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c57
-rw-r--r--src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c165
-rw-r--r--src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c49
-rw-r--r--src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c43
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c67
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c43
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c35
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c37
-rw-r--r--src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c43
-rw-r--r--src/backend/utils/mb/mbutils.c79
-rw-r--r--src/bin/pg_upgrade/check.c95
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_proc.dat332
-rw-r--r--src/include/mb/pg_wchar.h35
-rw-r--r--src/test/regress/expected/conversion.out519
-rw-r--r--src/test/regress/expected/opr_sanity.out7
-rw-r--r--src/test/regress/input/create_function_1.source4
-rw-r--r--src/test/regress/output/create_function_1.source3
-rw-r--r--src/test/regress/regress.c134
-rw-r--r--src/test/regress/sql/conversion.sql185
-rw-r--r--src/test/regress/sql/opr_sanity.sql7
39 files changed, 2324 insertions, 628 deletions
diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c
index f7ff321de71..5fed97a2f99 100644
--- a/src/backend/commands/conversioncmds.c
+++ b/src/backend/commands/conversioncmds.c
@@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt)
const char *from_encoding_name = stmt->for_encoding_name;
const char *to_encoding_name = stmt->to_encoding_name;
List *func_name = stmt->func_name;
- static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID};
+ static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID};
char result[1];
+ Datum funcresult;
/* Convert list of names to a name and namespace */
namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name,
@@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt)
funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid),
funcargs, false);
- /* Check it returns VOID, else it's probably the wrong function */
- if (get_func_rettype(funcoid) != VOIDOID)
+ /* Check it returns int4, else it's probably the wrong function */
+ if (get_func_rettype(funcoid) != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("encoding conversion function %s must return type %s",
- NameListToString(func_name), "void")));
+ NameListToString(func_name), "integer")));
/* Check we have EXECUTE rights for the function */
aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE);
@@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt)
* string; the conversion function should throw an error if it can't
* perform the requested conversion.
*/
- OidFunctionCall5(funcoid,
- Int32GetDatum(from_encoding),
- Int32GetDatum(to_encoding),
- CStringGetDatum(""),
- CStringGetDatum(result),
- Int32GetDatum(0));
+ funcresult = OidFunctionCall6(funcoid,
+ Int32GetDatum(from_encoding),
+ Int32GetDatum(to_encoding),
+ CStringGetDatum(""),
+ CStringGetDatum(result),
+ Int32GetDatum(0),
+ BoolGetDatum(false));
+
+ /*
+ * The function should return 0 for empty input. Might as well check that,
+ * too.
+ */
+ if (DatumGetInt32(funcresult) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("encoding conversion function %s returned incorrect result for empty input",
+ NameListToString(func_name))));
/*
* All seem ok, go ahead (possible failure would be a duplicate conversion
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 9e4ea1b345a..423df2f3006 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -2271,6 +2271,8 @@ write_console(const char *line, int len)
* Conversion on non-win32 platforms is not implemented yet. It requires
* non-throw version of pg_do_encoding_conversion(), that converts
* unconvertable characters to '?' without errors.
+ *
+ * XXX: We have a no-throw version now. It doesn't convert to '?' though.
*/
#endif
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index a07b54bd3b8..33e9c9a9e3c 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -25,15 +25,20 @@
* tab holds conversion entries for the source charset
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the target charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
local2local(const unsigned char *l,
unsigned char *p,
int len,
int src_encoding,
int dest_encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = l;
unsigned char c1,
c2;
@@ -41,7 +46,11 @@ local2local(const unsigned char *l,
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(src_encoding, (const char *) l, len);
+ }
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
@@ -50,13 +59,19 @@ local2local(const unsigned char *l,
if (c2)
*p++ = c2;
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(src_encoding, dest_encoding,
(const char *) l, len);
+ }
}
l++;
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
@@ -66,18 +81,26 @@ local2local(const unsigned char *l,
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding)
+ int lc, int encoding, bool noError)
{
+ const unsigned char *start = l;
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(encoding, (const char *) l, len);
+ }
if (IS_HIGHBIT_SET(c1))
*p++ = lc;
*p++ = c1;
@@ -85,6 +108,8 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
@@ -94,18 +119,26 @@ latin2mic(const unsigned char *l, unsigned char *p, int len,
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding)
+ int lc, int encoding, bool noError)
{
+ const unsigned char *start = mic;
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+ }
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
@@ -118,17 +151,27 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
int l = pg_mule_mblen(mic);
if (len < l)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
+ }
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
+ }
*p++ = mic[1];
mic += 2;
len -= 2;
}
}
*p = '\0';
+
+ return mic - start;
}
@@ -143,15 +186,20 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len,
* tab holds conversion entries for the local charset
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the mule encoding, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
latin2mic_with_table(const unsigned char *l,
unsigned char *p,
int len,
int lc,
int encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = l;
unsigned char c1,
c2;
@@ -159,7 +207,11 @@ latin2mic_with_table(const unsigned char *l,
{
c1 = *l;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(encoding, (const char *) l, len);
+ }
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
@@ -171,13 +223,19 @@ latin2mic_with_table(const unsigned char *l,
*p++ = c2;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(encoding, PG_MULE_INTERNAL,
(const char *) l, len);
+ }
}
l++;
len--;
}
*p = '\0';
+
+ return l - start;
}
/*
@@ -191,15 +249,20 @@ latin2mic_with_table(const unsigned char *l,
* tab holds conversion entries for the mule internal code's second byte,
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the local charset, or 0 if there is no equivalent code.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
mic2latin_with_table(const unsigned char *mic,
unsigned char *p,
int len,
int lc,
int encoding,
- const unsigned char *tab)
+ const unsigned char *tab,
+ bool noError)
{
+ const unsigned char *start = mic;
unsigned char c1,
c2;
@@ -207,7 +270,11 @@ mic2latin_with_table(const unsigned char *mic,
{
c1 = *mic;
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
+ }
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
@@ -220,11 +287,17 @@ mic2latin_with_table(const unsigned char *mic,
int l = pg_mule_mblen(mic);
if (len < l)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
+ }
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
(c2 = tab[mic[1] - HIGHBIT]) == 0)
{
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
break; /* keep compiler quiet */
@@ -235,6 +308,8 @@ mic2latin_with_table(const unsigned char *mic,
}
}
*p = '\0';
+
+ return mic - start;
}
/*
@@ -424,18 +499,22 @@ pg_mb_radix_conv(const pg_mb_radix_tree *rt,
* is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding)
+ int encoding, bool noError)
{
uint32 iutf;
int l;
const pg_utf_to_local_combined *cp;
+ const unsigned char *start = utf;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
@@ -505,10 +584,19 @@ UtfToLocal(const unsigned char *utf, int len,
l = pg_utf_mblen(utf);
if (len < l)
+ {
+ /* need more data to decide if this is a combined char */
+ utf -= l_save;
break;
+ }
if (!pg_utf8_islegal(utf, l))
+ {
+ if (!noError)
+ report_invalid_encoding(PG_UTF8, (const char *) utf, len);
+ utf -= l_save;
break;
+ }
/* We assume ASCII character cannot be in combined map */
if (l > 1)
@@ -584,15 +672,20 @@ UtfToLocal(const unsigned char *utf, int len,
}
/* failed to translate this character */
+ utf -= l;
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, encoding,
- (const char *) (utf - l), len);
+ (const char *) utf, len);
}
/* if we broke out of loop early, must be invalid input */
- if (len > 0)
+ if (len > 0 && !noError)
report_invalid_encoding(PG_UTF8, (const char *) utf, len);
*iso = '\0';
+
+ return utf - start;
}
/*
@@ -616,18 +709,23 @@ UtfToLocal(const unsigned char *utf, int len,
* (if provided) is applied. An error is raised if no match is found.
*
* See pg_wchar.h for more details about the data structures used here.
+ *
+ * Returns the number of input bytes consumed. If noError is true, this can
+ * be less than 'len'.
*/
-void
+int
LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding)
+ int encoding,
+ bool noError)
{
uint32 iiso;
int l;
const pg_local_to_utf_combined *cp;
+ const unsigned char *start = iso;
if (!PG_VALID_ENCODING(encoding))
ereport(ERROR,
@@ -723,13 +821,18 @@ LocalToUtf(const unsigned char *iso, int len,
}
/* failed to translate this character */
+ iso -= l;
+ if (noError)
+ break;
report_untranslatable_char(encoding, PG_UTF8,
- (const char *) (iso - l), len);
+ (const char *) iso, len);
}
/* if we broke out of loop early, must be invalid input */
- if (len > 0)
+ if (len > 0 && !noError)
report_invalid_encoding(encoding, (const char *) iso, len);
*utf = '\0';
+
+ return iso - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
index 4c5b02654de..368c2deb5e4 100644
--- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
@@ -44,8 +44,11 @@ PG_FUNCTION_INFO_V1(win866_to_iso);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -306,12 +309,14 @@ koi8r_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R);
+ converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -320,12 +325,14 @@ mic_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R);
- mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R);
+ converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -334,12 +341,14 @@ iso_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -348,12 +357,14 @@ mic_to_iso(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -362,12 +373,14 @@ win1251_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -376,12 +389,14 @@ mic_to_win1251(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -390,12 +405,14 @@ win866_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi);
+ converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -404,12 +421,14 @@ mic_to_win866(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866);
- mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866);
+ converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -418,12 +437,14 @@ koi8r_to_win1251(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251);
- local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -432,12 +453,14 @@ win1251_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R);
- local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -446,12 +469,14 @@ koi8r_to_win866(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866);
- local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -460,12 +485,14 @@ win866_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R);
- local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi);
+ converted = local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -474,12 +501,14 @@ win866_to_win1251(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251);
- local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251);
+ converted = local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -488,12 +517,14 @@ win1251_to_win866(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866);
- local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -502,12 +533,14 @@ iso_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R);
- local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -516,12 +549,14 @@ koi8r_to_iso(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5);
- local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso);
+ converted = local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -530,12 +565,14 @@ iso_to_win1251(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251);
- local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -544,12 +581,14 @@ win1251_to_iso(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5);
- local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso);
+ converted = local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -558,12 +597,14 @@ iso_to_win866(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866);
- local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866);
+ converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -572,10 +613,12 @@ win866_to_iso(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5);
- local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso);
+ converted = local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
index 4d7fb116cfd..a3fd35bd406 100644
--- a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
+++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c
@@ -19,8 +19,8 @@ PG_MODULE_MAGIC;
PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004);
PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004);
-static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len);
-static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len);
+static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError);
/* ----------
* conv_proc(
@@ -28,8 +28,11 @@ static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -39,12 +42,14 @@ euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004);
- euc_jis_20042shift_jis_2004(src, dest, len);
+ converted = euc_jis_20042shift_jis_2004(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -53,20 +58,23 @@ shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004);
- shift_jis_20042euc_jis_2004(src, dest, len);
+ converted = shift_jis_20042euc_jis_2004(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_JIS_2004 -> SHIFT_JIS_2004
*/
-static void
-euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1,
ku,
ten;
@@ -79,8 +87,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
@@ -90,8 +102,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */
{
@@ -121,8 +137,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
*p++ = (ku + 0x19b) >> 1;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
if (ku % 2)
@@ -132,8 +152,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
else
*p++ = ten + 0x9e;
@@ -149,8 +173,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ku >= 63 && ku <= 94)
*p++ = (ku + 0x181) >> 1;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
if (ku % 2)
{
@@ -159,20 +187,30 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
else if (ten >= 64 && ten <= 94)
*p++ = ten + 0x40;
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
}
else
*p++ = ten + 0x9e;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JIS_2004,
(const char *) euc, len);
+ }
euc += l;
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
@@ -212,9 +250,10 @@ get_ten(int b, int *ku)
* SHIFT_JIS_2004 ---> EUC_JIS_2004
*/
-static void
-shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len)
+static int
+shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1;
int ku,
ten,
@@ -230,8 +269,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
@@ -241,8 +284,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0 || l > len)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (c1 >= 0xa1 && c1 <= 0xdf && l == 1)
{
@@ -266,8 +313,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
ku = (c1 << 1) - 0x100;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
ku -= kubun;
}
else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */
@@ -275,9 +326,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
ku = (c1 << 1) - 0x180;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
-
(const char *) sjis, len);
+ }
ku -= kubun;
}
else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2
@@ -286,8 +340,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
switch (c1)
{
case 0xf0:
@@ -309,16 +367,24 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
plane = 2;
ten = get_ten(c2, &kubun);
if (ten < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (c1 == 0xf4 && kubun == 1)
ku = 15;
else
ku = (c1 << 1) - 0x19a - kubun;
}
else
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SHIFT_JIS_2004,
(const char *) sjis, len);
+ }
if (plane == 2)
*p++ = SS3;
@@ -330,4 +396,6 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
len -= l;
}
*p = '\0';
+
+ return sjis - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
index e9bb896935f..09b3c2e75bf 100644
--- a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_cn);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len);
+static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_cn_to_mic(PG_FUNCTION_ARGS)
@@ -40,12 +43,14 @@ euc_cn_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
- euc_cn2mic(src, dest, len);
+ converted = euc_cn2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -54,20 +59,23 @@ mic_to_euc_cn(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
- mic2euc_cn(src, dest, len);
+ converted = mic2euc_cn(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_CN ---> MIC
*/
-static void
-euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
while (len > 0)
@@ -76,7 +84,11 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
if (IS_HIGHBIT_SET(c1))
{
if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+ }
*p++ = LC_GB2312_80;
*p++ = c1;
*p++ = euc[1];
@@ -86,21 +98,28 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_CN
*/
-static void
-mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
while (len > 0)
@@ -109,11 +128,19 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
if (IS_HIGHBIT_SET(c1))
{
if (c1 != LC_GB2312_80)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
(const char *) mic, len);
+ }
if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
mic++;
*p++ = *mic++;
*p++ = *mic++;
@@ -122,12 +149,18 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
}
}
*p = '\0';
+
+ return mic - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
index 5059f917a98..2e68708893d 100644
--- a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c
@@ -42,17 +42,20 @@ PG_FUNCTION_INFO_V1(mic_to_sjis);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len);
-static void mic2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len);
-static void euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len);
-static void sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len);
+static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
+static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_jp_to_sjis(PG_FUNCTION_ARGS)
@@ -60,12 +63,14 @@ euc_jp_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS);
- euc_jp2sjis(src, dest, len);
+ converted = euc_jp2sjis(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -74,12 +79,14 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP);
- sjis2euc_jp(src, dest, len);
+ converted = sjis2euc_jp(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -88,12 +95,14 @@ euc_jp_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
- euc_jp2mic(src, dest, len);
+ converted = euc_jp2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -102,12 +111,14 @@ mic_to_euc_jp(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
- mic2euc_jp(src, dest, len);
+ converted = mic2euc_jp(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -116,12 +127,14 @@ sjis_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
- sjis2mic(src, dest, len);
+ converted = sjis2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -130,20 +143,23 @@ mic_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
- mic2sjis(src, dest, len);
+ converted = mic2sjis(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* SJIS ---> MIC
*/
-static void
-sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1,
c2,
i,
@@ -167,7 +183,11 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
* JIS X0208, X0212, user defined extended characters
*/
if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+ }
c2 = sjis[1];
k = (c1 << 8) + c2;
if (k >= 0xed40 && k < 0xf040)
@@ -257,21 +277,28 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
}
}
*p = '\0';
+
+ return sjis - start;
}
/*
* MIC ---> SJIS
*/
-static void
-mic2sjis(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1,
c2,
k,
@@ -284,8 +311,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
@@ -293,8 +324,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_JISX0201K)
*p++ = mic[1];
else if (c1 == LC_JISX0208)
@@ -350,20 +385,27 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
}
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* EUC_JP ---> MIC
*/
-static void
-euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
@@ -374,8 +416,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
@@ -383,8 +429,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
if (c1 == SS2)
{ /* 1 byte kana? */
*p++ = LC_JISX0201K;
@@ -406,14 +456,17 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_JP
*/
-static void
-mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
@@ -424,8 +477,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
@@ -433,8 +490,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_JISX0201K)
{
*p++ = SS2;
@@ -452,20 +513,27 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
*p++ = mic[2];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* EUC_JP -> SJIS
*/
-static void
-euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1,
c2,
k;
@@ -478,8 +546,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
@@ -487,8 +559,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
/* hankaku kana? */
@@ -551,14 +627,17 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
len -= l;
}
*p = '\0';
+
+ return euc - start;
}
/*
* SJIS ---> EUC_JP
*/
-static void
-sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
+static int
+sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = sjis;
int c1,
c2,
i,
@@ -573,8 +652,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS,
(const char *) sjis, len);
+ }
*p++ = c1;
sjis++;
len--;
@@ -582,8 +665,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_SJIS,
(const char *) sjis, len);
+ }
if (c1 >= 0xa1 && c1 <= 0xdf)
{
/* JIS X0201 (1 byte kana) */
@@ -680,4 +767,6 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
len -= l;
}
*p = '\0';
+
+ return sjis - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
index ac823d6c270..3b85f0c1861 100644
--- a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
@@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_kr);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len);
+static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_kr_to_mic(PG_FUNCTION_ARGS)
@@ -40,12 +43,14 @@ euc_kr_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
- euc_kr2mic(src, dest, len);
+ converted = euc_kr2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -54,20 +59,23 @@ mic_to_euc_kr(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
- mic2euc_kr(src, dest, len);
+ converted = mic2euc_kr(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_KR ---> MIC
*/
-static void
-euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
@@ -78,8 +86,12 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
{
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
if (l != 2)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
+ }
*p++ = LC_KS5601;
*p++ = c1;
*p++ = euc[1];
@@ -89,22 +101,29 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_KR
*/
-static void
-mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
@@ -115,8 +134,12 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
@@ -124,18 +147,28 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_KS5601)
{
*p++ = mic[1];
*p++ = mic[2];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
index 66c242d7f36..4bf8acda99f 100644
--- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c
@@ -32,17 +32,20 @@ PG_FUNCTION_INFO_V1(mic_to_big5);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
-static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len);
-static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len);
-static void big52mic(const unsigned char *big5, unsigned char *p, int len);
-static void mic2big5(const unsigned char *mic, unsigned char *p, int len);
-static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len);
-static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len);
+static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
+static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
+static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
+static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_tw_to_big5(PG_FUNCTION_ARGS)
@@ -50,12 +53,14 @@ euc_tw_to_big5(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5);
- euc_tw2big5(src, dest, len);
+ converted = euc_tw2big5(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -64,12 +69,14 @@ big5_to_euc_tw(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW);
- big52euc_tw(src, dest, len);
+ converted = big52euc_tw(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -78,12 +85,14 @@ euc_tw_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
- euc_tw2mic(src, dest, len);
+ converted = euc_tw2mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -92,12 +101,14 @@ mic_to_euc_tw(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
- mic2euc_tw(src, dest, len);
+ converted = mic2euc_tw(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -106,12 +117,14 @@ big5_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
- big52mic(src, dest, len);
+ converted = big52mic(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -120,21 +133,24 @@ mic_to_big5(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
- mic2big5(src, dest, len);
+ converted = mic2big5(src, dest, len, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
/*
* EUC_TW ---> Big5
*/
-static void
-euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
unsigned char c1;
unsigned short big5buf,
cnsBuf;
@@ -149,8 +165,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
/* Verify and decode the next EUC_TW input character */
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
c1 = euc[1]; /* plane No. */
@@ -171,8 +191,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
/* Write it out in Big5 */
big5buf = CNStoBIG5(cnsBuf, lc);
if (big5buf == 0)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_EUC_TW, PG_BIG5,
(const char *) euc, len);
+ }
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
@@ -182,22 +206,29 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* Big5 ---> EUC_TW
*/
-static void
-big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = big5;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
@@ -212,8 +243,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
{
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
big5buf = (c1 << 8) | big5[1];
cnsBuf = BIG5toCNS(big5buf, &lc);
@@ -237,8 +272,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
*p++ = cnsBuf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_BIG5, PG_EUC_TW,
(const char *) big5, len);
+ }
big5 += l;
len -= l;
@@ -256,14 +295,17 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len)
}
}
*p = '\0';
+
+ return big5 - start;
}
/*
* EUC_TW ---> MIC
*/
-static void
-euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
+static int
+euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = euc;
int c1;
int l;
@@ -274,8 +316,12 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
{
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
if (c1 == SS2)
{
c1 = euc[1]; /* plane No. */
@@ -304,22 +350,29 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
else
{ /* should be ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
+ }
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
+
+ return euc - start;
}
/*
* MIC ---> EUC_TW
*/
-static void
-mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
int c1;
int l;
@@ -330,8 +383,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
@@ -339,8 +396,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_CNS11643_1)
{
*p++ = mic[1];
@@ -362,20 +423,27 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
*p++ = mic[3];
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
/*
* Big5 ---> MIC
*/
-static void
-big52mic(const unsigned char *big5, unsigned char *p, int len)
+static int
+big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = big5;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
@@ -389,8 +457,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
*p++ = c1;
big5++;
len--;
@@ -398,8 +470,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
+ }
big5buf = (c1 << 8) | big5[1];
cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0)
@@ -412,20 +488,27 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
*p++ = cnsBuf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
(const char *) big5, len);
+ }
big5 += l;
len -= l;
}
*p = '\0';
+
+ return big5 - start;
}
/*
* MIC ---> Big5
*/
-static void
-mic2big5(const unsigned char *mic, unsigned char *p, int len)
+static int
+mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
+ const unsigned char *start = mic;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
@@ -438,8 +521,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
{
/* ASCII */
if (c1 == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
*p++ = c1;
mic++;
len--;
@@ -447,8 +534,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
+ }
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
{
if (c1 == LCPRV2_B)
@@ -462,16 +553,26 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
}
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
+ }
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
+ }
mic += l;
len -= l;
}
*p = '\0';
+
+ return mic - start;
}
diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
index 2e28e6780a5..8610fcb69aa 100644
--- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
+++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c
@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -82,12 +85,14 @@ latin2_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+ converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -96,12 +101,14 @@ mic_to_latin2(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
- mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2);
+ converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -110,13 +117,15 @@ win1250_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
- latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- win1250_2_iso88592);
+ converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+ win1250_2_iso88592, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -125,13 +134,15 @@ mic_to_win1250(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
- mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
- iso88592_2_win1250);
+ converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
+ iso88592_2_win1250, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -140,12 +151,15 @@ latin2_to_win1250(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250);
- local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250);
+ converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250,
+ iso88592_2_win1250, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -154,10 +168,13 @@ win1250_to_latin2(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2);
- local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592);
+ converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2,
+ win1250_2_iso88592, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
index bc651410f21..bff27d1c295 100644
--- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
@@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(mic_to_latin4);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -42,12 +45,14 @@ latin1_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+ converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,12 +61,14 @@ mic_to_latin1(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
- mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1);
+ converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -70,12 +77,14 @@ latin3_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+ converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -84,12 +93,14 @@ mic_to_latin3(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
- mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3);
+ converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -98,12 +109,14 @@ latin4_to_mic(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
- latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+ converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -112,10 +125,12 @@ mic_to_latin4(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
- mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4);
+ converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
index d6067cdc24e..3838b15cab9 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_big5);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ big5_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8);
- LocalToUtf(src, len, dest,
- &big5_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_BIG5);
+ converted = LocalToUtf(src, len, dest,
+ &big5_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_BIG5,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_big5(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5);
- UtfToLocal(src, len, dest,
- &big5_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_BIG5);
+ converted = UtfToLocal(src, len, dest,
+ &big5_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_BIG5,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
index ed90e8e682e..75719fe5f1b 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c
@@ -33,8 +33,11 @@ PG_FUNCTION_INFO_V1(koi8u_to_utf8);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -44,16 +47,19 @@ utf8_to_koi8r(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R);
- UtfToLocal(src, len, dest,
- &koi8r_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8R);
+ converted = UtfToLocal(src, len, dest,
+ &koi8r_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8R,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -62,16 +68,19 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8);
- LocalToUtf(src, len, dest,
- &koi8r_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8R);
+ converted = LocalToUtf(src, len, dest,
+ &koi8r_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8R,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -80,16 +89,19 @@ utf8_to_koi8u(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
- UtfToLocal(src, len, dest,
- &koi8u_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8U);
+ converted = UtfToLocal(src, len, dest,
+ &koi8u_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8U,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -98,14 +110,17 @@ koi8u_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
- LocalToUtf(src, len, dest,
- &koi8u_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_KOI8U);
+ converted = LocalToUtf(src, len, dest,
+ &koi8u_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_KOI8U,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
index d699affce47..5391001951a 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_jis_2004_to_unicode_tree,
- LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
- NULL,
- PG_EUC_JIS_2004);
+ converted = LocalToUtf(src, len, dest,
+ &euc_jis_2004_to_unicode_tree,
+ LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined),
+ NULL,
+ PG_EUC_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);
- UtfToLocal(src, len, dest,
- &euc_jis_2004_from_unicode_tree,
- ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
- NULL,
- PG_EUC_JIS_2004);
+ converted = UtfToLocal(src, len, dest,
+ &euc_jis_2004_from_unicode_tree,
+ ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
+ NULL,
+ PG_EUC_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
index d7c0ba6a58b..c87d1bf2398 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_cn);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_cn_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_CN);
+ converted = LocalToUtf(src, len, dest,
+ &euc_cn_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_CN,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN);
- UtfToLocal(src, len, dest,
- &euc_cn_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_CN);
+ converted = UtfToLocal(src, len, dest,
+ &euc_cn_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_CN,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
index 13a3a23e77b..6a55134db21 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jp);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_jp_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_JP);
+ converted = LocalToUtf(src, len, dest,
+ &euc_jp_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_JP,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP);
- UtfToLocal(src, len, dest,
- &euc_jp_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_JP);
+ converted = UtfToLocal(src, len, dest,
+ &euc_jp_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_JP,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
index 1bbb8aaef7b..fe1924e2fec 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_kr);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_kr_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_KR);
+ converted = LocalToUtf(src, len, dest,
+ &euc_kr_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_KR,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR);
- UtfToLocal(src, len, dest,
- &euc_kr_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_KR);
+ converted = UtfToLocal(src, len, dest,
+ &euc_kr_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_KR,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
index 9830045dccd..68215659b57 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_tw);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8);
- LocalToUtf(src, len, dest,
- &euc_tw_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_TW);
+ converted = LocalToUtf(src, len, dest,
+ &euc_tw_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_TW,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW);
- UtfToLocal(src, len, dest,
- &euc_tw_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_EUC_TW);
+ converted = UtfToLocal(src, len, dest,
+ &euc_tw_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_EUC_TW,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
index f86ecf27424..e1a59c39a4d 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c
@@ -183,8 +183,11 @@ conv_utf8_to_18030(uint32 code)
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -193,16 +196,19 @@ gb18030_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8);
- LocalToUtf(src, len, dest,
- &gb18030_to_unicode_tree,
- NULL, 0,
- conv_18030_to_utf8,
- PG_GB18030);
+ converted = LocalToUtf(src, len, dest,
+ &gb18030_to_unicode_tree,
+ NULL, 0,
+ conv_18030_to_utf8,
+ PG_GB18030,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -211,14 +217,17 @@ utf8_to_gb18030(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030);
- UtfToLocal(src, len, dest,
- &gb18030_from_unicode_tree,
- NULL, 0,
- conv_utf8_to_18030,
- PG_GB18030);
+ converted = UtfToLocal(src, len, dest,
+ &gb18030_from_unicode_tree,
+ NULL, 0,
+ conv_utf8_to_18030,
+ PG_GB18030,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
index 2ab8b16c8a8..881386d5347 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_gbk);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ gbk_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8);
- LocalToUtf(src, len, dest,
- &gbk_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_GBK);
+ converted = LocalToUtf(src, len, dest,
+ &gbk_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_GBK,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_gbk(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK);
- UtfToLocal(src, len, dest,
- &gbk_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_GBK);
+ converted = UtfToLocal(src, len, dest,
+ &gbk_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_GBK,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
index 3e49f67ea2f..d93a521badf 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c
@@ -52,8 +52,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -100,6 +103,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
@@ -108,12 +112,15 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
- LocalToUtf(src, len, dest,
- maps[i].map1,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = LocalToUtf(src, len, dest,
+ maps[i].map1,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
@@ -122,7 +129,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
Datum
@@ -132,6 +139,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
@@ -140,12 +148,15 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
- UtfToLocal(src, len, dest,
- maps[i].map2,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = UtfToLocal(src, len, dest,
+ maps[i].map2,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
@@ -154,5 +165,5 @@ utf8_to_iso8859(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for ISO 8859 character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
index 67e713cca11..d0dc4cca378 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c
@@ -26,8 +26,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859_1);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -37,6 +40,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ unsigned char *start = src;
unsigned short c;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8);
@@ -45,7 +50,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
{
c = *src;
if (c == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_LATIN1, (const char *) src, len);
+ }
if (!IS_HIGHBIT_SET(c))
*dest++ = c;
else
@@ -58,7 +67,7 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS)
}
*dest = '\0';
- PG_RETURN_VOID();
+ PG_RETURN_INT32(src - start);
}
Datum
@@ -67,6 +76,8 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ unsigned char *start = src;
unsigned short c,
c1;
@@ -76,7 +87,11 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
{
c = *src;
if (c == 0)
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_UTF8, (const char *) src, len);
+ }
/* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(c))
{
@@ -89,10 +104,18 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
int l = pg_utf_mblen(src);
if (l > len || !pg_utf8_islegal(src, l))
+ {
+ if (noError)
+ break;
report_invalid_encoding(PG_UTF8, (const char *) src, len);
+ }
if (l != 2)
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
+ }
c1 = src[1] & 0x3f;
c = ((c & 0x1f) << 6) | c1;
if (c >= 0x80 && c <= 0xff)
@@ -102,11 +125,15 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS)
len -= 2;
}
else
+ {
+ if (noError)
+ break;
report_untranslatable_char(PG_UTF8, PG_LATIN1,
(const char *) src, len);
+ }
}
}
*dest = '\0';
- PG_RETURN_VOID();
+ PG_RETURN_INT32(src - start);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
index 578f5df4e7f..317daa2d5ee 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_johab);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ johab_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8);
- LocalToUtf(src, len, dest,
- &johab_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_JOHAB);
+ converted = LocalToUtf(src, len, dest,
+ &johab_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_JOHAB,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_johab(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);
- UtfToLocal(src, len, dest,
- &johab_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_JOHAB);
+ converted = UtfToLocal(src, len, dest,
+ &johab_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_JOHAB,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
index dd9fc2975ad..4c9348aba59 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_sjis);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ sjis_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);
- LocalToUtf(src, len, dest,
- &sjis_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_SJIS);
+ converted = LocalToUtf(src, len, dest,
+ &sjis_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_SJIS,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_sjis(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS);
- UtfToLocal(src, len, dest,
- &sjis_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_SJIS);
+ converted = UtfToLocal(src, len, dest,
+ &sjis_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_SJIS,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
index 4bcc886d674..1fffdc5930c 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);
- LocalToUtf(src, len, dest,
- &shift_jis_2004_to_unicode_tree,
- LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
- NULL,
- PG_SHIFT_JIS_2004);
+ converted = LocalToUtf(src, len, dest,
+ &shift_jis_2004_to_unicode_tree,
+ LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
+ NULL,
+ PG_SHIFT_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004);
- UtfToLocal(src, len, dest,
- &shift_jis_2004_from_unicode_tree,
- ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
- NULL,
- PG_SHIFT_JIS_2004);
+ converted = UtfToLocal(src, len, dest,
+ &shift_jis_2004_from_unicode_tree,
+ ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined),
+ NULL,
+ PG_SHIFT_JIS_2004,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
index c8e512994a1..d9471dad097 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c
@@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_uhc);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
Datum
@@ -38,16 +41,19 @@ uhc_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8);
- LocalToUtf(src, len, dest,
- &uhc_to_unicode_tree,
- NULL, 0,
- NULL,
- PG_UHC);
+ converted = LocalToUtf(src, len, dest,
+ &uhc_to_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_UHC,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
Datum
@@ -56,14 +62,17 @@ utf8_to_uhc(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
+ int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC);
- UtfToLocal(src, len, dest,
- &uhc_from_unicode_tree,
- NULL, 0,
- NULL,
- PG_UHC);
+ converted = UtfToLocal(src, len, dest,
+ &uhc_from_unicode_tree,
+ NULL, 0,
+ NULL,
+ PG_UHC,
+ noError);
- PG_RETURN_VOID();
+ PG_RETURN_INT32(converted);
}
diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
index 0c9493dee56..110ba5677d0 100644
--- a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
+++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c
@@ -48,8 +48,11 @@ PG_FUNCTION_INFO_V1(utf8_to_win);
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
- * INTEGER -- source string length
- * ) returns VOID;
+ * INTEGER, -- source string length
+ * BOOL -- if true, don't throw an error if conversion fails
+ * ) returns INTEGER;
+ *
+ * Returns the number of bytes successfully converted.
* ----------
*/
@@ -81,6 +84,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8);
@@ -89,12 +93,15 @@ win_to_utf8(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
- LocalToUtf(src, len, dest,
- maps[i].map1,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = LocalToUtf(src, len, dest,
+ maps[i].map1,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
@@ -103,7 +110,7 @@ win_to_utf8(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
Datum
@@ -113,6 +120,7 @@ utf8_to_win(PG_FUNCTION_ARGS)
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
+ bool noError = PG_GETARG_BOOL(5);
int i;
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1);
@@ -121,12 +129,15 @@ utf8_to_win(PG_FUNCTION_ARGS)
{
if (encoding == maps[i].encoding)
{
- UtfToLocal(src, len, dest,
- maps[i].map2,
- NULL, 0,
- NULL,
- encoding);
- PG_RETURN_VOID();
+ int converted;
+
+ converted = UtfToLocal(src, len, dest,
+ maps[i].map2,
+ NULL, 0,
+ NULL,
+ encoding,
+ noError);
+ PG_RETURN_INT32(converted);
}
}
@@ -135,5 +146,5 @@ utf8_to_win(PG_FUNCTION_ARGS)
errmsg("unexpected encoding ID %d for WIN character sets",
encoding)));
- PG_RETURN_VOID();
+ PG_RETURN_INT32(0);
}
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 2578573b0ab..a13c398f4ac 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -406,12 +406,13 @@ pg_do_encoding_conversion(unsigned char *src, int len,
MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1);
- OidFunctionCall5(proc,
- Int32GetDatum(src_encoding),
- Int32GetDatum(dest_encoding),
- CStringGetDatum(src),
- CStringGetDatum(result),
- Int32GetDatum(len));
+ (void) OidFunctionCall6(proc,
+ Int32GetDatum(src_encoding),
+ Int32GetDatum(dest_encoding),
+ CStringGetDatum(src),
+ CStringGetDatum(result),
+ Int32GetDatum(len),
+ BoolGetDatum(false));
/*
* If the result is large, it's worth repalloc'ing to release any extra
@@ -436,6 +437,62 @@ pg_do_encoding_conversion(unsigned char *src, int len,
}
/*
+ * Convert src string to another encoding.
+ *
+ * This function has a different API than the other conversion functions.
+ * The caller should've looked up the conversion function using
+ * FindDefaultConversionProc(). Unlike the other functions, the converted
+ * result is not palloc'd. It is written to the caller-supplied buffer
+ * instead.
+ *
+ * src_encoding - encoding to convert from
+ * dest_encoding - encoding to convert to
+ * src, srclen - input buffer and its length in bytes
+ * dest, destlen - destination buffer and its size in bytes
+ *
+ * The output is null-terminated.
+ *
+ * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output
+ * wouldn't necessarily fit in the output buffer, and the function will not
+ * convert the whole input.
+ *
+ * TODO: The conversion function interface is not great. Firstly, it
+ * would be nice to pass through the destination buffer size to the
+ * conversion function, so that if you pass a shorter destination buffer, it
+ * could still continue to fill up the whole buffer. Currently, we have to
+ * assume worst case expansion and stop the conversion short, even if there
+ * is in fact space left in the destination buffer. Secondly, it would be
+ * nice to return the number of bytes written to the caller, to avoid a call
+ * to strlen().
+ */
+int
+pg_do_encoding_conversion_buf(Oid proc,
+ int src_encoding,
+ int dest_encoding,
+ unsigned char *src, int srclen,
+ unsigned char *dest, int destlen,
+ bool noError)
+{
+ Datum result;
+
+ /*
+ * If the destination buffer is not large enough to hold the result in the
+ * worst case, limit the input size passed to the conversion function.
+ */
+ if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH))
+ srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH);
+
+ result = OidFunctionCall6(proc,
+ Int32GetDatum(src_encoding),
+ Int32GetDatum(dest_encoding),
+ CStringGetDatum(src),
+ CStringGetDatum(dest),
+ Int32GetDatum(srclen),
+ BoolGetDatum(noError));
+ return DatumGetInt32(result);
+}
+
+/*
* Convert string to encoding encoding_name. The source
* encoding is the DB encoding.
*
@@ -762,12 +819,13 @@ perform_default_encoding_conversion(const char *src, int len,
MemoryContextAllocHuge(CurrentMemoryContext,
(Size) len * MAX_CONVERSION_GROWTH + 1);
- FunctionCall5(flinfo,
+ FunctionCall6(flinfo,
Int32GetDatum(src_encoding),
Int32GetDatum(dest_encoding),
CStringGetDatum(src),
CStringGetDatum(result),
- Int32GetDatum(len));
+ Int32GetDatum(len),
+ BoolGetDatum(false));
/*
* Release extra space if there might be a lot --- see comments in
@@ -849,12 +907,13 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
c_as_utf8[c_as_utf8_len] = '\0';
/* Convert, or throw error if we can't */
- FunctionCall5(Utf8ToServerConvProc,
+ FunctionCall6(Utf8ToServerConvProc,
Int32GetDatum(PG_UTF8),
Int32GetDatum(server_encoding),
CStringGetDatum(c_as_utf8),
CStringGetDatum(s),
- Int32GetDatum(c_as_utf8_len));
+ Int32GetDatum(c_as_utf8_len),
+ BoolGetDatum(false));
}
diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c
index 43fc297eb69..d77183b8d12 100644
--- a/src/bin/pg_upgrade/check.c
+++ b/src/bin/pg_upgrade/check.c
@@ -28,6 +28,7 @@ static void check_for_reg_data_type_usage(ClusterInfo *cluster);
static void check_for_jsonb_9_4_usage(ClusterInfo *cluster);
static void check_for_pg_role_prefix(ClusterInfo *cluster);
static void check_for_new_tablespace_dir(ClusterInfo *new_cluster);
+static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster);
static char *get_canonical_locale_name(int category, const char *locale);
@@ -103,6 +104,15 @@ check_and_dump_old_cluster(bool live_check)
check_for_isn_and_int8_passing_mismatch(&old_cluster);
/*
+ * PG 14 changed the function signature of encoding conversion functions.
+ * Conversions from older versions cannot be upgraded automatically
+ * because the user-defined functions used by the encoding conversions
+ * need to be changed to match the new signature.
+ */
+ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1300)
+ check_for_user_defined_encoding_conversions(&old_cluster);
+
+ /*
* Pre-PG 14 allowed user defined postfix operators, which are not
* supported anymore. Verify there are none, iff applicable.
*/
@@ -1268,6 +1278,91 @@ check_for_pg_role_prefix(ClusterInfo *cluster)
check_ok();
}
+/*
+ * Verify that no user-defined encoding conversions exist.
+ */
+static void
+check_for_user_defined_encoding_conversions(ClusterInfo *cluster)
+{
+ int dbnum;
+ FILE *script = NULL;
+ bool found = false;
+ char output_path[MAXPGPATH];
+
+ prep_status("Checking for user-defined encoding conversions");
+
+ snprintf(output_path, sizeof(output_path),
+ "encoding_conversions.txt");
+
+ /* Find any user defined encoding conversions */
+ for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++)
+ {
+ PGresult *res;
+ bool db_used = false;
+ int ntups;
+ int rowno;
+ int i_conoid,
+ i_conname,
+ i_nspname;
+ DbInfo *active_db = &cluster->dbarr.dbs[dbnum];
+ PGconn *conn = connectToServer(cluster, active_db->db_name);
+
+ /*
+ * The query below hardcodes FirstNormalObjectId as 16384 rather than
+ * interpolating that C #define into the query because, if that
+ * #define is ever changed, the cutoff we want to use is the value
+ * used by pre-version 14 servers, not that of some future version.
+ */
+ res = executeQueryOrDie(conn,
+ "SELECT c.oid as conoid, c.conname, n.nspname "
+ "FROM pg_catalog.pg_conversion c, "
+ " pg_catalog.pg_namespace n "
+ "WHERE c.connamespace = n.oid AND "
+ " c.oid >= 16384");
+ ntups = PQntuples(res);
+ i_conoid = PQfnumber(res, "conoid");
+ i_conname = PQfnumber(res, "conname");
+ i_nspname = PQfnumber(res, "nspname");
+ for (rowno = 0; rowno < ntups; rowno++)
+ {
+ found = true;
+ if (script == NULL &&
+ (script = fopen_priv(output_path, "w")) == NULL)
+ pg_fatal("could not open file \"%s\": %s\n",
+ output_path, strerror(errno));
+ if (!db_used)
+ {
+ fprintf(script, "In database: %s\n", active_db->db_name);
+ db_used = true;
+ }
+ fprintf(script, " (oid=%s) %s.%s\n",
+ PQgetvalue(res, rowno, i_conoid),
+ PQgetvalue(res, rowno, i_nspname),
+ PQgetvalue(res, rowno, i_conname));
+ }
+
+ PQclear(res);
+
+ PQfinish(conn);
+ }
+
+ if (script)
+ fclose(script);
+
+ if (found)
+ {
+ pg_log(PG_REPORT, "fatal\n");
+ pg_fatal("Your installation contains user-defined encoding conversions.\n"
+ "The conversion function parameters changed in PostgreSQL version 14\n"
+ "so this cluster cannot currently be upgraded. You can remove the\n"
+ "encoding conversions in the old cluster and restart the upgrade.\n"
+ "A list of user-defined encoding conversions is in the file:\n"
+ " %s\n\n", output_path);
+ }
+ else
+ check_ok();
+}
+
/*
* get_canonical_locale_name
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 489f5be427f..6a61c8f64f0 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202103291
+#define CATALOG_VERSION_NO 202104011
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index bfb89e0575d..69ffd0c3f4d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10914,388 +10914,388 @@
# conversion functions
{ oid => '4302',
descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
- proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_mic',
+ proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4303',
descr => 'internal conversion function for MULE_INTERNAL to KOI8R',
- proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_koi8r',
+ proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4304',
descr => 'internal conversion function for ISO-8859-5 to MULE_INTERNAL',
- proname => 'iso_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_mic',
+ proname => 'iso_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4305',
descr => 'internal conversion function for MULE_INTERNAL to ISO-8859-5',
- proname => 'mic_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_iso',
+ proname => 'mic_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4306',
descr => 'internal conversion function for WIN1251 to MULE_INTERNAL',
- proname => 'win1251_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_mic',
+ proname => 'win1251_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4307',
descr => 'internal conversion function for MULE_INTERNAL to WIN1251',
- proname => 'mic_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1251',
+ proname => 'mic_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1251',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4308',
descr => 'internal conversion function for WIN866 to MULE_INTERNAL',
- proname => 'win866_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_mic',
+ proname => 'win866_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4309',
descr => 'internal conversion function for MULE_INTERNAL to WIN866',
- proname => 'mic_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win866',
+ proname => 'mic_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
- proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4311', descr => 'internal conversion function for WIN1251 to KOI8R',
- proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4312', descr => 'internal conversion function for KOI8R to WIN866',
- proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_win866',
+ proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4313', descr => 'internal conversion function for WIN866 to KOI8R',
- proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_koi8r',
+ proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4314',
descr => 'internal conversion function for WIN866 to WIN1251',
- proname => 'win866_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win866_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4315',
descr => 'internal conversion function for WIN1251 to WIN866',
- proname => 'win1251_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1251_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4316',
descr => 'internal conversion function for ISO-8859-5 to KOI8R',
- proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_koi8r',
+ proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_koi8r',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4317',
descr => 'internal conversion function for KOI8R to ISO-8859-5',
- proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_iso',
+ proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4318',
descr => 'internal conversion function for ISO-8859-5 to WIN1251',
- proname => 'iso_to_win1251', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win1251',
+ proname => 'iso_to_win1251', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win1251',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4319',
descr => 'internal conversion function for WIN1251 to ISO-8859-5',
- proname => 'win1251_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1251_to_iso',
+ proname => 'win1251_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1251_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4320',
descr => 'internal conversion function for ISO-8859-5 to WIN866',
- proname => 'iso_to_win866', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso_to_win866',
+ proname => 'iso_to_win866', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_win866',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4321',
descr => 'internal conversion function for WIN866 to ISO-8859-5',
- proname => 'win866_to_iso', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win866_to_iso',
+ proname => 'win866_to_iso', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win866_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4322',
descr => 'internal conversion function for EUC_CN to MULE_INTERNAL',
- proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_mic',
+ proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_mic',
probin => '$libdir/euc_cn_and_mic' },
{ oid => '4323',
descr => 'internal conversion function for MULE_INTERNAL to EUC_CN',
- proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_cn',
+ proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_cn',
probin => '$libdir/euc_cn_and_mic' },
{ oid => '4324', descr => 'internal conversion function for EUC_JP to SJIS',
- proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_sjis',
+ proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_sjis',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4325', descr => 'internal conversion function for SJIS to EUC_JP',
- proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_euc_jp',
+ proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_euc_jp',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4326',
descr => 'internal conversion function for EUC_JP to MULE_INTERNAL',
- proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_mic',
+ proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_mic',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4327',
descr => 'internal conversion function for SJIS to MULE_INTERNAL',
- proname => 'sjis_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_mic',
+ proname => 'sjis_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_mic',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4328',
descr => 'internal conversion function for MULE_INTERNAL to EUC_JP',
- proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_jp',
+ proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_jp',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4329',
descr => 'internal conversion function for MULE_INTERNAL to SJIS',
- proname => 'mic_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_sjis',
+ proname => 'mic_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_sjis',
probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4330',
descr => 'internal conversion function for EUC_KR to MULE_INTERNAL',
- proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_mic',
+ proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_mic',
probin => '$libdir/euc_kr_and_mic' },
{ oid => '4331',
descr => 'internal conversion function for MULE_INTERNAL to EUC_KR',
- proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_kr',
+ proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_kr',
probin => '$libdir/euc_kr_and_mic' },
{ oid => '4332', descr => 'internal conversion function for EUC_TW to BIG5',
- proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_big5',
+ proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_big5',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4333', descr => 'internal conversion function for BIG5 to EUC_TW',
- proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_euc_tw',
+ proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_euc_tw',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4334',
descr => 'internal conversion function for EUC_TW to MULE_INTERNAL',
- proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_mic',
+ proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_mic',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4335',
descr => 'internal conversion function for BIG5 to MULE_INTERNAL',
- proname => 'big5_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_mic',
+ proname => 'big5_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_mic',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4336',
descr => 'internal conversion function for MULE_INTERNAL to EUC_TW',
- proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_euc_tw',
+ proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_euc_tw',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4337',
descr => 'internal conversion function for MULE_INTERNAL to BIG5',
- proname => 'mic_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_big5',
+ proname => 'mic_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_big5',
probin => '$libdir/euc_tw_and_big5' },
{ oid => '4338',
descr => 'internal conversion function for LATIN2 to MULE_INTERNAL',
- proname => 'latin2_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin2_to_mic',
+ proname => 'latin2_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin2_to_mic',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4339',
descr => 'internal conversion function for MULE_INTERNAL to LATIN2',
- proname => 'mic_to_latin2', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin2',
+ proname => 'mic_to_latin2', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin2',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4340',
descr => 'internal conversion function for WIN1250 to MULE_INTERNAL',
- proname => 'win1250_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win1250_to_mic',
+ proname => 'win1250_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win1250_to_mic',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4341',
descr => 'internal conversion function for MULE_INTERNAL to WIN1250',
- proname => 'mic_to_win1250', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_win1250',
+ proname => 'mic_to_win1250', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_win1250',
probin => '$libdir/latin2_and_win1250' },
{ oid => '4342',
descr => 'internal conversion function for LATIN2 to WIN1250',
- proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin2_to_win1250', probin => '$libdir/latin2_and_win1250' },
{ oid => '4343',
descr => 'internal conversion function for WIN1250 to LATIN2',
- proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1250_to_latin2', probin => '$libdir/latin2_and_win1250' },
{ oid => '4344',
descr => 'internal conversion function for LATIN1 to MULE_INTERNAL',
- proname => 'latin1_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin1_to_mic',
+ proname => 'latin1_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin1_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4345',
descr => 'internal conversion function for MULE_INTERNAL to LATIN1',
- proname => 'mic_to_latin1', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin1',
+ proname => 'mic_to_latin1', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin1',
probin => '$libdir/latin_and_mic' },
{ oid => '4346',
descr => 'internal conversion function for LATIN3 to MULE_INTERNAL',
- proname => 'latin3_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin3_to_mic',
+ proname => 'latin3_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin3_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4347',
descr => 'internal conversion function for MULE_INTERNAL to LATIN3',
- proname => 'mic_to_latin3', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin3',
+ proname => 'mic_to_latin3', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin3',
probin => '$libdir/latin_and_mic' },
{ oid => '4348',
descr => 'internal conversion function for LATIN4 to MULE_INTERNAL',
- proname => 'latin4_to_mic', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'latin4_to_mic',
+ proname => 'latin4_to_mic', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'latin4_to_mic',
probin => '$libdir/latin_and_mic' },
{ oid => '4349',
descr => 'internal conversion function for MULE_INTERNAL to LATIN4',
- proname => 'mic_to_latin4', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'mic_to_latin4',
+ proname => 'mic_to_latin4', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_latin4',
probin => '$libdir/latin_and_mic' },
{ oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
- proname => 'big5_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'big5_to_utf8',
+ proname => 'big5_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'big5_to_utf8',
probin => '$libdir/utf8_and_big5' },
{ oid => '4353', descr => 'internal conversion function for UTF8 to BIG5',
- proname => 'utf8_to_big5', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_big5',
+ proname => 'utf8_to_big5', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_big5',
probin => '$libdir/utf8_and_big5' },
{ oid => '4354', descr => 'internal conversion function for UTF8 to KOI8R',
- proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8r',
+ proname => 'utf8_to_koi8r', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8r',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4355', descr => 'internal conversion function for KOI8R to UTF8',
- proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8r_to_utf8',
+ proname => 'koi8r_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8r_to_utf8',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4356', descr => 'internal conversion function for UTF8 to KOI8U',
- proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_koi8u',
+ proname => 'utf8_to_koi8u', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_koi8u',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4357', descr => 'internal conversion function for KOI8U to UTF8',
- proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'koi8u_to_utf8',
+ proname => 'koi8u_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'koi8u_to_utf8',
probin => '$libdir/utf8_and_cyrillic' },
{ oid => '4358', descr => 'internal conversion function for UTF8 to WIN',
- proname => 'utf8_to_win', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_win',
+ proname => 'utf8_to_win', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_win',
probin => '$libdir/utf8_and_win' },
{ oid => '4359', descr => 'internal conversion function for WIN to UTF8',
- proname => 'win_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'win_to_utf8',
+ proname => 'win_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'win_to_utf8',
probin => '$libdir/utf8_and_win' },
{ oid => '4360', descr => 'internal conversion function for EUC_CN to UTF8',
- proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_cn_to_utf8',
+ proname => 'euc_cn_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_cn_to_utf8',
probin => '$libdir/utf8_and_euc_cn' },
{ oid => '4361', descr => 'internal conversion function for UTF8 to EUC_CN',
- proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_cn',
+ proname => 'utf8_to_euc_cn', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_cn',
probin => '$libdir/utf8_and_euc_cn' },
{ oid => '4362', descr => 'internal conversion function for EUC_JP to UTF8',
- proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_jp_to_utf8',
+ proname => 'euc_jp_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_jp_to_utf8',
probin => '$libdir/utf8_and_euc_jp' },
{ oid => '4363', descr => 'internal conversion function for UTF8 to EUC_JP',
- proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_jp',
+ proname => 'utf8_to_euc_jp', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_jp',
probin => '$libdir/utf8_and_euc_jp' },
{ oid => '4364', descr => 'internal conversion function for EUC_KR to UTF8',
- proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_kr_to_utf8',
+ proname => 'euc_kr_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_kr_to_utf8',
probin => '$libdir/utf8_and_euc_kr' },
{ oid => '4365', descr => 'internal conversion function for UTF8 to EUC_KR',
- proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_kr',
+ proname => 'utf8_to_euc_kr', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_kr',
probin => '$libdir/utf8_and_euc_kr' },
{ oid => '4366', descr => 'internal conversion function for EUC_TW to UTF8',
- proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'euc_tw_to_utf8',
+ proname => 'euc_tw_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'euc_tw_to_utf8',
probin => '$libdir/utf8_and_euc_tw' },
{ oid => '4367', descr => 'internal conversion function for UTF8 to EUC_TW',
- proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_euc_tw',
+ proname => 'utf8_to_euc_tw', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_euc_tw',
probin => '$libdir/utf8_and_euc_tw' },
{ oid => '4368', descr => 'internal conversion function for GB18030 to UTF8',
- proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gb18030_to_utf8',
+ proname => 'gb18030_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gb18030_to_utf8',
probin => '$libdir/utf8_and_gb18030' },
{ oid => '4369', descr => 'internal conversion function for UTF8 to GB18030',
- proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gb18030',
+ proname => 'utf8_to_gb18030', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gb18030',
probin => '$libdir/utf8_and_gb18030' },
{ oid => '4370', descr => 'internal conversion function for GBK to UTF8',
- proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'gbk_to_utf8',
+ proname => 'gbk_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'gbk_to_utf8',
probin => '$libdir/utf8_and_gbk' },
{ oid => '4371', descr => 'internal conversion function for UTF8 to GBK',
- proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_gbk',
+ proname => 'utf8_to_gbk', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_gbk',
probin => '$libdir/utf8_and_gbk' },
{ oid => '4372',
descr => 'internal conversion function for UTF8 to ISO-8859 2-16',
- proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_iso8859',
+ proname => 'utf8_to_iso8859', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_iso8859',
probin => '$libdir/utf8_and_iso8859' },
{ oid => '4373',
descr => 'internal conversion function for ISO-8859 2-16 to UTF8',
- proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'iso8859_to_utf8',
+ proname => 'iso8859_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso8859_to_utf8',
probin => '$libdir/utf8_and_iso8859' },
{ oid => '4374', descr => 'internal conversion function for LATIN1 to UTF8',
- proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'iso8859_1_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'iso8859_1_to_utf8', probin => '$libdir/utf8_and_iso8859_1' },
{ oid => '4375', descr => 'internal conversion function for UTF8 to LATIN1',
- proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_iso8859_1', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_iso8859_1', probin => '$libdir/utf8_and_iso8859_1' },
{ oid => '4376', descr => 'internal conversion function for JOHAB to UTF8',
- proname => 'johab_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'johab_to_utf8',
+ proname => 'johab_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'johab_to_utf8',
probin => '$libdir/utf8_and_johab' },
{ oid => '4377', descr => 'internal conversion function for UTF8 to JOHAB',
- proname => 'utf8_to_johab', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_johab',
+ proname => 'utf8_to_johab', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_johab',
probin => '$libdir/utf8_and_johab' },
{ oid => '4378', descr => 'internal conversion function for SJIS to UTF8',
- proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'sjis_to_utf8',
+ proname => 'sjis_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'sjis_to_utf8',
probin => '$libdir/utf8_and_sjis' },
{ oid => '4379', descr => 'internal conversion function for UTF8 to SJIS',
- proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_sjis',
+ proname => 'utf8_to_sjis', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_sjis',
probin => '$libdir/utf8_and_sjis' },
{ oid => '4380', descr => 'internal conversion function for UHC to UTF8',
- proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'uhc_to_utf8',
+ proname => 'uhc_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'uhc_to_utf8',
probin => '$libdir/utf8_and_uhc' },
{ oid => '4381', descr => 'internal conversion function for UTF8 to UHC',
- proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4', prosrc => 'utf8_to_uhc',
+ proname => 'utf8_to_uhc', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'utf8_to_uhc',
probin => '$libdir/utf8_and_uhc' },
{ oid => '4382',
descr => 'internal conversion function for EUC_JIS_2004 to UTF8',
- proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'euc_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_jis_2004_to_utf8', probin => '$libdir/utf8_and_euc2004' },
{ oid => '4383',
descr => 'internal conversion function for UTF8 to EUC_JIS_2004',
- proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_euc_jis_2004', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_euc_jis_2004', probin => '$libdir/utf8_and_euc2004' },
{ oid => '4384',
descr => 'internal conversion function for SHIFT_JIS_2004 to UTF8',
- proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'shift_jis_2004_to_utf8', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'shift_jis_2004_to_utf8', probin => '$libdir/utf8_and_sjis2004' },
{ oid => '4385',
descr => 'internal conversion function for UTF8 to SHIFT_JIS_2004',
- proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'void',
- proargtypes => 'int4 int4 cstring internal int4',
+ proname => 'utf8_to_shift_jis_2004', prolang => 'c', prorettype => 'int4',
+ proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'utf8_to_shift_jis_2004', probin => '$libdir/utf8_and_sjis2004' },
{ oid => '4386',
descr => 'internal conversion function for EUC_JIS_2004 to SHIFT_JIS_2004',
proname => 'euc_jis_2004_to_shift_jis_2004', prolang => 'c',
- prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+ prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_jis_2004_to_shift_jis_2004',
probin => '$libdir/euc2004_sjis2004' },
{ oid => '4387',
descr => 'internal conversion function for SHIFT_JIS_2004 to EUC_JIS_2004',
proname => 'shift_jis_2004_to_euc_jis_2004', prolang => 'c',
- prorettype => 'void', proargtypes => 'int4 int4 cstring internal int4',
+ prorettype => 'int4', proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'shift_jis_2004_to_euc_jis_2004',
probin => '$libdir/euc2004_sjis2004' },
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 64b22e4b0d4..a9aaff9e6dc 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -616,6 +616,12 @@ extern int pg_bind_textdomain_codeset(const char *domainname);
extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
int src_encoding,
int dest_encoding);
+extern int pg_do_encoding_conversion_buf(Oid proc,
+ int src_encoding,
+ int dest_encoding,
+ unsigned char *src, int srclen,
+ unsigned char *dst, int dstlen,
+ bool noError);
extern char *pg_client_to_server(const char *s, int len);
extern char *pg_server_to_client(const char *s, int len);
@@ -627,18 +633,18 @@ extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
-extern void UtfToLocal(const unsigned char *utf, int len,
+extern int UtfToLocal(const unsigned char *utf, int len,
unsigned char *iso,
const pg_mb_radix_tree *map,
const pg_utf_to_local_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding);
-extern void LocalToUtf(const unsigned char *iso, int len,
+ int encoding, bool noError);
+extern int LocalToUtf(const unsigned char *iso, int len,
unsigned char *utf,
const pg_mb_radix_tree *map,
const pg_local_to_utf_combined *cmap, int cmapsize,
utf_local_conversion_func conv_func,
- int encoding);
+ int encoding, bool noError);
extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
@@ -656,18 +662,19 @@ extern void report_invalid_encoding(int encoding, const char *mbstr, int len) pg
extern void report_untranslatable_char(int src_encoding, int dest_encoding,
const char *mbstr, int len) pg_attribute_noreturn();
-extern void local2local(const unsigned char *l, unsigned char *p, int len,
- int src_encoding, int dest_encoding, const unsigned char *tab);
-extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
- int lc, int encoding);
-extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
- int lc, int encoding);
-extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
+extern int local2local(const unsigned char *l, unsigned char *p, int len,
+ int src_encoding, int dest_encoding,
+ const unsigned char *tab, bool noError);
+extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
+ int lc, int encoding, bool noError);
+extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
+ int lc, int encoding, bool noError);
+extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding,
- const unsigned char *tab);
-extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
+ const unsigned char *tab, bool noError);
+extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding,
- const unsigned char *tab);
+ const unsigned char *tab, bool noError);
#ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index 62c10671685..e34ab20974d 100644
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -37,3 +37,522 @@ DROP CONVERSION mydef;
--
RESET SESSION AUTHORIZATION;
DROP USER regress_conversion_user;
+--
+-- Test built-in conversion functions.
+--
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+ input IN bytea,
+ src_encoding IN text,
+ dst_encoding IN text,
+ result OUT bytea,
+ errorat OUT bytea,
+ error OUT text)
+language plpgsql as
+$$
+declare
+ validlen int;
+begin
+ -- First try to perform the conversion with noError = false. If that errors out,
+ -- capture the error message, and try again with noError = true. The second call
+ -- should succeed and return the position of the error, return that too.
+ begin
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+ errorat = NULL;
+ error := NULL;
+ exception when others then
+ error := sqlerrm;
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+ errorat = substr(input, validlen + 1);
+ end;
+ return;
+end;
+$$;
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xc3a4c3b6', 'valid, extra latin chars'),
+ ('\xd184d0bed0be', 'valid, cyrillic'),
+ ('\x666f6fe8b1a1', 'valid, kanji/Chinese'),
+ ('\xe382abe3829a', 'valid, two chars that combine to one in EUC_JIS_2004'),
+ ('\xe382ab', 'only first half of combined char in EUC_JIS_2004'),
+ ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+ ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+ ('\x666f6fefa8aa', 'valid, needs mapping function to convert to GB18030'),
+ ('\x66e8b1ff6f6f', 'invalid byte sequence'),
+ ('\x66006f', 'invalid, NUL byte'),
+ ('\x666f6fe8b100', 'invalid, NUL byte'),
+ ('\x666f6fe8b1', 'incomplete character at end');
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+ description | result | errorat | error
+------------------------------------------------------+----------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | |
+ valid, cyrillic | \xd184d0bed0be | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382ab | \xe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | |
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | |
+ invalid byte sequence | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------------+----------------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xa9daa9ec | |
+ valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fbedd | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5f7 | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "EUC_JIS_2004"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN1"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN2"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+-------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \xe4f6 | |
+ valid, cyrillic | \xd184d0bed0be | \x | \xd184d0bed0be | character with byte sequence 0xd1 0x84 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "LATIN5"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------+----------------------+------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \x | \xc3a4c3b6 | character with byte sequence 0xc3 0xa4 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, cyrillic | \xd184d0bed0be | \xc6cfcf | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6f | \xe8b1a1 | character with byte sequence 0xe8 0xb1 0xa1 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \x | \xe382abe3829a | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \x | \xe382ab | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \x | \xe382abe382 | character with byte sequence 0xe3 0x82 0xab in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x | \xecbd94eb81bceba6ac | character with byte sequence 0xec 0xbd 0x94 in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f | \xefa8aa | character with byte sequence 0xef 0xa8 0xaa in encoding "UTF8" has no equivalent in encoding "KOI8R"
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------------+----------------------+----------------------------+--------------+-----------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid, extra latin chars | \xc3a4c3b6 | \x81308a3181308b32 | |
+ valid, cyrillic | \xd184d0bed0be | \xa7e6a7e0a7e0 | |
+ valid, kanji/Chinese | \x666f6fe8b1a1 | \x666f6fcff3 | |
+ valid, two chars that combine to one in EUC_JIS_2004 | \xe382abe3829a | \xa5ab8139a732 | |
+ only first half of combined char in EUC_JIS_2004 | \xe382ab | \xa5ab | |
+ incomplete combination when converted EUC_JIS_2004 | \xe382abe382 | \xa5ab | \xe382 | invalid byte sequence for encoding "UTF8": 0xe3 0x82
+ valid, Hangul, Korean | \xecbd94eb81bceba6ac | \x8334e5398238c4338330b335 | |
+ valid, needs mapping function to convert to GB18030 | \x666f6fefa8aa | \x666f6f84309c38 | |
+ invalid byte sequence | \x66e8b1ff6f6f | \x66 | \xe8b1ff6f6f | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0xff
+ invalid, NUL byte | \x66006f | \x66 | \x006f | invalid byte sequence for encoding "UTF8": 0x00
+ invalid, NUL byte | \x666f6fe8b100 | \x666f6f | \xe8b100 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1 0x00
+ incomplete character at end | \x666f6fe8b1 | \x666f6f | \xe8b1 | invalid byte sequence for encoding "UTF8": 0xe8 0xb1
+(13 rows)
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fbedd', 'valid'),
+ ('\xa5f7', 'valid, translates to two UTF-8 chars '),
+ ('\xbeddbe', 'incomplete char '),
+ ('\x666f6f00bedd', 'invalid, NUL byte'),
+ ('\x666f6fbe00dd', 'invalid, NUL byte'),
+ ('\x666f6fbedd00', 'invalid, NUL byte'),
+ ('\xbe04', 'invalid byte sequence');
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+--------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fbedd | \x666f6fbedd | |
+ valid, translates to two UTF-8 chars | \xa5f7 | \xa5f7 | |
+ incomplete char | \xbeddbe | \xbedd | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte | \x666f6fbedd00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+----------------+----------+--------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fbedd | \x666f6fe8b1a1 | |
+ valid, translates to two UTF-8 chars | \xa5f7 | \xe382abe3829a | |
+ incomplete char | \xbeddbe | \xe8b1a1 | \xbe | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe
+ invalid, NUL byte | \x666f6f00bedd | \x666f6f | \x00bedd | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6fbe00dd | \x666f6f | \xbe00dd | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x00
+ invalid, NUL byte | \x666f6fbedd00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "EUC_JIS_2004": 0x00
+ invalid byte sequence | \xbe04 | \x | \xbe04 | invalid byte sequence for encoding "EUC_JIS_2004": 0xbe 0x04
+(8 rows)
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6f8fdb', 'valid'),
+ ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+ ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+ ('\x666f6f8fdb8f', 'incomplete char '),
+ ('\x666f6f820a', 'incomplete char, followed by newline '),
+ ('\x666f6f008fdb', 'invalid, NUL byte'),
+ ('\x666f6f8f00db', 'invalid, NUL byte'),
+ ('\x666f6f8fdb00', 'invalid, NUL byte');
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6f8fdb | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6f81c0 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6f82f5 | |
+ incomplete char | \x666f6f8fdb8f | \x666f6f8fdb | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6f8fdb | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+----------------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fe28a84 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fe3818be3829a | |
+ incomplete char | \x666f6f8fdb8f | \x666f6fe8b1a1 | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+ description | inbytes | result | errorat | error
+---------------------------------------+----------------+--------------+----------+----------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6f8fdb | \x666f6fbedd | |
+ valid, no translation to UTF-8 | \x666f6f81c0 | \x666f6fa2c2 | |
+ valid, translates to two UTF-8 chars | \x666f6f82f5 | \x666f6fa4f7 | |
+ incomplete char | \x666f6f8fdb8f | \x666f6fbedd | \x8f | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f
+ incomplete char, followed by newline | \x666f6f820a | \x666f6f | \x820a | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x82 0x0a
+ invalid, NUL byte | \x666f6f008fdb | \x666f6f | \x008fdb | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+ invalid, NUL byte | \x666f6f8f00db | \x666f6f | \x8f00db | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x8f 0x00
+ invalid, NUL byte | \x666f6f8fdb00 | \x666f6fbedd | \x00 | invalid byte sequence for encoding "SHIFT_JIS_2004": 0x00
+(9 rows)
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fcff3', 'valid'),
+ ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+ ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+ ('\x666f6f84309c', 'incomplete char '),
+ ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84309c3800', 'invalid, NUL byte'),
+ ('\x666f6f84309c0038', 'invalid, NUL byte');
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------+--------------------+------------------+--------------+-------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fcff3 | \x666f6fcff3 | |
+ valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f8431a530 | |
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6f84309c38 | |
+ incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte | \x666f6f84309c3800 | \x666f6f84309c38 | \x00 | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+ description | inbytes | result | errorat | error
+------------------------------------------------+--------------------+----------------+--------------+-------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fcff3 | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6f8431a530 | \x666f6f | \x8431a530 | character with byte sequence 0x84 0x31 0xa5 0x30 in encoding "GB18030" has no equivalent in encoding "UTF8"
+ valid, translates to UTF-8 by mapping function | \x666f6f84309c38 | \x666f6fefa8aa | |
+ incomplete char | \x666f6f84309c | \x666f6f | \x84309c | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c
+ incomplete char, followed by newline | \x666f6f84309c0a | \x666f6f | \x84309c0a | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x0a
+ invalid, NUL byte | \x666f6f84309c3800 | \x666f6fefa8aa | \x00 | invalid byte sequence for encoding "GB18030": 0x00
+ invalid, NUL byte | \x666f6f84309c0038 | \x666f6f | \x84309c0038 | invalid byte sequence for encoding "GB18030": 0x84 0x30 0x9c 0x00
+(8 rows)
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xe4dede', 'valid'),
+ ('\x00', 'invalid, NUL byte'),
+ ('\xe400dede', 'invalid, NUL byte'),
+ ('\xe4dede00', 'invalid, NUL byte');
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xe4dede | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xe4 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xe4dede | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xd184d0bed0be | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xd184 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xd184d0bed0be | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \xc6cfcf | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \xc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+ description | inbytes | result | errorat | error
+-------------------+------------+----------------+----------+-------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \xe4dede | \x8bc68bcf8bcf | |
+ invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe400dede | \x8bc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
+ invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
+(5 rows)
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fb648', 'valid'),
+ ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+ ('\x666f6fb60048', 'invalid, NUL byte'),
+ ('\x666f6fb64800', 'invalid, NUL byte');
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+--------------+----------+------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6fb648 | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6fa27f | |
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6fb648 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+----------------+----------+------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6fe8b1a1 | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f | \xa27f | character with byte sequence 0xa2 0x7f in encoding "BIG5" has no equivalent in encoding "UTF8"
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+ description | inbytes | result | errorat | error
+--------------------------------+----------------+----------------+----------+------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid | \x666f6fb648 | \x666f6f95e2af | |
+ valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | |
+ invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
+ invalid, NUL byte | \x666f6fb64800 | \x666f6f95e2af | \x00 | invalid byte sequence for encoding "BIG5": 0x00
+(5 rows)
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
+ ('\x8bc68bcf8b', 'invalid,incomplete char'),
+ ('\x92bedd', 'valid (in SHIFT_JIS)'),
+ ('\x92be', 'invalid, incomplete char)'),
+ ('\x666f6f95a3c1', 'valid (in Big5)'),
+ ('\x666f6f95a3', 'invalid, incomplete char'),
+ ('\x9200bedd', 'invalid, NUL byte'),
+ ('\x92bedd00', 'invalid, NUL byte'),
+ ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------------+------------------+--------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x8bc68bcf8bcf | |
+ invalid,incomplete char | \x8bc68bcf8b | \x8bc68bcf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x92bedd | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f95a3c1 | |
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x92bedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | |
+ invalid,incomplete char | \x8bc68bcf8b | \xc6cf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | |
+ invalid,incomplete char | \x8bc68bcf8b | \xe4de | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ valid (in SHIFT_JIS) | \x92bedd | \x8fdb | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x8fdb | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+--------------+------------------+--------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | |
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
+ description | inbytes | result | errorat | error
+---------------------------+------------------+----------+------------------+----------------------------------------------------------------------------------------------------------------
+ valid, pure ASCII | \x666f6f | \x666f6f | |
+ valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ valid (in SHIFT_JIS) | \x92bedd | \xbedd | |
+ invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
+ valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
+ invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
+ invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
+ invalid, NUL byte | \x92bedd00 | \xbedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
+ invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
+(10 rows)
+
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index ef4b4444b90..fa26bf76104 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -1052,13 +1052,14 @@ WHERE p1.conproc = 0 OR
SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND
- (p.prorettype != 'void'::regtype OR p.proretset OR
- p.pronargs != 5 OR
+ (p.prorettype != 'int4'::regtype OR p.proretset OR
+ p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR
- p.proargtypes[4] != 'int4'::regtype);
+ p.proargtypes[4] != 'int4'::regtype OR
+ p.proargtypes[5] != 'bool'::regtype);
oid | proname | oid | conname
-----+---------+-----+---------
(0 rows)
diff --git a/src/test/regress/input/create_function_1.source b/src/test/regress/input/create_function_1.source
index 412e339fcf2..6ba37fe63b6 100644
--- a/src/test/regress/input/create_function_1.source
+++ b/src/test/regress/input/create_function_1.source
@@ -78,6 +78,10 @@ CREATE FUNCTION test_opclass_options_func(internal)
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C;
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+ LANGUAGE C;
+
-- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
diff --git a/src/test/regress/output/create_function_1.source b/src/test/regress/output/create_function_1.source
index 4d78fa12289..cb38a039bf4 100644
--- a/src/test/regress/output/create_function_1.source
+++ b/src/test/regress/output/create_function_1.source
@@ -68,6 +68,9 @@ CREATE FUNCTION test_opclass_options_func(internal)
RETURNS void
AS '@libdir@/regress@DLSUFFIX@', 'test_opclass_options_func'
LANGUAGE C;
+CREATE FUNCTION test_enc_conversion(bytea, name, name, bool, validlen OUT int, result OUT bytea)
+ AS '@libdir@/regress@DLSUFFIX@', 'test_enc_conversion'
+ LANGUAGE C;
-- Things that shouldn't work:
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
AS 'SELECT ''not an integer'';';
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index 32ab9ed6b53..1990cbb6a13 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -23,12 +23,15 @@
#include "access/htup_details.h"
#include "access/transam.h"
#include "access/xact.h"
+#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "commands/sequence.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/spi.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/supportnodes.h"
#include "optimizer/optimizer.h"
@@ -1060,3 +1063,134 @@ test_opclass_options_func(PG_FUNCTION_ARGS)
{
PG_RETURN_NULL();
}
+
+/*
+ * Call an encoding conversion or verification function.
+ *
+ * Arguments:
+ * string bytea -- string to convert
+ * src_enc name -- source encoding
+ * dest_enc name -- destination encoding
+ * noError bool -- if set, don't ereport() on invalid or untranslatable
+ * input
+ *
+ * Result is a tuple with two attributes:
+ * int4 -- number of input bytes successfully converted
+ * bytea -- converted string
+ */
+PG_FUNCTION_INFO_V1(test_enc_conversion);
+Datum
+test_enc_conversion(PG_FUNCTION_ARGS)
+{
+ bytea *string = PG_GETARG_BYTEA_PP(0);
+ char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
+ int src_encoding = pg_char_to_encoding(src_encoding_name);
+ char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
+ int dest_encoding = pg_char_to_encoding(dest_encoding_name);
+ bool noError = PG_GETARG_BOOL(3);
+ TupleDesc tupdesc;
+ char *src;
+ char *dst;
+ bytea *retval;
+ Size srclen;
+ Size dstsize;
+ Oid proc;
+ int convertedbytes;
+ int dstlen;
+ Datum values[2];
+ bool nulls[2];
+ HeapTuple tuple;
+
+ if (src_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid source encoding name \"%s\"",
+ src_encoding_name)));
+ if (dest_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid destination encoding name \"%s\"",
+ dest_encoding_name)));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+ tupdesc = BlessTupleDesc(tupdesc);
+
+ srclen = VARSIZE_ANY_EXHDR(string);
+ src = VARDATA_ANY(string);
+
+ if (src_encoding == dest_encoding)
+ {
+ /* just check that the source string is valid */
+ int oklen;
+
+ oklen = pg_encoding_verifymbstr(src_encoding, src, srclen);
+
+ if (oklen == srclen)
+ {
+ convertedbytes = oklen;
+ retval = string;
+ }
+ else if (!noError)
+ {
+ report_invalid_encoding(src_encoding, src + oklen, srclen - oklen);
+ }
+ else
+ {
+ /*
+ * build bytea data type structure.
+ */
+ Assert(oklen < srclen);
+ convertedbytes = oklen;
+ retval = (bytea *) palloc(oklen + VARHDRSZ);
+ SET_VARSIZE(retval, oklen + VARHDRSZ);
+ memcpy(VARDATA(retval), src, oklen);
+ }
+ }
+ else
+ {
+ proc = FindDefaultConversionProc(src_encoding, dest_encoding);
+ if (!OidIsValid(proc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("default conversion function for encoding \"%s\" to \"%s\" does not exist",
+ pg_encoding_to_char(src_encoding),
+ pg_encoding_to_char(dest_encoding))));
+
+ if (srclen >= (MaxAllocSize / (Size) MAX_CONVERSION_GROWTH))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("out of memory"),
+ errdetail("String of %d bytes is too long for encoding conversion.",
+ (int) srclen)));
+
+ dstsize = (Size) srclen * MAX_CONVERSION_GROWTH + 1;
+ dst = MemoryContextAlloc(CurrentMemoryContext, dstsize);
+
+ /* perform conversion */
+ convertedbytes = pg_do_encoding_conversion_buf(proc,
+ src_encoding,
+ dest_encoding,
+ (unsigned char *) src, srclen,
+ (unsigned char *) dst, dstsize,
+ noError);
+ dstlen = strlen(dst);
+
+ /*
+ * build bytea data type structure.
+ */
+ retval = (bytea *) palloc(dstlen + VARHDRSZ);
+ SET_VARSIZE(retval, dstlen + VARHDRSZ);
+ memcpy(VARDATA(retval), dst, dstlen);
+
+ pfree(dst);
+ }
+
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int32GetDatum(convertedbytes);
+ values[1] = PointerGetDatum(retval);
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index 02cf39f1ce9..ea85f20ed83 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -34,3 +34,188 @@ DROP CONVERSION mydef;
--
RESET SESSION AUTHORIZATION;
DROP USER regress_conversion_user;
+
+--
+-- Test built-in conversion functions.
+--
+
+-- Helper function to test a conversion. Uses the test_enc_conversion function
+-- that was created in the create_function_1 test.
+create or replace function test_conv(
+ input IN bytea,
+ src_encoding IN text,
+ dst_encoding IN text,
+
+ result OUT bytea,
+ errorat OUT bytea,
+ error OUT text)
+language plpgsql as
+$$
+declare
+ validlen int;
+begin
+ -- First try to perform the conversion with noError = false. If that errors out,
+ -- capture the error message, and try again with noError = true. The second call
+ -- should succeed and return the position of the error, return that too.
+ begin
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, false);
+ errorat = NULL;
+ error := NULL;
+ exception when others then
+ error := sqlerrm;
+ select * into validlen, result from test_enc_conversion(input, src_encoding, dst_encoding, true);
+ errorat = substr(input, validlen + 1);
+ end;
+ return;
+end;
+$$;
+
+
+--
+-- UTF-8
+--
+CREATE TABLE utf8_inputs (inbytes bytea, description text);
+insert into utf8_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xc3a4c3b6', 'valid, extra latin chars'),
+ ('\xd184d0bed0be', 'valid, cyrillic'),
+ ('\x666f6fe8b1a1', 'valid, kanji/Chinese'),
+ ('\xe382abe3829a', 'valid, two chars that combine to one in EUC_JIS_2004'),
+ ('\xe382ab', 'only first half of combined char in EUC_JIS_2004'),
+ ('\xe382abe382', 'incomplete combination when converted EUC_JIS_2004'),
+ ('\xecbd94eb81bceba6ac', 'valid, Hangul, Korean'),
+ ('\x666f6fefa8aa', 'valid, needs mapping function to convert to GB18030'),
+ ('\x66e8b1ff6f6f', 'invalid byte sequence'),
+ ('\x66006f', 'invalid, NUL byte'),
+ ('\x666f6fe8b100', 'invalid, NUL byte'),
+ ('\x666f6fe8b1', 'incomplete character at end');
+
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_inputs;
+-- Test conversions from UTF-8
+select description, inbytes, (test_conv(inbytes, 'utf8', 'euc_jis_2004')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin1')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin2')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'latin5')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'koi8r')).* from utf8_inputs;
+select description, inbytes, (test_conv(inbytes, 'utf8', 'gb18030')).* from utf8_inputs;
+
+--
+-- EUC_JIS_2004
+--
+CREATE TABLE euc_jis_2004_inputs (inbytes bytea, description text);
+insert into euc_jis_2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fbedd', 'valid'),
+ ('\xa5f7', 'valid, translates to two UTF-8 chars '),
+ ('\xbeddbe', 'incomplete char '),
+ ('\x666f6f00bedd', 'invalid, NUL byte'),
+ ('\x666f6fbe00dd', 'invalid, NUL byte'),
+ ('\x666f6fbedd00', 'invalid, NUL byte'),
+ ('\xbe04', 'invalid byte sequence');
+
+-- Test EUC_JIS_2004 verification
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'euc_jis_2004')).* from euc_jis_2004_inputs;
+-- Test conversions from EUC_JIS_2004
+select description, inbytes, (test_conv(inbytes, 'euc_jis_2004', 'utf8')).* from euc_jis_2004_inputs;
+
+--
+-- SHIFT-JIS-2004
+--
+CREATE TABLE shiftjis2004_inputs (inbytes bytea, description text);
+insert into shiftjis2004_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6f8fdb', 'valid'),
+ ('\x666f6f81c0', 'valid, no translation to UTF-8'),
+ ('\x666f6f82f5', 'valid, translates to two UTF-8 chars '),
+ ('\x666f6f8fdb8f', 'incomplete char '),
+ ('\x666f6f820a', 'incomplete char, followed by newline '),
+ ('\x666f6f008fdb', 'invalid, NUL byte'),
+ ('\x666f6f8f00db', 'invalid, NUL byte'),
+ ('\x666f6f8fdb00', 'invalid, NUL byte');
+
+-- Test SHIFT-JIS-2004 verification
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'shiftjis2004')).* from shiftjis2004_inputs;
+-- Test conversions from SHIFT-JIS-2004
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'utf8')).* from shiftjis2004_inputs;
+select description, inbytes, (test_conv(inbytes, 'shiftjis2004', 'euc_jis_2004')).* from shiftjis2004_inputs;
+
+--
+-- GB18030
+--
+CREATE TABLE gb18030_inputs (inbytes bytea, description text);
+insert into gb18030_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fcff3', 'valid'),
+ ('\x666f6f8431a530', 'valid, no translation to UTF-8'),
+ ('\x666f6f84309c38', 'valid, translates to UTF-8 by mapping function'),
+ ('\x666f6f84309c', 'incomplete char '),
+ ('\x666f6f84309c0a', 'incomplete char, followed by newline '),
+ ('\x666f6f84309c3800', 'invalid, NUL byte'),
+ ('\x666f6f84309c0038', 'invalid, NUL byte');
+
+-- Test GB18030 verification
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'gb18030')).* from gb18030_inputs;
+-- Test conversions from GB18030
+select description, inbytes, (test_conv(inbytes, 'gb18030', 'utf8')).* from gb18030_inputs;
+
+
+--
+-- ISO-8859-5
+--
+CREATE TABLE iso8859_5_inputs (inbytes bytea, description text);
+insert into iso8859_5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\xe4dede', 'valid'),
+ ('\x00', 'invalid, NUL byte'),
+ ('\xe400dede', 'invalid, NUL byte'),
+ ('\xe4dede00', 'invalid, NUL byte');
+
+-- Test ISO-8859-5 verification
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* from iso8859_5_inputs;
+-- Test conversions from ISO-8859-5
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
+select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
+
+--
+-- Big5
+--
+CREATE TABLE big5_inputs (inbytes bytea, description text);
+insert into big5_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x666f6fb648', 'valid'),
+ ('\x666f6fa27f', 'valid, no translation to UTF-8'),
+ ('\x666f6fb60048', 'invalid, NUL byte'),
+ ('\x666f6fb64800', 'invalid, NUL byte');
+
+-- Test Big5 verification
+select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
+-- Test conversions from Big5
+select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
+select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
+
+--
+-- MULE_INTERNAL
+--
+CREATE TABLE mic_inputs (inbytes bytea, description text);
+insert into mic_inputs values
+ ('\x666f6f', 'valid, pure ASCII'),
+ ('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
+ ('\x8bc68bcf8b', 'invalid,incomplete char'),
+ ('\x92bedd', 'valid (in SHIFT_JIS)'),
+ ('\x92be', 'invalid, incomplete char)'),
+ ('\x666f6f95a3c1', 'valid (in Big5)'),
+ ('\x666f6f95a3', 'invalid, incomplete char'),
+ ('\x9200bedd', 'invalid, NUL byte'),
+ ('\x92bedd00', 'invalid, NUL byte'),
+ ('\x8b00c68bcf8bcf', 'invalid, NUL byte');
+
+-- Test MULE_INTERNAL verification
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
+-- Test conversions from MULE_INTERNAL
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
+select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index bbd3834b634..04691745981 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -556,13 +556,14 @@ WHERE p1.conproc = 0 OR
SELECT p.oid, p.proname, c.oid, c.conname
FROM pg_proc p, pg_conversion c
WHERE p.oid = c.conproc AND
- (p.prorettype != 'void'::regtype OR p.proretset OR
- p.pronargs != 5 OR
+ (p.prorettype != 'int4'::regtype OR p.proretset OR
+ p.pronargs != 6 OR
p.proargtypes[0] != 'int4'::regtype OR
p.proargtypes[1] != 'int4'::regtype OR
p.proargtypes[2] != 'cstring'::regtype OR
p.proargtypes[3] != 'internal'::regtype OR
- p.proargtypes[4] != 'int4'::regtype);
+ p.proargtypes[4] != 'int4'::regtype OR
+ p.proargtypes[5] != 'bool'::regtype);
-- Check for conprocs that don't perform the specific conversion that
-- pg_conversion alleges they do, by trying to invoke each conversion