aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2008-03-20 10:50:35 +0000
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2008-03-20 10:50:35 +0000
commitb775a3257b33bb22eca6ea77d3f49b2f14e2b732 (patch)
tree77d37eb225d7febf4409f284526dbac311e164ed /src
parent90f58833638e4a0f5a0f62ff29f7e98d5a6ade83 (diff)
downloadpostgresql-b775a3257b33bb22eca6ea77d3f49b2f14e2b732.tar.gz
postgresql-b775a3257b33bb22eca6ea77d3f49b2f14e2b732.zip
Add the missing cyrillic "Yo" characters ('e' and 'E' with two dots) to the
ISO_8859-5 <-> MULE_INTERNAL conversion tables. This was discovered when trying to convert a string containing those characters from ISO_8859-5 to Windows-1251, because we use MULE_INTERNAL/KOI8R as an intermediate encoding between those two. While the missing "Yo" was just an omission in the conversion tables, there are a few other characters like the "Numero" sign ("No" as a single character) that exists in all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but not in KOI8R. Added comments about that. Patch by Sergey Burladyan. Back-patch to 7.4.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c21
1 files changed, 16 insertions, 5 deletions
diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
index 0ccf97cfab8..dc900da17b7 100644
--- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
+++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.9.4.1 2006/05/21 20:06:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c,v 1.9.4.2 2008/03/20 10:50:35 heikki Exp $
*
*-------------------------------------------------------------------------
*/
@@ -299,6 +299,12 @@ alt_to_win1251(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
Assert(len >= 0);
+ /*
+ * Note: There are a few characters like the "Numero" sign that exist in
+ * all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but
+ * not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we
+ * will fail to convert those characters.
+ */
buf = palloc(len * ENCODING_GROWTH_RATE);
alt2mic(src, buf, len);
mic2win1251(buf, dest, strlen(buf));
@@ -319,6 +325,7 @@ win1251_to_alt(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_ALT);
Assert(len >= 0);
+ /* Use mic/KOI8R as intermediary, see comment in alt_to_win1251() */
buf = palloc(len * ENCODING_GROWTH_RATE);
win12512mic(src, buf, len);
mic2alt(buf, dest, strlen(buf));
@@ -379,6 +386,7 @@ iso_to_win1251(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_WIN1251);
Assert(len >= 0);
+ /* Use mic/KOI8R as intermediary, see comment in alt_to_win1251() */
buf = palloc(len * ENCODING_GROWTH_RATE);
iso2mic(src, buf, len);
mic2win1251(buf, dest, strlen(buf));
@@ -399,6 +407,7 @@ win1251_to_iso(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
Assert(len >= 0);
+ /* Use mic/KOI8R as intermediary, see comment in alt_to_win1251() */
buf = palloc(len * ENCODING_GROWTH_RATE);
win12512mic(src, buf, len);
mic2iso(buf, dest, strlen(buf));
@@ -419,6 +428,7 @@ iso_to_alt(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_ALT);
Assert(len >= 0);
+ /* Use mic/KOI8R as intermediary, see comment in alt_to_win1251() */
buf = palloc(len * ENCODING_GROWTH_RATE);
iso2mic(src, buf, len);
mic2alt(buf, dest, strlen(buf));
@@ -439,6 +449,7 @@ alt_to_iso(PG_FUNCTION_ARGS)
Assert(PG_GETARG_INT32(1) == PG_ISO_8859_5);
Assert(len >= 0);
+ /* Use mic/KOI8R as intermediary, see comment in alt_to_win1251() */
buf = palloc(len * ENCODING_GROWTH_RATE);
alt2mic(src, buf, len);
mic2iso(buf, dest, strlen(buf));
@@ -481,7 +492,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xb3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
@@ -491,7 +502,7 @@ iso2mic(const unsigned char *l, unsigned char *p, int len)
0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0xa3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
@@ -507,9 +518,9 @@ mic2iso(const unsigned char *mic, unsigned char *p, int len)
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xf1, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xa1, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3,
0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,