aboutsummaryrefslogtreecommitdiff
path: root/src/backend/regex/utftest.c
diff options
context:
space:
mode:
authorMarc G. Fournier <scrappy@hub.org>1998-03-15 07:39:04 +0000
committerMarc G. Fournier <scrappy@hub.org>1998-03-15 07:39:04 +0000
commit661ecf3c48e16a9add216287eb969d7615e47968 (patch)
tree91b54d5905aa2e22bd0ae9ea8c6b0f3cab75d3f4 /src/backend/regex/utftest.c
parent31a925c4d07675bc098a742ee9ca642ec79a40ee (diff)
downloadpostgresql-661ecf3c48e16a9add216287eb969d7615e47968.tar.gz
postgresql-661ecf3c48e16a9add216287eb969d7615e47968.zip
From: t-ishii@sra.co.jp
Included are patches intended for allowing PostgreSQL to handle multi-byte charachter sets such as EUC(Extende Unix Code), Unicode and Mule internal code. With the MB patch you can use multi-byte character sets in regexp and LIKE. The encoding system chosen is determined at the compile time. To enable the MB extension, you need to define a variable "MB" in Makefile.global or in Makefile.custom. For further information please take a look at README.mb under doc directory. (Note that unlike "jp patch" I do not use modified GNU regexp any more. I changed Henry Spencer's regexp coming with PostgreSQL.)
Diffstat (limited to 'src/backend/regex/utftest.c')
-rw-r--r--src/backend/regex/utftest.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/src/backend/regex/utftest.c b/src/backend/regex/utftest.c
new file mode 100644
index 00000000000..28baf7255ef
--- /dev/null
+++ b/src/backend/regex/utftest.c
@@ -0,0 +1,33 @@
+/*
+ * testing of utf2wchar()
+ * $Id: utftest.c,v 1.1 1998/03/15 07:38:37 scrappy Exp $
+ */
+#include <regex/regex.h>
+#include <regex/utils.h>
+#include <regex/regex2.h>
+
+#include <regex/pg_wchar.h>
+
+main()
+{
+ /* Example 1 from RFC2044 */
+ char utf1[] = {0x41,0xe2,0x89,0xa2,0xce,0x91,0x2e,0};
+ /* Example 2 from RFC2044 */
+ char utf2[] = {0x48,0x69,0x20,0x4d,0x6f,0x6d,0x20,0xe2,0x98,0xba,0x21,0};
+ /* Example 3 from RFC2044 */
+ char utf3[] = {0xe6,0x97,0xa5,0xe6,0x9c,0xac,0xe8,0xaa,0x9e,0};
+ char *utf[] = {utf1,utf2,utf3};
+ pg_wchar ucs[128];
+ pg_wchar *p;
+ int i;
+
+ for (i=0;i<sizeof(utf)/sizeof(char *);i++) {
+ pg_utf2wchar(utf[i],ucs);
+ p = ucs;
+ while(*p) {
+ printf("%04x ",*p);
+ p++;
+ }
+ printf("\n");
+ }
+}