aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/common/unicode/Makefile6
-rw-r--r--src/common/unicode/README45
-rw-r--r--src/common/unicode/category_test.c222
-rw-r--r--src/common/unicode/generate-unicode_category_table.pl390
-rw-r--r--src/common/unicode/meson.build4
-rw-r--r--src/common/unicode_category.c318
-rw-r--r--src/include/common/unicode_category.h27
-rw-r--r--src/include/common/unicode_category_table.h3694
8 files changed, 4604 insertions, 102 deletions
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile
index 04d81dd5cb5..27f0408d8b8 100644
--- a/src/common/unicode/Makefile
+++ b/src/common/unicode/Makefile
@@ -29,13 +29,13 @@ update-unicode: unicode_category_table.h unicode_east_asian_fw_table.h unicode_n
# These files are part of the Unicode Character Database. Download
# them on demand. The dependency on Makefile.global is for
# UNICODE_VERSION.
-CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global
+CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global
$(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
unicode_version.h: generate-unicode_version.pl
$(PERL) $< --version $(UNICODE_VERSION)
-unicode_category_table.h: generate-unicode_category_table.pl UnicodeData.txt
+unicode_category_table.h: generate-unicode_category_table.pl DerivedCoreProperties.txt PropList.txt UnicodeData.txt
$(PERL) $<
# Generation of conversion tables used for string normalization with
@@ -82,4 +82,4 @@ clean:
rm -f $(OBJS) category_test category_test.o norm_test norm_test.o
distclean: clean
- rm -f CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h
+ rm -f CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h
diff --git a/src/common/unicode/README b/src/common/unicode/README
index 56956f6a65f..110ce5765d5 100644
--- a/src/common/unicode/README
+++ b/src/common/unicode/README
@@ -1,22 +1,35 @@
-This directory contains tools to generate the tables in
-src/include/common/unicode_norm.h, used for Unicode normalization. The
-generated .h file is included in the source tree, so these are normally not
-needed to build PostgreSQL, only if you need to re-generate the .h file
-from the Unicode data files for some reason, e.g. to update to a new version
-of Unicode.
+This directory contains tools to download new Unicode data files and
+generate static tables. These tables are used to normalize or
+determine various properties of Unicode data.
-Generating unicode_norm_table.h
--------------------------------
+The generated header files are copied to src/include/common/, and
+included in the source tree, so these tools are not normally required
+to build PostgreSQL.
-Run
+Update Unicode Version
+----------------------
+
+Edit src/Makefile.global.in and src/common/unicode/meson.build
+to update the UNICODE_VERSION.
+
+Then, generate the new header files with:
make update-unicode
-from the top level of the source tree and commit the result.
+or if using meson:
+
+ ninja update-unicode
+
+from the top level of the source tree. Examine the result to make sure
+the changes look reasonable (that is, that the diff size and scope is
+comparable to the Unicode changes since the last update), and then
+commit it.
Tests
-----
+Normalization tests:
+
The Unicode consortium publishes a comprehensive test suite for the
normalization algorithm, in a file called NormalizationTest.txt. This
directory also contains a perl script and some C code, to run our
@@ -26,3 +39,15 @@ To download NormalizationTest.txt and run the tests:
make normalization-check
This is also run as part of the update-unicode target.
+
+Category & Property tests:
+
+The file category_test.c exhaustively compares the category and
+properties of each code point as determined by the generated tables
+with the category and properties as reported by ICU. For this test to
+be effective, the version of the Unicode data files must be similar to
+the version of Unicode on which ICU is based, so attempt to match the
+versions as closely as possible. A mismatched Unicode will skip over
+codepoints that are assigned in one version and not the other, and may
+falsely report failures. This test is run as a part of the
+update-unicode target.
diff --git a/src/common/unicode/category_test.c b/src/common/unicode/category_test.c
index f1aaac0f613..e823044d63a 100644
--- a/src/common/unicode/category_test.c
+++ b/src/common/unicode/category_test.c
@@ -1,6 +1,6 @@
/*-------------------------------------------------------------------------
* category_test.c
- * Program to test Unicode general category functions.
+ * Program to test Unicode general category and character properties.
*
* Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
*
@@ -14,17 +14,23 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wctype.h>
#ifdef USE_ICU
#include <unicode/uchar.h>
#endif
+
#include "common/unicode_category.h"
#include "common/unicode_version.h"
+static int pg_unicode_version = 0;
+#ifdef USE_ICU
+static int icu_unicode_version = 0;
+#endif
+
/*
* Parse version into integer for easy comparison.
*/
-#ifdef USE_ICU
static int
parse_unicode_version(const char *version)
{
@@ -39,57 +45,175 @@ parse_unicode_version(const char *version)
return major * 100 + minor;
}
-#endif
+#ifdef USE_ICU
/*
- * Exhaustively test that the Unicode category for each codepoint matches that
- * returned by ICU.
+ * Test Postgres Unicode tables by comparing with ICU. Test the General
+ * Category, as well as the properties Alphabetic, Lowercase, Uppercase,
+ * White_Space, and Hex_Digit.
*/
-int
-main(int argc, char **argv)
+static void
+icu_test()
{
-#ifdef USE_ICU
- int pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
- int icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
+ int successful = 0;
int pg_skipped_codepoints = 0;
int icu_skipped_codepoints = 0;
- printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
- printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
-
- for (UChar32 code = 0; code <= 0x10ffff; code++)
+ for (pg_wchar code = 0; code <= 0x10ffff; code++)
{
uint8_t pg_category = unicode_category(code);
uint8_t icu_category = u_charType(code);
+ /* Property tests */
+ bool prop_alphabetic = pg_u_prop_alphabetic(code);
+ bool prop_lowercase = pg_u_prop_lowercase(code);
+ bool prop_uppercase = pg_u_prop_uppercase(code);
+ bool prop_cased = pg_u_prop_cased(code);
+ bool prop_case_ignorable = pg_u_prop_case_ignorable(code);
+ bool prop_white_space = pg_u_prop_white_space(code);
+ bool prop_hex_digit = pg_u_prop_hex_digit(code);
+ bool prop_join_control = pg_u_prop_join_control(code);
+
+ bool icu_prop_alphabetic = u_hasBinaryProperty(
+ code, UCHAR_ALPHABETIC);
+ bool icu_prop_lowercase = u_hasBinaryProperty(
+ code, UCHAR_LOWERCASE);
+ bool icu_prop_uppercase = u_hasBinaryProperty(
+ code, UCHAR_UPPERCASE);
+ bool icu_prop_cased = u_hasBinaryProperty(
+ code, UCHAR_CASED);
+ bool icu_prop_case_ignorable = u_hasBinaryProperty(
+ code, UCHAR_CASE_IGNORABLE);
+ bool icu_prop_white_space = u_hasBinaryProperty(
+ code, UCHAR_WHITE_SPACE);
+ bool icu_prop_hex_digit = u_hasBinaryProperty(
+ code, UCHAR_HEX_DIGIT);
+ bool icu_prop_join_control = u_hasBinaryProperty(
+ code, UCHAR_JOIN_CONTROL);
+
+ /*
+ * Compare with ICU for character classes using:
+ *
+ * https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uchar_8h.html#details
+ *
+ * which describes how to use ICU to test for membership in regex
+ * character classes.
+ *
+ * NB: the document suggests testing for some properties such as
+ * UCHAR_POSIX_ALNUM, but that doesn't mean that we're testing for the
+ * "POSIX Compatible" character classes.
+ */
+ bool isalpha = pg_u_isalpha(code);
+ bool islower = pg_u_islower(code);
+ bool isupper = pg_u_isupper(code);
+ bool ispunct = pg_u_ispunct(code, false);
+ bool isdigit = pg_u_isdigit(code, false);
+ bool isxdigit = pg_u_isxdigit(code, false);
+ bool isalnum = pg_u_isalnum(code, false);
+ bool isspace = pg_u_isspace(code);
+ bool isblank = pg_u_isblank(code);
+ bool iscntrl = pg_u_iscntrl(code);
+ bool isgraph = pg_u_isgraph(code);
+ bool isprint = pg_u_isprint(code);
+
+ bool icu_isalpha = u_isUAlphabetic(code);
+ bool icu_islower = u_isULowercase(code);
+ bool icu_isupper = u_isUUppercase(code);
+ bool icu_ispunct = u_ispunct(code);
+ bool icu_isdigit = u_isdigit(code);
+ bool icu_isxdigit = u_hasBinaryProperty(code,
+ UCHAR_POSIX_XDIGIT);
+ bool icu_isalnum = u_hasBinaryProperty(code,
+ UCHAR_POSIX_ALNUM);
+ bool icu_isspace = u_isUWhiteSpace(code);
+ bool icu_isblank = u_isblank(code);
+ bool icu_iscntrl = icu_category == PG_U_CONTROL;
+ bool icu_isgraph = u_hasBinaryProperty(code,
+ UCHAR_POSIX_GRAPH);
+ bool icu_isprint = u_hasBinaryProperty(code,
+ UCHAR_POSIX_PRINT);
+
+ /*
+ * A version mismatch means that some assigned codepoints in the newer
+ * version may be unassigned in the older version. That's OK, though
+ * the test will not cover those codepoints marked unassigned in the
+ * older version (that is, it will no longer be an exhaustive test).
+ */
+ if (pg_category == PG_U_UNASSIGNED &&
+ icu_category != PG_U_UNASSIGNED &&
+ pg_unicode_version < icu_unicode_version)
+ {
+ pg_skipped_codepoints++;
+ continue;
+ }
+
+ if (icu_category == PG_U_UNASSIGNED &&
+ pg_category != PG_U_UNASSIGNED &&
+ icu_unicode_version < pg_unicode_version)
+ {
+ icu_skipped_codepoints++;
+ continue;
+ }
+
if (pg_category != icu_category)
{
- /*
- * A version mismatch means that some assigned codepoints in the
- * newer version may be unassigned in the older version. That's
- * OK, though the test will not cover those codepoints marked
- * unassigned in the older version (that is, it will no longer be
- * an exhaustive test).
- */
- if (pg_category == PG_U_UNASSIGNED &&
- pg_unicode_version < icu_unicode_version)
- pg_skipped_codepoints++;
- else if (icu_category == PG_U_UNASSIGNED &&
- icu_unicode_version < pg_unicode_version)
- icu_skipped_codepoints++;
- else
- {
- printf("category_test: FAILURE for codepoint 0x%06x\n", code);
- printf("category_test: Postgres category: %02d %s %s\n", pg_category,
- unicode_category_abbrev(pg_category),
- unicode_category_string(pg_category));
- printf("category_test: ICU category: %02d %s %s\n", icu_category,
- unicode_category_abbrev(icu_category),
- unicode_category_string(icu_category));
- printf("\n");
- exit(1);
- }
+ printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+ printf("category_test: Postgres category: %02d %s %s\n", pg_category,
+ unicode_category_abbrev(pg_category),
+ unicode_category_string(pg_category));
+ printf("category_test: ICU category: %02d %s %s\n", icu_category,
+ unicode_category_abbrev(icu_category),
+ unicode_category_string(icu_category));
+ printf("\n");
+ exit(1);
+ }
+
+ if (prop_alphabetic != icu_prop_alphabetic ||
+ prop_lowercase != icu_prop_lowercase ||
+ prop_uppercase != icu_prop_uppercase ||
+ prop_cased != icu_prop_cased ||
+ prop_case_ignorable != icu_prop_case_ignorable ||
+ prop_white_space != icu_prop_white_space ||
+ prop_hex_digit != icu_prop_hex_digit ||
+ prop_join_control != icu_prop_join_control)
+ {
+ printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+ printf("category_test: Postgres property alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n",
+ prop_alphabetic, prop_lowercase, prop_uppercase,
+ prop_cased, prop_case_ignorable,
+ prop_white_space, prop_hex_digit, prop_join_control);
+ printf("category_test: ICU property alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n",
+ icu_prop_alphabetic, icu_prop_lowercase, icu_prop_uppercase,
+ icu_prop_cased, icu_prop_case_ignorable,
+ icu_prop_white_space, icu_prop_hex_digit, icu_prop_join_control);
+ printf("\n");
+ exit(1);
}
+
+ if (isalpha != icu_isalpha ||
+ islower != icu_islower ||
+ isupper != icu_isupper ||
+ ispunct != icu_ispunct ||
+ isdigit != icu_isdigit ||
+ isxdigit != icu_isxdigit ||
+ isalnum != icu_isalnum ||
+ isspace != icu_isspace ||
+ isblank != icu_isblank ||
+ iscntrl != icu_iscntrl ||
+ isgraph != icu_isgraph ||
+ isprint != icu_isprint)
+ {
+ printf("category_test: FAILURE for codepoint 0x%06x\n", code);
+ printf("category_test: Postgres class alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n",
+ isalpha, islower, isupper, ispunct, isdigit, isxdigit, isalnum, isspace, isblank, iscntrl, isgraph, isprint);
+ printf("category_test: ICU class alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n",
+ icu_isalpha, icu_islower, icu_isupper, icu_ispunct, icu_isdigit, icu_isxdigit, icu_isalnum, icu_isspace, icu_isblank, icu_iscntrl, icu_isgraph, icu_isprint);
+ printf("\n");
+ exit(1);
+ }
+
+ if (pg_category != PG_U_UNASSIGNED)
+ successful++;
}
if (pg_skipped_codepoints > 0)
@@ -99,10 +223,22 @@ main(int argc, char **argv)
printf("category_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
icu_skipped_codepoints);
- printf("category_test: success\n");
- exit(0);
+ printf("category_test: ICU test: %d codepoints successful\n", successful);
+}
+#endif
+
+int
+main(int argc, char **argv)
+{
+ pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
+ printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
+
+#ifdef USE_ICU
+ icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
+ printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
+
+ icu_test();
#else
- printf("category_test: ICU support required for test; skipping\n");
- exit(0);
+ printf("category_test: ICU not available; skipping\n");
#endif
}
diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl
index a50c87b7e96..12914c02433 100644
--- a/src/common/unicode/generate-unicode_category_table.pl
+++ b/src/common/unicode/generate-unicode_category_table.pl
@@ -25,6 +25,10 @@ my $output_table_file = "$output_path/unicode_category_table.h";
my $FH;
+# create a table of all codepoints < 0x80 and their associated
+# categories and properties for fast lookups
+my %opt_ascii = ();
+
# Read entries from UnicodeData.txt into a list of codepoint ranges
# and their general category.
my @category_ranges = ();
@@ -48,21 +52,42 @@ while (my $line = <$FH>)
my $category = $elts[2];
die "codepoint out of range" if $code > 0x10FFFF;
- die "unassigned codepoint in UnicodeData.txt" if $category eq $CATEGORY_UNASSIGNED;
+ die "unassigned codepoint in UnicodeData.txt"
+ if $category eq $CATEGORY_UNASSIGNED;
+
+ if ($code < 0x80)
+ {
+ my @properties = ();
+ # No ASCII characters have category Titlecase_Letter,
+ # but include here for completeness.
+ push @properties, "PG_U_PROP_CASED" if ($category eq 'Lt');
+ $opt_ascii{$code} = {
+ Category => $category,
+ Properties => \@properties
+ };
+ }
- if (!defined($range_start)) {
+ if (!defined($range_start))
+ {
my $code_str = sprintf "0x%06x", $code;
- die if defined($range_end) || defined($range_category) || defined($gap_category);
+ die
+ if defined($range_end)
+ || defined($range_category)
+ || defined($gap_category);
die "unexpected first entry <..., Last>" if ($name =~ /Last>/);
- die "expected 0x000000 for first entry, got $code_str" if $code != 0x000000;
+ die "expected 0x000000 for first entry, got $code_str"
+ if $code != 0x000000;
# initialize
$range_start = $code;
$range_end = $code;
$range_category = $category;
- if ($name =~ /<.*, First>$/) {
+ if ($name =~ /<.*, First>$/)
+ {
$gap_category = $category;
- } else {
+ }
+ else
+ {
$gap_category = $CATEGORY_UNASSIGNED;
}
next;
@@ -71,10 +96,17 @@ while (my $line = <$FH>)
# Gap in codepoints detected. If it's a different category than
# the current range, emit the current range and initialize a new
# range representing the gap.
- if ($range_end + 1 != $code && $range_category ne $gap_category) {
- if ($range_category ne $CATEGORY_UNASSIGNED) {
- push(@category_ranges, {start => $range_start, end => $range_end,
- category => $range_category});
+ if ($range_end + 1 != $code && $range_category ne $gap_category)
+ {
+ if ($range_category ne $CATEGORY_UNASSIGNED)
+ {
+ push(
+ @category_ranges,
+ {
+ start => $range_start,
+ end => $range_end,
+ category => $range_category
+ });
}
$range_start = $range_end + 1;
$range_end = $code - 1;
@@ -82,27 +114,39 @@ while (my $line = <$FH>)
}
# different category; new range
- if ($range_category ne $category) {
- if ($range_category ne $CATEGORY_UNASSIGNED) {
- push(@category_ranges, {start => $range_start, end => $range_end,
- category => $range_category});
+ if ($range_category ne $category)
+ {
+ if ($range_category ne $CATEGORY_UNASSIGNED)
+ {
+ push(
+ @category_ranges,
+ {
+ start => $range_start,
+ end => $range_end,
+ category => $range_category
+ });
}
$range_start = $code;
$range_end = $code;
$range_category = $category;
}
- if ($name =~ /<.*, First>$/) {
- die "<..., First> entry unexpectedly follows another <..., First> entry"
+ if ($name =~ /<.*, First>$/)
+ {
+ die
+ "<..., First> entry unexpectedly follows another <..., First> entry"
if $gap_category ne $CATEGORY_UNASSIGNED;
$gap_category = $category;
}
- elsif ($name =~ /<.*, Last>$/) {
- die "<..., First> and <..., Last> entries have mismatching general category"
+ elsif ($name =~ /<.*, Last>$/)
+ {
+ die
+ "<..., First> and <..., Last> entries have mismatching general category"
if $gap_category ne $category;
$gap_category = $CATEGORY_UNASSIGNED;
}
- else {
+ else
+ {
die "unexpected entry found between <..., First> and <..., Last>"
if $gap_category ne $CATEGORY_UNASSIGNED;
}
@@ -115,13 +159,17 @@ die "<..., First> entry with no corresponding <..., Last> entry"
if $gap_category ne $CATEGORY_UNASSIGNED;
# emit final range
-if ($range_category ne $CATEGORY_UNASSIGNED) {
- push(@category_ranges, {start => $range_start, end => $range_end,
- category => $range_category});
+if ($range_category ne $CATEGORY_UNASSIGNED)
+{
+ push(
+ @category_ranges,
+ {
+ start => $range_start,
+ end => $range_end,
+ category => $range_category
+ });
}
-my $num_ranges = scalar @category_ranges;
-
# See: https://www.unicode.org/reports/tr44/#General_Category_Values
my $categories = {
Cn => 'PG_U_UNASSIGNED',
@@ -156,11 +204,146 @@ my $categories = {
Pf => 'PG_U_FINAL_PUNCTUATION'
};
-# Start writing out the output files
+# Find White_Space and Hex_Digit characters
+my @white_space = ();
+my @hex_digits = ();
+my @join_control = ();
+open($FH, '<', "$output_path/PropList.txt")
+ or die "Could not open $output_path/PropList.txt: $!.";
+while (my $line = <$FH>)
+{
+ my $pattern = qr/([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*/s;
+ next unless $line =~ $pattern;
+
+ my $code = $line =~ s/$pattern/$1/rg;
+ my $property = $line =~ s/$pattern/$2/rg;
+ my $start;
+ my $end;
+
+ if ($code =~ /\.\./)
+ {
+ # code range
+ my @sp = split /\.\./, $code;
+ $start = hex($sp[0]);
+ $end = hex($sp[1]);
+ }
+ else
+ {
+ # single code point
+ $start = hex($code);
+ $end = hex($code);
+ }
+
+ if ($property eq "White_Space")
+ {
+ push @white_space, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_WHITE_SPACE";
+ }
+ }
+ elsif ($property eq "Hex_Digit")
+ {
+ push @hex_digits, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_HEX_DIGIT";
+ }
+ }
+ elsif ($property eq "Join_Control")
+ {
+ push @join_control, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_JOIN_CONTROL";
+ }
+ }
+}
+
+# Find Alphabetic, Lowercase, and Uppercase characters
+my @alphabetic = ();
+my @lowercase = ();
+my @uppercase = ();
+my @case_ignorable = ();
+open($FH, '<', "$output_path/DerivedCoreProperties.txt")
+ or die "Could not open $output_path/DerivedCoreProperties.txt: $!.";
+while (my $line = <$FH>)
+{
+ my $pattern = qr/^([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*$/s;
+ next unless $line =~ $pattern;
+
+ my $code = $line =~ s/$pattern/$1/rg;
+ my $property = $line =~ s/$pattern/$2/rg;
+ my $start;
+ my $end;
+
+ if ($code =~ /\.\./)
+ {
+ # code range
+ my @sp = split /\.\./, $code;
+ die "line: {$line} code: {$code} sp[0] {$sp[0]} sp[1] {$sp[1]}"
+ unless $sp[0] =~ /^[0-9A-F]+$/ && $sp[1] =~ /^[0-9A-F]+$/;
+ $start = hex($sp[0]);
+ $end = hex($sp[1]);
+ }
+ else
+ {
+ die "line: {$line} code: {$code}" unless $code =~ /^[0-9A-F]+$/;
+ # single code point
+ $start = hex($code);
+ $end = hex($code);
+ }
+
+ if ($property eq "Alphabetic")
+ {
+ push @alphabetic, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_ALPHABETIC";
+ }
+ }
+ elsif ($property eq "Lowercase")
+ {
+ push @lowercase, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_LOWERCASE";
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED";
+ }
+ }
+ elsif ($property eq "Uppercase")
+ {
+ push @uppercase, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_UPPERCASE";
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED";
+ }
+ }
+ elsif ($property eq "Case_Ignorable")
+ {
+ push @case_ignorable, { start => $start, end => $end };
+ for (my $i = $start; $i <= $end && $i < 0x80; $i++)
+ {
+ push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASE_IGNORABLE";
+ }
+ }
+}
+
+my $num_category_ranges = scalar @category_ranges;
+my $num_alphabetic_ranges = scalar @alphabetic;
+my $num_lowercase_ranges = scalar @lowercase;
+my $num_uppercase_ranges = scalar @uppercase;
+my $num_case_ignorable_ranges = scalar @case_ignorable;
+my $num_white_space_ranges = scalar @white_space;
+my $num_hex_digit_ranges = scalar @hex_digits;
+my $num_join_control_ranges = scalar @join_control;
+
+# Start writing out the output file
open my $OT, '>', $output_table_file
or die "Could not open output file $output_table_file: $!\n";
-print $OT <<HEADER;
+print $OT <<"EOS";
/*-------------------------------------------------------------------------
*
* unicode_category_table.h
@@ -188,18 +371,153 @@ typedef struct
uint8 category; /* General Category */
} pg_category_range;
-/* table of Unicode codepoint ranges and their categories */
-static const pg_category_range unicode_categories[$num_ranges] =
+typedef struct
+{
+ uint32 first; /* Unicode codepoint */
+ uint32 last; /* Unicode codepoint */
+} pg_unicode_range;
+
+typedef struct
+{
+ uint8 category;
+ uint8 properties;
+} pg_unicode_properties;
+
+/*
+ * The properties currently used, in no particular order. Fits in a uint8, but
+ * if more properties are added, a wider integer will be needed.
+ */
+#define PG_U_PROP_ALPHABETIC (1 << 0)
+#define PG_U_PROP_LOWERCASE (1 << 1)
+#define PG_U_PROP_UPPERCASE (1 << 2)
+#define PG_U_PROP_CASED (1 << 3)
+#define PG_U_PROP_CASE_IGNORABLE (1 << 4)
+#define PG_U_PROP_WHITE_SPACE (1 << 5)
+#define PG_U_PROP_JOIN_CONTROL (1 << 6)
+#define PG_U_PROP_HEX_DIGIT (1 << 7)
+
+EOS
+
+print $OT <<"EOS";
+/* table for fast lookup of ASCII codepoints */
+static const pg_unicode_properties unicode_opt_ascii[128] =
+{
+EOS
+
+for (my $i = 0; $i < 128; $i++)
{
-HEADER
+ my $category_str = $categories->{ $opt_ascii{$i}->{Category} };
+ my $props_str = (join ' | ', @{ $opt_ascii{$i}{Properties} }) || "0";
+ printf $OT
+ "\t{\n\t\t/* 0x%06x */\n\t\t.category = %s,\n\t\t.properties = %s\n\t},\n",
+ $i, $category_str, $props_str;
+}
-my $firsttime = 1;
-foreach my $range (@category_ranges) {
- printf $OT ",\n" unless $firsttime;
- $firsttime = 0;
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges and their categories */
+static const pg_category_range unicode_categories[$num_category_ranges] =
+{
+EOS
- my $category = $categories->{$range->{category}};
+foreach my $range (@category_ranges)
+{
+ my $category = $categories->{ $range->{category} };
die "category missing: $range->{category}" unless $category;
- printf $OT "\t{0x%06x, 0x%06x, %s}", $range->{start}, $range->{end}, $category;
+ printf $OT "\t{0x%06x, 0x%06x, %s},\n", $range->{start}, $range->{end},
+ $category;
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Alphabetic characters */
+static const pg_unicode_range unicode_alphabetic[$num_alphabetic_ranges] =
+{
+EOS
+
+foreach my $range (@alphabetic)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Lowercase characters */
+static const pg_unicode_range unicode_lowercase[$num_lowercase_ranges] =
+{
+EOS
+
+foreach my $range (@lowercase)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Uppercase characters */
+static const pg_unicode_range unicode_uppercase[$num_uppercase_ranges] =
+{
+EOS
+
+foreach my $range (@uppercase)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
}
-print $OT "\n};\n";
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Case_Ignorable characters */
+static const pg_unicode_range unicode_case_ignorable[$num_case_ignorable_ranges] =
+{
+EOS
+
+foreach my $range (@case_ignorable)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of White_Space characters */
+static const pg_unicode_range unicode_white_space[$num_white_space_ranges] =
+{
+EOS
+
+foreach my $range (@white_space)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Hex_Digit characters */
+static const pg_unicode_range unicode_hex_digit[$num_hex_digit_ranges] =
+{
+EOS
+
+foreach my $range (@hex_digits)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n\n";
+
+print $OT <<"EOS";
+/* table of Unicode codepoint ranges of Join_Control characters */
+static const pg_unicode_range unicode_join_control[$num_join_control_ranges] =
+{
+EOS
+
+foreach my $range (@join_control)
+{
+ printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end};
+}
+
+print $OT "};\n";
diff --git a/src/common/unicode/meson.build b/src/common/unicode/meson.build
index df4f3a4ed1d..d7190bb8ca9 100644
--- a/src/common/unicode/meson.build
+++ b/src/common/unicode/meson.build
@@ -11,7 +11,7 @@ endif
# These files are part of the Unicode Character Database. Download them on
# demand.
-foreach f : ['CompositionExclusions.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'UnicodeData.txt']
+foreach f : ['CompositionExclusions.txt', 'DerivedCoreProperties.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'PropList.txt', 'UnicodeData.txt']
url = unicode_baseurl.format(UNICODE_VERSION, f)
target = custom_target(f,
output: f,
@@ -26,7 +26,7 @@ update_unicode_targets = []
update_unicode_targets += \
custom_target('unicode_category_table.h',
- input: [unicode_data['UnicodeData.txt']],
+ input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']],
output: ['unicode_category_table.h'],
command: [
perl, files('generate-unicode_category_table.pl'),
diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c
index 668051b461c..bece7334f5b 100644
--- a/src/common/unicode_category.c
+++ b/src/common/unicode_category.c
@@ -1,6 +1,8 @@
/*-------------------------------------------------------------------------
* unicode_category.c
- * Determine general category of Unicode characters.
+ * Determine general category and character properties of Unicode
+ * characters. Encoding must be UTF8, where we assume that the pg_wchar
+ * representation is a code point.
*
* Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group
*
@@ -19,23 +21,84 @@
#include "common/unicode_category_table.h"
/*
+ * Create bitmasks from pg_unicode_category values for efficient comparison of
+ * multiple categories. For instance, PG_U_MN_MASK is a bitmask representing
+ * the general cateogry Mn; and PG_U_M_MASK represents general categories Mn,
+ * Me, and Mc.
+ *
+ * The number of Unicode General Categories should never grow, so a 32-bit
+ * mask is fine.
+ */
+#define PG_U_CATEGORY_MASK(X) ((uint32)(1 << (X)))
+
+#define PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER)
+#define PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER)
+#define PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER)
+#define PG_U_LC_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK)
+#define PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER)
+#define PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER)
+#define PG_U_L_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK|PG_U_LM_MASK|\
+ PG_U_LO_MASK)
+#define PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK)
+#define PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK)
+#define PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK)
+#define PG_U_M_MASK (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK)
+#define PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER)
+#define PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER)
+#define PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER)
+#define PG_U_N_MASK (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK)
+#define PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION)
+#define PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION)
+#define PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION)
+#define PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION)
+#define PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION)
+#define PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION)
+#define PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION)
+#define PG_U_P_MASK (PG_U_PC_MASK|PG_U_PD_MASK|PG_U_PS_MASK|PG_U_PE_MASK|\
+ PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK)
+#define PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL)
+#define PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL)
+#define PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL)
+#define PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL)
+#define PG_U_S_MASK (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK)
+#define PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR)
+#define PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR)
+#define PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR)
+#define PG_U_Z_MASK (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK)
+#define PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL)
+#define PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT)
+#define PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE)
+#define PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE)
+#define PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED)
+#define PG_U_C_MASK (PG_U_CC_MASK|PG_U_CF_MASK|PG_U_CS_MASK|PG_U_CO_MASK|\
+ PG_U_CN_MASK)
+
+#define PG_U_CHARACTER_TAB 0x09
+
+static bool range_search(const pg_unicode_range * tbl, size_t size,
+ pg_wchar code);
+
+/*
* Unicode general category for the given codepoint.
*/
pg_unicode_category
-unicode_category(pg_wchar ucs)
+unicode_category(pg_wchar code)
{
int min = 0;
int mid;
int max = lengthof(unicode_categories) - 1;
- Assert(ucs <= 0x10ffff);
+ Assert(code <= 0x10ffff);
+
+ if (code < 0x80)
+ return unicode_opt_ascii[code].category;
while (max >= min)
{
mid = (min + max) / 2;
- if (ucs > unicode_categories[mid].last)
+ if (code > unicode_categories[mid].last)
min = mid + 1;
- else if (ucs < unicode_categories[mid].first)
+ else if (code < unicode_categories[mid].first)
max = mid - 1;
else
return unicode_categories[mid].category;
@@ -44,6 +107,224 @@ unicode_category(pg_wchar ucs)
return PG_U_UNASSIGNED;
}
+bool
+pg_u_prop_alphabetic(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;
+
+ return range_search(unicode_alphabetic,
+ lengthof(unicode_alphabetic),
+ code);
+}
+
+bool
+pg_u_prop_lowercase(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;
+
+ return range_search(unicode_lowercase,
+ lengthof(unicode_lowercase),
+ code);
+}
+
+bool
+pg_u_prop_uppercase(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;
+
+ return range_search(unicode_uppercase,
+ lengthof(unicode_uppercase),
+ code);
+}
+
+bool
+pg_u_prop_cased(pg_wchar code)
+{
+ uint32 category_mask;
+
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_CASED;
+
+ category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+ return category_mask & PG_U_LT_MASK ||
+ pg_u_prop_lowercase(code) ||
+ pg_u_prop_uppercase(code);
+}
+
+bool
+pg_u_prop_case_ignorable(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;
+
+ return range_search(unicode_case_ignorable,
+ lengthof(unicode_case_ignorable),
+ code);
+}
+
+bool
+pg_u_prop_white_space(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;
+
+ return range_search(unicode_white_space,
+ lengthof(unicode_white_space),
+ code);
+}
+
+bool
+pg_u_prop_hex_digit(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;
+
+ return range_search(unicode_hex_digit,
+ lengthof(unicode_hex_digit),
+ code);
+}
+
+bool
+pg_u_prop_join_control(pg_wchar code)
+{
+ if (code < 0x80)
+ return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;
+
+ return range_search(unicode_join_control,
+ lengthof(unicode_join_control),
+ code);
+}
+
+/*
+ * The following functions implement the Compatibility Properties described
+ * at: http://www.unicode.org/reports/tr18/#Compatibility_Properties
+ *
+ * If 'posix' is true, implements the "POSIX Compatible" variant, otherwise
+ * the "Standard" variant.
+ */
+
+bool
+pg_u_isdigit(pg_wchar code, bool posix)
+{
+ if (posix)
+ return ('0' <= code && code <= '9');
+ else
+ return unicode_category(code) == PG_U_DECIMAL_NUMBER;
+}
+
+bool
+pg_u_isalpha(pg_wchar code)
+{
+ return pg_u_prop_alphabetic(code);
+}
+
+bool
+pg_u_isalnum(pg_wchar code, bool posix)
+{
+ return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
+}
+
+bool
+pg_u_isword(pg_wchar code)
+{
+ uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+ return
+ category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) ||
+ pg_u_isalpha(code) ||
+ pg_u_prop_join_control(code);
+}
+
+bool
+pg_u_isupper(pg_wchar code)
+{
+ return pg_u_prop_uppercase(code);
+}
+
+bool
+pg_u_islower(pg_wchar code)
+{
+ return pg_u_prop_lowercase(code);
+}
+
+bool
+pg_u_isblank(pg_wchar code)
+{
+ return code == PG_U_CHARACTER_TAB ||
+ unicode_category(code) == PG_U_SPACE_SEPARATOR;
+}
+
+bool
+pg_u_iscntrl(pg_wchar code)
+{
+ return unicode_category(code) == PG_U_CONTROL;
+}
+
+bool
+pg_u_isgraph(pg_wchar code)
+{
+ uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+ if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) ||
+ pg_u_isspace(code))
+ return false;
+ return true;
+}
+
+bool
+pg_u_isprint(pg_wchar code)
+{
+ pg_unicode_category category = unicode_category(code);
+
+ if (category == PG_U_CONTROL)
+ return false;
+
+ return pg_u_isgraph(code) || pg_u_isblank(code);
+}
+
+bool
+pg_u_ispunct(pg_wchar code, bool posix)
+{
+ uint32 category_mask;
+
+ if (posix)
+ {
+ if (pg_u_isalpha(code))
+ return false;
+
+ category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+ return category_mask & (PG_U_P_MASK | PG_U_S_MASK);
+ }
+ else
+ {
+ category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
+
+ return category_mask & PG_U_P_MASK;
+ }
+}
+
+bool
+pg_u_isspace(pg_wchar code)
+{
+ return pg_u_prop_white_space(code);
+}
+
+bool
+pg_u_isxdigit(pg_wchar code, bool posix)
+{
+ if (posix)
+ return (('0' <= code && code <= '9') ||
+ ('A' <= code && code <= 'F') ||
+ ('a' <= code && code <= 'f'));
+ else
+ return unicode_category(code) == PG_U_DECIMAL_NUMBER ||
+ pg_u_prop_hex_digit(code);
+}
+
/*
* Description of Unicode general category.
*/
@@ -191,3 +472,30 @@ unicode_category_abbrev(pg_unicode_category category)
Assert(false);
return "??"; /* keep compiler quiet */
}
+
+/*
+ * Binary search to test if given codepoint exists in one of the ranges in the
+ * given table.
+ */
+static bool
+range_search(const pg_unicode_range * tbl, size_t size, pg_wchar code)
+{
+ int min = 0;
+ int mid;
+ int max = size - 1;
+
+ Assert(code <= 0x10ffff);
+
+ while (max >= min)
+ {
+ mid = (min + max) / 2;
+ if (code > tbl[mid].last)
+ min = mid + 1;
+ else if (code < tbl[mid].first)
+ max = mid - 1;
+ else
+ return true;
+ }
+
+ return false;
+}
diff --git a/src/include/common/unicode_category.h b/src/include/common/unicode_category.h
index 5bad2806150..f185b589009 100644
--- a/src/include/common/unicode_category.h
+++ b/src/include/common/unicode_category.h
@@ -62,7 +62,30 @@ typedef enum pg_unicode_category
} pg_unicode_category;
extern pg_unicode_category unicode_category(pg_wchar ucs);
-const char *unicode_category_string(pg_unicode_category category);
-const char *unicode_category_abbrev(pg_unicode_category category);
+extern const char *unicode_category_string(pg_unicode_category category);
+extern const char *unicode_category_abbrev(pg_unicode_category category);
+
+extern bool pg_u_prop_alphabetic(pg_wchar c);
+extern bool pg_u_prop_lowercase(pg_wchar c);
+extern bool pg_u_prop_uppercase(pg_wchar c);
+extern bool pg_u_prop_cased(pg_wchar c);
+extern bool pg_u_prop_case_ignorable(pg_wchar c);
+extern bool pg_u_prop_white_space(pg_wchar c);
+extern bool pg_u_prop_hex_digit(pg_wchar c);
+extern bool pg_u_prop_join_control(pg_wchar c);
+
+extern bool pg_u_isdigit(pg_wchar c, bool posix);
+extern bool pg_u_isalpha(pg_wchar c);
+extern bool pg_u_isalnum(pg_wchar c, bool posix);
+extern bool pg_u_isword(pg_wchar c);
+extern bool pg_u_isupper(pg_wchar c);
+extern bool pg_u_islower(pg_wchar c);
+extern bool pg_u_isblank(pg_wchar c);
+extern bool pg_u_iscntrl(pg_wchar c);
+extern bool pg_u_isgraph(pg_wchar c);
+extern bool pg_u_isprint(pg_wchar c);
+extern bool pg_u_ispunct(pg_wchar c, bool posix);
+extern bool pg_u_isspace(pg_wchar c);
+extern bool pg_u_isxdigit(pg_wchar c, bool posix);
#endif /* UNICODE_CATEGORY_H */
diff --git a/src/include/common/unicode_category_table.h b/src/include/common/unicode_category_table.h
index d7ef996189a..ff35ff45e83 100644
--- a/src/include/common/unicode_category_table.h
+++ b/src/include/common/unicode_category_table.h
@@ -25,6 +25,676 @@ typedef struct
uint8 category; /* General Category */
} pg_category_range;
+typedef struct
+{
+ uint32 first; /* Unicode codepoint */
+ uint32 last; /* Unicode codepoint */
+} pg_unicode_range;
+
+typedef struct
+{
+ uint8 category;
+ uint8 properties;
+} pg_unicode_properties;
+
+/*
+ * The properties currently used, in no particular order. Fits in a uint8, but
+ * if more properties are added, a wider integer will be needed.
+ */
+#define PG_U_PROP_ALPHABETIC (1 << 0)
+#define PG_U_PROP_LOWERCASE (1 << 1)
+#define PG_U_PROP_UPPERCASE (1 << 2)
+#define PG_U_PROP_CASED (1 << 3)
+#define PG_U_PROP_CASE_IGNORABLE (1 << 4)
+#define PG_U_PROP_WHITE_SPACE (1 << 5)
+#define PG_U_PROP_JOIN_CONTROL (1 << 6)
+#define PG_U_PROP_HEX_DIGIT (1 << 7)
+
+/* table for fast lookup of ASCII codepoints */
+static const pg_unicode_properties unicode_opt_ascii[128] =
+{
+ {
+ /* 0x000000 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000001 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000002 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000003 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000004 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000005 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000006 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000007 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000008 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000009 */
+ .category = PG_U_CONTROL,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x00000a */
+ .category = PG_U_CONTROL,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x00000b */
+ .category = PG_U_CONTROL,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x00000c */
+ .category = PG_U_CONTROL,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x00000d */
+ .category = PG_U_CONTROL,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x00000e */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00000f */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000010 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000011 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000012 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000013 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000014 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000015 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000016 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000017 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000018 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000019 */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001a */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001b */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001c */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001d */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001e */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x00001f */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+ {
+ /* 0x000020 */
+ .category = PG_U_SPACE_SEPARATOR,
+ .properties = PG_U_PROP_WHITE_SPACE
+ },
+ {
+ /* 0x000021 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000022 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000023 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000024 */
+ .category = PG_U_CURRENCY_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x000025 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000026 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000027 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = PG_U_PROP_CASE_IGNORABLE
+ },
+ {
+ /* 0x000028 */
+ .category = PG_U_OPEN_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000029 */
+ .category = PG_U_CLOSE_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00002a */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00002b */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00002c */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00002d */
+ .category = PG_U_DASH_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00002e */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = PG_U_PROP_CASE_IGNORABLE
+ },
+ {
+ /* 0x00002f */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000030 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000031 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000032 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000033 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000034 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000035 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000036 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000037 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000038 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x000039 */
+ .category = PG_U_DECIMAL_NUMBER,
+ .properties = PG_U_PROP_HEX_DIGIT
+ },
+ {
+ /* 0x00003a */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = PG_U_PROP_CASE_IGNORABLE
+ },
+ {
+ /* 0x00003b */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00003c */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00003d */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00003e */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00003f */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000040 */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000041 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000042 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000043 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000044 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000045 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000046 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000047 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000048 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000049 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004a */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004b */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004c */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004d */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004e */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00004f */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000050 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000051 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000052 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000053 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000054 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000055 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000056 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000057 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000058 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000059 */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00005a */
+ .category = PG_U_UPPERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00005b */
+ .category = PG_U_OPEN_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00005c */
+ .category = PG_U_OTHER_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00005d */
+ .category = PG_U_CLOSE_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00005e */
+ .category = PG_U_MODIFIER_SYMBOL,
+ .properties = PG_U_PROP_CASE_IGNORABLE
+ },
+ {
+ /* 0x00005f */
+ .category = PG_U_CONNECTOR_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x000060 */
+ .category = PG_U_MODIFIER_SYMBOL,
+ .properties = PG_U_PROP_CASE_IGNORABLE
+ },
+ {
+ /* 0x000061 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000062 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000063 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000064 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000065 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000066 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000067 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000068 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000069 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006a */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006b */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006c */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006d */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006e */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00006f */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000070 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000071 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000072 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000073 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000074 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000075 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000076 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000077 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000078 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x000079 */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00007a */
+ .category = PG_U_LOWERCASE_LETTER,
+ .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED
+ },
+ {
+ /* 0x00007b */
+ .category = PG_U_OPEN_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00007c */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00007d */
+ .category = PG_U_CLOSE_PUNCTUATION,
+ .properties = 0
+ },
+ {
+ /* 0x00007e */
+ .category = PG_U_MATH_SYMBOL,
+ .properties = 0
+ },
+ {
+ /* 0x00007f */
+ .category = PG_U_CONTROL,
+ .properties = 0
+ },
+};
+
/* table of Unicode codepoint ranges and their categories */
static const pg_category_range unicode_categories[3302] =
{
@@ -3329,5 +3999,3027 @@ static const pg_category_range unicode_categories[3302] =
{0x0e0020, 0x0e007f, PG_U_FORMAT},
{0x0e0100, 0x0e01ef, PG_U_NONSPACING_MARK},
{0x0f0000, 0x0ffffd, PG_U_PRIVATE_USE},
- {0x100000, 0x10fffd, PG_U_PRIVATE_USE}
+ {0x100000, 0x10fffd, PG_U_PRIVATE_USE},
+};
+
+/* table of Unicode codepoint ranges of Alphabetic characters */
+static const pg_unicode_range unicode_alphabetic[1141] =
+{
+ {0x000041, 0x00005a},
+ {0x000061, 0x00007a},
+ {0x0000aa, 0x0000aa},
+ {0x0000b5, 0x0000b5},
+ {0x0000ba, 0x0000ba},
+ {0x0000c0, 0x0000d6},
+ {0x0000d8, 0x0000f6},
+ {0x0000f8, 0x0001ba},
+ {0x0001bb, 0x0001bb},
+ {0x0001bc, 0x0001bf},
+ {0x0001c0, 0x0001c3},
+ {0x0001c4, 0x000293},
+ {0x000294, 0x000294},
+ {0x000295, 0x0002af},
+ {0x0002b0, 0x0002c1},
+ {0x0002c6, 0x0002d1},
+ {0x0002e0, 0x0002e4},
+ {0x0002ec, 0x0002ec},
+ {0x0002ee, 0x0002ee},
+ {0x000345, 0x000345},
+ {0x000370, 0x000373},
+ {0x000374, 0x000374},
+ {0x000376, 0x000377},
+ {0x00037a, 0x00037a},
+ {0x00037b, 0x00037d},
+ {0x00037f, 0x00037f},
+ {0x000386, 0x000386},
+ {0x000388, 0x00038a},
+ {0x00038c, 0x00038c},
+ {0x00038e, 0x0003a1},
+ {0x0003a3, 0x0003f5},
+ {0x0003f7, 0x000481},
+ {0x00048a, 0x00052f},
+ {0x000531, 0x000556},
+ {0x000559, 0x000559},
+ {0x000560, 0x000588},
+ {0x0005b0, 0x0005bd},
+ {0x0005bf, 0x0005bf},
+ {0x0005c1, 0x0005c2},
+ {0x0005c4, 0x0005c5},
+ {0x0005c7, 0x0005c7},
+ {0x0005d0, 0x0005ea},
+ {0x0005ef, 0x0005f2},
+ {0x000610, 0x00061a},
+ {0x000620, 0x00063f},
+ {0x000640, 0x000640},
+ {0x000641, 0x00064a},
+ {0x00064b, 0x000657},
+ {0x000659, 0x00065f},
+ {0x00066e, 0x00066f},
+ {0x000670, 0x000670},
+ {0x000671, 0x0006d3},
+ {0x0006d5, 0x0006d5},
+ {0x0006d6, 0x0006dc},
+ {0x0006e1, 0x0006e4},
+ {0x0006e5, 0x0006e6},
+ {0x0006e7, 0x0006e8},
+ {0x0006ed, 0x0006ed},
+ {0x0006ee, 0x0006ef},
+ {0x0006fa, 0x0006fc},
+ {0x0006ff, 0x0006ff},
+ {0x000710, 0x000710},
+ {0x000711, 0x000711},
+ {0x000712, 0x00072f},
+ {0x000730, 0x00073f},
+ {0x00074d, 0x0007a5},
+ {0x0007a6, 0x0007b0},
+ {0x0007b1, 0x0007b1},
+ {0x0007ca, 0x0007ea},
+ {0x0007f4, 0x0007f5},
+ {0x0007fa, 0x0007fa},
+ {0x000800, 0x000815},
+ {0x000816, 0x000817},
+ {0x00081a, 0x00081a},
+ {0x00081b, 0x000823},
+ {0x000824, 0x000824},
+ {0x000825, 0x000827},
+ {0x000828, 0x000828},
+ {0x000829, 0x00082c},
+ {0x000840, 0x000858},
+ {0x000860, 0x00086a},
+ {0x000870, 0x000887},
+ {0x000889, 0x00088e},
+ {0x0008a0, 0x0008c8},
+ {0x0008c9, 0x0008c9},
+ {0x0008d4, 0x0008df},
+ {0x0008e3, 0x0008e9},
+ {0x0008f0, 0x000902},
+ {0x000903, 0x000903},
+ {0x000904, 0x000939},
+ {0x00093a, 0x00093a},
+ {0x00093b, 0x00093b},
+ {0x00093d, 0x00093d},
+ {0x00093e, 0x000940},
+ {0x000941, 0x000948},
+ {0x000949, 0x00094c},
+ {0x00094e, 0x00094f},
+ {0x000950, 0x000950},
+ {0x000955, 0x000957},
+ {0x000958, 0x000961},
+ {0x000962, 0x000963},
+ {0x000971, 0x000971},
+ {0x000972, 0x000980},
+ {0x000981, 0x000981},
+ {0x000982, 0x000983},
+ {0x000985, 0x00098c},
+ {0x00098f, 0x000990},
+ {0x000993, 0x0009a8},
+ {0x0009aa, 0x0009b0},
+ {0x0009b2, 0x0009b2},
+ {0x0009b6, 0x0009b9},
+ {0x0009bd, 0x0009bd},
+ {0x0009be, 0x0009c0},
+ {0x0009c1, 0x0009c4},
+ {0x0009c7, 0x0009c8},
+ {0x0009cb, 0x0009cc},
+ {0x0009ce, 0x0009ce},
+ {0x0009d7, 0x0009d7},
+ {0x0009dc, 0x0009dd},
+ {0x0009df, 0x0009e1},
+ {0x0009e2, 0x0009e3},
+ {0x0009f0, 0x0009f1},
+ {0x0009fc, 0x0009fc},
+ {0x000a01, 0x000a02},
+ {0x000a03, 0x000a03},
+ {0x000a05, 0x000a0a},
+ {0x000a0f, 0x000a10},
+ {0x000a13, 0x000a28},
+ {0x000a2a, 0x000a30},
+ {0x000a32, 0x000a33},
+ {0x000a35, 0x000a36},
+ {0x000a38, 0x000a39},
+ {0x000a3e, 0x000a40},
+ {0x000a41, 0x000a42},
+ {0x000a47, 0x000a48},
+ {0x000a4b, 0x000a4c},
+ {0x000a51, 0x000a51},
+ {0x000a59, 0x000a5c},
+ {0x000a5e, 0x000a5e},
+ {0x000a70, 0x000a71},
+ {0x000a72, 0x000a74},
+ {0x000a75, 0x000a75},
+ {0x000a81, 0x000a82},
+ {0x000a83, 0x000a83},
+ {0x000a85, 0x000a8d},
+ {0x000a8f, 0x000a91},
+ {0x000a93, 0x000aa8},
+ {0x000aaa, 0x000ab0},
+ {0x000ab2, 0x000ab3},
+ {0x000ab5, 0x000ab9},
+ {0x000abd, 0x000abd},
+ {0x000abe, 0x000ac0},
+ {0x000ac1, 0x000ac5},
+ {0x000ac7, 0x000ac8},
+ {0x000ac9, 0x000ac9},
+ {0x000acb, 0x000acc},
+ {0x000ad0, 0x000ad0},
+ {0x000ae0, 0x000ae1},
+ {0x000ae2, 0x000ae3},
+ {0x000af9, 0x000af9},
+ {0x000afa, 0x000afc},
+ {0x000b01, 0x000b01},
+ {0x000b02, 0x000b03},
+ {0x000b05, 0x000b0c},
+ {0x000b0f, 0x000b10},
+ {0x000b13, 0x000b28},
+ {0x000b2a, 0x000b30},
+ {0x000b32, 0x000b33},
+ {0x000b35, 0x000b39},
+ {0x000b3d, 0x000b3d},
+ {0x000b3e, 0x000b3e},
+ {0x000b3f, 0x000b3f},
+ {0x000b40, 0x000b40},
+ {0x000b41, 0x000b44},
+ {0x000b47, 0x000b48},
+ {0x000b4b, 0x000b4c},
+ {0x000b56, 0x000b56},
+ {0x000b57, 0x000b57},
+ {0x000b5c, 0x000b5d},
+ {0x000b5f, 0x000b61},
+ {0x000b62, 0x000b63},
+ {0x000b71, 0x000b71},
+ {0x000b82, 0x000b82},
+ {0x000b83, 0x000b83},
+ {0x000b85, 0x000b8a},
+ {0x000b8e, 0x000b90},
+ {0x000b92, 0x000b95},
+ {0x000b99, 0x000b9a},
+ {0x000b9c, 0x000b9c},
+ {0x000b9e, 0x000b9f},
+ {0x000ba3, 0x000ba4},
+ {0x000ba8, 0x000baa},
+ {0x000bae, 0x000bb9},
+ {0x000bbe, 0x000bbf},
+ {0x000bc0, 0x000bc0},
+ {0x000bc1, 0x000bc2},
+ {0x000bc6, 0x000bc8},
+ {0x000bca, 0x000bcc},
+ {0x000bd0, 0x000bd0},
+ {0x000bd7, 0x000bd7},
+ {0x000c00, 0x000c00},
+ {0x000c01, 0x000c03},
+ {0x000c04, 0x000c04},
+ {0x000c05, 0x000c0c},
+ {0x000c0e, 0x000c10},
+ {0x000c12, 0x000c28},
+ {0x000c2a, 0x000c39},
+ {0x000c3d, 0x000c3d},
+ {0x000c3e, 0x000c40},
+ {0x000c41, 0x000c44},
+ {0x000c46, 0x000c48},
+ {0x000c4a, 0x000c4c},
+ {0x000c55, 0x000c56},
+ {0x000c58, 0x000c5a},
+ {0x000c5d, 0x000c5d},
+ {0x000c60, 0x000c61},
+ {0x000c62, 0x000c63},
+ {0x000c80, 0x000c80},
+ {0x000c81, 0x000c81},
+ {0x000c82, 0x000c83},
+ {0x000c85, 0x000c8c},
+ {0x000c8e, 0x000c90},
+ {0x000c92, 0x000ca8},
+ {0x000caa, 0x000cb3},
+ {0x000cb5, 0x000cb9},
+ {0x000cbd, 0x000cbd},
+ {0x000cbe, 0x000cbe},
+ {0x000cbf, 0x000cbf},
+ {0x000cc0, 0x000cc4},
+ {0x000cc6, 0x000cc6},
+ {0x000cc7, 0x000cc8},
+ {0x000cca, 0x000ccb},
+ {0x000ccc, 0x000ccc},
+ {0x000cd5, 0x000cd6},
+ {0x000cdd, 0x000cde},
+ {0x000ce0, 0x000ce1},
+ {0x000ce2, 0x000ce3},
+ {0x000cf1, 0x000cf2},
+ {0x000cf3, 0x000cf3},
+ {0x000d00, 0x000d01},
+ {0x000d02, 0x000d03},
+ {0x000d04, 0x000d0c},
+ {0x000d0e, 0x000d10},
+ {0x000d12, 0x000d3a},
+ {0x000d3d, 0x000d3d},
+ {0x000d3e, 0x000d40},
+ {0x000d41, 0x000d44},
+ {0x000d46, 0x000d48},
+ {0x000d4a, 0x000d4c},
+ {0x000d4e, 0x000d4e},
+ {0x000d54, 0x000d56},
+ {0x000d57, 0x000d57},
+ {0x000d5f, 0x000d61},
+ {0x000d62, 0x000d63},
+ {0x000d7a, 0x000d7f},
+ {0x000d81, 0x000d81},
+ {0x000d82, 0x000d83},
+ {0x000d85, 0x000d96},
+ {0x000d9a, 0x000db1},
+ {0x000db3, 0x000dbb},
+ {0x000dbd, 0x000dbd},
+ {0x000dc0, 0x000dc6},
+ {0x000dcf, 0x000dd1},
+ {0x000dd2, 0x000dd4},
+ {0x000dd6, 0x000dd6},
+ {0x000dd8, 0x000ddf},
+ {0x000df2, 0x000df3},
+ {0x000e01, 0x000e30},
+ {0x000e31, 0x000e31},
+ {0x000e32, 0x000e33},
+ {0x000e34, 0x000e3a},
+ {0x000e40, 0x000e45},
+ {0x000e46, 0x000e46},
+ {0x000e4d, 0x000e4d},
+ {0x000e81, 0x000e82},
+ {0x000e84, 0x000e84},
+ {0x000e86, 0x000e8a},
+ {0x000e8c, 0x000ea3},
+ {0x000ea5, 0x000ea5},
+ {0x000ea7, 0x000eb0},
+ {0x000eb1, 0x000eb1},
+ {0x000eb2, 0x000eb3},
+ {0x000eb4, 0x000eb9},
+ {0x000ebb, 0x000ebc},
+ {0x000ebd, 0x000ebd},
+ {0x000ec0, 0x000ec4},
+ {0x000ec6, 0x000ec6},
+ {0x000ecd, 0x000ecd},
+ {0x000edc, 0x000edf},
+ {0x000f00, 0x000f00},
+ {0x000f40, 0x000f47},
+ {0x000f49, 0x000f6c},
+ {0x000f71, 0x000f7e},
+ {0x000f7f, 0x000f7f},
+ {0x000f80, 0x000f83},
+ {0x000f88, 0x000f8c},
+ {0x000f8d, 0x000f97},
+ {0x000f99, 0x000fbc},
+ {0x001000, 0x00102a},
+ {0x00102b, 0x00102c},
+ {0x00102d, 0x001030},
+ {0x001031, 0x001031},
+ {0x001032, 0x001036},
+ {0x001038, 0x001038},
+ {0x00103b, 0x00103c},
+ {0x00103d, 0x00103e},
+ {0x00103f, 0x00103f},
+ {0x001050, 0x001055},
+ {0x001056, 0x001057},
+ {0x001058, 0x001059},
+ {0x00105a, 0x00105d},
+ {0x00105e, 0x001060},
+ {0x001061, 0x001061},
+ {0x001062, 0x001064},
+ {0x001065, 0x001066},
+ {0x001067, 0x00106d},
+ {0x00106e, 0x001070},
+ {0x001071, 0x001074},
+ {0x001075, 0x001081},
+ {0x001082, 0x001082},
+ {0x001083, 0x001084},
+ {0x001085, 0x001086},
+ {0x001087, 0x00108c},
+ {0x00108d, 0x00108d},
+ {0x00108e, 0x00108e},
+ {0x00108f, 0x00108f},
+ {0x00109a, 0x00109c},
+ {0x00109d, 0x00109d},
+ {0x0010a0, 0x0010c5},
+ {0x0010c7, 0x0010c7},
+ {0x0010cd, 0x0010cd},
+ {0x0010d0, 0x0010fa},
+ {0x0010fc, 0x0010fc},
+ {0x0010fd, 0x0010ff},
+ {0x001100, 0x001248},
+ {0x00124a, 0x00124d},
+ {0x001250, 0x001256},
+ {0x001258, 0x001258},
+ {0x00125a, 0x00125d},
+ {0x001260, 0x001288},
+ {0x00128a, 0x00128d},
+ {0x001290, 0x0012b0},
+ {0x0012b2, 0x0012b5},
+ {0x0012b8, 0x0012be},
+ {0x0012c0, 0x0012c0},
+ {0x0012c2, 0x0012c5},
+ {0x0012c8, 0x0012d6},
+ {0x0012d8, 0x001310},
+ {0x001312, 0x001315},
+ {0x001318, 0x00135a},
+ {0x001380, 0x00138f},
+ {0x0013a0, 0x0013f5},
+ {0x0013f8, 0x0013fd},
+ {0x001401, 0x00166c},
+ {0x00166f, 0x00167f},
+ {0x001681, 0x00169a},
+ {0x0016a0, 0x0016ea},
+ {0x0016ee, 0x0016f0},
+ {0x0016f1, 0x0016f8},
+ {0x001700, 0x001711},
+ {0x001712, 0x001713},
+ {0x00171f, 0x001731},
+ {0x001732, 0x001733},
+ {0x001740, 0x001751},
+ {0x001752, 0x001753},
+ {0x001760, 0x00176c},
+ {0x00176e, 0x001770},
+ {0x001772, 0x001773},
+ {0x001780, 0x0017b3},
+ {0x0017b6, 0x0017b6},
+ {0x0017b7, 0x0017bd},
+ {0x0017be, 0x0017c5},
+ {0x0017c6, 0x0017c6},
+ {0x0017c7, 0x0017c8},
+ {0x0017d7, 0x0017d7},
+ {0x0017dc, 0x0017dc},
+ {0x001820, 0x001842},
+ {0x001843, 0x001843},
+ {0x001844, 0x001878},
+ {0x001880, 0x001884},
+ {0x001885, 0x001886},
+ {0x001887, 0x0018a8},
+ {0x0018a9, 0x0018a9},
+ {0x0018aa, 0x0018aa},
+ {0x0018b0, 0x0018f5},
+ {0x001900, 0x00191e},
+ {0x001920, 0x001922},
+ {0x001923, 0x001926},
+ {0x001927, 0x001928},
+ {0x001929, 0x00192b},
+ {0x001930, 0x001931},
+ {0x001932, 0x001932},
+ {0x001933, 0x001938},
+ {0x001950, 0x00196d},
+ {0x001970, 0x001974},
+ {0x001980, 0x0019ab},
+ {0x0019b0, 0x0019c9},
+ {0x001a00, 0x001a16},
+ {0x001a17, 0x001a18},
+ {0x001a19, 0x001a1a},
+ {0x001a1b, 0x001a1b},
+ {0x001a20, 0x001a54},
+ {0x001a55, 0x001a55},
+ {0x001a56, 0x001a56},
+ {0x001a57, 0x001a57},
+ {0x001a58, 0x001a5e},
+ {0x001a61, 0x001a61},
+ {0x001a62, 0x001a62},
+ {0x001a63, 0x001a64},
+ {0x001a65, 0x001a6c},
+ {0x001a6d, 0x001a72},
+ {0x001a73, 0x001a74},
+ {0x001aa7, 0x001aa7},
+ {0x001abf, 0x001ac0},
+ {0x001acc, 0x001ace},
+ {0x001b00, 0x001b03},
+ {0x001b04, 0x001b04},
+ {0x001b05, 0x001b33},
+ {0x001b35, 0x001b35},
+ {0x001b36, 0x001b3a},
+ {0x001b3b, 0x001b3b},
+ {0x001b3c, 0x001b3c},
+ {0x001b3d, 0x001b41},
+ {0x001b42, 0x001b42},
+ {0x001b43, 0x001b43},
+ {0x001b45, 0x001b4c},
+ {0x001b80, 0x001b81},
+ {0x001b82, 0x001b82},
+ {0x001b83, 0x001ba0},
+ {0x001ba1, 0x001ba1},
+ {0x001ba2, 0x001ba5},
+ {0x001ba6, 0x001ba7},
+ {0x001ba8, 0x001ba9},
+ {0x001bac, 0x001bad},
+ {0x001bae, 0x001baf},
+ {0x001bba, 0x001be5},
+ {0x001be7, 0x001be7},
+ {0x001be8, 0x001be9},
+ {0x001bea, 0x001bec},
+ {0x001bed, 0x001bed},
+ {0x001bee, 0x001bee},
+ {0x001bef, 0x001bf1},
+ {0x001c00, 0x001c23},
+ {0x001c24, 0x001c2b},
+ {0x001c2c, 0x001c33},
+ {0x001c34, 0x001c35},
+ {0x001c36, 0x001c36},
+ {0x001c4d, 0x001c4f},
+ {0x001c5a, 0x001c77},
+ {0x001c78, 0x001c7d},
+ {0x001c80, 0x001c88},
+ {0x001c90, 0x001cba},
+ {0x001cbd, 0x001cbf},
+ {0x001ce9, 0x001cec},
+ {0x001cee, 0x001cf3},
+ {0x001cf5, 0x001cf6},
+ {0x001cfa, 0x001cfa},
+ {0x001d00, 0x001d2b},
+ {0x001d2c, 0x001d6a},
+ {0x001d6b, 0x001d77},
+ {0x001d78, 0x001d78},
+ {0x001d79, 0x001d9a},
+ {0x001d9b, 0x001dbf},
+ {0x001de7, 0x001df4},
+ {0x001e00, 0x001f15},
+ {0x001f18, 0x001f1d},
+ {0x001f20, 0x001f45},
+ {0x001f48, 0x001f4d},
+ {0x001f50, 0x001f57},
+ {0x001f59, 0x001f59},
+ {0x001f5b, 0x001f5b},
+ {0x001f5d, 0x001f5d},
+ {0x001f5f, 0x001f7d},
+ {0x001f80, 0x001fb4},
+ {0x001fb6, 0x001fbc},
+ {0x001fbe, 0x001fbe},
+ {0x001fc2, 0x001fc4},
+ {0x001fc6, 0x001fcc},
+ {0x001fd0, 0x001fd3},
+ {0x001fd6, 0x001fdb},
+ {0x001fe0, 0x001fec},
+ {0x001ff2, 0x001ff4},
+ {0x001ff6, 0x001ffc},
+ {0x002071, 0x002071},
+ {0x00207f, 0x00207f},
+ {0x002090, 0x00209c},
+ {0x002102, 0x002102},
+ {0x002107, 0x002107},
+ {0x00210a, 0x002113},
+ {0x002115, 0x002115},
+ {0x002119, 0x00211d},
+ {0x002124, 0x002124},
+ {0x002126, 0x002126},
+ {0x002128, 0x002128},
+ {0x00212a, 0x00212d},
+ {0x00212f, 0x002134},
+ {0x002135, 0x002138},
+ {0x002139, 0x002139},
+ {0x00213c, 0x00213f},
+ {0x002145, 0x002149},
+ {0x00214e, 0x00214e},
+ {0x002160, 0x002182},
+ {0x002183, 0x002184},
+ {0x002185, 0x002188},
+ {0x0024b6, 0x0024e9},
+ {0x002c00, 0x002c7b},
+ {0x002c7c, 0x002c7d},
+ {0x002c7e, 0x002ce4},
+ {0x002ceb, 0x002cee},
+ {0x002cf2, 0x002cf3},
+ {0x002d00, 0x002d25},
+ {0x002d27, 0x002d27},
+ {0x002d2d, 0x002d2d},
+ {0x002d30, 0x002d67},
+ {0x002d6f, 0x002d6f},
+ {0x002d80, 0x002d96},
+ {0x002da0, 0x002da6},
+ {0x002da8, 0x002dae},
+ {0x002db0, 0x002db6},
+ {0x002db8, 0x002dbe},
+ {0x002dc0, 0x002dc6},
+ {0x002dc8, 0x002dce},
+ {0x002dd0, 0x002dd6},
+ {0x002dd8, 0x002dde},
+ {0x002de0, 0x002dff},
+ {0x002e2f, 0x002e2f},
+ {0x003005, 0x003005},
+ {0x003006, 0x003006},
+ {0x003007, 0x003007},
+ {0x003021, 0x003029},
+ {0x003031, 0x003035},
+ {0x003038, 0x00303a},
+ {0x00303b, 0x00303b},
+ {0x00303c, 0x00303c},
+ {0x003041, 0x003096},
+ {0x00309d, 0x00309e},
+ {0x00309f, 0x00309f},
+ {0x0030a1, 0x0030fa},
+ {0x0030fc, 0x0030fe},
+ {0x0030ff, 0x0030ff},
+ {0x003105, 0x00312f},
+ {0x003131, 0x00318e},
+ {0x0031a0, 0x0031bf},
+ {0x0031f0, 0x0031ff},
+ {0x003400, 0x004dbf},
+ {0x004e00, 0x00a014},
+ {0x00a015, 0x00a015},
+ {0x00a016, 0x00a48c},
+ {0x00a4d0, 0x00a4f7},
+ {0x00a4f8, 0x00a4fd},
+ {0x00a500, 0x00a60b},
+ {0x00a60c, 0x00a60c},
+ {0x00a610, 0x00a61f},
+ {0x00a62a, 0x00a62b},
+ {0x00a640, 0x00a66d},
+ {0x00a66e, 0x00a66e},
+ {0x00a674, 0x00a67b},
+ {0x00a67f, 0x00a67f},
+ {0x00a680, 0x00a69b},
+ {0x00a69c, 0x00a69d},
+ {0x00a69e, 0x00a69f},
+ {0x00a6a0, 0x00a6e5},
+ {0x00a6e6, 0x00a6ef},
+ {0x00a717, 0x00a71f},
+ {0x00a722, 0x00a76f},
+ {0x00a770, 0x00a770},
+ {0x00a771, 0x00a787},
+ {0x00a788, 0x00a788},
+ {0x00a78b, 0x00a78e},
+ {0x00a78f, 0x00a78f},
+ {0x00a790, 0x00a7ca},
+ {0x00a7d0, 0x00a7d1},
+ {0x00a7d3, 0x00a7d3},
+ {0x00a7d5, 0x00a7d9},
+ {0x00a7f2, 0x00a7f4},
+ {0x00a7f5, 0x00a7f6},
+ {0x00a7f7, 0x00a7f7},
+ {0x00a7f8, 0x00a7f9},
+ {0x00a7fa, 0x00a7fa},
+ {0x00a7fb, 0x00a801},
+ {0x00a802, 0x00a802},
+ {0x00a803, 0x00a805},
+ {0x00a807, 0x00a80a},
+ {0x00a80b, 0x00a80b},
+ {0x00a80c, 0x00a822},
+ {0x00a823, 0x00a824},
+ {0x00a825, 0x00a826},
+ {0x00a827, 0x00a827},
+ {0x00a840, 0x00a873},
+ {0x00a880, 0x00a881},
+ {0x00a882, 0x00a8b3},
+ {0x00a8b4, 0x00a8c3},
+ {0x00a8c5, 0x00a8c5},
+ {0x00a8f2, 0x00a8f7},
+ {0x00a8fb, 0x00a8fb},
+ {0x00a8fd, 0x00a8fe},
+ {0x00a8ff, 0x00a8ff},
+ {0x00a90a, 0x00a925},
+ {0x00a926, 0x00a92a},
+ {0x00a930, 0x00a946},
+ {0x00a947, 0x00a951},
+ {0x00a952, 0x00a952},
+ {0x00a960, 0x00a97c},
+ {0x00a980, 0x00a982},
+ {0x00a983, 0x00a983},
+ {0x00a984, 0x00a9b2},
+ {0x00a9b4, 0x00a9b5},
+ {0x00a9b6, 0x00a9b9},
+ {0x00a9ba, 0x00a9bb},
+ {0x00a9bc, 0x00a9bd},
+ {0x00a9be, 0x00a9bf},
+ {0x00a9cf, 0x00a9cf},
+ {0x00a9e0, 0x00a9e4},
+ {0x00a9e5, 0x00a9e5},
+ {0x00a9e6, 0x00a9e6},
+ {0x00a9e7, 0x00a9ef},
+ {0x00a9fa, 0x00a9fe},
+ {0x00aa00, 0x00aa28},
+ {0x00aa29, 0x00aa2e},
+ {0x00aa2f, 0x00aa30},
+ {0x00aa31, 0x00aa32},
+ {0x00aa33, 0x00aa34},
+ {0x00aa35, 0x00aa36},
+ {0x00aa40, 0x00aa42},
+ {0x00aa43, 0x00aa43},
+ {0x00aa44, 0x00aa4b},
+ {0x00aa4c, 0x00aa4c},
+ {0x00aa4d, 0x00aa4d},
+ {0x00aa60, 0x00aa6f},
+ {0x00aa70, 0x00aa70},
+ {0x00aa71, 0x00aa76},
+ {0x00aa7a, 0x00aa7a},
+ {0x00aa7b, 0x00aa7b},
+ {0x00aa7c, 0x00aa7c},
+ {0x00aa7d, 0x00aa7d},
+ {0x00aa7e, 0x00aaaf},
+ {0x00aab0, 0x00aab0},
+ {0x00aab1, 0x00aab1},
+ {0x00aab2, 0x00aab4},
+ {0x00aab5, 0x00aab6},
+ {0x00aab7, 0x00aab8},
+ {0x00aab9, 0x00aabd},
+ {0x00aabe, 0x00aabe},
+ {0x00aac0, 0x00aac0},
+ {0x00aac2, 0x00aac2},
+ {0x00aadb, 0x00aadc},
+ {0x00aadd, 0x00aadd},
+ {0x00aae0, 0x00aaea},
+ {0x00aaeb, 0x00aaeb},
+ {0x00aaec, 0x00aaed},
+ {0x00aaee, 0x00aaef},
+ {0x00aaf2, 0x00aaf2},
+ {0x00aaf3, 0x00aaf4},
+ {0x00aaf5, 0x00aaf5},
+ {0x00ab01, 0x00ab06},
+ {0x00ab09, 0x00ab0e},
+ {0x00ab11, 0x00ab16},
+ {0x00ab20, 0x00ab26},
+ {0x00ab28, 0x00ab2e},
+ {0x00ab30, 0x00ab5a},
+ {0x00ab5c, 0x00ab5f},
+ {0x00ab60, 0x00ab68},
+ {0x00ab69, 0x00ab69},
+ {0x00ab70, 0x00abbf},
+ {0x00abc0, 0x00abe2},
+ {0x00abe3, 0x00abe4},
+ {0x00abe5, 0x00abe5},
+ {0x00abe6, 0x00abe7},
+ {0x00abe8, 0x00abe8},
+ {0x00abe9, 0x00abea},
+ {0x00ac00, 0x00d7a3},
+ {0x00d7b0, 0x00d7c6},
+ {0x00d7cb, 0x00d7fb},
+ {0x00f900, 0x00fa6d},
+ {0x00fa70, 0x00fad9},
+ {0x00fb00, 0x00fb06},
+ {0x00fb13, 0x00fb17},
+ {0x00fb1d, 0x00fb1d},
+ {0x00fb1e, 0x00fb1e},
+ {0x00fb1f, 0x00fb28},
+ {0x00fb2a, 0x00fb36},
+ {0x00fb38, 0x00fb3c},
+ {0x00fb3e, 0x00fb3e},
+ {0x00fb40, 0x00fb41},
+ {0x00fb43, 0x00fb44},
+ {0x00fb46, 0x00fbb1},
+ {0x00fbd3, 0x00fd3d},
+ {0x00fd50, 0x00fd8f},
+ {0x00fd92, 0x00fdc7},
+ {0x00fdf0, 0x00fdfb},
+ {0x00fe70, 0x00fe74},
+ {0x00fe76, 0x00fefc},
+ {0x00ff21, 0x00ff3a},
+ {0x00ff41, 0x00ff5a},
+ {0x00ff66, 0x00ff6f},
+ {0x00ff70, 0x00ff70},
+ {0x00ff71, 0x00ff9d},
+ {0x00ff9e, 0x00ff9f},
+ {0x00ffa0, 0x00ffbe},
+ {0x00ffc2, 0x00ffc7},
+ {0x00ffca, 0x00ffcf},
+ {0x00ffd2, 0x00ffd7},
+ {0x00ffda, 0x00ffdc},
+ {0x010000, 0x01000b},
+ {0x01000d, 0x010026},
+ {0x010028, 0x01003a},
+ {0x01003c, 0x01003d},
+ {0x01003f, 0x01004d},
+ {0x010050, 0x01005d},
+ {0x010080, 0x0100fa},
+ {0x010140, 0x010174},
+ {0x010280, 0x01029c},
+ {0x0102a0, 0x0102d0},
+ {0x010300, 0x01031f},
+ {0x01032d, 0x010340},
+ {0x010341, 0x010341},
+ {0x010342, 0x010349},
+ {0x01034a, 0x01034a},
+ {0x010350, 0x010375},
+ {0x010376, 0x01037a},
+ {0x010380, 0x01039d},
+ {0x0103a0, 0x0103c3},
+ {0x0103c8, 0x0103cf},
+ {0x0103d1, 0x0103d5},
+ {0x010400, 0x01044f},
+ {0x010450, 0x01049d},
+ {0x0104b0, 0x0104d3},
+ {0x0104d8, 0x0104fb},
+ {0x010500, 0x010527},
+ {0x010530, 0x010563},
+ {0x010570, 0x01057a},
+ {0x01057c, 0x01058a},
+ {0x01058c, 0x010592},
+ {0x010594, 0x010595},
+ {0x010597, 0x0105a1},
+ {0x0105a3, 0x0105b1},
+ {0x0105b3, 0x0105b9},
+ {0x0105bb, 0x0105bc},
+ {0x010600, 0x010736},
+ {0x010740, 0x010755},
+ {0x010760, 0x010767},
+ {0x010780, 0x010785},
+ {0x010787, 0x0107b0},
+ {0x0107b2, 0x0107ba},
+ {0x010800, 0x010805},
+ {0x010808, 0x010808},
+ {0x01080a, 0x010835},
+ {0x010837, 0x010838},
+ {0x01083c, 0x01083c},
+ {0x01083f, 0x010855},
+ {0x010860, 0x010876},
+ {0x010880, 0x01089e},
+ {0x0108e0, 0x0108f2},
+ {0x0108f4, 0x0108f5},
+ {0x010900, 0x010915},
+ {0x010920, 0x010939},
+ {0x010980, 0x0109b7},
+ {0x0109be, 0x0109bf},
+ {0x010a00, 0x010a00},
+ {0x010a01, 0x010a03},
+ {0x010a05, 0x010a06},
+ {0x010a0c, 0x010a0f},
+ {0x010a10, 0x010a13},
+ {0x010a15, 0x010a17},
+ {0x010a19, 0x010a35},
+ {0x010a60, 0x010a7c},
+ {0x010a80, 0x010a9c},
+ {0x010ac0, 0x010ac7},
+ {0x010ac9, 0x010ae4},
+ {0x010b00, 0x010b35},
+ {0x010b40, 0x010b55},
+ {0x010b60, 0x010b72},
+ {0x010b80, 0x010b91},
+ {0x010c00, 0x010c48},
+ {0x010c80, 0x010cb2},
+ {0x010cc0, 0x010cf2},
+ {0x010d00, 0x010d23},
+ {0x010d24, 0x010d27},
+ {0x010e80, 0x010ea9},
+ {0x010eab, 0x010eac},
+ {0x010eb0, 0x010eb1},
+ {0x010f00, 0x010f1c},
+ {0x010f27, 0x010f27},
+ {0x010f30, 0x010f45},
+ {0x010f70, 0x010f81},
+ {0x010fb0, 0x010fc4},
+ {0x010fe0, 0x010ff6},
+ {0x011000, 0x011000},
+ {0x011001, 0x011001},
+ {0x011002, 0x011002},
+ {0x011003, 0x011037},
+ {0x011038, 0x011045},
+ {0x011071, 0x011072},
+ {0x011073, 0x011074},
+ {0x011075, 0x011075},
+ {0x011080, 0x011081},
+ {0x011082, 0x011082},
+ {0x011083, 0x0110af},
+ {0x0110b0, 0x0110b2},
+ {0x0110b3, 0x0110b6},
+ {0x0110b7, 0x0110b8},
+ {0x0110c2, 0x0110c2},
+ {0x0110d0, 0x0110e8},
+ {0x011100, 0x011102},
+ {0x011103, 0x011126},
+ {0x011127, 0x01112b},
+ {0x01112c, 0x01112c},
+ {0x01112d, 0x011132},
+ {0x011144, 0x011144},
+ {0x011145, 0x011146},
+ {0x011147, 0x011147},
+ {0x011150, 0x011172},
+ {0x011176, 0x011176},
+ {0x011180, 0x011181},
+ {0x011182, 0x011182},
+ {0x011183, 0x0111b2},
+ {0x0111b3, 0x0111b5},
+ {0x0111b6, 0x0111be},
+ {0x0111bf, 0x0111bf},
+ {0x0111c1, 0x0111c4},
+ {0x0111ce, 0x0111ce},
+ {0x0111cf, 0x0111cf},
+ {0x0111da, 0x0111da},
+ {0x0111dc, 0x0111dc},
+ {0x011200, 0x011211},
+ {0x011213, 0x01122b},
+ {0x01122c, 0x01122e},
+ {0x01122f, 0x011231},
+ {0x011232, 0x011233},
+ {0x011234, 0x011234},
+ {0x011237, 0x011237},
+ {0x01123e, 0x01123e},
+ {0x01123f, 0x011240},
+ {0x011241, 0x011241},
+ {0x011280, 0x011286},
+ {0x011288, 0x011288},
+ {0x01128a, 0x01128d},
+ {0x01128f, 0x01129d},
+ {0x01129f, 0x0112a8},
+ {0x0112b0, 0x0112de},
+ {0x0112df, 0x0112df},
+ {0x0112e0, 0x0112e2},
+ {0x0112e3, 0x0112e8},
+ {0x011300, 0x011301},
+ {0x011302, 0x011303},
+ {0x011305, 0x01130c},
+ {0x01130f, 0x011310},
+ {0x011313, 0x011328},
+ {0x01132a, 0x011330},
+ {0x011332, 0x011333},
+ {0x011335, 0x011339},
+ {0x01133d, 0x01133d},
+ {0x01133e, 0x01133f},
+ {0x011340, 0x011340},
+ {0x011341, 0x011344},
+ {0x011347, 0x011348},
+ {0x01134b, 0x01134c},
+ {0x011350, 0x011350},
+ {0x011357, 0x011357},
+ {0x01135d, 0x011361},
+ {0x011362, 0x011363},
+ {0x011400, 0x011434},
+ {0x011435, 0x011437},
+ {0x011438, 0x01143f},
+ {0x011440, 0x011441},
+ {0x011443, 0x011444},
+ {0x011445, 0x011445},
+ {0x011447, 0x01144a},
+ {0x01145f, 0x011461},
+ {0x011480, 0x0114af},
+ {0x0114b0, 0x0114b2},
+ {0x0114b3, 0x0114b8},
+ {0x0114b9, 0x0114b9},
+ {0x0114ba, 0x0114ba},
+ {0x0114bb, 0x0114be},
+ {0x0114bf, 0x0114c0},
+ {0x0114c1, 0x0114c1},
+ {0x0114c4, 0x0114c5},
+ {0x0114c7, 0x0114c7},
+ {0x011580, 0x0115ae},
+ {0x0115af, 0x0115b1},
+ {0x0115b2, 0x0115b5},
+ {0x0115b8, 0x0115bb},
+ {0x0115bc, 0x0115bd},
+ {0x0115be, 0x0115be},
+ {0x0115d8, 0x0115db},
+ {0x0115dc, 0x0115dd},
+ {0x011600, 0x01162f},
+ {0x011630, 0x011632},
+ {0x011633, 0x01163a},
+ {0x01163b, 0x01163c},
+ {0x01163d, 0x01163d},
+ {0x01163e, 0x01163e},
+ {0x011640, 0x011640},
+ {0x011644, 0x011644},
+ {0x011680, 0x0116aa},
+ {0x0116ab, 0x0116ab},
+ {0x0116ac, 0x0116ac},
+ {0x0116ad, 0x0116ad},
+ {0x0116ae, 0x0116af},
+ {0x0116b0, 0x0116b5},
+ {0x0116b8, 0x0116b8},
+ {0x011700, 0x01171a},
+ {0x01171d, 0x01171f},
+ {0x011720, 0x011721},
+ {0x011722, 0x011725},
+ {0x011726, 0x011726},
+ {0x011727, 0x01172a},
+ {0x011740, 0x011746},
+ {0x011800, 0x01182b},
+ {0x01182c, 0x01182e},
+ {0x01182f, 0x011837},
+ {0x011838, 0x011838},
+ {0x0118a0, 0x0118df},
+ {0x0118ff, 0x011906},
+ {0x011909, 0x011909},
+ {0x01190c, 0x011913},
+ {0x011915, 0x011916},
+ {0x011918, 0x01192f},
+ {0x011930, 0x011935},
+ {0x011937, 0x011938},
+ {0x01193b, 0x01193c},
+ {0x01193f, 0x01193f},
+ {0x011940, 0x011940},
+ {0x011941, 0x011941},
+ {0x011942, 0x011942},
+ {0x0119a0, 0x0119a7},
+ {0x0119aa, 0x0119d0},
+ {0x0119d1, 0x0119d3},
+ {0x0119d4, 0x0119d7},
+ {0x0119da, 0x0119db},
+ {0x0119dc, 0x0119df},
+ {0x0119e1, 0x0119e1},
+ {0x0119e3, 0x0119e3},
+ {0x0119e4, 0x0119e4},
+ {0x011a00, 0x011a00},
+ {0x011a01, 0x011a0a},
+ {0x011a0b, 0x011a32},
+ {0x011a35, 0x011a38},
+ {0x011a39, 0x011a39},
+ {0x011a3a, 0x011a3a},
+ {0x011a3b, 0x011a3e},
+ {0x011a50, 0x011a50},
+ {0x011a51, 0x011a56},
+ {0x011a57, 0x011a58},
+ {0x011a59, 0x011a5b},
+ {0x011a5c, 0x011a89},
+ {0x011a8a, 0x011a96},
+ {0x011a97, 0x011a97},
+ {0x011a9d, 0x011a9d},
+ {0x011ab0, 0x011af8},
+ {0x011c00, 0x011c08},
+ {0x011c0a, 0x011c2e},
+ {0x011c2f, 0x011c2f},
+ {0x011c30, 0x011c36},
+ {0x011c38, 0x011c3d},
+ {0x011c3e, 0x011c3e},
+ {0x011c40, 0x011c40},
+ {0x011c72, 0x011c8f},
+ {0x011c92, 0x011ca7},
+ {0x011ca9, 0x011ca9},
+ {0x011caa, 0x011cb0},
+ {0x011cb1, 0x011cb1},
+ {0x011cb2, 0x011cb3},
+ {0x011cb4, 0x011cb4},
+ {0x011cb5, 0x011cb6},
+ {0x011d00, 0x011d06},
+ {0x011d08, 0x011d09},
+ {0x011d0b, 0x011d30},
+ {0x011d31, 0x011d36},
+ {0x011d3a, 0x011d3a},
+ {0x011d3c, 0x011d3d},
+ {0x011d3f, 0x011d41},
+ {0x011d43, 0x011d43},
+ {0x011d46, 0x011d46},
+ {0x011d47, 0x011d47},
+ {0x011d60, 0x011d65},
+ {0x011d67, 0x011d68},
+ {0x011d6a, 0x011d89},
+ {0x011d8a, 0x011d8e},
+ {0x011d90, 0x011d91},
+ {0x011d93, 0x011d94},
+ {0x011d95, 0x011d95},
+ {0x011d96, 0x011d96},
+ {0x011d98, 0x011d98},
+ {0x011ee0, 0x011ef2},
+ {0x011ef3, 0x011ef4},
+ {0x011ef5, 0x011ef6},
+ {0x011f00, 0x011f01},
+ {0x011f02, 0x011f02},
+ {0x011f03, 0x011f03},
+ {0x011f04, 0x011f10},
+ {0x011f12, 0x011f33},
+ {0x011f34, 0x011f35},
+ {0x011f36, 0x011f3a},
+ {0x011f3e, 0x011f3f},
+ {0x011f40, 0x011f40},
+ {0x011fb0, 0x011fb0},
+ {0x012000, 0x012399},
+ {0x012400, 0x01246e},
+ {0x012480, 0x012543},
+ {0x012f90, 0x012ff0},
+ {0x013000, 0x01342f},
+ {0x013441, 0x013446},
+ {0x014400, 0x014646},
+ {0x016800, 0x016a38},
+ {0x016a40, 0x016a5e},
+ {0x016a70, 0x016abe},
+ {0x016ad0, 0x016aed},
+ {0x016b00, 0x016b2f},
+ {0x016b40, 0x016b43},
+ {0x016b63, 0x016b77},
+ {0x016b7d, 0x016b8f},
+ {0x016e40, 0x016e7f},
+ {0x016f00, 0x016f4a},
+ {0x016f4f, 0x016f4f},
+ {0x016f50, 0x016f50},
+ {0x016f51, 0x016f87},
+ {0x016f8f, 0x016f92},
+ {0x016f93, 0x016f9f},
+ {0x016fe0, 0x016fe1},
+ {0x016fe3, 0x016fe3},
+ {0x016ff0, 0x016ff1},
+ {0x017000, 0x0187f7},
+ {0x018800, 0x018cd5},
+ {0x018d00, 0x018d08},
+ {0x01aff0, 0x01aff3},
+ {0x01aff5, 0x01affb},
+ {0x01affd, 0x01affe},
+ {0x01b000, 0x01b122},
+ {0x01b132, 0x01b132},
+ {0x01b150, 0x01b152},
+ {0x01b155, 0x01b155},
+ {0x01b164, 0x01b167},
+ {0x01b170, 0x01b2fb},
+ {0x01bc00, 0x01bc6a},
+ {0x01bc70, 0x01bc7c},
+ {0x01bc80, 0x01bc88},
+ {0x01bc90, 0x01bc99},
+ {0x01bc9e, 0x01bc9e},
+ {0x01d400, 0x01d454},
+ {0x01d456, 0x01d49c},
+ {0x01d49e, 0x01d49f},
+ {0x01d4a2, 0x01d4a2},
+ {0x01d4a5, 0x01d4a6},
+ {0x01d4a9, 0x01d4ac},
+ {0x01d4ae, 0x01d4b9},
+ {0x01d4bb, 0x01d4bb},
+ {0x01d4bd, 0x01d4c3},
+ {0x01d4c5, 0x01d505},
+ {0x01d507, 0x01d50a},
+ {0x01d50d, 0x01d514},
+ {0x01d516, 0x01d51c},
+ {0x01d51e, 0x01d539},
+ {0x01d53b, 0x01d53e},
+ {0x01d540, 0x01d544},
+ {0x01d546, 0x01d546},
+ {0x01d54a, 0x01d550},
+ {0x01d552, 0x01d6a5},
+ {0x01d6a8, 0x01d6c0},
+ {0x01d6c2, 0x01d6da},
+ {0x01d6dc, 0x01d6fa},
+ {0x01d6fc, 0x01d714},
+ {0x01d716, 0x01d734},
+ {0x01d736, 0x01d74e},
+ {0x01d750, 0x01d76e},
+ {0x01d770, 0x01d788},
+ {0x01d78a, 0x01d7a8},
+ {0x01d7aa, 0x01d7c2},
+ {0x01d7c4, 0x01d7cb},
+ {0x01df00, 0x01df09},
+ {0x01df0a, 0x01df0a},
+ {0x01df0b, 0x01df1e},
+ {0x01df25, 0x01df2a},
+ {0x01e000, 0x01e006},
+ {0x01e008, 0x01e018},
+ {0x01e01b, 0x01e021},
+ {0x01e023, 0x01e024},
+ {0x01e026, 0x01e02a},
+ {0x01e030, 0x01e06d},
+ {0x01e08f, 0x01e08f},
+ {0x01e100, 0x01e12c},
+ {0x01e137, 0x01e13d},
+ {0x01e14e, 0x01e14e},
+ {0x01e290, 0x01e2ad},
+ {0x01e2c0, 0x01e2eb},
+ {0x01e4d0, 0x01e4ea},
+ {0x01e4eb, 0x01e4eb},
+ {0x01e7e0, 0x01e7e6},
+ {0x01e7e8, 0x01e7eb},
+ {0x01e7ed, 0x01e7ee},
+ {0x01e7f0, 0x01e7fe},
+ {0x01e800, 0x01e8c4},
+ {0x01e900, 0x01e943},
+ {0x01e947, 0x01e947},
+ {0x01e94b, 0x01e94b},
+ {0x01ee00, 0x01ee03},
+ {0x01ee05, 0x01ee1f},
+ {0x01ee21, 0x01ee22},
+ {0x01ee24, 0x01ee24},
+ {0x01ee27, 0x01ee27},
+ {0x01ee29, 0x01ee32},
+ {0x01ee34, 0x01ee37},
+ {0x01ee39, 0x01ee39},
+ {0x01ee3b, 0x01ee3b},
+ {0x01ee42, 0x01ee42},
+ {0x01ee47, 0x01ee47},
+ {0x01ee49, 0x01ee49},
+ {0x01ee4b, 0x01ee4b},
+ {0x01ee4d, 0x01ee4f},
+ {0x01ee51, 0x01ee52},
+ {0x01ee54, 0x01ee54},
+ {0x01ee57, 0x01ee57},
+ {0x01ee59, 0x01ee59},
+ {0x01ee5b, 0x01ee5b},
+ {0x01ee5d, 0x01ee5d},
+ {0x01ee5f, 0x01ee5f},
+ {0x01ee61, 0x01ee62},
+ {0x01ee64, 0x01ee64},
+ {0x01ee67, 0x01ee6a},
+ {0x01ee6c, 0x01ee72},
+ {0x01ee74, 0x01ee77},
+ {0x01ee79, 0x01ee7c},
+ {0x01ee7e, 0x01ee7e},
+ {0x01ee80, 0x01ee89},
+ {0x01ee8b, 0x01ee9b},
+ {0x01eea1, 0x01eea3},
+ {0x01eea5, 0x01eea9},
+ {0x01eeab, 0x01eebb},
+ {0x01f130, 0x01f149},
+ {0x01f150, 0x01f169},
+ {0x01f170, 0x01f189},
+ {0x020000, 0x02a6df},
+ {0x02a700, 0x02b739},
+ {0x02b740, 0x02b81d},
+ {0x02b820, 0x02cea1},
+ {0x02ceb0, 0x02ebe0},
+ {0x02ebf0, 0x02ee5d},
+ {0x02f800, 0x02fa1d},
+ {0x030000, 0x03134a},
+ {0x031350, 0x0323af},
+};
+
+/* table of Unicode codepoint ranges of Lowercase characters */
+static const pg_unicode_range unicode_lowercase[686] =
+{
+ {0x000061, 0x00007a},
+ {0x0000aa, 0x0000aa},
+ {0x0000b5, 0x0000b5},
+ {0x0000ba, 0x0000ba},
+ {0x0000df, 0x0000f6},
+ {0x0000f8, 0x0000ff},
+ {0x000101, 0x000101},
+ {0x000103, 0x000103},
+ {0x000105, 0x000105},
+ {0x000107, 0x000107},
+ {0x000109, 0x000109},
+ {0x00010b, 0x00010b},
+ {0x00010d, 0x00010d},
+ {0x00010f, 0x00010f},
+ {0x000111, 0x000111},
+ {0x000113, 0x000113},
+ {0x000115, 0x000115},
+ {0x000117, 0x000117},
+ {0x000119, 0x000119},
+ {0x00011b, 0x00011b},
+ {0x00011d, 0x00011d},
+ {0x00011f, 0x00011f},
+ {0x000121, 0x000121},
+ {0x000123, 0x000123},
+ {0x000125, 0x000125},
+ {0x000127, 0x000127},
+ {0x000129, 0x000129},
+ {0x00012b, 0x00012b},
+ {0x00012d, 0x00012d},
+ {0x00012f, 0x00012f},
+ {0x000131, 0x000131},
+ {0x000133, 0x000133},
+ {0x000135, 0x000135},
+ {0x000137, 0x000138},
+ {0x00013a, 0x00013a},
+ {0x00013c, 0x00013c},
+ {0x00013e, 0x00013e},
+ {0x000140, 0x000140},
+ {0x000142, 0x000142},
+ {0x000144, 0x000144},
+ {0x000146, 0x000146},
+ {0x000148, 0x000149},
+ {0x00014b, 0x00014b},
+ {0x00014d, 0x00014d},
+ {0x00014f, 0x00014f},
+ {0x000151, 0x000151},
+ {0x000153, 0x000153},
+ {0x000155, 0x000155},
+ {0x000157, 0x000157},
+ {0x000159, 0x000159},
+ {0x00015b, 0x00015b},
+ {0x00015d, 0x00015d},
+ {0x00015f, 0x00015f},
+ {0x000161, 0x000161},
+ {0x000163, 0x000163},
+ {0x000165, 0x000165},
+ {0x000167, 0x000167},
+ {0x000169, 0x000169},
+ {0x00016b, 0x00016b},
+ {0x00016d, 0x00016d},
+ {0x00016f, 0x00016f},
+ {0x000171, 0x000171},
+ {0x000173, 0x000173},
+ {0x000175, 0x000175},
+ {0x000177, 0x000177},
+ {0x00017a, 0x00017a},
+ {0x00017c, 0x00017c},
+ {0x00017e, 0x000180},
+ {0x000183, 0x000183},
+ {0x000185, 0x000185},
+ {0x000188, 0x000188},
+ {0x00018c, 0x00018d},
+ {0x000192, 0x000192},
+ {0x000195, 0x000195},
+ {0x000199, 0x00019b},
+ {0x00019e, 0x00019e},
+ {0x0001a1, 0x0001a1},
+ {0x0001a3, 0x0001a3},
+ {0x0001a5, 0x0001a5},
+ {0x0001a8, 0x0001a8},
+ {0x0001aa, 0x0001ab},
+ {0x0001ad, 0x0001ad},
+ {0x0001b0, 0x0001b0},
+ {0x0001b4, 0x0001b4},
+ {0x0001b6, 0x0001b6},
+ {0x0001b9, 0x0001ba},
+ {0x0001bd, 0x0001bf},
+ {0x0001c6, 0x0001c6},
+ {0x0001c9, 0x0001c9},
+ {0x0001cc, 0x0001cc},
+ {0x0001ce, 0x0001ce},
+ {0x0001d0, 0x0001d0},
+ {0x0001d2, 0x0001d2},
+ {0x0001d4, 0x0001d4},
+ {0x0001d6, 0x0001d6},
+ {0x0001d8, 0x0001d8},
+ {0x0001da, 0x0001da},
+ {0x0001dc, 0x0001dd},
+ {0x0001df, 0x0001df},
+ {0x0001e1, 0x0001e1},
+ {0x0001e3, 0x0001e3},
+ {0x0001e5, 0x0001e5},
+ {0x0001e7, 0x0001e7},
+ {0x0001e9, 0x0001e9},
+ {0x0001eb, 0x0001eb},
+ {0x0001ed, 0x0001ed},
+ {0x0001ef, 0x0001f0},
+ {0x0001f3, 0x0001f3},
+ {0x0001f5, 0x0001f5},
+ {0x0001f9, 0x0001f9},
+ {0x0001fb, 0x0001fb},
+ {0x0001fd, 0x0001fd},
+ {0x0001ff, 0x0001ff},
+ {0x000201, 0x000201},
+ {0x000203, 0x000203},
+ {0x000205, 0x000205},
+ {0x000207, 0x000207},
+ {0x000209, 0x000209},
+ {0x00020b, 0x00020b},
+ {0x00020d, 0x00020d},
+ {0x00020f, 0x00020f},
+ {0x000211, 0x000211},
+ {0x000213, 0x000213},
+ {0x000215, 0x000215},
+ {0x000217, 0x000217},
+ {0x000219, 0x000219},
+ {0x00021b, 0x00021b},
+ {0x00021d, 0x00021d},
+ {0x00021f, 0x00021f},
+ {0x000221, 0x000221},
+ {0x000223, 0x000223},
+ {0x000225, 0x000225},
+ {0x000227, 0x000227},
+ {0x000229, 0x000229},
+ {0x00022b, 0x00022b},
+ {0x00022d, 0x00022d},
+ {0x00022f, 0x00022f},
+ {0x000231, 0x000231},
+ {0x000233, 0x000239},
+ {0x00023c, 0x00023c},
+ {0x00023f, 0x000240},
+ {0x000242, 0x000242},
+ {0x000247, 0x000247},
+ {0x000249, 0x000249},
+ {0x00024b, 0x00024b},
+ {0x00024d, 0x00024d},
+ {0x00024f, 0x000293},
+ {0x000295, 0x0002af},
+ {0x0002b0, 0x0002b8},
+ {0x0002c0, 0x0002c1},
+ {0x0002e0, 0x0002e4},
+ {0x000345, 0x000345},
+ {0x000371, 0x000371},
+ {0x000373, 0x000373},
+ {0x000377, 0x000377},
+ {0x00037a, 0x00037a},
+ {0x00037b, 0x00037d},
+ {0x000390, 0x000390},
+ {0x0003ac, 0x0003ce},
+ {0x0003d0, 0x0003d1},
+ {0x0003d5, 0x0003d7},
+ {0x0003d9, 0x0003d9},
+ {0x0003db, 0x0003db},
+ {0x0003dd, 0x0003dd},
+ {0x0003df, 0x0003df},
+ {0x0003e1, 0x0003e1},
+ {0x0003e3, 0x0003e3},
+ {0x0003e5, 0x0003e5},
+ {0x0003e7, 0x0003e7},
+ {0x0003e9, 0x0003e9},
+ {0x0003eb, 0x0003eb},
+ {0x0003ed, 0x0003ed},
+ {0x0003ef, 0x0003f3},
+ {0x0003f5, 0x0003f5},
+ {0x0003f8, 0x0003f8},
+ {0x0003fb, 0x0003fc},
+ {0x000430, 0x00045f},
+ {0x000461, 0x000461},
+ {0x000463, 0x000463},
+ {0x000465, 0x000465},
+ {0x000467, 0x000467},
+ {0x000469, 0x000469},
+ {0x00046b, 0x00046b},
+ {0x00046d, 0x00046d},
+ {0x00046f, 0x00046f},
+ {0x000471, 0x000471},
+ {0x000473, 0x000473},
+ {0x000475, 0x000475},
+ {0x000477, 0x000477},
+ {0x000479, 0x000479},
+ {0x00047b, 0x00047b},
+ {0x00047d, 0x00047d},
+ {0x00047f, 0x00047f},
+ {0x000481, 0x000481},
+ {0x00048b, 0x00048b},
+ {0x00048d, 0x00048d},
+ {0x00048f, 0x00048f},
+ {0x000491, 0x000491},
+ {0x000493, 0x000493},
+ {0x000495, 0x000495},
+ {0x000497, 0x000497},
+ {0x000499, 0x000499},
+ {0x00049b, 0x00049b},
+ {0x00049d, 0x00049d},
+ {0x00049f, 0x00049f},
+ {0x0004a1, 0x0004a1},
+ {0x0004a3, 0x0004a3},
+ {0x0004a5, 0x0004a5},
+ {0x0004a7, 0x0004a7},
+ {0x0004a9, 0x0004a9},
+ {0x0004ab, 0x0004ab},
+ {0x0004ad, 0x0004ad},
+ {0x0004af, 0x0004af},
+ {0x0004b1, 0x0004b1},
+ {0x0004b3, 0x0004b3},
+ {0x0004b5, 0x0004b5},
+ {0x0004b7, 0x0004b7},
+ {0x0004b9, 0x0004b9},
+ {0x0004bb, 0x0004bb},
+ {0x0004bd, 0x0004bd},
+ {0x0004bf, 0x0004bf},
+ {0x0004c2, 0x0004c2},
+ {0x0004c4, 0x0004c4},
+ {0x0004c6, 0x0004c6},
+ {0x0004c8, 0x0004c8},
+ {0x0004ca, 0x0004ca},
+ {0x0004cc, 0x0004cc},
+ {0x0004ce, 0x0004cf},
+ {0x0004d1, 0x0004d1},
+ {0x0004d3, 0x0004d3},
+ {0x0004d5, 0x0004d5},
+ {0x0004d7, 0x0004d7},
+ {0x0004d9, 0x0004d9},
+ {0x0004db, 0x0004db},
+ {0x0004dd, 0x0004dd},
+ {0x0004df, 0x0004df},
+ {0x0004e1, 0x0004e1},
+ {0x0004e3, 0x0004e3},
+ {0x0004e5, 0x0004e5},
+ {0x0004e7, 0x0004e7},
+ {0x0004e9, 0x0004e9},
+ {0x0004eb, 0x0004eb},
+ {0x0004ed, 0x0004ed},
+ {0x0004ef, 0x0004ef},
+ {0x0004f1, 0x0004f1},
+ {0x0004f3, 0x0004f3},
+ {0x0004f5, 0x0004f5},
+ {0x0004f7, 0x0004f7},
+ {0x0004f9, 0x0004f9},
+ {0x0004fb, 0x0004fb},
+ {0x0004fd, 0x0004fd},
+ {0x0004ff, 0x0004ff},
+ {0x000501, 0x000501},
+ {0x000503, 0x000503},
+ {0x000505, 0x000505},
+ {0x000507, 0x000507},
+ {0x000509, 0x000509},
+ {0x00050b, 0x00050b},
+ {0x00050d, 0x00050d},
+ {0x00050f, 0x00050f},
+ {0x000511, 0x000511},
+ {0x000513, 0x000513},
+ {0x000515, 0x000515},
+ {0x000517, 0x000517},
+ {0x000519, 0x000519},
+ {0x00051b, 0x00051b},
+ {0x00051d, 0x00051d},
+ {0x00051f, 0x00051f},
+ {0x000521, 0x000521},
+ {0x000523, 0x000523},
+ {0x000525, 0x000525},
+ {0x000527, 0x000527},
+ {0x000529, 0x000529},
+ {0x00052b, 0x00052b},
+ {0x00052d, 0x00052d},
+ {0x00052f, 0x00052f},
+ {0x000560, 0x000588},
+ {0x0010d0, 0x0010fa},
+ {0x0010fc, 0x0010fc},
+ {0x0010fd, 0x0010ff},
+ {0x0013f8, 0x0013fd},
+ {0x001c80, 0x001c88},
+ {0x001d00, 0x001d2b},
+ {0x001d2c, 0x001d6a},
+ {0x001d6b, 0x001d77},
+ {0x001d78, 0x001d78},
+ {0x001d79, 0x001d9a},
+ {0x001d9b, 0x001dbf},
+ {0x001e01, 0x001e01},
+ {0x001e03, 0x001e03},
+ {0x001e05, 0x001e05},
+ {0x001e07, 0x001e07},
+ {0x001e09, 0x001e09},
+ {0x001e0b, 0x001e0b},
+ {0x001e0d, 0x001e0d},
+ {0x001e0f, 0x001e0f},
+ {0x001e11, 0x001e11},
+ {0x001e13, 0x001e13},
+ {0x001e15, 0x001e15},
+ {0x001e17, 0x001e17},
+ {0x001e19, 0x001e19},
+ {0x001e1b, 0x001e1b},
+ {0x001e1d, 0x001e1d},
+ {0x001e1f, 0x001e1f},
+ {0x001e21, 0x001e21},
+ {0x001e23, 0x001e23},
+ {0x001e25, 0x001e25},
+ {0x001e27, 0x001e27},
+ {0x001e29, 0x001e29},
+ {0x001e2b, 0x001e2b},
+ {0x001e2d, 0x001e2d},
+ {0x001e2f, 0x001e2f},
+ {0x001e31, 0x001e31},
+ {0x001e33, 0x001e33},
+ {0x001e35, 0x001e35},
+ {0x001e37, 0x001e37},
+ {0x001e39, 0x001e39},
+ {0x001e3b, 0x001e3b},
+ {0x001e3d, 0x001e3d},
+ {0x001e3f, 0x001e3f},
+ {0x001e41, 0x001e41},
+ {0x001e43, 0x001e43},
+ {0x001e45, 0x001e45},
+ {0x001e47, 0x001e47},
+ {0x001e49, 0x001e49},
+ {0x001e4b, 0x001e4b},
+ {0x001e4d, 0x001e4d},
+ {0x001e4f, 0x001e4f},
+ {0x001e51, 0x001e51},
+ {0x001e53, 0x001e53},
+ {0x001e55, 0x001e55},
+ {0x001e57, 0x001e57},
+ {0x001e59, 0x001e59},
+ {0x001e5b, 0x001e5b},
+ {0x001e5d, 0x001e5d},
+ {0x001e5f, 0x001e5f},
+ {0x001e61, 0x001e61},
+ {0x001e63, 0x001e63},
+ {0x001e65, 0x001e65},
+ {0x001e67, 0x001e67},
+ {0x001e69, 0x001e69},
+ {0x001e6b, 0x001e6b},
+ {0x001e6d, 0x001e6d},
+ {0x001e6f, 0x001e6f},
+ {0x001e71, 0x001e71},
+ {0x001e73, 0x001e73},
+ {0x001e75, 0x001e75},
+ {0x001e77, 0x001e77},
+ {0x001e79, 0x001e79},
+ {0x001e7b, 0x001e7b},
+ {0x001e7d, 0x001e7d},
+ {0x001e7f, 0x001e7f},
+ {0x001e81, 0x001e81},
+ {0x001e83, 0x001e83},
+ {0x001e85, 0x001e85},
+ {0x001e87, 0x001e87},
+ {0x001e89, 0x001e89},
+ {0x001e8b, 0x001e8b},
+ {0x001e8d, 0x001e8d},
+ {0x001e8f, 0x001e8f},
+ {0x001e91, 0x001e91},
+ {0x001e93, 0x001e93},
+ {0x001e95, 0x001e9d},
+ {0x001e9f, 0x001e9f},
+ {0x001ea1, 0x001ea1},
+ {0x001ea3, 0x001ea3},
+ {0x001ea5, 0x001ea5},
+ {0x001ea7, 0x001ea7},
+ {0x001ea9, 0x001ea9},
+ {0x001eab, 0x001eab},
+ {0x001ead, 0x001ead},
+ {0x001eaf, 0x001eaf},
+ {0x001eb1, 0x001eb1},
+ {0x001eb3, 0x001eb3},
+ {0x001eb5, 0x001eb5},
+ {0x001eb7, 0x001eb7},
+ {0x001eb9, 0x001eb9},
+ {0x001ebb, 0x001ebb},
+ {0x001ebd, 0x001ebd},
+ {0x001ebf, 0x001ebf},
+ {0x001ec1, 0x001ec1},
+ {0x001ec3, 0x001ec3},
+ {0x001ec5, 0x001ec5},
+ {0x001ec7, 0x001ec7},
+ {0x001ec9, 0x001ec9},
+ {0x001ecb, 0x001ecb},
+ {0x001ecd, 0x001ecd},
+ {0x001ecf, 0x001ecf},
+ {0x001ed1, 0x001ed1},
+ {0x001ed3, 0x001ed3},
+ {0x001ed5, 0x001ed5},
+ {0x001ed7, 0x001ed7},
+ {0x001ed9, 0x001ed9},
+ {0x001edb, 0x001edb},
+ {0x001edd, 0x001edd},
+ {0x001edf, 0x001edf},
+ {0x001ee1, 0x001ee1},
+ {0x001ee3, 0x001ee3},
+ {0x001ee5, 0x001ee5},
+ {0x001ee7, 0x001ee7},
+ {0x001ee9, 0x001ee9},
+ {0x001eeb, 0x001eeb},
+ {0x001eed, 0x001eed},
+ {0x001eef, 0x001eef},
+ {0x001ef1, 0x001ef1},
+ {0x001ef3, 0x001ef3},
+ {0x001ef5, 0x001ef5},
+ {0x001ef7, 0x001ef7},
+ {0x001ef9, 0x001ef9},
+ {0x001efb, 0x001efb},
+ {0x001efd, 0x001efd},
+ {0x001eff, 0x001f07},
+ {0x001f10, 0x001f15},
+ {0x001f20, 0x001f27},
+ {0x001f30, 0x001f37},
+ {0x001f40, 0x001f45},
+ {0x001f50, 0x001f57},
+ {0x001f60, 0x001f67},
+ {0x001f70, 0x001f7d},
+ {0x001f80, 0x001f87},
+ {0x001f90, 0x001f97},
+ {0x001fa0, 0x001fa7},
+ {0x001fb0, 0x001fb4},
+ {0x001fb6, 0x001fb7},
+ {0x001fbe, 0x001fbe},
+ {0x001fc2, 0x001fc4},
+ {0x001fc6, 0x001fc7},
+ {0x001fd0, 0x001fd3},
+ {0x001fd6, 0x001fd7},
+ {0x001fe0, 0x001fe7},
+ {0x001ff2, 0x001ff4},
+ {0x001ff6, 0x001ff7},
+ {0x002071, 0x002071},
+ {0x00207f, 0x00207f},
+ {0x002090, 0x00209c},
+ {0x00210a, 0x00210a},
+ {0x00210e, 0x00210f},
+ {0x002113, 0x002113},
+ {0x00212f, 0x00212f},
+ {0x002134, 0x002134},
+ {0x002139, 0x002139},
+ {0x00213c, 0x00213d},
+ {0x002146, 0x002149},
+ {0x00214e, 0x00214e},
+ {0x002170, 0x00217f},
+ {0x002184, 0x002184},
+ {0x0024d0, 0x0024e9},
+ {0x002c30, 0x002c5f},
+ {0x002c61, 0x002c61},
+ {0x002c65, 0x002c66},
+ {0x002c68, 0x002c68},
+ {0x002c6a, 0x002c6a},
+ {0x002c6c, 0x002c6c},
+ {0x002c71, 0x002c71},
+ {0x002c73, 0x002c74},
+ {0x002c76, 0x002c7b},
+ {0x002c7c, 0x002c7d},
+ {0x002c81, 0x002c81},
+ {0x002c83, 0x002c83},
+ {0x002c85, 0x002c85},
+ {0x002c87, 0x002c87},
+ {0x002c89, 0x002c89},
+ {0x002c8b, 0x002c8b},
+ {0x002c8d, 0x002c8d},
+ {0x002c8f, 0x002c8f},
+ {0x002c91, 0x002c91},
+ {0x002c93, 0x002c93},
+ {0x002c95, 0x002c95},
+ {0x002c97, 0x002c97},
+ {0x002c99, 0x002c99},
+ {0x002c9b, 0x002c9b},
+ {0x002c9d, 0x002c9d},
+ {0x002c9f, 0x002c9f},
+ {0x002ca1, 0x002ca1},
+ {0x002ca3, 0x002ca3},
+ {0x002ca5, 0x002ca5},
+ {0x002ca7, 0x002ca7},
+ {0x002ca9, 0x002ca9},
+ {0x002cab, 0x002cab},
+ {0x002cad, 0x002cad},
+ {0x002caf, 0x002caf},
+ {0x002cb1, 0x002cb1},
+ {0x002cb3, 0x002cb3},
+ {0x002cb5, 0x002cb5},
+ {0x002cb7, 0x002cb7},
+ {0x002cb9, 0x002cb9},
+ {0x002cbb, 0x002cbb},
+ {0x002cbd, 0x002cbd},
+ {0x002cbf, 0x002cbf},
+ {0x002cc1, 0x002cc1},
+ {0x002cc3, 0x002cc3},
+ {0x002cc5, 0x002cc5},
+ {0x002cc7, 0x002cc7},
+ {0x002cc9, 0x002cc9},
+ {0x002ccb, 0x002ccb},
+ {0x002ccd, 0x002ccd},
+ {0x002ccf, 0x002ccf},
+ {0x002cd1, 0x002cd1},
+ {0x002cd3, 0x002cd3},
+ {0x002cd5, 0x002cd5},
+ {0x002cd7, 0x002cd7},
+ {0x002cd9, 0x002cd9},
+ {0x002cdb, 0x002cdb},
+ {0x002cdd, 0x002cdd},
+ {0x002cdf, 0x002cdf},
+ {0x002ce1, 0x002ce1},
+ {0x002ce3, 0x002ce4},
+ {0x002cec, 0x002cec},
+ {0x002cee, 0x002cee},
+ {0x002cf3, 0x002cf3},
+ {0x002d00, 0x002d25},
+ {0x002d27, 0x002d27},
+ {0x002d2d, 0x002d2d},
+ {0x00a641, 0x00a641},
+ {0x00a643, 0x00a643},
+ {0x00a645, 0x00a645},
+ {0x00a647, 0x00a647},
+ {0x00a649, 0x00a649},
+ {0x00a64b, 0x00a64b},
+ {0x00a64d, 0x00a64d},
+ {0x00a64f, 0x00a64f},
+ {0x00a651, 0x00a651},
+ {0x00a653, 0x00a653},
+ {0x00a655, 0x00a655},
+ {0x00a657, 0x00a657},
+ {0x00a659, 0x00a659},
+ {0x00a65b, 0x00a65b},
+ {0x00a65d, 0x00a65d},
+ {0x00a65f, 0x00a65f},
+ {0x00a661, 0x00a661},
+ {0x00a663, 0x00a663},
+ {0x00a665, 0x00a665},
+ {0x00a667, 0x00a667},
+ {0x00a669, 0x00a669},
+ {0x00a66b, 0x00a66b},
+ {0x00a66d, 0x00a66d},
+ {0x00a681, 0x00a681},
+ {0x00a683, 0x00a683},
+ {0x00a685, 0x00a685},
+ {0x00a687, 0x00a687},
+ {0x00a689, 0x00a689},
+ {0x00a68b, 0x00a68b},
+ {0x00a68d, 0x00a68d},
+ {0x00a68f, 0x00a68f},
+ {0x00a691, 0x00a691},
+ {0x00a693, 0x00a693},
+ {0x00a695, 0x00a695},
+ {0x00a697, 0x00a697},
+ {0x00a699, 0x00a699},
+ {0x00a69b, 0x00a69b},
+ {0x00a69c, 0x00a69d},
+ {0x00a723, 0x00a723},
+ {0x00a725, 0x00a725},
+ {0x00a727, 0x00a727},
+ {0x00a729, 0x00a729},
+ {0x00a72b, 0x00a72b},
+ {0x00a72d, 0x00a72d},
+ {0x00a72f, 0x00a731},
+ {0x00a733, 0x00a733},
+ {0x00a735, 0x00a735},
+ {0x00a737, 0x00a737},
+ {0x00a739, 0x00a739},
+ {0x00a73b, 0x00a73b},
+ {0x00a73d, 0x00a73d},
+ {0x00a73f, 0x00a73f},
+ {0x00a741, 0x00a741},
+ {0x00a743, 0x00a743},
+ {0x00a745, 0x00a745},
+ {0x00a747, 0x00a747},
+ {0x00a749, 0x00a749},
+ {0x00a74b, 0x00a74b},
+ {0x00a74d, 0x00a74d},
+ {0x00a74f, 0x00a74f},
+ {0x00a751, 0x00a751},
+ {0x00a753, 0x00a753},
+ {0x00a755, 0x00a755},
+ {0x00a757, 0x00a757},
+ {0x00a759, 0x00a759},
+ {0x00a75b, 0x00a75b},
+ {0x00a75d, 0x00a75d},
+ {0x00a75f, 0x00a75f},
+ {0x00a761, 0x00a761},
+ {0x00a763, 0x00a763},
+ {0x00a765, 0x00a765},
+ {0x00a767, 0x00a767},
+ {0x00a769, 0x00a769},
+ {0x00a76b, 0x00a76b},
+ {0x00a76d, 0x00a76d},
+ {0x00a76f, 0x00a76f},
+ {0x00a770, 0x00a770},
+ {0x00a771, 0x00a778},
+ {0x00a77a, 0x00a77a},
+ {0x00a77c, 0x00a77c},
+ {0x00a77f, 0x00a77f},
+ {0x00a781, 0x00a781},
+ {0x00a783, 0x00a783},
+ {0x00a785, 0x00a785},
+ {0x00a787, 0x00a787},
+ {0x00a78c, 0x00a78c},
+ {0x00a78e, 0x00a78e},
+ {0x00a791, 0x00a791},
+ {0x00a793, 0x00a795},
+ {0x00a797, 0x00a797},
+ {0x00a799, 0x00a799},
+ {0x00a79b, 0x00a79b},
+ {0x00a79d, 0x00a79d},
+ {0x00a79f, 0x00a79f},
+ {0x00a7a1, 0x00a7a1},
+ {0x00a7a3, 0x00a7a3},
+ {0x00a7a5, 0x00a7a5},
+ {0x00a7a7, 0x00a7a7},
+ {0x00a7a9, 0x00a7a9},
+ {0x00a7af, 0x00a7af},
+ {0x00a7b5, 0x00a7b5},
+ {0x00a7b7, 0x00a7b7},
+ {0x00a7b9, 0x00a7b9},
+ {0x00a7bb, 0x00a7bb},
+ {0x00a7bd, 0x00a7bd},
+ {0x00a7bf, 0x00a7bf},
+ {0x00a7c1, 0x00a7c1},
+ {0x00a7c3, 0x00a7c3},
+ {0x00a7c8, 0x00a7c8},
+ {0x00a7ca, 0x00a7ca},
+ {0x00a7d1, 0x00a7d1},
+ {0x00a7d3, 0x00a7d3},
+ {0x00a7d5, 0x00a7d5},
+ {0x00a7d7, 0x00a7d7},
+ {0x00a7d9, 0x00a7d9},
+ {0x00a7f2, 0x00a7f4},
+ {0x00a7f6, 0x00a7f6},
+ {0x00a7f8, 0x00a7f9},
+ {0x00a7fa, 0x00a7fa},
+ {0x00ab30, 0x00ab5a},
+ {0x00ab5c, 0x00ab5f},
+ {0x00ab60, 0x00ab68},
+ {0x00ab69, 0x00ab69},
+ {0x00ab70, 0x00abbf},
+ {0x00fb00, 0x00fb06},
+ {0x00fb13, 0x00fb17},
+ {0x00ff41, 0x00ff5a},
+ {0x010428, 0x01044f},
+ {0x0104d8, 0x0104fb},
+ {0x010597, 0x0105a1},
+ {0x0105a3, 0x0105b1},
+ {0x0105b3, 0x0105b9},
+ {0x0105bb, 0x0105bc},
+ {0x010780, 0x010780},
+ {0x010783, 0x010785},
+ {0x010787, 0x0107b0},
+ {0x0107b2, 0x0107ba},
+ {0x010cc0, 0x010cf2},
+ {0x0118c0, 0x0118df},
+ {0x016e60, 0x016e7f},
+ {0x01d41a, 0x01d433},
+ {0x01d44e, 0x01d454},
+ {0x01d456, 0x01d467},
+ {0x01d482, 0x01d49b},
+ {0x01d4b6, 0x01d4b9},
+ {0x01d4bb, 0x01d4bb},
+ {0x01d4bd, 0x01d4c3},
+ {0x01d4c5, 0x01d4cf},
+ {0x01d4ea, 0x01d503},
+ {0x01d51e, 0x01d537},
+ {0x01d552, 0x01d56b},
+ {0x01d586, 0x01d59f},
+ {0x01d5ba, 0x01d5d3},
+ {0x01d5ee, 0x01d607},
+ {0x01d622, 0x01d63b},
+ {0x01d656, 0x01d66f},
+ {0x01d68a, 0x01d6a5},
+ {0x01d6c2, 0x01d6da},
+ {0x01d6dc, 0x01d6e1},
+ {0x01d6fc, 0x01d714},
+ {0x01d716, 0x01d71b},
+ {0x01d736, 0x01d74e},
+ {0x01d750, 0x01d755},
+ {0x01d770, 0x01d788},
+ {0x01d78a, 0x01d78f},
+ {0x01d7aa, 0x01d7c2},
+ {0x01d7c4, 0x01d7c9},
+ {0x01d7cb, 0x01d7cb},
+ {0x01df00, 0x01df09},
+ {0x01df0b, 0x01df1e},
+ {0x01df25, 0x01df2a},
+ {0x01e030, 0x01e06d},
+ {0x01e922, 0x01e943},
+};
+
+/* table of Unicode codepoint ranges of Uppercase characters */
+static const pg_unicode_range unicode_uppercase[651] =
+{
+ {0x000041, 0x00005a},
+ {0x0000c0, 0x0000d6},
+ {0x0000d8, 0x0000de},
+ {0x000100, 0x000100},
+ {0x000102, 0x000102},
+ {0x000104, 0x000104},
+ {0x000106, 0x000106},
+ {0x000108, 0x000108},
+ {0x00010a, 0x00010a},
+ {0x00010c, 0x00010c},
+ {0x00010e, 0x00010e},
+ {0x000110, 0x000110},
+ {0x000112, 0x000112},
+ {0x000114, 0x000114},
+ {0x000116, 0x000116},
+ {0x000118, 0x000118},
+ {0x00011a, 0x00011a},
+ {0x00011c, 0x00011c},
+ {0x00011e, 0x00011e},
+ {0x000120, 0x000120},
+ {0x000122, 0x000122},
+ {0x000124, 0x000124},
+ {0x000126, 0x000126},
+ {0x000128, 0x000128},
+ {0x00012a, 0x00012a},
+ {0x00012c, 0x00012c},
+ {0x00012e, 0x00012e},
+ {0x000130, 0x000130},
+ {0x000132, 0x000132},
+ {0x000134, 0x000134},
+ {0x000136, 0x000136},
+ {0x000139, 0x000139},
+ {0x00013b, 0x00013b},
+ {0x00013d, 0x00013d},
+ {0x00013f, 0x00013f},
+ {0x000141, 0x000141},
+ {0x000143, 0x000143},
+ {0x000145, 0x000145},
+ {0x000147, 0x000147},
+ {0x00014a, 0x00014a},
+ {0x00014c, 0x00014c},
+ {0x00014e, 0x00014e},
+ {0x000150, 0x000150},
+ {0x000152, 0x000152},
+ {0x000154, 0x000154},
+ {0x000156, 0x000156},
+ {0x000158, 0x000158},
+ {0x00015a, 0x00015a},
+ {0x00015c, 0x00015c},
+ {0x00015e, 0x00015e},
+ {0x000160, 0x000160},
+ {0x000162, 0x000162},
+ {0x000164, 0x000164},
+ {0x000166, 0x000166},
+ {0x000168, 0x000168},
+ {0x00016a, 0x00016a},
+ {0x00016c, 0x00016c},
+ {0x00016e, 0x00016e},
+ {0x000170, 0x000170},
+ {0x000172, 0x000172},
+ {0x000174, 0x000174},
+ {0x000176, 0x000176},
+ {0x000178, 0x000179},
+ {0x00017b, 0x00017b},
+ {0x00017d, 0x00017d},
+ {0x000181, 0x000182},
+ {0x000184, 0x000184},
+ {0x000186, 0x000187},
+ {0x000189, 0x00018b},
+ {0x00018e, 0x000191},
+ {0x000193, 0x000194},
+ {0x000196, 0x000198},
+ {0x00019c, 0x00019d},
+ {0x00019f, 0x0001a0},
+ {0x0001a2, 0x0001a2},
+ {0x0001a4, 0x0001a4},
+ {0x0001a6, 0x0001a7},
+ {0x0001a9, 0x0001a9},
+ {0x0001ac, 0x0001ac},
+ {0x0001ae, 0x0001af},
+ {0x0001b1, 0x0001b3},
+ {0x0001b5, 0x0001b5},
+ {0x0001b7, 0x0001b8},
+ {0x0001bc, 0x0001bc},
+ {0x0001c4, 0x0001c4},
+ {0x0001c7, 0x0001c7},
+ {0x0001ca, 0x0001ca},
+ {0x0001cd, 0x0001cd},
+ {0x0001cf, 0x0001cf},
+ {0x0001d1, 0x0001d1},
+ {0x0001d3, 0x0001d3},
+ {0x0001d5, 0x0001d5},
+ {0x0001d7, 0x0001d7},
+ {0x0001d9, 0x0001d9},
+ {0x0001db, 0x0001db},
+ {0x0001de, 0x0001de},
+ {0x0001e0, 0x0001e0},
+ {0x0001e2, 0x0001e2},
+ {0x0001e4, 0x0001e4},
+ {0x0001e6, 0x0001e6},
+ {0x0001e8, 0x0001e8},
+ {0x0001ea, 0x0001ea},
+ {0x0001ec, 0x0001ec},
+ {0x0001ee, 0x0001ee},
+ {0x0001f1, 0x0001f1},
+ {0x0001f4, 0x0001f4},
+ {0x0001f6, 0x0001f8},
+ {0x0001fa, 0x0001fa},
+ {0x0001fc, 0x0001fc},
+ {0x0001fe, 0x0001fe},
+ {0x000200, 0x000200},
+ {0x000202, 0x000202},
+ {0x000204, 0x000204},
+ {0x000206, 0x000206},
+ {0x000208, 0x000208},
+ {0x00020a, 0x00020a},
+ {0x00020c, 0x00020c},
+ {0x00020e, 0x00020e},
+ {0x000210, 0x000210},
+ {0x000212, 0x000212},
+ {0x000214, 0x000214},
+ {0x000216, 0x000216},
+ {0x000218, 0x000218},
+ {0x00021a, 0x00021a},
+ {0x00021c, 0x00021c},
+ {0x00021e, 0x00021e},
+ {0x000220, 0x000220},
+ {0x000222, 0x000222},
+ {0x000224, 0x000224},
+ {0x000226, 0x000226},
+ {0x000228, 0x000228},
+ {0x00022a, 0x00022a},
+ {0x00022c, 0x00022c},
+ {0x00022e, 0x00022e},
+ {0x000230, 0x000230},
+ {0x000232, 0x000232},
+ {0x00023a, 0x00023b},
+ {0x00023d, 0x00023e},
+ {0x000241, 0x000241},
+ {0x000243, 0x000246},
+ {0x000248, 0x000248},
+ {0x00024a, 0x00024a},
+ {0x00024c, 0x00024c},
+ {0x00024e, 0x00024e},
+ {0x000370, 0x000370},
+ {0x000372, 0x000372},
+ {0x000376, 0x000376},
+ {0x00037f, 0x00037f},
+ {0x000386, 0x000386},
+ {0x000388, 0x00038a},
+ {0x00038c, 0x00038c},
+ {0x00038e, 0x00038f},
+ {0x000391, 0x0003a1},
+ {0x0003a3, 0x0003ab},
+ {0x0003cf, 0x0003cf},
+ {0x0003d2, 0x0003d4},
+ {0x0003d8, 0x0003d8},
+ {0x0003da, 0x0003da},
+ {0x0003dc, 0x0003dc},
+ {0x0003de, 0x0003de},
+ {0x0003e0, 0x0003e0},
+ {0x0003e2, 0x0003e2},
+ {0x0003e4, 0x0003e4},
+ {0x0003e6, 0x0003e6},
+ {0x0003e8, 0x0003e8},
+ {0x0003ea, 0x0003ea},
+ {0x0003ec, 0x0003ec},
+ {0x0003ee, 0x0003ee},
+ {0x0003f4, 0x0003f4},
+ {0x0003f7, 0x0003f7},
+ {0x0003f9, 0x0003fa},
+ {0x0003fd, 0x00042f},
+ {0x000460, 0x000460},
+ {0x000462, 0x000462},
+ {0x000464, 0x000464},
+ {0x000466, 0x000466},
+ {0x000468, 0x000468},
+ {0x00046a, 0x00046a},
+ {0x00046c, 0x00046c},
+ {0x00046e, 0x00046e},
+ {0x000470, 0x000470},
+ {0x000472, 0x000472},
+ {0x000474, 0x000474},
+ {0x000476, 0x000476},
+ {0x000478, 0x000478},
+ {0x00047a, 0x00047a},
+ {0x00047c, 0x00047c},
+ {0x00047e, 0x00047e},
+ {0x000480, 0x000480},
+ {0x00048a, 0x00048a},
+ {0x00048c, 0x00048c},
+ {0x00048e, 0x00048e},
+ {0x000490, 0x000490},
+ {0x000492, 0x000492},
+ {0x000494, 0x000494},
+ {0x000496, 0x000496},
+ {0x000498, 0x000498},
+ {0x00049a, 0x00049a},
+ {0x00049c, 0x00049c},
+ {0x00049e, 0x00049e},
+ {0x0004a0, 0x0004a0},
+ {0x0004a2, 0x0004a2},
+ {0x0004a4, 0x0004a4},
+ {0x0004a6, 0x0004a6},
+ {0x0004a8, 0x0004a8},
+ {0x0004aa, 0x0004aa},
+ {0x0004ac, 0x0004ac},
+ {0x0004ae, 0x0004ae},
+ {0x0004b0, 0x0004b0},
+ {0x0004b2, 0x0004b2},
+ {0x0004b4, 0x0004b4},
+ {0x0004b6, 0x0004b6},
+ {0x0004b8, 0x0004b8},
+ {0x0004ba, 0x0004ba},
+ {0x0004bc, 0x0004bc},
+ {0x0004be, 0x0004be},
+ {0x0004c0, 0x0004c1},
+ {0x0004c3, 0x0004c3},
+ {0x0004c5, 0x0004c5},
+ {0x0004c7, 0x0004c7},
+ {0x0004c9, 0x0004c9},
+ {0x0004cb, 0x0004cb},
+ {0x0004cd, 0x0004cd},
+ {0x0004d0, 0x0004d0},
+ {0x0004d2, 0x0004d2},
+ {0x0004d4, 0x0004d4},
+ {0x0004d6, 0x0004d6},
+ {0x0004d8, 0x0004d8},
+ {0x0004da, 0x0004da},
+ {0x0004dc, 0x0004dc},
+ {0x0004de, 0x0004de},
+ {0x0004e0, 0x0004e0},
+ {0x0004e2, 0x0004e2},
+ {0x0004e4, 0x0004e4},
+ {0x0004e6, 0x0004e6},
+ {0x0004e8, 0x0004e8},
+ {0x0004ea, 0x0004ea},
+ {0x0004ec, 0x0004ec},
+ {0x0004ee, 0x0004ee},
+ {0x0004f0, 0x0004f0},
+ {0x0004f2, 0x0004f2},
+ {0x0004f4, 0x0004f4},
+ {0x0004f6, 0x0004f6},
+ {0x0004f8, 0x0004f8},
+ {0x0004fa, 0x0004fa},
+ {0x0004fc, 0x0004fc},
+ {0x0004fe, 0x0004fe},
+ {0x000500, 0x000500},
+ {0x000502, 0x000502},
+ {0x000504, 0x000504},
+ {0x000506, 0x000506},
+ {0x000508, 0x000508},
+ {0x00050a, 0x00050a},
+ {0x00050c, 0x00050c},
+ {0x00050e, 0x00050e},
+ {0x000510, 0x000510},
+ {0x000512, 0x000512},
+ {0x000514, 0x000514},
+ {0x000516, 0x000516},
+ {0x000518, 0x000518},
+ {0x00051a, 0x00051a},
+ {0x00051c, 0x00051c},
+ {0x00051e, 0x00051e},
+ {0x000520, 0x000520},
+ {0x000522, 0x000522},
+ {0x000524, 0x000524},
+ {0x000526, 0x000526},
+ {0x000528, 0x000528},
+ {0x00052a, 0x00052a},
+ {0x00052c, 0x00052c},
+ {0x00052e, 0x00052e},
+ {0x000531, 0x000556},
+ {0x0010a0, 0x0010c5},
+ {0x0010c7, 0x0010c7},
+ {0x0010cd, 0x0010cd},
+ {0x0013a0, 0x0013f5},
+ {0x001c90, 0x001cba},
+ {0x001cbd, 0x001cbf},
+ {0x001e00, 0x001e00},
+ {0x001e02, 0x001e02},
+ {0x001e04, 0x001e04},
+ {0x001e06, 0x001e06},
+ {0x001e08, 0x001e08},
+ {0x001e0a, 0x001e0a},
+ {0x001e0c, 0x001e0c},
+ {0x001e0e, 0x001e0e},
+ {0x001e10, 0x001e10},
+ {0x001e12, 0x001e12},
+ {0x001e14, 0x001e14},
+ {0x001e16, 0x001e16},
+ {0x001e18, 0x001e18},
+ {0x001e1a, 0x001e1a},
+ {0x001e1c, 0x001e1c},
+ {0x001e1e, 0x001e1e},
+ {0x001e20, 0x001e20},
+ {0x001e22, 0x001e22},
+ {0x001e24, 0x001e24},
+ {0x001e26, 0x001e26},
+ {0x001e28, 0x001e28},
+ {0x001e2a, 0x001e2a},
+ {0x001e2c, 0x001e2c},
+ {0x001e2e, 0x001e2e},
+ {0x001e30, 0x001e30},
+ {0x001e32, 0x001e32},
+ {0x001e34, 0x001e34},
+ {0x001e36, 0x001e36},
+ {0x001e38, 0x001e38},
+ {0x001e3a, 0x001e3a},
+ {0x001e3c, 0x001e3c},
+ {0x001e3e, 0x001e3e},
+ {0x001e40, 0x001e40},
+ {0x001e42, 0x001e42},
+ {0x001e44, 0x001e44},
+ {0x001e46, 0x001e46},
+ {0x001e48, 0x001e48},
+ {0x001e4a, 0x001e4a},
+ {0x001e4c, 0x001e4c},
+ {0x001e4e, 0x001e4e},
+ {0x001e50, 0x001e50},
+ {0x001e52, 0x001e52},
+ {0x001e54, 0x001e54},
+ {0x001e56, 0x001e56},
+ {0x001e58, 0x001e58},
+ {0x001e5a, 0x001e5a},
+ {0x001e5c, 0x001e5c},
+ {0x001e5e, 0x001e5e},
+ {0x001e60, 0x001e60},
+ {0x001e62, 0x001e62},
+ {0x001e64, 0x001e64},
+ {0x001e66, 0x001e66},
+ {0x001e68, 0x001e68},
+ {0x001e6a, 0x001e6a},
+ {0x001e6c, 0x001e6c},
+ {0x001e6e, 0x001e6e},
+ {0x001e70, 0x001e70},
+ {0x001e72, 0x001e72},
+ {0x001e74, 0x001e74},
+ {0x001e76, 0x001e76},
+ {0x001e78, 0x001e78},
+ {0x001e7a, 0x001e7a},
+ {0x001e7c, 0x001e7c},
+ {0x001e7e, 0x001e7e},
+ {0x001e80, 0x001e80},
+ {0x001e82, 0x001e82},
+ {0x001e84, 0x001e84},
+ {0x001e86, 0x001e86},
+ {0x001e88, 0x001e88},
+ {0x001e8a, 0x001e8a},
+ {0x001e8c, 0x001e8c},
+ {0x001e8e, 0x001e8e},
+ {0x001e90, 0x001e90},
+ {0x001e92, 0x001e92},
+ {0x001e94, 0x001e94},
+ {0x001e9e, 0x001e9e},
+ {0x001ea0, 0x001ea0},
+ {0x001ea2, 0x001ea2},
+ {0x001ea4, 0x001ea4},
+ {0x001ea6, 0x001ea6},
+ {0x001ea8, 0x001ea8},
+ {0x001eaa, 0x001eaa},
+ {0x001eac, 0x001eac},
+ {0x001eae, 0x001eae},
+ {0x001eb0, 0x001eb0},
+ {0x001eb2, 0x001eb2},
+ {0x001eb4, 0x001eb4},
+ {0x001eb6, 0x001eb6},
+ {0x001eb8, 0x001eb8},
+ {0x001eba, 0x001eba},
+ {0x001ebc, 0x001ebc},
+ {0x001ebe, 0x001ebe},
+ {0x001ec0, 0x001ec0},
+ {0x001ec2, 0x001ec2},
+ {0x001ec4, 0x001ec4},
+ {0x001ec6, 0x001ec6},
+ {0x001ec8, 0x001ec8},
+ {0x001eca, 0x001eca},
+ {0x001ecc, 0x001ecc},
+ {0x001ece, 0x001ece},
+ {0x001ed0, 0x001ed0},
+ {0x001ed2, 0x001ed2},
+ {0x001ed4, 0x001ed4},
+ {0x001ed6, 0x001ed6},
+ {0x001ed8, 0x001ed8},
+ {0x001eda, 0x001eda},
+ {0x001edc, 0x001edc},
+ {0x001ede, 0x001ede},
+ {0x001ee0, 0x001ee0},
+ {0x001ee2, 0x001ee2},
+ {0x001ee4, 0x001ee4},
+ {0x001ee6, 0x001ee6},
+ {0x001ee8, 0x001ee8},
+ {0x001eea, 0x001eea},
+ {0x001eec, 0x001eec},
+ {0x001eee, 0x001eee},
+ {0x001ef0, 0x001ef0},
+ {0x001ef2, 0x001ef2},
+ {0x001ef4, 0x001ef4},
+ {0x001ef6, 0x001ef6},
+ {0x001ef8, 0x001ef8},
+ {0x001efa, 0x001efa},
+ {0x001efc, 0x001efc},
+ {0x001efe, 0x001efe},
+ {0x001f08, 0x001f0f},
+ {0x001f18, 0x001f1d},
+ {0x001f28, 0x001f2f},
+ {0x001f38, 0x001f3f},
+ {0x001f48, 0x001f4d},
+ {0x001f59, 0x001f59},
+ {0x001f5b, 0x001f5b},
+ {0x001f5d, 0x001f5d},
+ {0x001f5f, 0x001f5f},
+ {0x001f68, 0x001f6f},
+ {0x001fb8, 0x001fbb},
+ {0x001fc8, 0x001fcb},
+ {0x001fd8, 0x001fdb},
+ {0x001fe8, 0x001fec},
+ {0x001ff8, 0x001ffb},
+ {0x002102, 0x002102},
+ {0x002107, 0x002107},
+ {0x00210b, 0x00210d},
+ {0x002110, 0x002112},
+ {0x002115, 0x002115},
+ {0x002119, 0x00211d},
+ {0x002124, 0x002124},
+ {0x002126, 0x002126},
+ {0x002128, 0x002128},
+ {0x00212a, 0x00212d},
+ {0x002130, 0x002133},
+ {0x00213e, 0x00213f},
+ {0x002145, 0x002145},
+ {0x002160, 0x00216f},
+ {0x002183, 0x002183},
+ {0x0024b6, 0x0024cf},
+ {0x002c00, 0x002c2f},
+ {0x002c60, 0x002c60},
+ {0x002c62, 0x002c64},
+ {0x002c67, 0x002c67},
+ {0x002c69, 0x002c69},
+ {0x002c6b, 0x002c6b},
+ {0x002c6d, 0x002c70},
+ {0x002c72, 0x002c72},
+ {0x002c75, 0x002c75},
+ {0x002c7e, 0x002c80},
+ {0x002c82, 0x002c82},
+ {0x002c84, 0x002c84},
+ {0x002c86, 0x002c86},
+ {0x002c88, 0x002c88},
+ {0x002c8a, 0x002c8a},
+ {0x002c8c, 0x002c8c},
+ {0x002c8e, 0x002c8e},
+ {0x002c90, 0x002c90},
+ {0x002c92, 0x002c92},
+ {0x002c94, 0x002c94},
+ {0x002c96, 0x002c96},
+ {0x002c98, 0x002c98},
+ {0x002c9a, 0x002c9a},
+ {0x002c9c, 0x002c9c},
+ {0x002c9e, 0x002c9e},
+ {0x002ca0, 0x002ca0},
+ {0x002ca2, 0x002ca2},
+ {0x002ca4, 0x002ca4},
+ {0x002ca6, 0x002ca6},
+ {0x002ca8, 0x002ca8},
+ {0x002caa, 0x002caa},
+ {0x002cac, 0x002cac},
+ {0x002cae, 0x002cae},
+ {0x002cb0, 0x002cb0},
+ {0x002cb2, 0x002cb2},
+ {0x002cb4, 0x002cb4},
+ {0x002cb6, 0x002cb6},
+ {0x002cb8, 0x002cb8},
+ {0x002cba, 0x002cba},
+ {0x002cbc, 0x002cbc},
+ {0x002cbe, 0x002cbe},
+ {0x002cc0, 0x002cc0},
+ {0x002cc2, 0x002cc2},
+ {0x002cc4, 0x002cc4},
+ {0x002cc6, 0x002cc6},
+ {0x002cc8, 0x002cc8},
+ {0x002cca, 0x002cca},
+ {0x002ccc, 0x002ccc},
+ {0x002cce, 0x002cce},
+ {0x002cd0, 0x002cd0},
+ {0x002cd2, 0x002cd2},
+ {0x002cd4, 0x002cd4},
+ {0x002cd6, 0x002cd6},
+ {0x002cd8, 0x002cd8},
+ {0x002cda, 0x002cda},
+ {0x002cdc, 0x002cdc},
+ {0x002cde, 0x002cde},
+ {0x002ce0, 0x002ce0},
+ {0x002ce2, 0x002ce2},
+ {0x002ceb, 0x002ceb},
+ {0x002ced, 0x002ced},
+ {0x002cf2, 0x002cf2},
+ {0x00a640, 0x00a640},
+ {0x00a642, 0x00a642},
+ {0x00a644, 0x00a644},
+ {0x00a646, 0x00a646},
+ {0x00a648, 0x00a648},
+ {0x00a64a, 0x00a64a},
+ {0x00a64c, 0x00a64c},
+ {0x00a64e, 0x00a64e},
+ {0x00a650, 0x00a650},
+ {0x00a652, 0x00a652},
+ {0x00a654, 0x00a654},
+ {0x00a656, 0x00a656},
+ {0x00a658, 0x00a658},
+ {0x00a65a, 0x00a65a},
+ {0x00a65c, 0x00a65c},
+ {0x00a65e, 0x00a65e},
+ {0x00a660, 0x00a660},
+ {0x00a662, 0x00a662},
+ {0x00a664, 0x00a664},
+ {0x00a666, 0x00a666},
+ {0x00a668, 0x00a668},
+ {0x00a66a, 0x00a66a},
+ {0x00a66c, 0x00a66c},
+ {0x00a680, 0x00a680},
+ {0x00a682, 0x00a682},
+ {0x00a684, 0x00a684},
+ {0x00a686, 0x00a686},
+ {0x00a688, 0x00a688},
+ {0x00a68a, 0x00a68a},
+ {0x00a68c, 0x00a68c},
+ {0x00a68e, 0x00a68e},
+ {0x00a690, 0x00a690},
+ {0x00a692, 0x00a692},
+ {0x00a694, 0x00a694},
+ {0x00a696, 0x00a696},
+ {0x00a698, 0x00a698},
+ {0x00a69a, 0x00a69a},
+ {0x00a722, 0x00a722},
+ {0x00a724, 0x00a724},
+ {0x00a726, 0x00a726},
+ {0x00a728, 0x00a728},
+ {0x00a72a, 0x00a72a},
+ {0x00a72c, 0x00a72c},
+ {0x00a72e, 0x00a72e},
+ {0x00a732, 0x00a732},
+ {0x00a734, 0x00a734},
+ {0x00a736, 0x00a736},
+ {0x00a738, 0x00a738},
+ {0x00a73a, 0x00a73a},
+ {0x00a73c, 0x00a73c},
+ {0x00a73e, 0x00a73e},
+ {0x00a740, 0x00a740},
+ {0x00a742, 0x00a742},
+ {0x00a744, 0x00a744},
+ {0x00a746, 0x00a746},
+ {0x00a748, 0x00a748},
+ {0x00a74a, 0x00a74a},
+ {0x00a74c, 0x00a74c},
+ {0x00a74e, 0x00a74e},
+ {0x00a750, 0x00a750},
+ {0x00a752, 0x00a752},
+ {0x00a754, 0x00a754},
+ {0x00a756, 0x00a756},
+ {0x00a758, 0x00a758},
+ {0x00a75a, 0x00a75a},
+ {0x00a75c, 0x00a75c},
+ {0x00a75e, 0x00a75e},
+ {0x00a760, 0x00a760},
+ {0x00a762, 0x00a762},
+ {0x00a764, 0x00a764},
+ {0x00a766, 0x00a766},
+ {0x00a768, 0x00a768},
+ {0x00a76a, 0x00a76a},
+ {0x00a76c, 0x00a76c},
+ {0x00a76e, 0x00a76e},
+ {0x00a779, 0x00a779},
+ {0x00a77b, 0x00a77b},
+ {0x00a77d, 0x00a77e},
+ {0x00a780, 0x00a780},
+ {0x00a782, 0x00a782},
+ {0x00a784, 0x00a784},
+ {0x00a786, 0x00a786},
+ {0x00a78b, 0x00a78b},
+ {0x00a78d, 0x00a78d},
+ {0x00a790, 0x00a790},
+ {0x00a792, 0x00a792},
+ {0x00a796, 0x00a796},
+ {0x00a798, 0x00a798},
+ {0x00a79a, 0x00a79a},
+ {0x00a79c, 0x00a79c},
+ {0x00a79e, 0x00a79e},
+ {0x00a7a0, 0x00a7a0},
+ {0x00a7a2, 0x00a7a2},
+ {0x00a7a4, 0x00a7a4},
+ {0x00a7a6, 0x00a7a6},
+ {0x00a7a8, 0x00a7a8},
+ {0x00a7aa, 0x00a7ae},
+ {0x00a7b0, 0x00a7b4},
+ {0x00a7b6, 0x00a7b6},
+ {0x00a7b8, 0x00a7b8},
+ {0x00a7ba, 0x00a7ba},
+ {0x00a7bc, 0x00a7bc},
+ {0x00a7be, 0x00a7be},
+ {0x00a7c0, 0x00a7c0},
+ {0x00a7c2, 0x00a7c2},
+ {0x00a7c4, 0x00a7c7},
+ {0x00a7c9, 0x00a7c9},
+ {0x00a7d0, 0x00a7d0},
+ {0x00a7d6, 0x00a7d6},
+ {0x00a7d8, 0x00a7d8},
+ {0x00a7f5, 0x00a7f5},
+ {0x00ff21, 0x00ff3a},
+ {0x010400, 0x010427},
+ {0x0104b0, 0x0104d3},
+ {0x010570, 0x01057a},
+ {0x01057c, 0x01058a},
+ {0x01058c, 0x010592},
+ {0x010594, 0x010595},
+ {0x010c80, 0x010cb2},
+ {0x0118a0, 0x0118bf},
+ {0x016e40, 0x016e5f},
+ {0x01d400, 0x01d419},
+ {0x01d434, 0x01d44d},
+ {0x01d468, 0x01d481},
+ {0x01d49c, 0x01d49c},
+ {0x01d49e, 0x01d49f},
+ {0x01d4a2, 0x01d4a2},
+ {0x01d4a5, 0x01d4a6},
+ {0x01d4a9, 0x01d4ac},
+ {0x01d4ae, 0x01d4b5},
+ {0x01d4d0, 0x01d4e9},
+ {0x01d504, 0x01d505},
+ {0x01d507, 0x01d50a},
+ {0x01d50d, 0x01d514},
+ {0x01d516, 0x01d51c},
+ {0x01d538, 0x01d539},
+ {0x01d53b, 0x01d53e},
+ {0x01d540, 0x01d544},
+ {0x01d546, 0x01d546},
+ {0x01d54a, 0x01d550},
+ {0x01d56c, 0x01d585},
+ {0x01d5a0, 0x01d5b9},
+ {0x01d5d4, 0x01d5ed},
+ {0x01d608, 0x01d621},
+ {0x01d63c, 0x01d655},
+ {0x01d670, 0x01d689},
+ {0x01d6a8, 0x01d6c0},
+ {0x01d6e2, 0x01d6fa},
+ {0x01d71c, 0x01d734},
+ {0x01d756, 0x01d76e},
+ {0x01d790, 0x01d7a8},
+ {0x01d7ca, 0x01d7ca},
+ {0x01e900, 0x01e921},
+ {0x01f130, 0x01f149},
+ {0x01f150, 0x01f169},
+ {0x01f170, 0x01f189},
+};
+
+/* table of Unicode codepoint ranges of Case_Ignorable characters */
+static const pg_unicode_range unicode_case_ignorable[491] =
+{
+ {0x000027, 0x000027},
+ {0x00002e, 0x00002e},
+ {0x00003a, 0x00003a},
+ {0x00005e, 0x00005e},
+ {0x000060, 0x000060},
+ {0x0000a8, 0x0000a8},
+ {0x0000ad, 0x0000ad},
+ {0x0000af, 0x0000af},
+ {0x0000b4, 0x0000b4},
+ {0x0000b7, 0x0000b7},
+ {0x0000b8, 0x0000b8},
+ {0x0002b0, 0x0002c1},
+ {0x0002c2, 0x0002c5},
+ {0x0002c6, 0x0002d1},
+ {0x0002d2, 0x0002df},
+ {0x0002e0, 0x0002e4},
+ {0x0002e5, 0x0002eb},
+ {0x0002ec, 0x0002ec},
+ {0x0002ed, 0x0002ed},
+ {0x0002ee, 0x0002ee},
+ {0x0002ef, 0x0002ff},
+ {0x000300, 0x00036f},
+ {0x000374, 0x000374},
+ {0x000375, 0x000375},
+ {0x00037a, 0x00037a},
+ {0x000384, 0x000385},
+ {0x000387, 0x000387},
+ {0x000483, 0x000487},
+ {0x000488, 0x000489},
+ {0x000559, 0x000559},
+ {0x00055f, 0x00055f},
+ {0x000591, 0x0005bd},
+ {0x0005bf, 0x0005bf},
+ {0x0005c1, 0x0005c2},
+ {0x0005c4, 0x0005c5},
+ {0x0005c7, 0x0005c7},
+ {0x0005f4, 0x0005f4},
+ {0x000600, 0x000605},
+ {0x000610, 0x00061a},
+ {0x00061c, 0x00061c},
+ {0x000640, 0x000640},
+ {0x00064b, 0x00065f},
+ {0x000670, 0x000670},
+ {0x0006d6, 0x0006dc},
+ {0x0006dd, 0x0006dd},
+ {0x0006df, 0x0006e4},
+ {0x0006e5, 0x0006e6},
+ {0x0006e7, 0x0006e8},
+ {0x0006ea, 0x0006ed},
+ {0x00070f, 0x00070f},
+ {0x000711, 0x000711},
+ {0x000730, 0x00074a},
+ {0x0007a6, 0x0007b0},
+ {0x0007eb, 0x0007f3},
+ {0x0007f4, 0x0007f5},
+ {0x0007fa, 0x0007fa},
+ {0x0007fd, 0x0007fd},
+ {0x000816, 0x000819},
+ {0x00081a, 0x00081a},
+ {0x00081b, 0x000823},
+ {0x000824, 0x000824},
+ {0x000825, 0x000827},
+ {0x000828, 0x000828},
+ {0x000829, 0x00082d},
+ {0x000859, 0x00085b},
+ {0x000888, 0x000888},
+ {0x000890, 0x000891},
+ {0x000898, 0x00089f},
+ {0x0008c9, 0x0008c9},
+ {0x0008ca, 0x0008e1},
+ {0x0008e2, 0x0008e2},
+ {0x0008e3, 0x000902},
+ {0x00093a, 0x00093a},
+ {0x00093c, 0x00093c},
+ {0x000941, 0x000948},
+ {0x00094d, 0x00094d},
+ {0x000951, 0x000957},
+ {0x000962, 0x000963},
+ {0x000971, 0x000971},
+ {0x000981, 0x000981},
+ {0x0009bc, 0x0009bc},
+ {0x0009c1, 0x0009c4},
+ {0x0009cd, 0x0009cd},
+ {0x0009e2, 0x0009e3},
+ {0x0009fe, 0x0009fe},
+ {0x000a01, 0x000a02},
+ {0x000a3c, 0x000a3c},
+ {0x000a41, 0x000a42},
+ {0x000a47, 0x000a48},
+ {0x000a4b, 0x000a4d},
+ {0x000a51, 0x000a51},
+ {0x000a70, 0x000a71},
+ {0x000a75, 0x000a75},
+ {0x000a81, 0x000a82},
+ {0x000abc, 0x000abc},
+ {0x000ac1, 0x000ac5},
+ {0x000ac7, 0x000ac8},
+ {0x000acd, 0x000acd},
+ {0x000ae2, 0x000ae3},
+ {0x000afa, 0x000aff},
+ {0x000b01, 0x000b01},
+ {0x000b3c, 0x000b3c},
+ {0x000b3f, 0x000b3f},
+ {0x000b41, 0x000b44},
+ {0x000b4d, 0x000b4d},
+ {0x000b55, 0x000b56},
+ {0x000b62, 0x000b63},
+ {0x000b82, 0x000b82},
+ {0x000bc0, 0x000bc0},
+ {0x000bcd, 0x000bcd},
+ {0x000c00, 0x000c00},
+ {0x000c04, 0x000c04},
+ {0x000c3c, 0x000c3c},
+ {0x000c3e, 0x000c40},
+ {0x000c46, 0x000c48},
+ {0x000c4a, 0x000c4d},
+ {0x000c55, 0x000c56},
+ {0x000c62, 0x000c63},
+ {0x000c81, 0x000c81},
+ {0x000cbc, 0x000cbc},
+ {0x000cbf, 0x000cbf},
+ {0x000cc6, 0x000cc6},
+ {0x000ccc, 0x000ccd},
+ {0x000ce2, 0x000ce3},
+ {0x000d00, 0x000d01},
+ {0x000d3b, 0x000d3c},
+ {0x000d41, 0x000d44},
+ {0x000d4d, 0x000d4d},
+ {0x000d62, 0x000d63},
+ {0x000d81, 0x000d81},
+ {0x000dca, 0x000dca},
+ {0x000dd2, 0x000dd4},
+ {0x000dd6, 0x000dd6},
+ {0x000e31, 0x000e31},
+ {0x000e34, 0x000e3a},
+ {0x000e46, 0x000e46},
+ {0x000e47, 0x000e4e},
+ {0x000eb1, 0x000eb1},
+ {0x000eb4, 0x000ebc},
+ {0x000ec6, 0x000ec6},
+ {0x000ec8, 0x000ece},
+ {0x000f18, 0x000f19},
+ {0x000f35, 0x000f35},
+ {0x000f37, 0x000f37},
+ {0x000f39, 0x000f39},
+ {0x000f71, 0x000f7e},
+ {0x000f80, 0x000f84},
+ {0x000f86, 0x000f87},
+ {0x000f8d, 0x000f97},
+ {0x000f99, 0x000fbc},
+ {0x000fc6, 0x000fc6},
+ {0x00102d, 0x001030},
+ {0x001032, 0x001037},
+ {0x001039, 0x00103a},
+ {0x00103d, 0x00103e},
+ {0x001058, 0x001059},
+ {0x00105e, 0x001060},
+ {0x001071, 0x001074},
+ {0x001082, 0x001082},
+ {0x001085, 0x001086},
+ {0x00108d, 0x00108d},
+ {0x00109d, 0x00109d},
+ {0x0010fc, 0x0010fc},
+ {0x00135d, 0x00135f},
+ {0x001712, 0x001714},
+ {0x001732, 0x001733},
+ {0x001752, 0x001753},
+ {0x001772, 0x001773},
+ {0x0017b4, 0x0017b5},
+ {0x0017b7, 0x0017bd},
+ {0x0017c6, 0x0017c6},
+ {0x0017c9, 0x0017d3},
+ {0x0017d7, 0x0017d7},
+ {0x0017dd, 0x0017dd},
+ {0x00180b, 0x00180d},
+ {0x00180e, 0x00180e},
+ {0x00180f, 0x00180f},
+ {0x001843, 0x001843},
+ {0x001885, 0x001886},
+ {0x0018a9, 0x0018a9},
+ {0x001920, 0x001922},
+ {0x001927, 0x001928},
+ {0x001932, 0x001932},
+ {0x001939, 0x00193b},
+ {0x001a17, 0x001a18},
+ {0x001a1b, 0x001a1b},
+ {0x001a56, 0x001a56},
+ {0x001a58, 0x001a5e},
+ {0x001a60, 0x001a60},
+ {0x001a62, 0x001a62},
+ {0x001a65, 0x001a6c},
+ {0x001a73, 0x001a7c},
+ {0x001a7f, 0x001a7f},
+ {0x001aa7, 0x001aa7},
+ {0x001ab0, 0x001abd},
+ {0x001abe, 0x001abe},
+ {0x001abf, 0x001ace},
+ {0x001b00, 0x001b03},
+ {0x001b34, 0x001b34},
+ {0x001b36, 0x001b3a},
+ {0x001b3c, 0x001b3c},
+ {0x001b42, 0x001b42},
+ {0x001b6b, 0x001b73},
+ {0x001b80, 0x001b81},
+ {0x001ba2, 0x001ba5},
+ {0x001ba8, 0x001ba9},
+ {0x001bab, 0x001bad},
+ {0x001be6, 0x001be6},
+ {0x001be8, 0x001be9},
+ {0x001bed, 0x001bed},
+ {0x001bef, 0x001bf1},
+ {0x001c2c, 0x001c33},
+ {0x001c36, 0x001c37},
+ {0x001c78, 0x001c7d},
+ {0x001cd0, 0x001cd2},
+ {0x001cd4, 0x001ce0},
+ {0x001ce2, 0x001ce8},
+ {0x001ced, 0x001ced},
+ {0x001cf4, 0x001cf4},
+ {0x001cf8, 0x001cf9},
+ {0x001d2c, 0x001d6a},
+ {0x001d78, 0x001d78},
+ {0x001d9b, 0x001dbf},
+ {0x001dc0, 0x001dff},
+ {0x001fbd, 0x001fbd},
+ {0x001fbf, 0x001fc1},
+ {0x001fcd, 0x001fcf},
+ {0x001fdd, 0x001fdf},
+ {0x001fed, 0x001fef},
+ {0x001ffd, 0x001ffe},
+ {0x00200b, 0x00200f},
+ {0x002018, 0x002018},
+ {0x002019, 0x002019},
+ {0x002024, 0x002024},
+ {0x002027, 0x002027},
+ {0x00202a, 0x00202e},
+ {0x002060, 0x002064},
+ {0x002066, 0x00206f},
+ {0x002071, 0x002071},
+ {0x00207f, 0x00207f},
+ {0x002090, 0x00209c},
+ {0x0020d0, 0x0020dc},
+ {0x0020dd, 0x0020e0},
+ {0x0020e1, 0x0020e1},
+ {0x0020e2, 0x0020e4},
+ {0x0020e5, 0x0020f0},
+ {0x002c7c, 0x002c7d},
+ {0x002cef, 0x002cf1},
+ {0x002d6f, 0x002d6f},
+ {0x002d7f, 0x002d7f},
+ {0x002de0, 0x002dff},
+ {0x002e2f, 0x002e2f},
+ {0x003005, 0x003005},
+ {0x00302a, 0x00302d},
+ {0x003031, 0x003035},
+ {0x00303b, 0x00303b},
+ {0x003099, 0x00309a},
+ {0x00309b, 0x00309c},
+ {0x00309d, 0x00309e},
+ {0x0030fc, 0x0030fe},
+ {0x00a015, 0x00a015},
+ {0x00a4f8, 0x00a4fd},
+ {0x00a60c, 0x00a60c},
+ {0x00a66f, 0x00a66f},
+ {0x00a670, 0x00a672},
+ {0x00a674, 0x00a67d},
+ {0x00a67f, 0x00a67f},
+ {0x00a69c, 0x00a69d},
+ {0x00a69e, 0x00a69f},
+ {0x00a6f0, 0x00a6f1},
+ {0x00a700, 0x00a716},
+ {0x00a717, 0x00a71f},
+ {0x00a720, 0x00a721},
+ {0x00a770, 0x00a770},
+ {0x00a788, 0x00a788},
+ {0x00a789, 0x00a78a},
+ {0x00a7f2, 0x00a7f4},
+ {0x00a7f8, 0x00a7f9},
+ {0x00a802, 0x00a802},
+ {0x00a806, 0x00a806},
+ {0x00a80b, 0x00a80b},
+ {0x00a825, 0x00a826},
+ {0x00a82c, 0x00a82c},
+ {0x00a8c4, 0x00a8c5},
+ {0x00a8e0, 0x00a8f1},
+ {0x00a8ff, 0x00a8ff},
+ {0x00a926, 0x00a92d},
+ {0x00a947, 0x00a951},
+ {0x00a980, 0x00a982},
+ {0x00a9b3, 0x00a9b3},
+ {0x00a9b6, 0x00a9b9},
+ {0x00a9bc, 0x00a9bd},
+ {0x00a9cf, 0x00a9cf},
+ {0x00a9e5, 0x00a9e5},
+ {0x00a9e6, 0x00a9e6},
+ {0x00aa29, 0x00aa2e},
+ {0x00aa31, 0x00aa32},
+ {0x00aa35, 0x00aa36},
+ {0x00aa43, 0x00aa43},
+ {0x00aa4c, 0x00aa4c},
+ {0x00aa70, 0x00aa70},
+ {0x00aa7c, 0x00aa7c},
+ {0x00aab0, 0x00aab0},
+ {0x00aab2, 0x00aab4},
+ {0x00aab7, 0x00aab8},
+ {0x00aabe, 0x00aabf},
+ {0x00aac1, 0x00aac1},
+ {0x00aadd, 0x00aadd},
+ {0x00aaec, 0x00aaed},
+ {0x00aaf3, 0x00aaf4},
+ {0x00aaf6, 0x00aaf6},
+ {0x00ab5b, 0x00ab5b},
+ {0x00ab5c, 0x00ab5f},
+ {0x00ab69, 0x00ab69},
+ {0x00ab6a, 0x00ab6b},
+ {0x00abe5, 0x00abe5},
+ {0x00abe8, 0x00abe8},
+ {0x00abed, 0x00abed},
+ {0x00fb1e, 0x00fb1e},
+ {0x00fbb2, 0x00fbc2},
+ {0x00fe00, 0x00fe0f},
+ {0x00fe13, 0x00fe13},
+ {0x00fe20, 0x00fe2f},
+ {0x00fe52, 0x00fe52},
+ {0x00fe55, 0x00fe55},
+ {0x00feff, 0x00feff},
+ {0x00ff07, 0x00ff07},
+ {0x00ff0e, 0x00ff0e},
+ {0x00ff1a, 0x00ff1a},
+ {0x00ff3e, 0x00ff3e},
+ {0x00ff40, 0x00ff40},
+ {0x00ff70, 0x00ff70},
+ {0x00ff9e, 0x00ff9f},
+ {0x00ffe3, 0x00ffe3},
+ {0x00fff9, 0x00fffb},
+ {0x0101fd, 0x0101fd},
+ {0x0102e0, 0x0102e0},
+ {0x010376, 0x01037a},
+ {0x010780, 0x010785},
+ {0x010787, 0x0107b0},
+ {0x0107b2, 0x0107ba},
+ {0x010a01, 0x010a03},
+ {0x010a05, 0x010a06},
+ {0x010a0c, 0x010a0f},
+ {0x010a38, 0x010a3a},
+ {0x010a3f, 0x010a3f},
+ {0x010ae5, 0x010ae6},
+ {0x010d24, 0x010d27},
+ {0x010eab, 0x010eac},
+ {0x010efd, 0x010eff},
+ {0x010f46, 0x010f50},
+ {0x010f82, 0x010f85},
+ {0x011001, 0x011001},
+ {0x011038, 0x011046},
+ {0x011070, 0x011070},
+ {0x011073, 0x011074},
+ {0x01107f, 0x011081},
+ {0x0110b3, 0x0110b6},
+ {0x0110b9, 0x0110ba},
+ {0x0110bd, 0x0110bd},
+ {0x0110c2, 0x0110c2},
+ {0x0110cd, 0x0110cd},
+ {0x011100, 0x011102},
+ {0x011127, 0x01112b},
+ {0x01112d, 0x011134},
+ {0x011173, 0x011173},
+ {0x011180, 0x011181},
+ {0x0111b6, 0x0111be},
+ {0x0111c9, 0x0111cc},
+ {0x0111cf, 0x0111cf},
+ {0x01122f, 0x011231},
+ {0x011234, 0x011234},
+ {0x011236, 0x011237},
+ {0x01123e, 0x01123e},
+ {0x011241, 0x011241},
+ {0x0112df, 0x0112df},
+ {0x0112e3, 0x0112ea},
+ {0x011300, 0x011301},
+ {0x01133b, 0x01133c},
+ {0x011340, 0x011340},
+ {0x011366, 0x01136c},
+ {0x011370, 0x011374},
+ {0x011438, 0x01143f},
+ {0x011442, 0x011444},
+ {0x011446, 0x011446},
+ {0x01145e, 0x01145e},
+ {0x0114b3, 0x0114b8},
+ {0x0114ba, 0x0114ba},
+ {0x0114bf, 0x0114c0},
+ {0x0114c2, 0x0114c3},
+ {0x0115b2, 0x0115b5},
+ {0x0115bc, 0x0115bd},
+ {0x0115bf, 0x0115c0},
+ {0x0115dc, 0x0115dd},
+ {0x011633, 0x01163a},
+ {0x01163d, 0x01163d},
+ {0x01163f, 0x011640},
+ {0x0116ab, 0x0116ab},
+ {0x0116ad, 0x0116ad},
+ {0x0116b0, 0x0116b5},
+ {0x0116b7, 0x0116b7},
+ {0x01171d, 0x01171f},
+ {0x011722, 0x011725},
+ {0x011727, 0x01172b},
+ {0x01182f, 0x011837},
+ {0x011839, 0x01183a},
+ {0x01193b, 0x01193c},
+ {0x01193e, 0x01193e},
+ {0x011943, 0x011943},
+ {0x0119d4, 0x0119d7},
+ {0x0119da, 0x0119db},
+ {0x0119e0, 0x0119e0},
+ {0x011a01, 0x011a0a},
+ {0x011a33, 0x011a38},
+ {0x011a3b, 0x011a3e},
+ {0x011a47, 0x011a47},
+ {0x011a51, 0x011a56},
+ {0x011a59, 0x011a5b},
+ {0x011a8a, 0x011a96},
+ {0x011a98, 0x011a99},
+ {0x011c30, 0x011c36},
+ {0x011c38, 0x011c3d},
+ {0x011c3f, 0x011c3f},
+ {0x011c92, 0x011ca7},
+ {0x011caa, 0x011cb0},
+ {0x011cb2, 0x011cb3},
+ {0x011cb5, 0x011cb6},
+ {0x011d31, 0x011d36},
+ {0x011d3a, 0x011d3a},
+ {0x011d3c, 0x011d3d},
+ {0x011d3f, 0x011d45},
+ {0x011d47, 0x011d47},
+ {0x011d90, 0x011d91},
+ {0x011d95, 0x011d95},
+ {0x011d97, 0x011d97},
+ {0x011ef3, 0x011ef4},
+ {0x011f00, 0x011f01},
+ {0x011f36, 0x011f3a},
+ {0x011f40, 0x011f40},
+ {0x011f42, 0x011f42},
+ {0x013430, 0x01343f},
+ {0x013440, 0x013440},
+ {0x013447, 0x013455},
+ {0x016af0, 0x016af4},
+ {0x016b30, 0x016b36},
+ {0x016b40, 0x016b43},
+ {0x016f4f, 0x016f4f},
+ {0x016f8f, 0x016f92},
+ {0x016f93, 0x016f9f},
+ {0x016fe0, 0x016fe1},
+ {0x016fe3, 0x016fe3},
+ {0x016fe4, 0x016fe4},
+ {0x01aff0, 0x01aff3},
+ {0x01aff5, 0x01affb},
+ {0x01affd, 0x01affe},
+ {0x01bc9d, 0x01bc9e},
+ {0x01bca0, 0x01bca3},
+ {0x01cf00, 0x01cf2d},
+ {0x01cf30, 0x01cf46},
+ {0x01d167, 0x01d169},
+ {0x01d173, 0x01d17a},
+ {0x01d17b, 0x01d182},
+ {0x01d185, 0x01d18b},
+ {0x01d1aa, 0x01d1ad},
+ {0x01d242, 0x01d244},
+ {0x01da00, 0x01da36},
+ {0x01da3b, 0x01da6c},
+ {0x01da75, 0x01da75},
+ {0x01da84, 0x01da84},
+ {0x01da9b, 0x01da9f},
+ {0x01daa1, 0x01daaf},
+ {0x01e000, 0x01e006},
+ {0x01e008, 0x01e018},
+ {0x01e01b, 0x01e021},
+ {0x01e023, 0x01e024},
+ {0x01e026, 0x01e02a},
+ {0x01e030, 0x01e06d},
+ {0x01e08f, 0x01e08f},
+ {0x01e130, 0x01e136},
+ {0x01e137, 0x01e13d},
+ {0x01e2ae, 0x01e2ae},
+ {0x01e2ec, 0x01e2ef},
+ {0x01e4eb, 0x01e4eb},
+ {0x01e4ec, 0x01e4ef},
+ {0x01e8d0, 0x01e8d6},
+ {0x01e944, 0x01e94a},
+ {0x01e94b, 0x01e94b},
+ {0x01f3fb, 0x01f3ff},
+ {0x0e0001, 0x0e0001},
+ {0x0e0020, 0x0e007f},
+ {0x0e0100, 0x0e01ef},
+};
+
+/* table of Unicode codepoint ranges of White_Space characters */
+static const pg_unicode_range unicode_white_space[11] =
+{
+ {0x000009, 0x00000d},
+ {0x000020, 0x000020},
+ {0x000085, 0x000085},
+ {0x0000a0, 0x0000a0},
+ {0x001680, 0x001680},
+ {0x002000, 0x00200a},
+ {0x002028, 0x002028},
+ {0x002029, 0x002029},
+ {0x00202f, 0x00202f},
+ {0x00205f, 0x00205f},
+ {0x003000, 0x003000},
+};
+
+/* table of Unicode codepoint ranges of Hex_Digit characters */
+static const pg_unicode_range unicode_hex_digit[6] =
+{
+ {0x000030, 0x000039},
+ {0x000041, 0x000046},
+ {0x000061, 0x000066},
+ {0x00ff10, 0x00ff19},
+ {0x00ff21, 0x00ff26},
+ {0x00ff41, 0x00ff46},
+};
+
+/* table of Unicode codepoint ranges of Join_Control characters */
+static const pg_unicode_range unicode_join_control[1] =
+{
+ {0x00200c, 0x00200d},
};