diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/unicode/Makefile | 6 | ||||
-rw-r--r-- | src/common/unicode/README | 45 | ||||
-rw-r--r-- | src/common/unicode/category_test.c | 222 | ||||
-rw-r--r-- | src/common/unicode/generate-unicode_category_table.pl | 390 | ||||
-rw-r--r-- | src/common/unicode/meson.build | 4 | ||||
-rw-r--r-- | src/common/unicode_category.c | 318 | ||||
-rw-r--r-- | src/include/common/unicode_category.h | 27 | ||||
-rw-r--r-- | src/include/common/unicode_category_table.h | 3694 |
8 files changed, 4604 insertions, 102 deletions
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile index 04d81dd5cb5..27f0408d8b8 100644 --- a/src/common/unicode/Makefile +++ b/src/common/unicode/Makefile @@ -29,13 +29,13 @@ update-unicode: unicode_category_table.h unicode_east_asian_fw_table.h unicode_n # These files are part of the Unicode Character Database. Download # them on demand. The dependency on Makefile.global is for # UNICODE_VERSION. -CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global +CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt: $(top_builddir)/src/Makefile.global $(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F) unicode_version.h: generate-unicode_version.pl $(PERL) $< --version $(UNICODE_VERSION) -unicode_category_table.h: generate-unicode_category_table.pl UnicodeData.txt +unicode_category_table.h: generate-unicode_category_table.pl DerivedCoreProperties.txt PropList.txt UnicodeData.txt $(PERL) $< # Generation of conversion tables used for string normalization with @@ -82,4 +82,4 @@ clean: rm -f $(OBJS) category_test category_test.o norm_test norm_test.o distclean: clean - rm -f CompositionExclusions.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h + rm -f CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h diff --git a/src/common/unicode/README b/src/common/unicode/README index 56956f6a65f..110ce5765d5 100644 --- a/src/common/unicode/README +++ b/src/common/unicode/README @@ -1,22 +1,35 @@ -This directory contains tools to generate the tables in -src/include/common/unicode_norm.h, used for Unicode normalization. The -generated .h file is included in the source tree, so these are normally not -needed to build PostgreSQL, only if you need to re-generate the .h file -from the Unicode data files for some reason, e.g. to update to a new version -of Unicode. +This directory contains tools to download new Unicode data files and +generate static tables. These tables are used to normalize or +determine various properties of Unicode data. -Generating unicode_norm_table.h -------------------------------- +The generated header files are copied to src/include/common/, and +included in the source tree, so these tools are not normally required +to build PostgreSQL. -Run +Update Unicode Version +---------------------- + +Edit src/Makefile.global.in and src/common/unicode/meson.build +to update the UNICODE_VERSION. + +Then, generate the new header files with: make update-unicode -from the top level of the source tree and commit the result. +or if using meson: + + ninja update-unicode + +from the top level of the source tree. Examine the result to make sure +the changes look reasonable (that is, that the diff size and scope is +comparable to the Unicode changes since the last update), and then +commit it. Tests ----- +Normalization tests: + The Unicode consortium publishes a comprehensive test suite for the normalization algorithm, in a file called NormalizationTest.txt. This directory also contains a perl script and some C code, to run our @@ -26,3 +39,15 @@ To download NormalizationTest.txt and run the tests: make normalization-check This is also run as part of the update-unicode target. + +Category & Property tests: + +The file category_test.c exhaustively compares the category and +properties of each code point as determined by the generated tables +with the category and properties as reported by ICU. For this test to +be effective, the version of the Unicode data files must be similar to +the version of Unicode on which ICU is based, so attempt to match the +versions as closely as possible. A mismatched Unicode will skip over +codepoints that are assigned in one version and not the other, and may +falsely report failures. This test is run as a part of the +update-unicode target. diff --git a/src/common/unicode/category_test.c b/src/common/unicode/category_test.c index f1aaac0f613..e823044d63a 100644 --- a/src/common/unicode/category_test.c +++ b/src/common/unicode/category_test.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * category_test.c - * Program to test Unicode general category functions. + * Program to test Unicode general category and character properties. * * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group * @@ -14,17 +14,23 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wctype.h> #ifdef USE_ICU #include <unicode/uchar.h> #endif + #include "common/unicode_category.h" #include "common/unicode_version.h" +static int pg_unicode_version = 0; +#ifdef USE_ICU +static int icu_unicode_version = 0; +#endif + /* * Parse version into integer for easy comparison. */ -#ifdef USE_ICU static int parse_unicode_version(const char *version) { @@ -39,57 +45,175 @@ parse_unicode_version(const char *version) return major * 100 + minor; } -#endif +#ifdef USE_ICU /* - * Exhaustively test that the Unicode category for each codepoint matches that - * returned by ICU. + * Test Postgres Unicode tables by comparing with ICU. Test the General + * Category, as well as the properties Alphabetic, Lowercase, Uppercase, + * White_Space, and Hex_Digit. */ -int -main(int argc, char **argv) +static void +icu_test() { -#ifdef USE_ICU - int pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION); - int icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION); + int successful = 0; int pg_skipped_codepoints = 0; int icu_skipped_codepoints = 0; - printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION); - printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION); - - for (UChar32 code = 0; code <= 0x10ffff; code++) + for (pg_wchar code = 0; code <= 0x10ffff; code++) { uint8_t pg_category = unicode_category(code); uint8_t icu_category = u_charType(code); + /* Property tests */ + bool prop_alphabetic = pg_u_prop_alphabetic(code); + bool prop_lowercase = pg_u_prop_lowercase(code); + bool prop_uppercase = pg_u_prop_uppercase(code); + bool prop_cased = pg_u_prop_cased(code); + bool prop_case_ignorable = pg_u_prop_case_ignorable(code); + bool prop_white_space = pg_u_prop_white_space(code); + bool prop_hex_digit = pg_u_prop_hex_digit(code); + bool prop_join_control = pg_u_prop_join_control(code); + + bool icu_prop_alphabetic = u_hasBinaryProperty( + code, UCHAR_ALPHABETIC); + bool icu_prop_lowercase = u_hasBinaryProperty( + code, UCHAR_LOWERCASE); + bool icu_prop_uppercase = u_hasBinaryProperty( + code, UCHAR_UPPERCASE); + bool icu_prop_cased = u_hasBinaryProperty( + code, UCHAR_CASED); + bool icu_prop_case_ignorable = u_hasBinaryProperty( + code, UCHAR_CASE_IGNORABLE); + bool icu_prop_white_space = u_hasBinaryProperty( + code, UCHAR_WHITE_SPACE); + bool icu_prop_hex_digit = u_hasBinaryProperty( + code, UCHAR_HEX_DIGIT); + bool icu_prop_join_control = u_hasBinaryProperty( + code, UCHAR_JOIN_CONTROL); + + /* + * Compare with ICU for character classes using: + * + * https://unicode-org.github.io/icu-docs/apidoc/dev/icu4c/uchar_8h.html#details + * + * which describes how to use ICU to test for membership in regex + * character classes. + * + * NB: the document suggests testing for some properties such as + * UCHAR_POSIX_ALNUM, but that doesn't mean that we're testing for the + * "POSIX Compatible" character classes. + */ + bool isalpha = pg_u_isalpha(code); + bool islower = pg_u_islower(code); + bool isupper = pg_u_isupper(code); + bool ispunct = pg_u_ispunct(code, false); + bool isdigit = pg_u_isdigit(code, false); + bool isxdigit = pg_u_isxdigit(code, false); + bool isalnum = pg_u_isalnum(code, false); + bool isspace = pg_u_isspace(code); + bool isblank = pg_u_isblank(code); + bool iscntrl = pg_u_iscntrl(code); + bool isgraph = pg_u_isgraph(code); + bool isprint = pg_u_isprint(code); + + bool icu_isalpha = u_isUAlphabetic(code); + bool icu_islower = u_isULowercase(code); + bool icu_isupper = u_isUUppercase(code); + bool icu_ispunct = u_ispunct(code); + bool icu_isdigit = u_isdigit(code); + bool icu_isxdigit = u_hasBinaryProperty(code, + UCHAR_POSIX_XDIGIT); + bool icu_isalnum = u_hasBinaryProperty(code, + UCHAR_POSIX_ALNUM); + bool icu_isspace = u_isUWhiteSpace(code); + bool icu_isblank = u_isblank(code); + bool icu_iscntrl = icu_category == PG_U_CONTROL; + bool icu_isgraph = u_hasBinaryProperty(code, + UCHAR_POSIX_GRAPH); + bool icu_isprint = u_hasBinaryProperty(code, + UCHAR_POSIX_PRINT); + + /* + * A version mismatch means that some assigned codepoints in the newer + * version may be unassigned in the older version. That's OK, though + * the test will not cover those codepoints marked unassigned in the + * older version (that is, it will no longer be an exhaustive test). + */ + if (pg_category == PG_U_UNASSIGNED && + icu_category != PG_U_UNASSIGNED && + pg_unicode_version < icu_unicode_version) + { + pg_skipped_codepoints++; + continue; + } + + if (icu_category == PG_U_UNASSIGNED && + pg_category != PG_U_UNASSIGNED && + icu_unicode_version < pg_unicode_version) + { + icu_skipped_codepoints++; + continue; + } + if (pg_category != icu_category) { - /* - * A version mismatch means that some assigned codepoints in the - * newer version may be unassigned in the older version. That's - * OK, though the test will not cover those codepoints marked - * unassigned in the older version (that is, it will no longer be - * an exhaustive test). - */ - if (pg_category == PG_U_UNASSIGNED && - pg_unicode_version < icu_unicode_version) - pg_skipped_codepoints++; - else if (icu_category == PG_U_UNASSIGNED && - icu_unicode_version < pg_unicode_version) - icu_skipped_codepoints++; - else - { - printf("category_test: FAILURE for codepoint 0x%06x\n", code); - printf("category_test: Postgres category: %02d %s %s\n", pg_category, - unicode_category_abbrev(pg_category), - unicode_category_string(pg_category)); - printf("category_test: ICU category: %02d %s %s\n", icu_category, - unicode_category_abbrev(icu_category), - unicode_category_string(icu_category)); - printf("\n"); - exit(1); - } + printf("category_test: FAILURE for codepoint 0x%06x\n", code); + printf("category_test: Postgres category: %02d %s %s\n", pg_category, + unicode_category_abbrev(pg_category), + unicode_category_string(pg_category)); + printf("category_test: ICU category: %02d %s %s\n", icu_category, + unicode_category_abbrev(icu_category), + unicode_category_string(icu_category)); + printf("\n"); + exit(1); + } + + if (prop_alphabetic != icu_prop_alphabetic || + prop_lowercase != icu_prop_lowercase || + prop_uppercase != icu_prop_uppercase || + prop_cased != icu_prop_cased || + prop_case_ignorable != icu_prop_case_ignorable || + prop_white_space != icu_prop_white_space || + prop_hex_digit != icu_prop_hex_digit || + prop_join_control != icu_prop_join_control) + { + printf("category_test: FAILURE for codepoint 0x%06x\n", code); + printf("category_test: Postgres property alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n", + prop_alphabetic, prop_lowercase, prop_uppercase, + prop_cased, prop_case_ignorable, + prop_white_space, prop_hex_digit, prop_join_control); + printf("category_test: ICU property alphabetic/lowercase/uppercase/cased/case_ignorable/white_space/hex_digit/join_control: %d/%d/%d/%d/%d/%d/%d/%d\n", + icu_prop_alphabetic, icu_prop_lowercase, icu_prop_uppercase, + icu_prop_cased, icu_prop_case_ignorable, + icu_prop_white_space, icu_prop_hex_digit, icu_prop_join_control); + printf("\n"); + exit(1); } + + if (isalpha != icu_isalpha || + islower != icu_islower || + isupper != icu_isupper || + ispunct != icu_ispunct || + isdigit != icu_isdigit || + isxdigit != icu_isxdigit || + isalnum != icu_isalnum || + isspace != icu_isspace || + isblank != icu_isblank || + iscntrl != icu_iscntrl || + isgraph != icu_isgraph || + isprint != icu_isprint) + { + printf("category_test: FAILURE for codepoint 0x%06x\n", code); + printf("category_test: Postgres class alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n", + isalpha, islower, isupper, ispunct, isdigit, isxdigit, isalnum, isspace, isblank, iscntrl, isgraph, isprint); + printf("category_test: ICU class alpha/lower/upper/punct/digit/xdigit/alnum/space/blank/cntrl/graph/print: %d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d/%d\n", + icu_isalpha, icu_islower, icu_isupper, icu_ispunct, icu_isdigit, icu_isxdigit, icu_isalnum, icu_isspace, icu_isblank, icu_iscntrl, icu_isgraph, icu_isprint); + printf("\n"); + exit(1); + } + + if (pg_category != PG_U_UNASSIGNED) + successful++; } if (pg_skipped_codepoints > 0) @@ -99,10 +223,22 @@ main(int argc, char **argv) printf("category_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n", icu_skipped_codepoints); - printf("category_test: success\n"); - exit(0); + printf("category_test: ICU test: %d codepoints successful\n", successful); +} +#endif + +int +main(int argc, char **argv) +{ + pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION); + printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION); + +#ifdef USE_ICU + icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION); + printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION); + + icu_test(); #else - printf("category_test: ICU support required for test; skipping\n"); - exit(0); + printf("category_test: ICU not available; skipping\n"); #endif } diff --git a/src/common/unicode/generate-unicode_category_table.pl b/src/common/unicode/generate-unicode_category_table.pl index a50c87b7e96..12914c02433 100644 --- a/src/common/unicode/generate-unicode_category_table.pl +++ b/src/common/unicode/generate-unicode_category_table.pl @@ -25,6 +25,10 @@ my $output_table_file = "$output_path/unicode_category_table.h"; my $FH; +# create a table of all codepoints < 0x80 and their associated +# categories and properties for fast lookups +my %opt_ascii = (); + # Read entries from UnicodeData.txt into a list of codepoint ranges # and their general category. my @category_ranges = (); @@ -48,21 +52,42 @@ while (my $line = <$FH>) my $category = $elts[2]; die "codepoint out of range" if $code > 0x10FFFF; - die "unassigned codepoint in UnicodeData.txt" if $category eq $CATEGORY_UNASSIGNED; + die "unassigned codepoint in UnicodeData.txt" + if $category eq $CATEGORY_UNASSIGNED; + + if ($code < 0x80) + { + my @properties = (); + # No ASCII characters have category Titlecase_Letter, + # but include here for completeness. + push @properties, "PG_U_PROP_CASED" if ($category eq 'Lt'); + $opt_ascii{$code} = { + Category => $category, + Properties => \@properties + }; + } - if (!defined($range_start)) { + if (!defined($range_start)) + { my $code_str = sprintf "0x%06x", $code; - die if defined($range_end) || defined($range_category) || defined($gap_category); + die + if defined($range_end) + || defined($range_category) + || defined($gap_category); die "unexpected first entry <..., Last>" if ($name =~ /Last>/); - die "expected 0x000000 for first entry, got $code_str" if $code != 0x000000; + die "expected 0x000000 for first entry, got $code_str" + if $code != 0x000000; # initialize $range_start = $code; $range_end = $code; $range_category = $category; - if ($name =~ /<.*, First>$/) { + if ($name =~ /<.*, First>$/) + { $gap_category = $category; - } else { + } + else + { $gap_category = $CATEGORY_UNASSIGNED; } next; @@ -71,10 +96,17 @@ while (my $line = <$FH>) # Gap in codepoints detected. If it's a different category than # the current range, emit the current range and initialize a new # range representing the gap. - if ($range_end + 1 != $code && $range_category ne $gap_category) { - if ($range_category ne $CATEGORY_UNASSIGNED) { - push(@category_ranges, {start => $range_start, end => $range_end, - category => $range_category}); + if ($range_end + 1 != $code && $range_category ne $gap_category) + { + if ($range_category ne $CATEGORY_UNASSIGNED) + { + push( + @category_ranges, + { + start => $range_start, + end => $range_end, + category => $range_category + }); } $range_start = $range_end + 1; $range_end = $code - 1; @@ -82,27 +114,39 @@ while (my $line = <$FH>) } # different category; new range - if ($range_category ne $category) { - if ($range_category ne $CATEGORY_UNASSIGNED) { - push(@category_ranges, {start => $range_start, end => $range_end, - category => $range_category}); + if ($range_category ne $category) + { + if ($range_category ne $CATEGORY_UNASSIGNED) + { + push( + @category_ranges, + { + start => $range_start, + end => $range_end, + category => $range_category + }); } $range_start = $code; $range_end = $code; $range_category = $category; } - if ($name =~ /<.*, First>$/) { - die "<..., First> entry unexpectedly follows another <..., First> entry" + if ($name =~ /<.*, First>$/) + { + die + "<..., First> entry unexpectedly follows another <..., First> entry" if $gap_category ne $CATEGORY_UNASSIGNED; $gap_category = $category; } - elsif ($name =~ /<.*, Last>$/) { - die "<..., First> and <..., Last> entries have mismatching general category" + elsif ($name =~ /<.*, Last>$/) + { + die + "<..., First> and <..., Last> entries have mismatching general category" if $gap_category ne $category; $gap_category = $CATEGORY_UNASSIGNED; } - else { + else + { die "unexpected entry found between <..., First> and <..., Last>" if $gap_category ne $CATEGORY_UNASSIGNED; } @@ -115,13 +159,17 @@ die "<..., First> entry with no corresponding <..., Last> entry" if $gap_category ne $CATEGORY_UNASSIGNED; # emit final range -if ($range_category ne $CATEGORY_UNASSIGNED) { - push(@category_ranges, {start => $range_start, end => $range_end, - category => $range_category}); +if ($range_category ne $CATEGORY_UNASSIGNED) +{ + push( + @category_ranges, + { + start => $range_start, + end => $range_end, + category => $range_category + }); } -my $num_ranges = scalar @category_ranges; - # See: https://www.unicode.org/reports/tr44/#General_Category_Values my $categories = { Cn => 'PG_U_UNASSIGNED', @@ -156,11 +204,146 @@ my $categories = { Pf => 'PG_U_FINAL_PUNCTUATION' }; -# Start writing out the output files +# Find White_Space and Hex_Digit characters +my @white_space = (); +my @hex_digits = (); +my @join_control = (); +open($FH, '<', "$output_path/PropList.txt") + or die "Could not open $output_path/PropList.txt: $!."; +while (my $line = <$FH>) +{ + my $pattern = qr/([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*/s; + next unless $line =~ $pattern; + + my $code = $line =~ s/$pattern/$1/rg; + my $property = $line =~ s/$pattern/$2/rg; + my $start; + my $end; + + if ($code =~ /\.\./) + { + # code range + my @sp = split /\.\./, $code; + $start = hex($sp[0]); + $end = hex($sp[1]); + } + else + { + # single code point + $start = hex($code); + $end = hex($code); + } + + if ($property eq "White_Space") + { + push @white_space, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_WHITE_SPACE"; + } + } + elsif ($property eq "Hex_Digit") + { + push @hex_digits, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_HEX_DIGIT"; + } + } + elsif ($property eq "Join_Control") + { + push @join_control, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_JOIN_CONTROL"; + } + } +} + +# Find Alphabetic, Lowercase, and Uppercase characters +my @alphabetic = (); +my @lowercase = (); +my @uppercase = (); +my @case_ignorable = (); +open($FH, '<', "$output_path/DerivedCoreProperties.txt") + or die "Could not open $output_path/DerivedCoreProperties.txt: $!."; +while (my $line = <$FH>) +{ + my $pattern = qr/^([0-9A-F\.]+)\s*;\s*(\w+)\s*#.*$/s; + next unless $line =~ $pattern; + + my $code = $line =~ s/$pattern/$1/rg; + my $property = $line =~ s/$pattern/$2/rg; + my $start; + my $end; + + if ($code =~ /\.\./) + { + # code range + my @sp = split /\.\./, $code; + die "line: {$line} code: {$code} sp[0] {$sp[0]} sp[1] {$sp[1]}" + unless $sp[0] =~ /^[0-9A-F]+$/ && $sp[1] =~ /^[0-9A-F]+$/; + $start = hex($sp[0]); + $end = hex($sp[1]); + } + else + { + die "line: {$line} code: {$code}" unless $code =~ /^[0-9A-F]+$/; + # single code point + $start = hex($code); + $end = hex($code); + } + + if ($property eq "Alphabetic") + { + push @alphabetic, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_ALPHABETIC"; + } + } + elsif ($property eq "Lowercase") + { + push @lowercase, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_LOWERCASE"; + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED"; + } + } + elsif ($property eq "Uppercase") + { + push @uppercase, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_UPPERCASE"; + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASED"; + } + } + elsif ($property eq "Case_Ignorable") + { + push @case_ignorable, { start => $start, end => $end }; + for (my $i = $start; $i <= $end && $i < 0x80; $i++) + { + push @{ $opt_ascii{$i}{Properties} }, "PG_U_PROP_CASE_IGNORABLE"; + } + } +} + +my $num_category_ranges = scalar @category_ranges; +my $num_alphabetic_ranges = scalar @alphabetic; +my $num_lowercase_ranges = scalar @lowercase; +my $num_uppercase_ranges = scalar @uppercase; +my $num_case_ignorable_ranges = scalar @case_ignorable; +my $num_white_space_ranges = scalar @white_space; +my $num_hex_digit_ranges = scalar @hex_digits; +my $num_join_control_ranges = scalar @join_control; + +# Start writing out the output file open my $OT, '>', $output_table_file or die "Could not open output file $output_table_file: $!\n"; -print $OT <<HEADER; +print $OT <<"EOS"; /*------------------------------------------------------------------------- * * unicode_category_table.h @@ -188,18 +371,153 @@ typedef struct uint8 category; /* General Category */ } pg_category_range; -/* table of Unicode codepoint ranges and their categories */ -static const pg_category_range unicode_categories[$num_ranges] = +typedef struct +{ + uint32 first; /* Unicode codepoint */ + uint32 last; /* Unicode codepoint */ +} pg_unicode_range; + +typedef struct +{ + uint8 category; + uint8 properties; +} pg_unicode_properties; + +/* + * The properties currently used, in no particular order. Fits in a uint8, but + * if more properties are added, a wider integer will be needed. + */ +#define PG_U_PROP_ALPHABETIC (1 << 0) +#define PG_U_PROP_LOWERCASE (1 << 1) +#define PG_U_PROP_UPPERCASE (1 << 2) +#define PG_U_PROP_CASED (1 << 3) +#define PG_U_PROP_CASE_IGNORABLE (1 << 4) +#define PG_U_PROP_WHITE_SPACE (1 << 5) +#define PG_U_PROP_JOIN_CONTROL (1 << 6) +#define PG_U_PROP_HEX_DIGIT (1 << 7) + +EOS + +print $OT <<"EOS"; +/* table for fast lookup of ASCII codepoints */ +static const pg_unicode_properties unicode_opt_ascii[128] = +{ +EOS + +for (my $i = 0; $i < 128; $i++) { -HEADER + my $category_str = $categories->{ $opt_ascii{$i}->{Category} }; + my $props_str = (join ' | ', @{ $opt_ascii{$i}{Properties} }) || "0"; + printf $OT + "\t{\n\t\t/* 0x%06x */\n\t\t.category = %s,\n\t\t.properties = %s\n\t},\n", + $i, $category_str, $props_str; +} -my $firsttime = 1; -foreach my $range (@category_ranges) { - printf $OT ",\n" unless $firsttime; - $firsttime = 0; +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges and their categories */ +static const pg_category_range unicode_categories[$num_category_ranges] = +{ +EOS - my $category = $categories->{$range->{category}}; +foreach my $range (@category_ranges) +{ + my $category = $categories->{ $range->{category} }; die "category missing: $range->{category}" unless $category; - printf $OT "\t{0x%06x, 0x%06x, %s}", $range->{start}, $range->{end}, $category; + printf $OT "\t{0x%06x, 0x%06x, %s},\n", $range->{start}, $range->{end}, + $category; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Alphabetic characters */ +static const pg_unicode_range unicode_alphabetic[$num_alphabetic_ranges] = +{ +EOS + +foreach my $range (@alphabetic) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Lowercase characters */ +static const pg_unicode_range unicode_lowercase[$num_lowercase_ranges] = +{ +EOS + +foreach my $range (@lowercase) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Uppercase characters */ +static const pg_unicode_range unicode_uppercase[$num_uppercase_ranges] = +{ +EOS + +foreach my $range (@uppercase) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; } -print $OT "\n};\n"; + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Case_Ignorable characters */ +static const pg_unicode_range unicode_case_ignorable[$num_case_ignorable_ranges] = +{ +EOS + +foreach my $range (@case_ignorable) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of White_Space characters */ +static const pg_unicode_range unicode_white_space[$num_white_space_ranges] = +{ +EOS + +foreach my $range (@white_space) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Hex_Digit characters */ +static const pg_unicode_range unicode_hex_digit[$num_hex_digit_ranges] = +{ +EOS + +foreach my $range (@hex_digits) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n\n"; + +print $OT <<"EOS"; +/* table of Unicode codepoint ranges of Join_Control characters */ +static const pg_unicode_range unicode_join_control[$num_join_control_ranges] = +{ +EOS + +foreach my $range (@join_control) +{ + printf $OT "\t{0x%06x, 0x%06x},\n", $range->{start}, $range->{end}; +} + +print $OT "};\n"; diff --git a/src/common/unicode/meson.build b/src/common/unicode/meson.build index df4f3a4ed1d..d7190bb8ca9 100644 --- a/src/common/unicode/meson.build +++ b/src/common/unicode/meson.build @@ -11,7 +11,7 @@ endif # These files are part of the Unicode Character Database. Download them on # demand. -foreach f : ['CompositionExclusions.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'UnicodeData.txt'] +foreach f : ['CompositionExclusions.txt', 'DerivedCoreProperties.txt', 'DerivedNormalizationProps.txt', 'EastAsianWidth.txt', 'NormalizationTest.txt', 'PropList.txt', 'UnicodeData.txt'] url = unicode_baseurl.format(UNICODE_VERSION, f) target = custom_target(f, output: f, @@ -26,7 +26,7 @@ update_unicode_targets = [] update_unicode_targets += \ custom_target('unicode_category_table.h', - input: [unicode_data['UnicodeData.txt']], + input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']], output: ['unicode_category_table.h'], command: [ perl, files('generate-unicode_category_table.pl'), diff --git a/src/common/unicode_category.c b/src/common/unicode_category.c index 668051b461c..bece7334f5b 100644 --- a/src/common/unicode_category.c +++ b/src/common/unicode_category.c @@ -1,6 +1,8 @@ /*------------------------------------------------------------------------- * unicode_category.c - * Determine general category of Unicode characters. + * Determine general category and character properties of Unicode + * characters. Encoding must be UTF8, where we assume that the pg_wchar + * representation is a code point. * * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group * @@ -19,23 +21,84 @@ #include "common/unicode_category_table.h" /* + * Create bitmasks from pg_unicode_category values for efficient comparison of + * multiple categories. For instance, PG_U_MN_MASK is a bitmask representing + * the general cateogry Mn; and PG_U_M_MASK represents general categories Mn, + * Me, and Mc. + * + * The number of Unicode General Categories should never grow, so a 32-bit + * mask is fine. + */ +#define PG_U_CATEGORY_MASK(X) ((uint32)(1 << (X))) + +#define PG_U_LU_MASK PG_U_CATEGORY_MASK(PG_U_UPPERCASE_LETTER) +#define PG_U_LL_MASK PG_U_CATEGORY_MASK(PG_U_LOWERCASE_LETTER) +#define PG_U_LT_MASK PG_U_CATEGORY_MASK(PG_U_TITLECASE_LETTER) +#define PG_U_LC_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK) +#define PG_U_LM_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_LETTER) +#define PG_U_LO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_LETTER) +#define PG_U_L_MASK (PG_U_LU_MASK|PG_U_LL_MASK|PG_U_LT_MASK|PG_U_LM_MASK|\ + PG_U_LO_MASK) +#define PG_U_MN_MASK PG_U_CATEGORY_MASK(PG_U_NONSPACING_MARK) +#define PG_U_ME_MASK PG_U_CATEGORY_MASK(PG_U_ENCLOSING_MARK) +#define PG_U_MC_MASK PG_U_CATEGORY_MASK(PG_U_SPACING_MARK) +#define PG_U_M_MASK (PG_U_MN_MASK|PG_U_MC_MASK|PG_U_ME_MASK) +#define PG_U_ND_MASK PG_U_CATEGORY_MASK(PG_U_DECIMAL_NUMBER) +#define PG_U_NL_MASK PG_U_CATEGORY_MASK(PG_U_LETTER_NUMBER) +#define PG_U_NO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_NUMBER) +#define PG_U_N_MASK (PG_U_ND_MASK|PG_U_NL_MASK|PG_U_NO_MASK) +#define PG_U_PC_MASK PG_U_CATEGORY_MASK(PG_U_CONNECTOR_PUNCTUATION) +#define PG_U_PD_MASK PG_U_CATEGORY_MASK(PG_U_DASH_PUNCTUATION) +#define PG_U_PS_MASK PG_U_CATEGORY_MASK(PG_U_OPEN_PUNCTUATION) +#define PG_U_PE_MASK PG_U_CATEGORY_MASK(PG_U_CLOSE_PUNCTUATION) +#define PG_U_PI_MASK PG_U_CATEGORY_MASK(PG_U_INITIAL_PUNCTUATION) +#define PG_U_PF_MASK PG_U_CATEGORY_MASK(PG_U_FINAL_PUNCTUATION) +#define PG_U_PO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_PUNCTUATION) +#define PG_U_P_MASK (PG_U_PC_MASK|PG_U_PD_MASK|PG_U_PS_MASK|PG_U_PE_MASK|\ + PG_U_PI_MASK|PG_U_PF_MASK|PG_U_PO_MASK) +#define PG_U_SM_MASK PG_U_CATEGORY_MASK(PG_U_MATH_SYMBOL) +#define PG_U_SC_MASK PG_U_CATEGORY_MASK(PG_U_CURRENCY_SYMBOL) +#define PG_U_SK_MASK PG_U_CATEGORY_MASK(PG_U_MODIFIER_SYMBOL) +#define PG_U_SO_MASK PG_U_CATEGORY_MASK(PG_U_OTHER_SYMBOL) +#define PG_U_S_MASK (PG_U_SM_MASK|PG_U_SC_MASK|PG_U_SK_MASK|PG_U_SO_MASK) +#define PG_U_ZS_MASK PG_U_CATEGORY_MASK(PG_U_SPACE_SEPARATOR) +#define PG_U_ZL_MASK PG_U_CATEGORY_MASK(PG_U_LINE_SEPARATOR) +#define PG_U_ZP_MASK PG_U_CATEGORY_MASK(PG_U_PARAGRAPH_SEPARATOR) +#define PG_U_Z_MASK (PG_U_ZS_MASK|PG_U_ZL_MASK|PG_U_ZP_MASK) +#define PG_U_CC_MASK PG_U_CATEGORY_MASK(PG_U_CONTROL) +#define PG_U_CF_MASK PG_U_CATEGORY_MASK(PG_U_FORMAT) +#define PG_U_CS_MASK PG_U_CATEGORY_MASK(PG_U_SURROGATE) +#define PG_U_CO_MASK PG_U_CATEGORY_MASK(PG_U_PRIVATE_USE) +#define PG_U_CN_MASK PG_U_CATEGORY_MASK(PG_U_UNASSIGNED) +#define PG_U_C_MASK (PG_U_CC_MASK|PG_U_CF_MASK|PG_U_CS_MASK|PG_U_CO_MASK|\ + PG_U_CN_MASK) + +#define PG_U_CHARACTER_TAB 0x09 + +static bool range_search(const pg_unicode_range * tbl, size_t size, + pg_wchar code); + +/* * Unicode general category for the given codepoint. */ pg_unicode_category -unicode_category(pg_wchar ucs) +unicode_category(pg_wchar code) { int min = 0; int mid; int max = lengthof(unicode_categories) - 1; - Assert(ucs <= 0x10ffff); + Assert(code <= 0x10ffff); + + if (code < 0x80) + return unicode_opt_ascii[code].category; while (max >= min) { mid = (min + max) / 2; - if (ucs > unicode_categories[mid].last) + if (code > unicode_categories[mid].last) min = mid + 1; - else if (ucs < unicode_categories[mid].first) + else if (code < unicode_categories[mid].first) max = mid - 1; else return unicode_categories[mid].category; @@ -44,6 +107,224 @@ unicode_category(pg_wchar ucs) return PG_U_UNASSIGNED; } +bool +pg_u_prop_alphabetic(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC; + + return range_search(unicode_alphabetic, + lengthof(unicode_alphabetic), + code); +} + +bool +pg_u_prop_lowercase(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE; + + return range_search(unicode_lowercase, + lengthof(unicode_lowercase), + code); +} + +bool +pg_u_prop_uppercase(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE; + + return range_search(unicode_uppercase, + lengthof(unicode_uppercase), + code); +} + +bool +pg_u_prop_cased(pg_wchar code) +{ + uint32 category_mask; + + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_CASED; + + category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); + + return category_mask & PG_U_LT_MASK || + pg_u_prop_lowercase(code) || + pg_u_prop_uppercase(code); +} + +bool +pg_u_prop_case_ignorable(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE; + + return range_search(unicode_case_ignorable, + lengthof(unicode_case_ignorable), + code); +} + +bool +pg_u_prop_white_space(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE; + + return range_search(unicode_white_space, + lengthof(unicode_white_space), + code); +} + +bool +pg_u_prop_hex_digit(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT; + + return range_search(unicode_hex_digit, + lengthof(unicode_hex_digit), + code); +} + +bool +pg_u_prop_join_control(pg_wchar code) +{ + if (code < 0x80) + return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL; + + return range_search(unicode_join_control, + lengthof(unicode_join_control), + code); +} + +/* + * The following functions implement the Compatibility Properties described + * at: http://www.unicode.org/reports/tr18/#Compatibility_Properties + * + * If 'posix' is true, implements the "POSIX Compatible" variant, otherwise + * the "Standard" variant. + */ + +bool +pg_u_isdigit(pg_wchar code, bool posix) +{ + if (posix) + return ('0' <= code && code <= '9'); + else + return unicode_category(code) == PG_U_DECIMAL_NUMBER; +} + +bool +pg_u_isalpha(pg_wchar code) +{ + return pg_u_prop_alphabetic(code); +} + +bool +pg_u_isalnum(pg_wchar code, bool posix) +{ + return pg_u_isalpha(code) || pg_u_isdigit(code, posix); +} + +bool +pg_u_isword(pg_wchar code) +{ + uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); + + return + category_mask & (PG_U_M_MASK | PG_U_ND_MASK | PG_U_PC_MASK) || + pg_u_isalpha(code) || + pg_u_prop_join_control(code); +} + +bool +pg_u_isupper(pg_wchar code) +{ + return pg_u_prop_uppercase(code); +} + +bool +pg_u_islower(pg_wchar code) +{ + return pg_u_prop_lowercase(code); +} + +bool +pg_u_isblank(pg_wchar code) +{ + return code == PG_U_CHARACTER_TAB || + unicode_category(code) == PG_U_SPACE_SEPARATOR; +} + +bool +pg_u_iscntrl(pg_wchar code) +{ + return unicode_category(code) == PG_U_CONTROL; +} + +bool +pg_u_isgraph(pg_wchar code) +{ + uint32 category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); + + if (category_mask & (PG_U_CC_MASK | PG_U_CS_MASK | PG_U_CN_MASK) || + pg_u_isspace(code)) + return false; + return true; +} + +bool +pg_u_isprint(pg_wchar code) +{ + pg_unicode_category category = unicode_category(code); + + if (category == PG_U_CONTROL) + return false; + + return pg_u_isgraph(code) || pg_u_isblank(code); +} + +bool +pg_u_ispunct(pg_wchar code, bool posix) +{ + uint32 category_mask; + + if (posix) + { + if (pg_u_isalpha(code)) + return false; + + category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); + return category_mask & (PG_U_P_MASK | PG_U_S_MASK); + } + else + { + category_mask = PG_U_CATEGORY_MASK(unicode_category(code)); + + return category_mask & PG_U_P_MASK; + } +} + +bool +pg_u_isspace(pg_wchar code) +{ + return pg_u_prop_white_space(code); +} + +bool +pg_u_isxdigit(pg_wchar code, bool posix) +{ + if (posix) + return (('0' <= code && code <= '9') || + ('A' <= code && code <= 'F') || + ('a' <= code && code <= 'f')); + else + return unicode_category(code) == PG_U_DECIMAL_NUMBER || + pg_u_prop_hex_digit(code); +} + /* * Description of Unicode general category. */ @@ -191,3 +472,30 @@ unicode_category_abbrev(pg_unicode_category category) Assert(false); return "??"; /* keep compiler quiet */ } + +/* + * Binary search to test if given codepoint exists in one of the ranges in the + * given table. + */ +static bool +range_search(const pg_unicode_range * tbl, size_t size, pg_wchar code) +{ + int min = 0; + int mid; + int max = size - 1; + + Assert(code <= 0x10ffff); + + while (max >= min) + { + mid = (min + max) / 2; + if (code > tbl[mid].last) + min = mid + 1; + else if (code < tbl[mid].first) + max = mid - 1; + else + return true; + } + + return false; +} diff --git a/src/include/common/unicode_category.h b/src/include/common/unicode_category.h index 5bad2806150..f185b589009 100644 --- a/src/include/common/unicode_category.h +++ b/src/include/common/unicode_category.h @@ -62,7 +62,30 @@ typedef enum pg_unicode_category } pg_unicode_category; extern pg_unicode_category unicode_category(pg_wchar ucs); -const char *unicode_category_string(pg_unicode_category category); -const char *unicode_category_abbrev(pg_unicode_category category); +extern const char *unicode_category_string(pg_unicode_category category); +extern const char *unicode_category_abbrev(pg_unicode_category category); + +extern bool pg_u_prop_alphabetic(pg_wchar c); +extern bool pg_u_prop_lowercase(pg_wchar c); +extern bool pg_u_prop_uppercase(pg_wchar c); +extern bool pg_u_prop_cased(pg_wchar c); +extern bool pg_u_prop_case_ignorable(pg_wchar c); +extern bool pg_u_prop_white_space(pg_wchar c); +extern bool pg_u_prop_hex_digit(pg_wchar c); +extern bool pg_u_prop_join_control(pg_wchar c); + +extern bool pg_u_isdigit(pg_wchar c, bool posix); +extern bool pg_u_isalpha(pg_wchar c); +extern bool pg_u_isalnum(pg_wchar c, bool posix); +extern bool pg_u_isword(pg_wchar c); +extern bool pg_u_isupper(pg_wchar c); +extern bool pg_u_islower(pg_wchar c); +extern bool pg_u_isblank(pg_wchar c); +extern bool pg_u_iscntrl(pg_wchar c); +extern bool pg_u_isgraph(pg_wchar c); +extern bool pg_u_isprint(pg_wchar c); +extern bool pg_u_ispunct(pg_wchar c, bool posix); +extern bool pg_u_isspace(pg_wchar c); +extern bool pg_u_isxdigit(pg_wchar c, bool posix); #endif /* UNICODE_CATEGORY_H */ diff --git a/src/include/common/unicode_category_table.h b/src/include/common/unicode_category_table.h index d7ef996189a..ff35ff45e83 100644 --- a/src/include/common/unicode_category_table.h +++ b/src/include/common/unicode_category_table.h @@ -25,6 +25,676 @@ typedef struct uint8 category; /* General Category */ } pg_category_range; +typedef struct +{ + uint32 first; /* Unicode codepoint */ + uint32 last; /* Unicode codepoint */ +} pg_unicode_range; + +typedef struct +{ + uint8 category; + uint8 properties; +} pg_unicode_properties; + +/* + * The properties currently used, in no particular order. Fits in a uint8, but + * if more properties are added, a wider integer will be needed. + */ +#define PG_U_PROP_ALPHABETIC (1 << 0) +#define PG_U_PROP_LOWERCASE (1 << 1) +#define PG_U_PROP_UPPERCASE (1 << 2) +#define PG_U_PROP_CASED (1 << 3) +#define PG_U_PROP_CASE_IGNORABLE (1 << 4) +#define PG_U_PROP_WHITE_SPACE (1 << 5) +#define PG_U_PROP_JOIN_CONTROL (1 << 6) +#define PG_U_PROP_HEX_DIGIT (1 << 7) + +/* table for fast lookup of ASCII codepoints */ +static const pg_unicode_properties unicode_opt_ascii[128] = +{ + { + /* 0x000000 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000001 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000002 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000003 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000004 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000005 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000006 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000007 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000008 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000009 */ + .category = PG_U_CONTROL, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x00000a */ + .category = PG_U_CONTROL, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x00000b */ + .category = PG_U_CONTROL, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x00000c */ + .category = PG_U_CONTROL, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x00000d */ + .category = PG_U_CONTROL, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x00000e */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00000f */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000010 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000011 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000012 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000013 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000014 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000015 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000016 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000017 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000018 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000019 */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001a */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001b */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001c */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001d */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001e */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x00001f */ + .category = PG_U_CONTROL, + .properties = 0 + }, + { + /* 0x000020 */ + .category = PG_U_SPACE_SEPARATOR, + .properties = PG_U_PROP_WHITE_SPACE + }, + { + /* 0x000021 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000022 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000023 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000024 */ + .category = PG_U_CURRENCY_SYMBOL, + .properties = 0 + }, + { + /* 0x000025 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000026 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000027 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = PG_U_PROP_CASE_IGNORABLE + }, + { + /* 0x000028 */ + .category = PG_U_OPEN_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000029 */ + .category = PG_U_CLOSE_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00002a */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00002b */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00002c */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00002d */ + .category = PG_U_DASH_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00002e */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = PG_U_PROP_CASE_IGNORABLE + }, + { + /* 0x00002f */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000030 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000031 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000032 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000033 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000034 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000035 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000036 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000037 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000038 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x000039 */ + .category = PG_U_DECIMAL_NUMBER, + .properties = PG_U_PROP_HEX_DIGIT + }, + { + /* 0x00003a */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = PG_U_PROP_CASE_IGNORABLE + }, + { + /* 0x00003b */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00003c */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00003d */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00003e */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00003f */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000040 */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000041 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000042 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000043 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000044 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000045 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000046 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000047 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000048 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000049 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004a */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004b */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004c */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004d */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004e */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00004f */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000050 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000051 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000052 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000053 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000054 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000055 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000056 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000057 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000058 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x000059 */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00005a */ + .category = PG_U_UPPERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_UPPERCASE | PG_U_PROP_CASED + }, + { + /* 0x00005b */ + .category = PG_U_OPEN_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00005c */ + .category = PG_U_OTHER_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00005d */ + .category = PG_U_CLOSE_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00005e */ + .category = PG_U_MODIFIER_SYMBOL, + .properties = PG_U_PROP_CASE_IGNORABLE + }, + { + /* 0x00005f */ + .category = PG_U_CONNECTOR_PUNCTUATION, + .properties = 0 + }, + { + /* 0x000060 */ + .category = PG_U_MODIFIER_SYMBOL, + .properties = PG_U_PROP_CASE_IGNORABLE + }, + { + /* 0x000061 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000062 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000063 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000064 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000065 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000066 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_HEX_DIGIT | PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000067 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000068 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000069 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006a */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006b */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006c */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006d */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006e */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00006f */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000070 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000071 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000072 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000073 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000074 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000075 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000076 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000077 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000078 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x000079 */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00007a */ + .category = PG_U_LOWERCASE_LETTER, + .properties = PG_U_PROP_ALPHABETIC | PG_U_PROP_LOWERCASE | PG_U_PROP_CASED + }, + { + /* 0x00007b */ + .category = PG_U_OPEN_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00007c */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00007d */ + .category = PG_U_CLOSE_PUNCTUATION, + .properties = 0 + }, + { + /* 0x00007e */ + .category = PG_U_MATH_SYMBOL, + .properties = 0 + }, + { + /* 0x00007f */ + .category = PG_U_CONTROL, + .properties = 0 + }, +}; + /* table of Unicode codepoint ranges and their categories */ static const pg_category_range unicode_categories[3302] = { @@ -3329,5 +3999,3027 @@ static const pg_category_range unicode_categories[3302] = {0x0e0020, 0x0e007f, PG_U_FORMAT}, {0x0e0100, 0x0e01ef, PG_U_NONSPACING_MARK}, {0x0f0000, 0x0ffffd, PG_U_PRIVATE_USE}, - {0x100000, 0x10fffd, PG_U_PRIVATE_USE} + {0x100000, 0x10fffd, PG_U_PRIVATE_USE}, +}; + +/* table of Unicode codepoint ranges of Alphabetic characters */ +static const pg_unicode_range unicode_alphabetic[1141] = +{ + {0x000041, 0x00005a}, + {0x000061, 0x00007a}, + {0x0000aa, 0x0000aa}, + {0x0000b5, 0x0000b5}, + {0x0000ba, 0x0000ba}, + {0x0000c0, 0x0000d6}, + {0x0000d8, 0x0000f6}, + {0x0000f8, 0x0001ba}, + {0x0001bb, 0x0001bb}, + {0x0001bc, 0x0001bf}, + {0x0001c0, 0x0001c3}, + {0x0001c4, 0x000293}, + {0x000294, 0x000294}, + {0x000295, 0x0002af}, + {0x0002b0, 0x0002c1}, + {0x0002c6, 0x0002d1}, + {0x0002e0, 0x0002e4}, + {0x0002ec, 0x0002ec}, + {0x0002ee, 0x0002ee}, + {0x000345, 0x000345}, + {0x000370, 0x000373}, + {0x000374, 0x000374}, + {0x000376, 0x000377}, + {0x00037a, 0x00037a}, + {0x00037b, 0x00037d}, + {0x00037f, 0x00037f}, + {0x000386, 0x000386}, + {0x000388, 0x00038a}, + {0x00038c, 0x00038c}, + {0x00038e, 0x0003a1}, + {0x0003a3, 0x0003f5}, + {0x0003f7, 0x000481}, + {0x00048a, 0x00052f}, + {0x000531, 0x000556}, + {0x000559, 0x000559}, + {0x000560, 0x000588}, + {0x0005b0, 0x0005bd}, + {0x0005bf, 0x0005bf}, + {0x0005c1, 0x0005c2}, + {0x0005c4, 0x0005c5}, + {0x0005c7, 0x0005c7}, + {0x0005d0, 0x0005ea}, + {0x0005ef, 0x0005f2}, + {0x000610, 0x00061a}, + {0x000620, 0x00063f}, + {0x000640, 0x000640}, + {0x000641, 0x00064a}, + {0x00064b, 0x000657}, + {0x000659, 0x00065f}, + {0x00066e, 0x00066f}, + {0x000670, 0x000670}, + {0x000671, 0x0006d3}, + {0x0006d5, 0x0006d5}, + {0x0006d6, 0x0006dc}, + {0x0006e1, 0x0006e4}, + {0x0006e5, 0x0006e6}, + {0x0006e7, 0x0006e8}, + {0x0006ed, 0x0006ed}, + {0x0006ee, 0x0006ef}, + {0x0006fa, 0x0006fc}, + {0x0006ff, 0x0006ff}, + {0x000710, 0x000710}, + {0x000711, 0x000711}, + {0x000712, 0x00072f}, + {0x000730, 0x00073f}, + {0x00074d, 0x0007a5}, + {0x0007a6, 0x0007b0}, + {0x0007b1, 0x0007b1}, + {0x0007ca, 0x0007ea}, + {0x0007f4, 0x0007f5}, + {0x0007fa, 0x0007fa}, + {0x000800, 0x000815}, + {0x000816, 0x000817}, + {0x00081a, 0x00081a}, + {0x00081b, 0x000823}, + {0x000824, 0x000824}, + {0x000825, 0x000827}, + {0x000828, 0x000828}, + {0x000829, 0x00082c}, + {0x000840, 0x000858}, + {0x000860, 0x00086a}, + {0x000870, 0x000887}, + {0x000889, 0x00088e}, + {0x0008a0, 0x0008c8}, + {0x0008c9, 0x0008c9}, + {0x0008d4, 0x0008df}, + {0x0008e3, 0x0008e9}, + {0x0008f0, 0x000902}, + {0x000903, 0x000903}, + {0x000904, 0x000939}, + {0x00093a, 0x00093a}, + {0x00093b, 0x00093b}, + {0x00093d, 0x00093d}, + {0x00093e, 0x000940}, + {0x000941, 0x000948}, + {0x000949, 0x00094c}, + {0x00094e, 0x00094f}, + {0x000950, 0x000950}, + {0x000955, 0x000957}, + {0x000958, 0x000961}, + {0x000962, 0x000963}, + {0x000971, 0x000971}, + {0x000972, 0x000980}, + {0x000981, 0x000981}, + {0x000982, 0x000983}, + {0x000985, 0x00098c}, + {0x00098f, 0x000990}, + {0x000993, 0x0009a8}, + {0x0009aa, 0x0009b0}, + {0x0009b2, 0x0009b2}, + {0x0009b6, 0x0009b9}, + {0x0009bd, 0x0009bd}, + {0x0009be, 0x0009c0}, + {0x0009c1, 0x0009c4}, + {0x0009c7, 0x0009c8}, + {0x0009cb, 0x0009cc}, + {0x0009ce, 0x0009ce}, + {0x0009d7, 0x0009d7}, + {0x0009dc, 0x0009dd}, + {0x0009df, 0x0009e1}, + {0x0009e2, 0x0009e3}, + {0x0009f0, 0x0009f1}, + {0x0009fc, 0x0009fc}, + {0x000a01, 0x000a02}, + {0x000a03, 0x000a03}, + {0x000a05, 0x000a0a}, + {0x000a0f, 0x000a10}, + {0x000a13, 0x000a28}, + {0x000a2a, 0x000a30}, + {0x000a32, 0x000a33}, + {0x000a35, 0x000a36}, + {0x000a38, 0x000a39}, + {0x000a3e, 0x000a40}, + {0x000a41, 0x000a42}, + {0x000a47, 0x000a48}, + {0x000a4b, 0x000a4c}, + {0x000a51, 0x000a51}, + {0x000a59, 0x000a5c}, + {0x000a5e, 0x000a5e}, + {0x000a70, 0x000a71}, + {0x000a72, 0x000a74}, + {0x000a75, 0x000a75}, + {0x000a81, 0x000a82}, + {0x000a83, 0x000a83}, + {0x000a85, 0x000a8d}, + {0x000a8f, 0x000a91}, + {0x000a93, 0x000aa8}, + {0x000aaa, 0x000ab0}, + {0x000ab2, 0x000ab3}, + {0x000ab5, 0x000ab9}, + {0x000abd, 0x000abd}, + {0x000abe, 0x000ac0}, + {0x000ac1, 0x000ac5}, + {0x000ac7, 0x000ac8}, + {0x000ac9, 0x000ac9}, + {0x000acb, 0x000acc}, + {0x000ad0, 0x000ad0}, + {0x000ae0, 0x000ae1}, + {0x000ae2, 0x000ae3}, + {0x000af9, 0x000af9}, + {0x000afa, 0x000afc}, + {0x000b01, 0x000b01}, + {0x000b02, 0x000b03}, + {0x000b05, 0x000b0c}, + {0x000b0f, 0x000b10}, + {0x000b13, 0x000b28}, + {0x000b2a, 0x000b30}, + {0x000b32, 0x000b33}, + {0x000b35, 0x000b39}, + {0x000b3d, 0x000b3d}, + {0x000b3e, 0x000b3e}, + {0x000b3f, 0x000b3f}, + {0x000b40, 0x000b40}, + {0x000b41, 0x000b44}, + {0x000b47, 0x000b48}, + {0x000b4b, 0x000b4c}, + {0x000b56, 0x000b56}, + {0x000b57, 0x000b57}, + {0x000b5c, 0x000b5d}, + {0x000b5f, 0x000b61}, + {0x000b62, 0x000b63}, + {0x000b71, 0x000b71}, + {0x000b82, 0x000b82}, + {0x000b83, 0x000b83}, + {0x000b85, 0x000b8a}, + {0x000b8e, 0x000b90}, + {0x000b92, 0x000b95}, + {0x000b99, 0x000b9a}, + {0x000b9c, 0x000b9c}, + {0x000b9e, 0x000b9f}, + {0x000ba3, 0x000ba4}, + {0x000ba8, 0x000baa}, + {0x000bae, 0x000bb9}, + {0x000bbe, 0x000bbf}, + {0x000bc0, 0x000bc0}, + {0x000bc1, 0x000bc2}, + {0x000bc6, 0x000bc8}, + {0x000bca, 0x000bcc}, + {0x000bd0, 0x000bd0}, + {0x000bd7, 0x000bd7}, + {0x000c00, 0x000c00}, + {0x000c01, 0x000c03}, + {0x000c04, 0x000c04}, + {0x000c05, 0x000c0c}, + {0x000c0e, 0x000c10}, + {0x000c12, 0x000c28}, + {0x000c2a, 0x000c39}, + {0x000c3d, 0x000c3d}, + {0x000c3e, 0x000c40}, + {0x000c41, 0x000c44}, + {0x000c46, 0x000c48}, + {0x000c4a, 0x000c4c}, + {0x000c55, 0x000c56}, + {0x000c58, 0x000c5a}, + {0x000c5d, 0x000c5d}, + {0x000c60, 0x000c61}, + {0x000c62, 0x000c63}, + {0x000c80, 0x000c80}, + {0x000c81, 0x000c81}, + {0x000c82, 0x000c83}, + {0x000c85, 0x000c8c}, + {0x000c8e, 0x000c90}, + {0x000c92, 0x000ca8}, + {0x000caa, 0x000cb3}, + {0x000cb5, 0x000cb9}, + {0x000cbd, 0x000cbd}, + {0x000cbe, 0x000cbe}, + {0x000cbf, 0x000cbf}, + {0x000cc0, 0x000cc4}, + {0x000cc6, 0x000cc6}, + {0x000cc7, 0x000cc8}, + {0x000cca, 0x000ccb}, + {0x000ccc, 0x000ccc}, + {0x000cd5, 0x000cd6}, + {0x000cdd, 0x000cde}, + {0x000ce0, 0x000ce1}, + {0x000ce2, 0x000ce3}, + {0x000cf1, 0x000cf2}, + {0x000cf3, 0x000cf3}, + {0x000d00, 0x000d01}, + {0x000d02, 0x000d03}, + {0x000d04, 0x000d0c}, + {0x000d0e, 0x000d10}, + {0x000d12, 0x000d3a}, + {0x000d3d, 0x000d3d}, + {0x000d3e, 0x000d40}, + {0x000d41, 0x000d44}, + {0x000d46, 0x000d48}, + {0x000d4a, 0x000d4c}, + {0x000d4e, 0x000d4e}, + {0x000d54, 0x000d56}, + {0x000d57, 0x000d57}, + {0x000d5f, 0x000d61}, + {0x000d62, 0x000d63}, + {0x000d7a, 0x000d7f}, + {0x000d81, 0x000d81}, + {0x000d82, 0x000d83}, + {0x000d85, 0x000d96}, + {0x000d9a, 0x000db1}, + {0x000db3, 0x000dbb}, + {0x000dbd, 0x000dbd}, + {0x000dc0, 0x000dc6}, + {0x000dcf, 0x000dd1}, + {0x000dd2, 0x000dd4}, + {0x000dd6, 0x000dd6}, + {0x000dd8, 0x000ddf}, + {0x000df2, 0x000df3}, + {0x000e01, 0x000e30}, + {0x000e31, 0x000e31}, + {0x000e32, 0x000e33}, + {0x000e34, 0x000e3a}, + {0x000e40, 0x000e45}, + {0x000e46, 0x000e46}, + {0x000e4d, 0x000e4d}, + {0x000e81, 0x000e82}, + {0x000e84, 0x000e84}, + {0x000e86, 0x000e8a}, + {0x000e8c, 0x000ea3}, + {0x000ea5, 0x000ea5}, + {0x000ea7, 0x000eb0}, + {0x000eb1, 0x000eb1}, + {0x000eb2, 0x000eb3}, + {0x000eb4, 0x000eb9}, + {0x000ebb, 0x000ebc}, + {0x000ebd, 0x000ebd}, + {0x000ec0, 0x000ec4}, + {0x000ec6, 0x000ec6}, + {0x000ecd, 0x000ecd}, + {0x000edc, 0x000edf}, + {0x000f00, 0x000f00}, + {0x000f40, 0x000f47}, + {0x000f49, 0x000f6c}, + {0x000f71, 0x000f7e}, + {0x000f7f, 0x000f7f}, + {0x000f80, 0x000f83}, + {0x000f88, 0x000f8c}, + {0x000f8d, 0x000f97}, + {0x000f99, 0x000fbc}, + {0x001000, 0x00102a}, + {0x00102b, 0x00102c}, + {0x00102d, 0x001030}, + {0x001031, 0x001031}, + {0x001032, 0x001036}, + {0x001038, 0x001038}, + {0x00103b, 0x00103c}, + {0x00103d, 0x00103e}, + {0x00103f, 0x00103f}, + {0x001050, 0x001055}, + {0x001056, 0x001057}, + {0x001058, 0x001059}, + {0x00105a, 0x00105d}, + {0x00105e, 0x001060}, + {0x001061, 0x001061}, + {0x001062, 0x001064}, + {0x001065, 0x001066}, + {0x001067, 0x00106d}, + {0x00106e, 0x001070}, + {0x001071, 0x001074}, + {0x001075, 0x001081}, + {0x001082, 0x001082}, + {0x001083, 0x001084}, + {0x001085, 0x001086}, + {0x001087, 0x00108c}, + {0x00108d, 0x00108d}, + {0x00108e, 0x00108e}, + {0x00108f, 0x00108f}, + {0x00109a, 0x00109c}, + {0x00109d, 0x00109d}, + {0x0010a0, 0x0010c5}, + {0x0010c7, 0x0010c7}, + {0x0010cd, 0x0010cd}, + {0x0010d0, 0x0010fa}, + {0x0010fc, 0x0010fc}, + {0x0010fd, 0x0010ff}, + {0x001100, 0x001248}, + {0x00124a, 0x00124d}, + {0x001250, 0x001256}, + {0x001258, 0x001258}, + {0x00125a, 0x00125d}, + {0x001260, 0x001288}, + {0x00128a, 0x00128d}, + {0x001290, 0x0012b0}, + {0x0012b2, 0x0012b5}, + {0x0012b8, 0x0012be}, + {0x0012c0, 0x0012c0}, + {0x0012c2, 0x0012c5}, + {0x0012c8, 0x0012d6}, + {0x0012d8, 0x001310}, + {0x001312, 0x001315}, + {0x001318, 0x00135a}, + {0x001380, 0x00138f}, + {0x0013a0, 0x0013f5}, + {0x0013f8, 0x0013fd}, + {0x001401, 0x00166c}, + {0x00166f, 0x00167f}, + {0x001681, 0x00169a}, + {0x0016a0, 0x0016ea}, + {0x0016ee, 0x0016f0}, + {0x0016f1, 0x0016f8}, + {0x001700, 0x001711}, + {0x001712, 0x001713}, + {0x00171f, 0x001731}, + {0x001732, 0x001733}, + {0x001740, 0x001751}, + {0x001752, 0x001753}, + {0x001760, 0x00176c}, + {0x00176e, 0x001770}, + {0x001772, 0x001773}, + {0x001780, 0x0017b3}, + {0x0017b6, 0x0017b6}, + {0x0017b7, 0x0017bd}, + {0x0017be, 0x0017c5}, + {0x0017c6, 0x0017c6}, + {0x0017c7, 0x0017c8}, + {0x0017d7, 0x0017d7}, + {0x0017dc, 0x0017dc}, + {0x001820, 0x001842}, + {0x001843, 0x001843}, + {0x001844, 0x001878}, + {0x001880, 0x001884}, + {0x001885, 0x001886}, + {0x001887, 0x0018a8}, + {0x0018a9, 0x0018a9}, + {0x0018aa, 0x0018aa}, + {0x0018b0, 0x0018f5}, + {0x001900, 0x00191e}, + {0x001920, 0x001922}, + {0x001923, 0x001926}, + {0x001927, 0x001928}, + {0x001929, 0x00192b}, + {0x001930, 0x001931}, + {0x001932, 0x001932}, + {0x001933, 0x001938}, + {0x001950, 0x00196d}, + {0x001970, 0x001974}, + {0x001980, 0x0019ab}, + {0x0019b0, 0x0019c9}, + {0x001a00, 0x001a16}, + {0x001a17, 0x001a18}, + {0x001a19, 0x001a1a}, + {0x001a1b, 0x001a1b}, + {0x001a20, 0x001a54}, + {0x001a55, 0x001a55}, + {0x001a56, 0x001a56}, + {0x001a57, 0x001a57}, + {0x001a58, 0x001a5e}, + {0x001a61, 0x001a61}, + {0x001a62, 0x001a62}, + {0x001a63, 0x001a64}, + {0x001a65, 0x001a6c}, + {0x001a6d, 0x001a72}, + {0x001a73, 0x001a74}, + {0x001aa7, 0x001aa7}, + {0x001abf, 0x001ac0}, + {0x001acc, 0x001ace}, + {0x001b00, 0x001b03}, + {0x001b04, 0x001b04}, + {0x001b05, 0x001b33}, + {0x001b35, 0x001b35}, + {0x001b36, 0x001b3a}, + {0x001b3b, 0x001b3b}, + {0x001b3c, 0x001b3c}, + {0x001b3d, 0x001b41}, + {0x001b42, 0x001b42}, + {0x001b43, 0x001b43}, + {0x001b45, 0x001b4c}, + {0x001b80, 0x001b81}, + {0x001b82, 0x001b82}, + {0x001b83, 0x001ba0}, + {0x001ba1, 0x001ba1}, + {0x001ba2, 0x001ba5}, + {0x001ba6, 0x001ba7}, + {0x001ba8, 0x001ba9}, + {0x001bac, 0x001bad}, + {0x001bae, 0x001baf}, + {0x001bba, 0x001be5}, + {0x001be7, 0x001be7}, + {0x001be8, 0x001be9}, + {0x001bea, 0x001bec}, + {0x001bed, 0x001bed}, + {0x001bee, 0x001bee}, + {0x001bef, 0x001bf1}, + {0x001c00, 0x001c23}, + {0x001c24, 0x001c2b}, + {0x001c2c, 0x001c33}, + {0x001c34, 0x001c35}, + {0x001c36, 0x001c36}, + {0x001c4d, 0x001c4f}, + {0x001c5a, 0x001c77}, + {0x001c78, 0x001c7d}, + {0x001c80, 0x001c88}, + {0x001c90, 0x001cba}, + {0x001cbd, 0x001cbf}, + {0x001ce9, 0x001cec}, + {0x001cee, 0x001cf3}, + {0x001cf5, 0x001cf6}, + {0x001cfa, 0x001cfa}, + {0x001d00, 0x001d2b}, + {0x001d2c, 0x001d6a}, + {0x001d6b, 0x001d77}, + {0x001d78, 0x001d78}, + {0x001d79, 0x001d9a}, + {0x001d9b, 0x001dbf}, + {0x001de7, 0x001df4}, + {0x001e00, 0x001f15}, + {0x001f18, 0x001f1d}, + {0x001f20, 0x001f45}, + {0x001f48, 0x001f4d}, + {0x001f50, 0x001f57}, + {0x001f59, 0x001f59}, + {0x001f5b, 0x001f5b}, + {0x001f5d, 0x001f5d}, + {0x001f5f, 0x001f7d}, + {0x001f80, 0x001fb4}, + {0x001fb6, 0x001fbc}, + {0x001fbe, 0x001fbe}, + {0x001fc2, 0x001fc4}, + {0x001fc6, 0x001fcc}, + {0x001fd0, 0x001fd3}, + {0x001fd6, 0x001fdb}, + {0x001fe0, 0x001fec}, + {0x001ff2, 0x001ff4}, + {0x001ff6, 0x001ffc}, + {0x002071, 0x002071}, + {0x00207f, 0x00207f}, + {0x002090, 0x00209c}, + {0x002102, 0x002102}, + {0x002107, 0x002107}, + {0x00210a, 0x002113}, + {0x002115, 0x002115}, + {0x002119, 0x00211d}, + {0x002124, 0x002124}, + {0x002126, 0x002126}, + {0x002128, 0x002128}, + {0x00212a, 0x00212d}, + {0x00212f, 0x002134}, + {0x002135, 0x002138}, + {0x002139, 0x002139}, + {0x00213c, 0x00213f}, + {0x002145, 0x002149}, + {0x00214e, 0x00214e}, + {0x002160, 0x002182}, + {0x002183, 0x002184}, + {0x002185, 0x002188}, + {0x0024b6, 0x0024e9}, + {0x002c00, 0x002c7b}, + {0x002c7c, 0x002c7d}, + {0x002c7e, 0x002ce4}, + {0x002ceb, 0x002cee}, + {0x002cf2, 0x002cf3}, + {0x002d00, 0x002d25}, + {0x002d27, 0x002d27}, + {0x002d2d, 0x002d2d}, + {0x002d30, 0x002d67}, + {0x002d6f, 0x002d6f}, + {0x002d80, 0x002d96}, + {0x002da0, 0x002da6}, + {0x002da8, 0x002dae}, + {0x002db0, 0x002db6}, + {0x002db8, 0x002dbe}, + {0x002dc0, 0x002dc6}, + {0x002dc8, 0x002dce}, + {0x002dd0, 0x002dd6}, + {0x002dd8, 0x002dde}, + {0x002de0, 0x002dff}, + {0x002e2f, 0x002e2f}, + {0x003005, 0x003005}, + {0x003006, 0x003006}, + {0x003007, 0x003007}, + {0x003021, 0x003029}, + {0x003031, 0x003035}, + {0x003038, 0x00303a}, + {0x00303b, 0x00303b}, + {0x00303c, 0x00303c}, + {0x003041, 0x003096}, + {0x00309d, 0x00309e}, + {0x00309f, 0x00309f}, + {0x0030a1, 0x0030fa}, + {0x0030fc, 0x0030fe}, + {0x0030ff, 0x0030ff}, + {0x003105, 0x00312f}, + {0x003131, 0x00318e}, + {0x0031a0, 0x0031bf}, + {0x0031f0, 0x0031ff}, + {0x003400, 0x004dbf}, + {0x004e00, 0x00a014}, + {0x00a015, 0x00a015}, + {0x00a016, 0x00a48c}, + {0x00a4d0, 0x00a4f7}, + {0x00a4f8, 0x00a4fd}, + {0x00a500, 0x00a60b}, + {0x00a60c, 0x00a60c}, + {0x00a610, 0x00a61f}, + {0x00a62a, 0x00a62b}, + {0x00a640, 0x00a66d}, + {0x00a66e, 0x00a66e}, + {0x00a674, 0x00a67b}, + {0x00a67f, 0x00a67f}, + {0x00a680, 0x00a69b}, + {0x00a69c, 0x00a69d}, + {0x00a69e, 0x00a69f}, + {0x00a6a0, 0x00a6e5}, + {0x00a6e6, 0x00a6ef}, + {0x00a717, 0x00a71f}, + {0x00a722, 0x00a76f}, + {0x00a770, 0x00a770}, + {0x00a771, 0x00a787}, + {0x00a788, 0x00a788}, + {0x00a78b, 0x00a78e}, + {0x00a78f, 0x00a78f}, + {0x00a790, 0x00a7ca}, + {0x00a7d0, 0x00a7d1}, + {0x00a7d3, 0x00a7d3}, + {0x00a7d5, 0x00a7d9}, + {0x00a7f2, 0x00a7f4}, + {0x00a7f5, 0x00a7f6}, + {0x00a7f7, 0x00a7f7}, + {0x00a7f8, 0x00a7f9}, + {0x00a7fa, 0x00a7fa}, + {0x00a7fb, 0x00a801}, + {0x00a802, 0x00a802}, + {0x00a803, 0x00a805}, + {0x00a807, 0x00a80a}, + {0x00a80b, 0x00a80b}, + {0x00a80c, 0x00a822}, + {0x00a823, 0x00a824}, + {0x00a825, 0x00a826}, + {0x00a827, 0x00a827}, + {0x00a840, 0x00a873}, + {0x00a880, 0x00a881}, + {0x00a882, 0x00a8b3}, + {0x00a8b4, 0x00a8c3}, + {0x00a8c5, 0x00a8c5}, + {0x00a8f2, 0x00a8f7}, + {0x00a8fb, 0x00a8fb}, + {0x00a8fd, 0x00a8fe}, + {0x00a8ff, 0x00a8ff}, + {0x00a90a, 0x00a925}, + {0x00a926, 0x00a92a}, + {0x00a930, 0x00a946}, + {0x00a947, 0x00a951}, + {0x00a952, 0x00a952}, + {0x00a960, 0x00a97c}, + {0x00a980, 0x00a982}, + {0x00a983, 0x00a983}, + {0x00a984, 0x00a9b2}, + {0x00a9b4, 0x00a9b5}, + {0x00a9b6, 0x00a9b9}, + {0x00a9ba, 0x00a9bb}, + {0x00a9bc, 0x00a9bd}, + {0x00a9be, 0x00a9bf}, + {0x00a9cf, 0x00a9cf}, + {0x00a9e0, 0x00a9e4}, + {0x00a9e5, 0x00a9e5}, + {0x00a9e6, 0x00a9e6}, + {0x00a9e7, 0x00a9ef}, + {0x00a9fa, 0x00a9fe}, + {0x00aa00, 0x00aa28}, + {0x00aa29, 0x00aa2e}, + {0x00aa2f, 0x00aa30}, + {0x00aa31, 0x00aa32}, + {0x00aa33, 0x00aa34}, + {0x00aa35, 0x00aa36}, + {0x00aa40, 0x00aa42}, + {0x00aa43, 0x00aa43}, + {0x00aa44, 0x00aa4b}, + {0x00aa4c, 0x00aa4c}, + {0x00aa4d, 0x00aa4d}, + {0x00aa60, 0x00aa6f}, + {0x00aa70, 0x00aa70}, + {0x00aa71, 0x00aa76}, + {0x00aa7a, 0x00aa7a}, + {0x00aa7b, 0x00aa7b}, + {0x00aa7c, 0x00aa7c}, + {0x00aa7d, 0x00aa7d}, + {0x00aa7e, 0x00aaaf}, + {0x00aab0, 0x00aab0}, + {0x00aab1, 0x00aab1}, + {0x00aab2, 0x00aab4}, + {0x00aab5, 0x00aab6}, + {0x00aab7, 0x00aab8}, + {0x00aab9, 0x00aabd}, + {0x00aabe, 0x00aabe}, + {0x00aac0, 0x00aac0}, + {0x00aac2, 0x00aac2}, + {0x00aadb, 0x00aadc}, + {0x00aadd, 0x00aadd}, + {0x00aae0, 0x00aaea}, + {0x00aaeb, 0x00aaeb}, + {0x00aaec, 0x00aaed}, + {0x00aaee, 0x00aaef}, + {0x00aaf2, 0x00aaf2}, + {0x00aaf3, 0x00aaf4}, + {0x00aaf5, 0x00aaf5}, + {0x00ab01, 0x00ab06}, + {0x00ab09, 0x00ab0e}, + {0x00ab11, 0x00ab16}, + {0x00ab20, 0x00ab26}, + {0x00ab28, 0x00ab2e}, + {0x00ab30, 0x00ab5a}, + {0x00ab5c, 0x00ab5f}, + {0x00ab60, 0x00ab68}, + {0x00ab69, 0x00ab69}, + {0x00ab70, 0x00abbf}, + {0x00abc0, 0x00abe2}, + {0x00abe3, 0x00abe4}, + {0x00abe5, 0x00abe5}, + {0x00abe6, 0x00abe7}, + {0x00abe8, 0x00abe8}, + {0x00abe9, 0x00abea}, + {0x00ac00, 0x00d7a3}, + {0x00d7b0, 0x00d7c6}, + {0x00d7cb, 0x00d7fb}, + {0x00f900, 0x00fa6d}, + {0x00fa70, 0x00fad9}, + {0x00fb00, 0x00fb06}, + {0x00fb13, 0x00fb17}, + {0x00fb1d, 0x00fb1d}, + {0x00fb1e, 0x00fb1e}, + {0x00fb1f, 0x00fb28}, + {0x00fb2a, 0x00fb36}, + {0x00fb38, 0x00fb3c}, + {0x00fb3e, 0x00fb3e}, + {0x00fb40, 0x00fb41}, + {0x00fb43, 0x00fb44}, + {0x00fb46, 0x00fbb1}, + {0x00fbd3, 0x00fd3d}, + {0x00fd50, 0x00fd8f}, + {0x00fd92, 0x00fdc7}, + {0x00fdf0, 0x00fdfb}, + {0x00fe70, 0x00fe74}, + {0x00fe76, 0x00fefc}, + {0x00ff21, 0x00ff3a}, + {0x00ff41, 0x00ff5a}, + {0x00ff66, 0x00ff6f}, + {0x00ff70, 0x00ff70}, + {0x00ff71, 0x00ff9d}, + {0x00ff9e, 0x00ff9f}, + {0x00ffa0, 0x00ffbe}, + {0x00ffc2, 0x00ffc7}, + {0x00ffca, 0x00ffcf}, + {0x00ffd2, 0x00ffd7}, + {0x00ffda, 0x00ffdc}, + {0x010000, 0x01000b}, + {0x01000d, 0x010026}, + {0x010028, 0x01003a}, + {0x01003c, 0x01003d}, + {0x01003f, 0x01004d}, + {0x010050, 0x01005d}, + {0x010080, 0x0100fa}, + {0x010140, 0x010174}, + {0x010280, 0x01029c}, + {0x0102a0, 0x0102d0}, + {0x010300, 0x01031f}, + {0x01032d, 0x010340}, + {0x010341, 0x010341}, + {0x010342, 0x010349}, + {0x01034a, 0x01034a}, + {0x010350, 0x010375}, + {0x010376, 0x01037a}, + {0x010380, 0x01039d}, + {0x0103a0, 0x0103c3}, + {0x0103c8, 0x0103cf}, + {0x0103d1, 0x0103d5}, + {0x010400, 0x01044f}, + {0x010450, 0x01049d}, + {0x0104b0, 0x0104d3}, + {0x0104d8, 0x0104fb}, + {0x010500, 0x010527}, + {0x010530, 0x010563}, + {0x010570, 0x01057a}, + {0x01057c, 0x01058a}, + {0x01058c, 0x010592}, + {0x010594, 0x010595}, + {0x010597, 0x0105a1}, + {0x0105a3, 0x0105b1}, + {0x0105b3, 0x0105b9}, + {0x0105bb, 0x0105bc}, + {0x010600, 0x010736}, + {0x010740, 0x010755}, + {0x010760, 0x010767}, + {0x010780, 0x010785}, + {0x010787, 0x0107b0}, + {0x0107b2, 0x0107ba}, + {0x010800, 0x010805}, + {0x010808, 0x010808}, + {0x01080a, 0x010835}, + {0x010837, 0x010838}, + {0x01083c, 0x01083c}, + {0x01083f, 0x010855}, + {0x010860, 0x010876}, + {0x010880, 0x01089e}, + {0x0108e0, 0x0108f2}, + {0x0108f4, 0x0108f5}, + {0x010900, 0x010915}, + {0x010920, 0x010939}, + {0x010980, 0x0109b7}, + {0x0109be, 0x0109bf}, + {0x010a00, 0x010a00}, + {0x010a01, 0x010a03}, + {0x010a05, 0x010a06}, + {0x010a0c, 0x010a0f}, + {0x010a10, 0x010a13}, + {0x010a15, 0x010a17}, + {0x010a19, 0x010a35}, + {0x010a60, 0x010a7c}, + {0x010a80, 0x010a9c}, + {0x010ac0, 0x010ac7}, + {0x010ac9, 0x010ae4}, + {0x010b00, 0x010b35}, + {0x010b40, 0x010b55}, + {0x010b60, 0x010b72}, + {0x010b80, 0x010b91}, + {0x010c00, 0x010c48}, + {0x010c80, 0x010cb2}, + {0x010cc0, 0x010cf2}, + {0x010d00, 0x010d23}, + {0x010d24, 0x010d27}, + {0x010e80, 0x010ea9}, + {0x010eab, 0x010eac}, + {0x010eb0, 0x010eb1}, + {0x010f00, 0x010f1c}, + {0x010f27, 0x010f27}, + {0x010f30, 0x010f45}, + {0x010f70, 0x010f81}, + {0x010fb0, 0x010fc4}, + {0x010fe0, 0x010ff6}, + {0x011000, 0x011000}, + {0x011001, 0x011001}, + {0x011002, 0x011002}, + {0x011003, 0x011037}, + {0x011038, 0x011045}, + {0x011071, 0x011072}, + {0x011073, 0x011074}, + {0x011075, 0x011075}, + {0x011080, 0x011081}, + {0x011082, 0x011082}, + {0x011083, 0x0110af}, + {0x0110b0, 0x0110b2}, + {0x0110b3, 0x0110b6}, + {0x0110b7, 0x0110b8}, + {0x0110c2, 0x0110c2}, + {0x0110d0, 0x0110e8}, + {0x011100, 0x011102}, + {0x011103, 0x011126}, + {0x011127, 0x01112b}, + {0x01112c, 0x01112c}, + {0x01112d, 0x011132}, + {0x011144, 0x011144}, + {0x011145, 0x011146}, + {0x011147, 0x011147}, + {0x011150, 0x011172}, + {0x011176, 0x011176}, + {0x011180, 0x011181}, + {0x011182, 0x011182}, + {0x011183, 0x0111b2}, + {0x0111b3, 0x0111b5}, + {0x0111b6, 0x0111be}, + {0x0111bf, 0x0111bf}, + {0x0111c1, 0x0111c4}, + {0x0111ce, 0x0111ce}, + {0x0111cf, 0x0111cf}, + {0x0111da, 0x0111da}, + {0x0111dc, 0x0111dc}, + {0x011200, 0x011211}, + {0x011213, 0x01122b}, + {0x01122c, 0x01122e}, + {0x01122f, 0x011231}, + {0x011232, 0x011233}, + {0x011234, 0x011234}, + {0x011237, 0x011237}, + {0x01123e, 0x01123e}, + {0x01123f, 0x011240}, + {0x011241, 0x011241}, + {0x011280, 0x011286}, + {0x011288, 0x011288}, + {0x01128a, 0x01128d}, + {0x01128f, 0x01129d}, + {0x01129f, 0x0112a8}, + {0x0112b0, 0x0112de}, + {0x0112df, 0x0112df}, + {0x0112e0, 0x0112e2}, + {0x0112e3, 0x0112e8}, + {0x011300, 0x011301}, + {0x011302, 0x011303}, + {0x011305, 0x01130c}, + {0x01130f, 0x011310}, + {0x011313, 0x011328}, + {0x01132a, 0x011330}, + {0x011332, 0x011333}, + {0x011335, 0x011339}, + {0x01133d, 0x01133d}, + {0x01133e, 0x01133f}, + {0x011340, 0x011340}, + {0x011341, 0x011344}, + {0x011347, 0x011348}, + {0x01134b, 0x01134c}, + {0x011350, 0x011350}, + {0x011357, 0x011357}, + {0x01135d, 0x011361}, + {0x011362, 0x011363}, + {0x011400, 0x011434}, + {0x011435, 0x011437}, + {0x011438, 0x01143f}, + {0x011440, 0x011441}, + {0x011443, 0x011444}, + {0x011445, 0x011445}, + {0x011447, 0x01144a}, + {0x01145f, 0x011461}, + {0x011480, 0x0114af}, + {0x0114b0, 0x0114b2}, + {0x0114b3, 0x0114b8}, + {0x0114b9, 0x0114b9}, + {0x0114ba, 0x0114ba}, + {0x0114bb, 0x0114be}, + {0x0114bf, 0x0114c0}, + {0x0114c1, 0x0114c1}, + {0x0114c4, 0x0114c5}, + {0x0114c7, 0x0114c7}, + {0x011580, 0x0115ae}, + {0x0115af, 0x0115b1}, + {0x0115b2, 0x0115b5}, + {0x0115b8, 0x0115bb}, + {0x0115bc, 0x0115bd}, + {0x0115be, 0x0115be}, + {0x0115d8, 0x0115db}, + {0x0115dc, 0x0115dd}, + {0x011600, 0x01162f}, + {0x011630, 0x011632}, + {0x011633, 0x01163a}, + {0x01163b, 0x01163c}, + {0x01163d, 0x01163d}, + {0x01163e, 0x01163e}, + {0x011640, 0x011640}, + {0x011644, 0x011644}, + {0x011680, 0x0116aa}, + {0x0116ab, 0x0116ab}, + {0x0116ac, 0x0116ac}, + {0x0116ad, 0x0116ad}, + {0x0116ae, 0x0116af}, + {0x0116b0, 0x0116b5}, + {0x0116b8, 0x0116b8}, + {0x011700, 0x01171a}, + {0x01171d, 0x01171f}, + {0x011720, 0x011721}, + {0x011722, 0x011725}, + {0x011726, 0x011726}, + {0x011727, 0x01172a}, + {0x011740, 0x011746}, + {0x011800, 0x01182b}, + {0x01182c, 0x01182e}, + {0x01182f, 0x011837}, + {0x011838, 0x011838}, + {0x0118a0, 0x0118df}, + {0x0118ff, 0x011906}, + {0x011909, 0x011909}, + {0x01190c, 0x011913}, + {0x011915, 0x011916}, + {0x011918, 0x01192f}, + {0x011930, 0x011935}, + {0x011937, 0x011938}, + {0x01193b, 0x01193c}, + {0x01193f, 0x01193f}, + {0x011940, 0x011940}, + {0x011941, 0x011941}, + {0x011942, 0x011942}, + {0x0119a0, 0x0119a7}, + {0x0119aa, 0x0119d0}, + {0x0119d1, 0x0119d3}, + {0x0119d4, 0x0119d7}, + {0x0119da, 0x0119db}, + {0x0119dc, 0x0119df}, + {0x0119e1, 0x0119e1}, + {0x0119e3, 0x0119e3}, + {0x0119e4, 0x0119e4}, + {0x011a00, 0x011a00}, + {0x011a01, 0x011a0a}, + {0x011a0b, 0x011a32}, + {0x011a35, 0x011a38}, + {0x011a39, 0x011a39}, + {0x011a3a, 0x011a3a}, + {0x011a3b, 0x011a3e}, + {0x011a50, 0x011a50}, + {0x011a51, 0x011a56}, + {0x011a57, 0x011a58}, + {0x011a59, 0x011a5b}, + {0x011a5c, 0x011a89}, + {0x011a8a, 0x011a96}, + {0x011a97, 0x011a97}, + {0x011a9d, 0x011a9d}, + {0x011ab0, 0x011af8}, + {0x011c00, 0x011c08}, + {0x011c0a, 0x011c2e}, + {0x011c2f, 0x011c2f}, + {0x011c30, 0x011c36}, + {0x011c38, 0x011c3d}, + {0x011c3e, 0x011c3e}, + {0x011c40, 0x011c40}, + {0x011c72, 0x011c8f}, + {0x011c92, 0x011ca7}, + {0x011ca9, 0x011ca9}, + {0x011caa, 0x011cb0}, + {0x011cb1, 0x011cb1}, + {0x011cb2, 0x011cb3}, + {0x011cb4, 0x011cb4}, + {0x011cb5, 0x011cb6}, + {0x011d00, 0x011d06}, + {0x011d08, 0x011d09}, + {0x011d0b, 0x011d30}, + {0x011d31, 0x011d36}, + {0x011d3a, 0x011d3a}, + {0x011d3c, 0x011d3d}, + {0x011d3f, 0x011d41}, + {0x011d43, 0x011d43}, + {0x011d46, 0x011d46}, + {0x011d47, 0x011d47}, + {0x011d60, 0x011d65}, + {0x011d67, 0x011d68}, + {0x011d6a, 0x011d89}, + {0x011d8a, 0x011d8e}, + {0x011d90, 0x011d91}, + {0x011d93, 0x011d94}, + {0x011d95, 0x011d95}, + {0x011d96, 0x011d96}, + {0x011d98, 0x011d98}, + {0x011ee0, 0x011ef2}, + {0x011ef3, 0x011ef4}, + {0x011ef5, 0x011ef6}, + {0x011f00, 0x011f01}, + {0x011f02, 0x011f02}, + {0x011f03, 0x011f03}, + {0x011f04, 0x011f10}, + {0x011f12, 0x011f33}, + {0x011f34, 0x011f35}, + {0x011f36, 0x011f3a}, + {0x011f3e, 0x011f3f}, + {0x011f40, 0x011f40}, + {0x011fb0, 0x011fb0}, + {0x012000, 0x012399}, + {0x012400, 0x01246e}, + {0x012480, 0x012543}, + {0x012f90, 0x012ff0}, + {0x013000, 0x01342f}, + {0x013441, 0x013446}, + {0x014400, 0x014646}, + {0x016800, 0x016a38}, + {0x016a40, 0x016a5e}, + {0x016a70, 0x016abe}, + {0x016ad0, 0x016aed}, + {0x016b00, 0x016b2f}, + {0x016b40, 0x016b43}, + {0x016b63, 0x016b77}, + {0x016b7d, 0x016b8f}, + {0x016e40, 0x016e7f}, + {0x016f00, 0x016f4a}, + {0x016f4f, 0x016f4f}, + {0x016f50, 0x016f50}, + {0x016f51, 0x016f87}, + {0x016f8f, 0x016f92}, + {0x016f93, 0x016f9f}, + {0x016fe0, 0x016fe1}, + {0x016fe3, 0x016fe3}, + {0x016ff0, 0x016ff1}, + {0x017000, 0x0187f7}, + {0x018800, 0x018cd5}, + {0x018d00, 0x018d08}, + {0x01aff0, 0x01aff3}, + {0x01aff5, 0x01affb}, + {0x01affd, 0x01affe}, + {0x01b000, 0x01b122}, + {0x01b132, 0x01b132}, + {0x01b150, 0x01b152}, + {0x01b155, 0x01b155}, + {0x01b164, 0x01b167}, + {0x01b170, 0x01b2fb}, + {0x01bc00, 0x01bc6a}, + {0x01bc70, 0x01bc7c}, + {0x01bc80, 0x01bc88}, + {0x01bc90, 0x01bc99}, + {0x01bc9e, 0x01bc9e}, + {0x01d400, 0x01d454}, + {0x01d456, 0x01d49c}, + {0x01d49e, 0x01d49f}, + {0x01d4a2, 0x01d4a2}, + {0x01d4a5, 0x01d4a6}, + {0x01d4a9, 0x01d4ac}, + {0x01d4ae, 0x01d4b9}, + {0x01d4bb, 0x01d4bb}, + {0x01d4bd, 0x01d4c3}, + {0x01d4c5, 0x01d505}, + {0x01d507, 0x01d50a}, + {0x01d50d, 0x01d514}, + {0x01d516, 0x01d51c}, + {0x01d51e, 0x01d539}, + {0x01d53b, 0x01d53e}, + {0x01d540, 0x01d544}, + {0x01d546, 0x01d546}, + {0x01d54a, 0x01d550}, + {0x01d552, 0x01d6a5}, + {0x01d6a8, 0x01d6c0}, + {0x01d6c2, 0x01d6da}, + {0x01d6dc, 0x01d6fa}, + {0x01d6fc, 0x01d714}, + {0x01d716, 0x01d734}, + {0x01d736, 0x01d74e}, + {0x01d750, 0x01d76e}, + {0x01d770, 0x01d788}, + {0x01d78a, 0x01d7a8}, + {0x01d7aa, 0x01d7c2}, + {0x01d7c4, 0x01d7cb}, + {0x01df00, 0x01df09}, + {0x01df0a, 0x01df0a}, + {0x01df0b, 0x01df1e}, + {0x01df25, 0x01df2a}, + {0x01e000, 0x01e006}, + {0x01e008, 0x01e018}, + {0x01e01b, 0x01e021}, + {0x01e023, 0x01e024}, + {0x01e026, 0x01e02a}, + {0x01e030, 0x01e06d}, + {0x01e08f, 0x01e08f}, + {0x01e100, 0x01e12c}, + {0x01e137, 0x01e13d}, + {0x01e14e, 0x01e14e}, + {0x01e290, 0x01e2ad}, + {0x01e2c0, 0x01e2eb}, + {0x01e4d0, 0x01e4ea}, + {0x01e4eb, 0x01e4eb}, + {0x01e7e0, 0x01e7e6}, + {0x01e7e8, 0x01e7eb}, + {0x01e7ed, 0x01e7ee}, + {0x01e7f0, 0x01e7fe}, + {0x01e800, 0x01e8c4}, + {0x01e900, 0x01e943}, + {0x01e947, 0x01e947}, + {0x01e94b, 0x01e94b}, + {0x01ee00, 0x01ee03}, + {0x01ee05, 0x01ee1f}, + {0x01ee21, 0x01ee22}, + {0x01ee24, 0x01ee24}, + {0x01ee27, 0x01ee27}, + {0x01ee29, 0x01ee32}, + {0x01ee34, 0x01ee37}, + {0x01ee39, 0x01ee39}, + {0x01ee3b, 0x01ee3b}, + {0x01ee42, 0x01ee42}, + {0x01ee47, 0x01ee47}, + {0x01ee49, 0x01ee49}, + {0x01ee4b, 0x01ee4b}, + {0x01ee4d, 0x01ee4f}, + {0x01ee51, 0x01ee52}, + {0x01ee54, 0x01ee54}, + {0x01ee57, 0x01ee57}, + {0x01ee59, 0x01ee59}, + {0x01ee5b, 0x01ee5b}, + {0x01ee5d, 0x01ee5d}, + {0x01ee5f, 0x01ee5f}, + {0x01ee61, 0x01ee62}, + {0x01ee64, 0x01ee64}, + {0x01ee67, 0x01ee6a}, + {0x01ee6c, 0x01ee72}, + {0x01ee74, 0x01ee77}, + {0x01ee79, 0x01ee7c}, + {0x01ee7e, 0x01ee7e}, + {0x01ee80, 0x01ee89}, + {0x01ee8b, 0x01ee9b}, + {0x01eea1, 0x01eea3}, + {0x01eea5, 0x01eea9}, + {0x01eeab, 0x01eebb}, + {0x01f130, 0x01f149}, + {0x01f150, 0x01f169}, + {0x01f170, 0x01f189}, + {0x020000, 0x02a6df}, + {0x02a700, 0x02b739}, + {0x02b740, 0x02b81d}, + {0x02b820, 0x02cea1}, + {0x02ceb0, 0x02ebe0}, + {0x02ebf0, 0x02ee5d}, + {0x02f800, 0x02fa1d}, + {0x030000, 0x03134a}, + {0x031350, 0x0323af}, +}; + +/* table of Unicode codepoint ranges of Lowercase characters */ +static const pg_unicode_range unicode_lowercase[686] = +{ + {0x000061, 0x00007a}, + {0x0000aa, 0x0000aa}, + {0x0000b5, 0x0000b5}, + {0x0000ba, 0x0000ba}, + {0x0000df, 0x0000f6}, + {0x0000f8, 0x0000ff}, + {0x000101, 0x000101}, + {0x000103, 0x000103}, + {0x000105, 0x000105}, + {0x000107, 0x000107}, + {0x000109, 0x000109}, + {0x00010b, 0x00010b}, + {0x00010d, 0x00010d}, + {0x00010f, 0x00010f}, + {0x000111, 0x000111}, + {0x000113, 0x000113}, + {0x000115, 0x000115}, + {0x000117, 0x000117}, + {0x000119, 0x000119}, + {0x00011b, 0x00011b}, + {0x00011d, 0x00011d}, + {0x00011f, 0x00011f}, + {0x000121, 0x000121}, + {0x000123, 0x000123}, + {0x000125, 0x000125}, + {0x000127, 0x000127}, + {0x000129, 0x000129}, + {0x00012b, 0x00012b}, + {0x00012d, 0x00012d}, + {0x00012f, 0x00012f}, + {0x000131, 0x000131}, + {0x000133, 0x000133}, + {0x000135, 0x000135}, + {0x000137, 0x000138}, + {0x00013a, 0x00013a}, + {0x00013c, 0x00013c}, + {0x00013e, 0x00013e}, + {0x000140, 0x000140}, + {0x000142, 0x000142}, + {0x000144, 0x000144}, + {0x000146, 0x000146}, + {0x000148, 0x000149}, + {0x00014b, 0x00014b}, + {0x00014d, 0x00014d}, + {0x00014f, 0x00014f}, + {0x000151, 0x000151}, + {0x000153, 0x000153}, + {0x000155, 0x000155}, + {0x000157, 0x000157}, + {0x000159, 0x000159}, + {0x00015b, 0x00015b}, + {0x00015d, 0x00015d}, + {0x00015f, 0x00015f}, + {0x000161, 0x000161}, + {0x000163, 0x000163}, + {0x000165, 0x000165}, + {0x000167, 0x000167}, + {0x000169, 0x000169}, + {0x00016b, 0x00016b}, + {0x00016d, 0x00016d}, + {0x00016f, 0x00016f}, + {0x000171, 0x000171}, + {0x000173, 0x000173}, + {0x000175, 0x000175}, + {0x000177, 0x000177}, + {0x00017a, 0x00017a}, + {0x00017c, 0x00017c}, + {0x00017e, 0x000180}, + {0x000183, 0x000183}, + {0x000185, 0x000185}, + {0x000188, 0x000188}, + {0x00018c, 0x00018d}, + {0x000192, 0x000192}, + {0x000195, 0x000195}, + {0x000199, 0x00019b}, + {0x00019e, 0x00019e}, + {0x0001a1, 0x0001a1}, + {0x0001a3, 0x0001a3}, + {0x0001a5, 0x0001a5}, + {0x0001a8, 0x0001a8}, + {0x0001aa, 0x0001ab}, + {0x0001ad, 0x0001ad}, + {0x0001b0, 0x0001b0}, + {0x0001b4, 0x0001b4}, + {0x0001b6, 0x0001b6}, + {0x0001b9, 0x0001ba}, + {0x0001bd, 0x0001bf}, + {0x0001c6, 0x0001c6}, + {0x0001c9, 0x0001c9}, + {0x0001cc, 0x0001cc}, + {0x0001ce, 0x0001ce}, + {0x0001d0, 0x0001d0}, + {0x0001d2, 0x0001d2}, + {0x0001d4, 0x0001d4}, + {0x0001d6, 0x0001d6}, + {0x0001d8, 0x0001d8}, + {0x0001da, 0x0001da}, + {0x0001dc, 0x0001dd}, + {0x0001df, 0x0001df}, + {0x0001e1, 0x0001e1}, + {0x0001e3, 0x0001e3}, + {0x0001e5, 0x0001e5}, + {0x0001e7, 0x0001e7}, + {0x0001e9, 0x0001e9}, + {0x0001eb, 0x0001eb}, + {0x0001ed, 0x0001ed}, + {0x0001ef, 0x0001f0}, + {0x0001f3, 0x0001f3}, + {0x0001f5, 0x0001f5}, + {0x0001f9, 0x0001f9}, + {0x0001fb, 0x0001fb}, + {0x0001fd, 0x0001fd}, + {0x0001ff, 0x0001ff}, + {0x000201, 0x000201}, + {0x000203, 0x000203}, + {0x000205, 0x000205}, + {0x000207, 0x000207}, + {0x000209, 0x000209}, + {0x00020b, 0x00020b}, + {0x00020d, 0x00020d}, + {0x00020f, 0x00020f}, + {0x000211, 0x000211}, + {0x000213, 0x000213}, + {0x000215, 0x000215}, + {0x000217, 0x000217}, + {0x000219, 0x000219}, + {0x00021b, 0x00021b}, + {0x00021d, 0x00021d}, + {0x00021f, 0x00021f}, + {0x000221, 0x000221}, + {0x000223, 0x000223}, + {0x000225, 0x000225}, + {0x000227, 0x000227}, + {0x000229, 0x000229}, + {0x00022b, 0x00022b}, + {0x00022d, 0x00022d}, + {0x00022f, 0x00022f}, + {0x000231, 0x000231}, + {0x000233, 0x000239}, + {0x00023c, 0x00023c}, + {0x00023f, 0x000240}, + {0x000242, 0x000242}, + {0x000247, 0x000247}, + {0x000249, 0x000249}, + {0x00024b, 0x00024b}, + {0x00024d, 0x00024d}, + {0x00024f, 0x000293}, + {0x000295, 0x0002af}, + {0x0002b0, 0x0002b8}, + {0x0002c0, 0x0002c1}, + {0x0002e0, 0x0002e4}, + {0x000345, 0x000345}, + {0x000371, 0x000371}, + {0x000373, 0x000373}, + {0x000377, 0x000377}, + {0x00037a, 0x00037a}, + {0x00037b, 0x00037d}, + {0x000390, 0x000390}, + {0x0003ac, 0x0003ce}, + {0x0003d0, 0x0003d1}, + {0x0003d5, 0x0003d7}, + {0x0003d9, 0x0003d9}, + {0x0003db, 0x0003db}, + {0x0003dd, 0x0003dd}, + {0x0003df, 0x0003df}, + {0x0003e1, 0x0003e1}, + {0x0003e3, 0x0003e3}, + {0x0003e5, 0x0003e5}, + {0x0003e7, 0x0003e7}, + {0x0003e9, 0x0003e9}, + {0x0003eb, 0x0003eb}, + {0x0003ed, 0x0003ed}, + {0x0003ef, 0x0003f3}, + {0x0003f5, 0x0003f5}, + {0x0003f8, 0x0003f8}, + {0x0003fb, 0x0003fc}, + {0x000430, 0x00045f}, + {0x000461, 0x000461}, + {0x000463, 0x000463}, + {0x000465, 0x000465}, + {0x000467, 0x000467}, + {0x000469, 0x000469}, + {0x00046b, 0x00046b}, + {0x00046d, 0x00046d}, + {0x00046f, 0x00046f}, + {0x000471, 0x000471}, + {0x000473, 0x000473}, + {0x000475, 0x000475}, + {0x000477, 0x000477}, + {0x000479, 0x000479}, + {0x00047b, 0x00047b}, + {0x00047d, 0x00047d}, + {0x00047f, 0x00047f}, + {0x000481, 0x000481}, + {0x00048b, 0x00048b}, + {0x00048d, 0x00048d}, + {0x00048f, 0x00048f}, + {0x000491, 0x000491}, + {0x000493, 0x000493}, + {0x000495, 0x000495}, + {0x000497, 0x000497}, + {0x000499, 0x000499}, + {0x00049b, 0x00049b}, + {0x00049d, 0x00049d}, + {0x00049f, 0x00049f}, + {0x0004a1, 0x0004a1}, + {0x0004a3, 0x0004a3}, + {0x0004a5, 0x0004a5}, + {0x0004a7, 0x0004a7}, + {0x0004a9, 0x0004a9}, + {0x0004ab, 0x0004ab}, + {0x0004ad, 0x0004ad}, + {0x0004af, 0x0004af}, + {0x0004b1, 0x0004b1}, + {0x0004b3, 0x0004b3}, + {0x0004b5, 0x0004b5}, + {0x0004b7, 0x0004b7}, + {0x0004b9, 0x0004b9}, + {0x0004bb, 0x0004bb}, + {0x0004bd, 0x0004bd}, + {0x0004bf, 0x0004bf}, + {0x0004c2, 0x0004c2}, + {0x0004c4, 0x0004c4}, + {0x0004c6, 0x0004c6}, + {0x0004c8, 0x0004c8}, + {0x0004ca, 0x0004ca}, + {0x0004cc, 0x0004cc}, + {0x0004ce, 0x0004cf}, + {0x0004d1, 0x0004d1}, + {0x0004d3, 0x0004d3}, + {0x0004d5, 0x0004d5}, + {0x0004d7, 0x0004d7}, + {0x0004d9, 0x0004d9}, + {0x0004db, 0x0004db}, + {0x0004dd, 0x0004dd}, + {0x0004df, 0x0004df}, + {0x0004e1, 0x0004e1}, + {0x0004e3, 0x0004e3}, + {0x0004e5, 0x0004e5}, + {0x0004e7, 0x0004e7}, + {0x0004e9, 0x0004e9}, + {0x0004eb, 0x0004eb}, + {0x0004ed, 0x0004ed}, + {0x0004ef, 0x0004ef}, + {0x0004f1, 0x0004f1}, + {0x0004f3, 0x0004f3}, + {0x0004f5, 0x0004f5}, + {0x0004f7, 0x0004f7}, + {0x0004f9, 0x0004f9}, + {0x0004fb, 0x0004fb}, + {0x0004fd, 0x0004fd}, + {0x0004ff, 0x0004ff}, + {0x000501, 0x000501}, + {0x000503, 0x000503}, + {0x000505, 0x000505}, + {0x000507, 0x000507}, + {0x000509, 0x000509}, + {0x00050b, 0x00050b}, + {0x00050d, 0x00050d}, + {0x00050f, 0x00050f}, + {0x000511, 0x000511}, + {0x000513, 0x000513}, + {0x000515, 0x000515}, + {0x000517, 0x000517}, + {0x000519, 0x000519}, + {0x00051b, 0x00051b}, + {0x00051d, 0x00051d}, + {0x00051f, 0x00051f}, + {0x000521, 0x000521}, + {0x000523, 0x000523}, + {0x000525, 0x000525}, + {0x000527, 0x000527}, + {0x000529, 0x000529}, + {0x00052b, 0x00052b}, + {0x00052d, 0x00052d}, + {0x00052f, 0x00052f}, + {0x000560, 0x000588}, + {0x0010d0, 0x0010fa}, + {0x0010fc, 0x0010fc}, + {0x0010fd, 0x0010ff}, + {0x0013f8, 0x0013fd}, + {0x001c80, 0x001c88}, + {0x001d00, 0x001d2b}, + {0x001d2c, 0x001d6a}, + {0x001d6b, 0x001d77}, + {0x001d78, 0x001d78}, + {0x001d79, 0x001d9a}, + {0x001d9b, 0x001dbf}, + {0x001e01, 0x001e01}, + {0x001e03, 0x001e03}, + {0x001e05, 0x001e05}, + {0x001e07, 0x001e07}, + {0x001e09, 0x001e09}, + {0x001e0b, 0x001e0b}, + {0x001e0d, 0x001e0d}, + {0x001e0f, 0x001e0f}, + {0x001e11, 0x001e11}, + {0x001e13, 0x001e13}, + {0x001e15, 0x001e15}, + {0x001e17, 0x001e17}, + {0x001e19, 0x001e19}, + {0x001e1b, 0x001e1b}, + {0x001e1d, 0x001e1d}, + {0x001e1f, 0x001e1f}, + {0x001e21, 0x001e21}, + {0x001e23, 0x001e23}, + {0x001e25, 0x001e25}, + {0x001e27, 0x001e27}, + {0x001e29, 0x001e29}, + {0x001e2b, 0x001e2b}, + {0x001e2d, 0x001e2d}, + {0x001e2f, 0x001e2f}, + {0x001e31, 0x001e31}, + {0x001e33, 0x001e33}, + {0x001e35, 0x001e35}, + {0x001e37, 0x001e37}, + {0x001e39, 0x001e39}, + {0x001e3b, 0x001e3b}, + {0x001e3d, 0x001e3d}, + {0x001e3f, 0x001e3f}, + {0x001e41, 0x001e41}, + {0x001e43, 0x001e43}, + {0x001e45, 0x001e45}, + {0x001e47, 0x001e47}, + {0x001e49, 0x001e49}, + {0x001e4b, 0x001e4b}, + {0x001e4d, 0x001e4d}, + {0x001e4f, 0x001e4f}, + {0x001e51, 0x001e51}, + {0x001e53, 0x001e53}, + {0x001e55, 0x001e55}, + {0x001e57, 0x001e57}, + {0x001e59, 0x001e59}, + {0x001e5b, 0x001e5b}, + {0x001e5d, 0x001e5d}, + {0x001e5f, 0x001e5f}, + {0x001e61, 0x001e61}, + {0x001e63, 0x001e63}, + {0x001e65, 0x001e65}, + {0x001e67, 0x001e67}, + {0x001e69, 0x001e69}, + {0x001e6b, 0x001e6b}, + {0x001e6d, 0x001e6d}, + {0x001e6f, 0x001e6f}, + {0x001e71, 0x001e71}, + {0x001e73, 0x001e73}, + {0x001e75, 0x001e75}, + {0x001e77, 0x001e77}, + {0x001e79, 0x001e79}, + {0x001e7b, 0x001e7b}, + {0x001e7d, 0x001e7d}, + {0x001e7f, 0x001e7f}, + {0x001e81, 0x001e81}, + {0x001e83, 0x001e83}, + {0x001e85, 0x001e85}, + {0x001e87, 0x001e87}, + {0x001e89, 0x001e89}, + {0x001e8b, 0x001e8b}, + {0x001e8d, 0x001e8d}, + {0x001e8f, 0x001e8f}, + {0x001e91, 0x001e91}, + {0x001e93, 0x001e93}, + {0x001e95, 0x001e9d}, + {0x001e9f, 0x001e9f}, + {0x001ea1, 0x001ea1}, + {0x001ea3, 0x001ea3}, + {0x001ea5, 0x001ea5}, + {0x001ea7, 0x001ea7}, + {0x001ea9, 0x001ea9}, + {0x001eab, 0x001eab}, + {0x001ead, 0x001ead}, + {0x001eaf, 0x001eaf}, + {0x001eb1, 0x001eb1}, + {0x001eb3, 0x001eb3}, + {0x001eb5, 0x001eb5}, + {0x001eb7, 0x001eb7}, + {0x001eb9, 0x001eb9}, + {0x001ebb, 0x001ebb}, + {0x001ebd, 0x001ebd}, + {0x001ebf, 0x001ebf}, + {0x001ec1, 0x001ec1}, + {0x001ec3, 0x001ec3}, + {0x001ec5, 0x001ec5}, + {0x001ec7, 0x001ec7}, + {0x001ec9, 0x001ec9}, + {0x001ecb, 0x001ecb}, + {0x001ecd, 0x001ecd}, + {0x001ecf, 0x001ecf}, + {0x001ed1, 0x001ed1}, + {0x001ed3, 0x001ed3}, + {0x001ed5, 0x001ed5}, + {0x001ed7, 0x001ed7}, + {0x001ed9, 0x001ed9}, + {0x001edb, 0x001edb}, + {0x001edd, 0x001edd}, + {0x001edf, 0x001edf}, + {0x001ee1, 0x001ee1}, + {0x001ee3, 0x001ee3}, + {0x001ee5, 0x001ee5}, + {0x001ee7, 0x001ee7}, + {0x001ee9, 0x001ee9}, + {0x001eeb, 0x001eeb}, + {0x001eed, 0x001eed}, + {0x001eef, 0x001eef}, + {0x001ef1, 0x001ef1}, + {0x001ef3, 0x001ef3}, + {0x001ef5, 0x001ef5}, + {0x001ef7, 0x001ef7}, + {0x001ef9, 0x001ef9}, + {0x001efb, 0x001efb}, + {0x001efd, 0x001efd}, + {0x001eff, 0x001f07}, + {0x001f10, 0x001f15}, + {0x001f20, 0x001f27}, + {0x001f30, 0x001f37}, + {0x001f40, 0x001f45}, + {0x001f50, 0x001f57}, + {0x001f60, 0x001f67}, + {0x001f70, 0x001f7d}, + {0x001f80, 0x001f87}, + {0x001f90, 0x001f97}, + {0x001fa0, 0x001fa7}, + {0x001fb0, 0x001fb4}, + {0x001fb6, 0x001fb7}, + {0x001fbe, 0x001fbe}, + {0x001fc2, 0x001fc4}, + {0x001fc6, 0x001fc7}, + {0x001fd0, 0x001fd3}, + {0x001fd6, 0x001fd7}, + {0x001fe0, 0x001fe7}, + {0x001ff2, 0x001ff4}, + {0x001ff6, 0x001ff7}, + {0x002071, 0x002071}, + {0x00207f, 0x00207f}, + {0x002090, 0x00209c}, + {0x00210a, 0x00210a}, + {0x00210e, 0x00210f}, + {0x002113, 0x002113}, + {0x00212f, 0x00212f}, + {0x002134, 0x002134}, + {0x002139, 0x002139}, + {0x00213c, 0x00213d}, + {0x002146, 0x002149}, + {0x00214e, 0x00214e}, + {0x002170, 0x00217f}, + {0x002184, 0x002184}, + {0x0024d0, 0x0024e9}, + {0x002c30, 0x002c5f}, + {0x002c61, 0x002c61}, + {0x002c65, 0x002c66}, + {0x002c68, 0x002c68}, + {0x002c6a, 0x002c6a}, + {0x002c6c, 0x002c6c}, + {0x002c71, 0x002c71}, + {0x002c73, 0x002c74}, + {0x002c76, 0x002c7b}, + {0x002c7c, 0x002c7d}, + {0x002c81, 0x002c81}, + {0x002c83, 0x002c83}, + {0x002c85, 0x002c85}, + {0x002c87, 0x002c87}, + {0x002c89, 0x002c89}, + {0x002c8b, 0x002c8b}, + {0x002c8d, 0x002c8d}, + {0x002c8f, 0x002c8f}, + {0x002c91, 0x002c91}, + {0x002c93, 0x002c93}, + {0x002c95, 0x002c95}, + {0x002c97, 0x002c97}, + {0x002c99, 0x002c99}, + {0x002c9b, 0x002c9b}, + {0x002c9d, 0x002c9d}, + {0x002c9f, 0x002c9f}, + {0x002ca1, 0x002ca1}, + {0x002ca3, 0x002ca3}, + {0x002ca5, 0x002ca5}, + {0x002ca7, 0x002ca7}, + {0x002ca9, 0x002ca9}, + {0x002cab, 0x002cab}, + {0x002cad, 0x002cad}, + {0x002caf, 0x002caf}, + {0x002cb1, 0x002cb1}, + {0x002cb3, 0x002cb3}, + {0x002cb5, 0x002cb5}, + {0x002cb7, 0x002cb7}, + {0x002cb9, 0x002cb9}, + {0x002cbb, 0x002cbb}, + {0x002cbd, 0x002cbd}, + {0x002cbf, 0x002cbf}, + {0x002cc1, 0x002cc1}, + {0x002cc3, 0x002cc3}, + {0x002cc5, 0x002cc5}, + {0x002cc7, 0x002cc7}, + {0x002cc9, 0x002cc9}, + {0x002ccb, 0x002ccb}, + {0x002ccd, 0x002ccd}, + {0x002ccf, 0x002ccf}, + {0x002cd1, 0x002cd1}, + {0x002cd3, 0x002cd3}, + {0x002cd5, 0x002cd5}, + {0x002cd7, 0x002cd7}, + {0x002cd9, 0x002cd9}, + {0x002cdb, 0x002cdb}, + {0x002cdd, 0x002cdd}, + {0x002cdf, 0x002cdf}, + {0x002ce1, 0x002ce1}, + {0x002ce3, 0x002ce4}, + {0x002cec, 0x002cec}, + {0x002cee, 0x002cee}, + {0x002cf3, 0x002cf3}, + {0x002d00, 0x002d25}, + {0x002d27, 0x002d27}, + {0x002d2d, 0x002d2d}, + {0x00a641, 0x00a641}, + {0x00a643, 0x00a643}, + {0x00a645, 0x00a645}, + {0x00a647, 0x00a647}, + {0x00a649, 0x00a649}, + {0x00a64b, 0x00a64b}, + {0x00a64d, 0x00a64d}, + {0x00a64f, 0x00a64f}, + {0x00a651, 0x00a651}, + {0x00a653, 0x00a653}, + {0x00a655, 0x00a655}, + {0x00a657, 0x00a657}, + {0x00a659, 0x00a659}, + {0x00a65b, 0x00a65b}, + {0x00a65d, 0x00a65d}, + {0x00a65f, 0x00a65f}, + {0x00a661, 0x00a661}, + {0x00a663, 0x00a663}, + {0x00a665, 0x00a665}, + {0x00a667, 0x00a667}, + {0x00a669, 0x00a669}, + {0x00a66b, 0x00a66b}, + {0x00a66d, 0x00a66d}, + {0x00a681, 0x00a681}, + {0x00a683, 0x00a683}, + {0x00a685, 0x00a685}, + {0x00a687, 0x00a687}, + {0x00a689, 0x00a689}, + {0x00a68b, 0x00a68b}, + {0x00a68d, 0x00a68d}, + {0x00a68f, 0x00a68f}, + {0x00a691, 0x00a691}, + {0x00a693, 0x00a693}, + {0x00a695, 0x00a695}, + {0x00a697, 0x00a697}, + {0x00a699, 0x00a699}, + {0x00a69b, 0x00a69b}, + {0x00a69c, 0x00a69d}, + {0x00a723, 0x00a723}, + {0x00a725, 0x00a725}, + {0x00a727, 0x00a727}, + {0x00a729, 0x00a729}, + {0x00a72b, 0x00a72b}, + {0x00a72d, 0x00a72d}, + {0x00a72f, 0x00a731}, + {0x00a733, 0x00a733}, + {0x00a735, 0x00a735}, + {0x00a737, 0x00a737}, + {0x00a739, 0x00a739}, + {0x00a73b, 0x00a73b}, + {0x00a73d, 0x00a73d}, + {0x00a73f, 0x00a73f}, + {0x00a741, 0x00a741}, + {0x00a743, 0x00a743}, + {0x00a745, 0x00a745}, + {0x00a747, 0x00a747}, + {0x00a749, 0x00a749}, + {0x00a74b, 0x00a74b}, + {0x00a74d, 0x00a74d}, + {0x00a74f, 0x00a74f}, + {0x00a751, 0x00a751}, + {0x00a753, 0x00a753}, + {0x00a755, 0x00a755}, + {0x00a757, 0x00a757}, + {0x00a759, 0x00a759}, + {0x00a75b, 0x00a75b}, + {0x00a75d, 0x00a75d}, + {0x00a75f, 0x00a75f}, + {0x00a761, 0x00a761}, + {0x00a763, 0x00a763}, + {0x00a765, 0x00a765}, + {0x00a767, 0x00a767}, + {0x00a769, 0x00a769}, + {0x00a76b, 0x00a76b}, + {0x00a76d, 0x00a76d}, + {0x00a76f, 0x00a76f}, + {0x00a770, 0x00a770}, + {0x00a771, 0x00a778}, + {0x00a77a, 0x00a77a}, + {0x00a77c, 0x00a77c}, + {0x00a77f, 0x00a77f}, + {0x00a781, 0x00a781}, + {0x00a783, 0x00a783}, + {0x00a785, 0x00a785}, + {0x00a787, 0x00a787}, + {0x00a78c, 0x00a78c}, + {0x00a78e, 0x00a78e}, + {0x00a791, 0x00a791}, + {0x00a793, 0x00a795}, + {0x00a797, 0x00a797}, + {0x00a799, 0x00a799}, + {0x00a79b, 0x00a79b}, + {0x00a79d, 0x00a79d}, + {0x00a79f, 0x00a79f}, + {0x00a7a1, 0x00a7a1}, + {0x00a7a3, 0x00a7a3}, + {0x00a7a5, 0x00a7a5}, + {0x00a7a7, 0x00a7a7}, + {0x00a7a9, 0x00a7a9}, + {0x00a7af, 0x00a7af}, + {0x00a7b5, 0x00a7b5}, + {0x00a7b7, 0x00a7b7}, + {0x00a7b9, 0x00a7b9}, + {0x00a7bb, 0x00a7bb}, + {0x00a7bd, 0x00a7bd}, + {0x00a7bf, 0x00a7bf}, + {0x00a7c1, 0x00a7c1}, + {0x00a7c3, 0x00a7c3}, + {0x00a7c8, 0x00a7c8}, + {0x00a7ca, 0x00a7ca}, + {0x00a7d1, 0x00a7d1}, + {0x00a7d3, 0x00a7d3}, + {0x00a7d5, 0x00a7d5}, + {0x00a7d7, 0x00a7d7}, + {0x00a7d9, 0x00a7d9}, + {0x00a7f2, 0x00a7f4}, + {0x00a7f6, 0x00a7f6}, + {0x00a7f8, 0x00a7f9}, + {0x00a7fa, 0x00a7fa}, + {0x00ab30, 0x00ab5a}, + {0x00ab5c, 0x00ab5f}, + {0x00ab60, 0x00ab68}, + {0x00ab69, 0x00ab69}, + {0x00ab70, 0x00abbf}, + {0x00fb00, 0x00fb06}, + {0x00fb13, 0x00fb17}, + {0x00ff41, 0x00ff5a}, + {0x010428, 0x01044f}, + {0x0104d8, 0x0104fb}, + {0x010597, 0x0105a1}, + {0x0105a3, 0x0105b1}, + {0x0105b3, 0x0105b9}, + {0x0105bb, 0x0105bc}, + {0x010780, 0x010780}, + {0x010783, 0x010785}, + {0x010787, 0x0107b0}, + {0x0107b2, 0x0107ba}, + {0x010cc0, 0x010cf2}, + {0x0118c0, 0x0118df}, + {0x016e60, 0x016e7f}, + {0x01d41a, 0x01d433}, + {0x01d44e, 0x01d454}, + {0x01d456, 0x01d467}, + {0x01d482, 0x01d49b}, + {0x01d4b6, 0x01d4b9}, + {0x01d4bb, 0x01d4bb}, + {0x01d4bd, 0x01d4c3}, + {0x01d4c5, 0x01d4cf}, + {0x01d4ea, 0x01d503}, + {0x01d51e, 0x01d537}, + {0x01d552, 0x01d56b}, + {0x01d586, 0x01d59f}, + {0x01d5ba, 0x01d5d3}, + {0x01d5ee, 0x01d607}, + {0x01d622, 0x01d63b}, + {0x01d656, 0x01d66f}, + {0x01d68a, 0x01d6a5}, + {0x01d6c2, 0x01d6da}, + {0x01d6dc, 0x01d6e1}, + {0x01d6fc, 0x01d714}, + {0x01d716, 0x01d71b}, + {0x01d736, 0x01d74e}, + {0x01d750, 0x01d755}, + {0x01d770, 0x01d788}, + {0x01d78a, 0x01d78f}, + {0x01d7aa, 0x01d7c2}, + {0x01d7c4, 0x01d7c9}, + {0x01d7cb, 0x01d7cb}, + {0x01df00, 0x01df09}, + {0x01df0b, 0x01df1e}, + {0x01df25, 0x01df2a}, + {0x01e030, 0x01e06d}, + {0x01e922, 0x01e943}, +}; + +/* table of Unicode codepoint ranges of Uppercase characters */ +static const pg_unicode_range unicode_uppercase[651] = +{ + {0x000041, 0x00005a}, + {0x0000c0, 0x0000d6}, + {0x0000d8, 0x0000de}, + {0x000100, 0x000100}, + {0x000102, 0x000102}, + {0x000104, 0x000104}, + {0x000106, 0x000106}, + {0x000108, 0x000108}, + {0x00010a, 0x00010a}, + {0x00010c, 0x00010c}, + {0x00010e, 0x00010e}, + {0x000110, 0x000110}, + {0x000112, 0x000112}, + {0x000114, 0x000114}, + {0x000116, 0x000116}, + {0x000118, 0x000118}, + {0x00011a, 0x00011a}, + {0x00011c, 0x00011c}, + {0x00011e, 0x00011e}, + {0x000120, 0x000120}, + {0x000122, 0x000122}, + {0x000124, 0x000124}, + {0x000126, 0x000126}, + {0x000128, 0x000128}, + {0x00012a, 0x00012a}, + {0x00012c, 0x00012c}, + {0x00012e, 0x00012e}, + {0x000130, 0x000130}, + {0x000132, 0x000132}, + {0x000134, 0x000134}, + {0x000136, 0x000136}, + {0x000139, 0x000139}, + {0x00013b, 0x00013b}, + {0x00013d, 0x00013d}, + {0x00013f, 0x00013f}, + {0x000141, 0x000141}, + {0x000143, 0x000143}, + {0x000145, 0x000145}, + {0x000147, 0x000147}, + {0x00014a, 0x00014a}, + {0x00014c, 0x00014c}, + {0x00014e, 0x00014e}, + {0x000150, 0x000150}, + {0x000152, 0x000152}, + {0x000154, 0x000154}, + {0x000156, 0x000156}, + {0x000158, 0x000158}, + {0x00015a, 0x00015a}, + {0x00015c, 0x00015c}, + {0x00015e, 0x00015e}, + {0x000160, 0x000160}, + {0x000162, 0x000162}, + {0x000164, 0x000164}, + {0x000166, 0x000166}, + {0x000168, 0x000168}, + {0x00016a, 0x00016a}, + {0x00016c, 0x00016c}, + {0x00016e, 0x00016e}, + {0x000170, 0x000170}, + {0x000172, 0x000172}, + {0x000174, 0x000174}, + {0x000176, 0x000176}, + {0x000178, 0x000179}, + {0x00017b, 0x00017b}, + {0x00017d, 0x00017d}, + {0x000181, 0x000182}, + {0x000184, 0x000184}, + {0x000186, 0x000187}, + {0x000189, 0x00018b}, + {0x00018e, 0x000191}, + {0x000193, 0x000194}, + {0x000196, 0x000198}, + {0x00019c, 0x00019d}, + {0x00019f, 0x0001a0}, + {0x0001a2, 0x0001a2}, + {0x0001a4, 0x0001a4}, + {0x0001a6, 0x0001a7}, + {0x0001a9, 0x0001a9}, + {0x0001ac, 0x0001ac}, + {0x0001ae, 0x0001af}, + {0x0001b1, 0x0001b3}, + {0x0001b5, 0x0001b5}, + {0x0001b7, 0x0001b8}, + {0x0001bc, 0x0001bc}, + {0x0001c4, 0x0001c4}, + {0x0001c7, 0x0001c7}, + {0x0001ca, 0x0001ca}, + {0x0001cd, 0x0001cd}, + {0x0001cf, 0x0001cf}, + {0x0001d1, 0x0001d1}, + {0x0001d3, 0x0001d3}, + {0x0001d5, 0x0001d5}, + {0x0001d7, 0x0001d7}, + {0x0001d9, 0x0001d9}, + {0x0001db, 0x0001db}, + {0x0001de, 0x0001de}, + {0x0001e0, 0x0001e0}, + {0x0001e2, 0x0001e2}, + {0x0001e4, 0x0001e4}, + {0x0001e6, 0x0001e6}, + {0x0001e8, 0x0001e8}, + {0x0001ea, 0x0001ea}, + {0x0001ec, 0x0001ec}, + {0x0001ee, 0x0001ee}, + {0x0001f1, 0x0001f1}, + {0x0001f4, 0x0001f4}, + {0x0001f6, 0x0001f8}, + {0x0001fa, 0x0001fa}, + {0x0001fc, 0x0001fc}, + {0x0001fe, 0x0001fe}, + {0x000200, 0x000200}, + {0x000202, 0x000202}, + {0x000204, 0x000204}, + {0x000206, 0x000206}, + {0x000208, 0x000208}, + {0x00020a, 0x00020a}, + {0x00020c, 0x00020c}, + {0x00020e, 0x00020e}, + {0x000210, 0x000210}, + {0x000212, 0x000212}, + {0x000214, 0x000214}, + {0x000216, 0x000216}, + {0x000218, 0x000218}, + {0x00021a, 0x00021a}, + {0x00021c, 0x00021c}, + {0x00021e, 0x00021e}, + {0x000220, 0x000220}, + {0x000222, 0x000222}, + {0x000224, 0x000224}, + {0x000226, 0x000226}, + {0x000228, 0x000228}, + {0x00022a, 0x00022a}, + {0x00022c, 0x00022c}, + {0x00022e, 0x00022e}, + {0x000230, 0x000230}, + {0x000232, 0x000232}, + {0x00023a, 0x00023b}, + {0x00023d, 0x00023e}, + {0x000241, 0x000241}, + {0x000243, 0x000246}, + {0x000248, 0x000248}, + {0x00024a, 0x00024a}, + {0x00024c, 0x00024c}, + {0x00024e, 0x00024e}, + {0x000370, 0x000370}, + {0x000372, 0x000372}, + {0x000376, 0x000376}, + {0x00037f, 0x00037f}, + {0x000386, 0x000386}, + {0x000388, 0x00038a}, + {0x00038c, 0x00038c}, + {0x00038e, 0x00038f}, + {0x000391, 0x0003a1}, + {0x0003a3, 0x0003ab}, + {0x0003cf, 0x0003cf}, + {0x0003d2, 0x0003d4}, + {0x0003d8, 0x0003d8}, + {0x0003da, 0x0003da}, + {0x0003dc, 0x0003dc}, + {0x0003de, 0x0003de}, + {0x0003e0, 0x0003e0}, + {0x0003e2, 0x0003e2}, + {0x0003e4, 0x0003e4}, + {0x0003e6, 0x0003e6}, + {0x0003e8, 0x0003e8}, + {0x0003ea, 0x0003ea}, + {0x0003ec, 0x0003ec}, + {0x0003ee, 0x0003ee}, + {0x0003f4, 0x0003f4}, + {0x0003f7, 0x0003f7}, + {0x0003f9, 0x0003fa}, + {0x0003fd, 0x00042f}, + {0x000460, 0x000460}, + {0x000462, 0x000462}, + {0x000464, 0x000464}, + {0x000466, 0x000466}, + {0x000468, 0x000468}, + {0x00046a, 0x00046a}, + {0x00046c, 0x00046c}, + {0x00046e, 0x00046e}, + {0x000470, 0x000470}, + {0x000472, 0x000472}, + {0x000474, 0x000474}, + {0x000476, 0x000476}, + {0x000478, 0x000478}, + {0x00047a, 0x00047a}, + {0x00047c, 0x00047c}, + {0x00047e, 0x00047e}, + {0x000480, 0x000480}, + {0x00048a, 0x00048a}, + {0x00048c, 0x00048c}, + {0x00048e, 0x00048e}, + {0x000490, 0x000490}, + {0x000492, 0x000492}, + {0x000494, 0x000494}, + {0x000496, 0x000496}, + {0x000498, 0x000498}, + {0x00049a, 0x00049a}, + {0x00049c, 0x00049c}, + {0x00049e, 0x00049e}, + {0x0004a0, 0x0004a0}, + {0x0004a2, 0x0004a2}, + {0x0004a4, 0x0004a4}, + {0x0004a6, 0x0004a6}, + {0x0004a8, 0x0004a8}, + {0x0004aa, 0x0004aa}, + {0x0004ac, 0x0004ac}, + {0x0004ae, 0x0004ae}, + {0x0004b0, 0x0004b0}, + {0x0004b2, 0x0004b2}, + {0x0004b4, 0x0004b4}, + {0x0004b6, 0x0004b6}, + {0x0004b8, 0x0004b8}, + {0x0004ba, 0x0004ba}, + {0x0004bc, 0x0004bc}, + {0x0004be, 0x0004be}, + {0x0004c0, 0x0004c1}, + {0x0004c3, 0x0004c3}, + {0x0004c5, 0x0004c5}, + {0x0004c7, 0x0004c7}, + {0x0004c9, 0x0004c9}, + {0x0004cb, 0x0004cb}, + {0x0004cd, 0x0004cd}, + {0x0004d0, 0x0004d0}, + {0x0004d2, 0x0004d2}, + {0x0004d4, 0x0004d4}, + {0x0004d6, 0x0004d6}, + {0x0004d8, 0x0004d8}, + {0x0004da, 0x0004da}, + {0x0004dc, 0x0004dc}, + {0x0004de, 0x0004de}, + {0x0004e0, 0x0004e0}, + {0x0004e2, 0x0004e2}, + {0x0004e4, 0x0004e4}, + {0x0004e6, 0x0004e6}, + {0x0004e8, 0x0004e8}, + {0x0004ea, 0x0004ea}, + {0x0004ec, 0x0004ec}, + {0x0004ee, 0x0004ee}, + {0x0004f0, 0x0004f0}, + {0x0004f2, 0x0004f2}, + {0x0004f4, 0x0004f4}, + {0x0004f6, 0x0004f6}, + {0x0004f8, 0x0004f8}, + {0x0004fa, 0x0004fa}, + {0x0004fc, 0x0004fc}, + {0x0004fe, 0x0004fe}, + {0x000500, 0x000500}, + {0x000502, 0x000502}, + {0x000504, 0x000504}, + {0x000506, 0x000506}, + {0x000508, 0x000508}, + {0x00050a, 0x00050a}, + {0x00050c, 0x00050c}, + {0x00050e, 0x00050e}, + {0x000510, 0x000510}, + {0x000512, 0x000512}, + {0x000514, 0x000514}, + {0x000516, 0x000516}, + {0x000518, 0x000518}, + {0x00051a, 0x00051a}, + {0x00051c, 0x00051c}, + {0x00051e, 0x00051e}, + {0x000520, 0x000520}, + {0x000522, 0x000522}, + {0x000524, 0x000524}, + {0x000526, 0x000526}, + {0x000528, 0x000528}, + {0x00052a, 0x00052a}, + {0x00052c, 0x00052c}, + {0x00052e, 0x00052e}, + {0x000531, 0x000556}, + {0x0010a0, 0x0010c5}, + {0x0010c7, 0x0010c7}, + {0x0010cd, 0x0010cd}, + {0x0013a0, 0x0013f5}, + {0x001c90, 0x001cba}, + {0x001cbd, 0x001cbf}, + {0x001e00, 0x001e00}, + {0x001e02, 0x001e02}, + {0x001e04, 0x001e04}, + {0x001e06, 0x001e06}, + {0x001e08, 0x001e08}, + {0x001e0a, 0x001e0a}, + {0x001e0c, 0x001e0c}, + {0x001e0e, 0x001e0e}, + {0x001e10, 0x001e10}, + {0x001e12, 0x001e12}, + {0x001e14, 0x001e14}, + {0x001e16, 0x001e16}, + {0x001e18, 0x001e18}, + {0x001e1a, 0x001e1a}, + {0x001e1c, 0x001e1c}, + {0x001e1e, 0x001e1e}, + {0x001e20, 0x001e20}, + {0x001e22, 0x001e22}, + {0x001e24, 0x001e24}, + {0x001e26, 0x001e26}, + {0x001e28, 0x001e28}, + {0x001e2a, 0x001e2a}, + {0x001e2c, 0x001e2c}, + {0x001e2e, 0x001e2e}, + {0x001e30, 0x001e30}, + {0x001e32, 0x001e32}, + {0x001e34, 0x001e34}, + {0x001e36, 0x001e36}, + {0x001e38, 0x001e38}, + {0x001e3a, 0x001e3a}, + {0x001e3c, 0x001e3c}, + {0x001e3e, 0x001e3e}, + {0x001e40, 0x001e40}, + {0x001e42, 0x001e42}, + {0x001e44, 0x001e44}, + {0x001e46, 0x001e46}, + {0x001e48, 0x001e48}, + {0x001e4a, 0x001e4a}, + {0x001e4c, 0x001e4c}, + {0x001e4e, 0x001e4e}, + {0x001e50, 0x001e50}, + {0x001e52, 0x001e52}, + {0x001e54, 0x001e54}, + {0x001e56, 0x001e56}, + {0x001e58, 0x001e58}, + {0x001e5a, 0x001e5a}, + {0x001e5c, 0x001e5c}, + {0x001e5e, 0x001e5e}, + {0x001e60, 0x001e60}, + {0x001e62, 0x001e62}, + {0x001e64, 0x001e64}, + {0x001e66, 0x001e66}, + {0x001e68, 0x001e68}, + {0x001e6a, 0x001e6a}, + {0x001e6c, 0x001e6c}, + {0x001e6e, 0x001e6e}, + {0x001e70, 0x001e70}, + {0x001e72, 0x001e72}, + {0x001e74, 0x001e74}, + {0x001e76, 0x001e76}, + {0x001e78, 0x001e78}, + {0x001e7a, 0x001e7a}, + {0x001e7c, 0x001e7c}, + {0x001e7e, 0x001e7e}, + {0x001e80, 0x001e80}, + {0x001e82, 0x001e82}, + {0x001e84, 0x001e84}, + {0x001e86, 0x001e86}, + {0x001e88, 0x001e88}, + {0x001e8a, 0x001e8a}, + {0x001e8c, 0x001e8c}, + {0x001e8e, 0x001e8e}, + {0x001e90, 0x001e90}, + {0x001e92, 0x001e92}, + {0x001e94, 0x001e94}, + {0x001e9e, 0x001e9e}, + {0x001ea0, 0x001ea0}, + {0x001ea2, 0x001ea2}, + {0x001ea4, 0x001ea4}, + {0x001ea6, 0x001ea6}, + {0x001ea8, 0x001ea8}, + {0x001eaa, 0x001eaa}, + {0x001eac, 0x001eac}, + {0x001eae, 0x001eae}, + {0x001eb0, 0x001eb0}, + {0x001eb2, 0x001eb2}, + {0x001eb4, 0x001eb4}, + {0x001eb6, 0x001eb6}, + {0x001eb8, 0x001eb8}, + {0x001eba, 0x001eba}, + {0x001ebc, 0x001ebc}, + {0x001ebe, 0x001ebe}, + {0x001ec0, 0x001ec0}, + {0x001ec2, 0x001ec2}, + {0x001ec4, 0x001ec4}, + {0x001ec6, 0x001ec6}, + {0x001ec8, 0x001ec8}, + {0x001eca, 0x001eca}, + {0x001ecc, 0x001ecc}, + {0x001ece, 0x001ece}, + {0x001ed0, 0x001ed0}, + {0x001ed2, 0x001ed2}, + {0x001ed4, 0x001ed4}, + {0x001ed6, 0x001ed6}, + {0x001ed8, 0x001ed8}, + {0x001eda, 0x001eda}, + {0x001edc, 0x001edc}, + {0x001ede, 0x001ede}, + {0x001ee0, 0x001ee0}, + {0x001ee2, 0x001ee2}, + {0x001ee4, 0x001ee4}, + {0x001ee6, 0x001ee6}, + {0x001ee8, 0x001ee8}, + {0x001eea, 0x001eea}, + {0x001eec, 0x001eec}, + {0x001eee, 0x001eee}, + {0x001ef0, 0x001ef0}, + {0x001ef2, 0x001ef2}, + {0x001ef4, 0x001ef4}, + {0x001ef6, 0x001ef6}, + {0x001ef8, 0x001ef8}, + {0x001efa, 0x001efa}, + {0x001efc, 0x001efc}, + {0x001efe, 0x001efe}, + {0x001f08, 0x001f0f}, + {0x001f18, 0x001f1d}, + {0x001f28, 0x001f2f}, + {0x001f38, 0x001f3f}, + {0x001f48, 0x001f4d}, + {0x001f59, 0x001f59}, + {0x001f5b, 0x001f5b}, + {0x001f5d, 0x001f5d}, + {0x001f5f, 0x001f5f}, + {0x001f68, 0x001f6f}, + {0x001fb8, 0x001fbb}, + {0x001fc8, 0x001fcb}, + {0x001fd8, 0x001fdb}, + {0x001fe8, 0x001fec}, + {0x001ff8, 0x001ffb}, + {0x002102, 0x002102}, + {0x002107, 0x002107}, + {0x00210b, 0x00210d}, + {0x002110, 0x002112}, + {0x002115, 0x002115}, + {0x002119, 0x00211d}, + {0x002124, 0x002124}, + {0x002126, 0x002126}, + {0x002128, 0x002128}, + {0x00212a, 0x00212d}, + {0x002130, 0x002133}, + {0x00213e, 0x00213f}, + {0x002145, 0x002145}, + {0x002160, 0x00216f}, + {0x002183, 0x002183}, + {0x0024b6, 0x0024cf}, + {0x002c00, 0x002c2f}, + {0x002c60, 0x002c60}, + {0x002c62, 0x002c64}, + {0x002c67, 0x002c67}, + {0x002c69, 0x002c69}, + {0x002c6b, 0x002c6b}, + {0x002c6d, 0x002c70}, + {0x002c72, 0x002c72}, + {0x002c75, 0x002c75}, + {0x002c7e, 0x002c80}, + {0x002c82, 0x002c82}, + {0x002c84, 0x002c84}, + {0x002c86, 0x002c86}, + {0x002c88, 0x002c88}, + {0x002c8a, 0x002c8a}, + {0x002c8c, 0x002c8c}, + {0x002c8e, 0x002c8e}, + {0x002c90, 0x002c90}, + {0x002c92, 0x002c92}, + {0x002c94, 0x002c94}, + {0x002c96, 0x002c96}, + {0x002c98, 0x002c98}, + {0x002c9a, 0x002c9a}, + {0x002c9c, 0x002c9c}, + {0x002c9e, 0x002c9e}, + {0x002ca0, 0x002ca0}, + {0x002ca2, 0x002ca2}, + {0x002ca4, 0x002ca4}, + {0x002ca6, 0x002ca6}, + {0x002ca8, 0x002ca8}, + {0x002caa, 0x002caa}, + {0x002cac, 0x002cac}, + {0x002cae, 0x002cae}, + {0x002cb0, 0x002cb0}, + {0x002cb2, 0x002cb2}, + {0x002cb4, 0x002cb4}, + {0x002cb6, 0x002cb6}, + {0x002cb8, 0x002cb8}, + {0x002cba, 0x002cba}, + {0x002cbc, 0x002cbc}, + {0x002cbe, 0x002cbe}, + {0x002cc0, 0x002cc0}, + {0x002cc2, 0x002cc2}, + {0x002cc4, 0x002cc4}, + {0x002cc6, 0x002cc6}, + {0x002cc8, 0x002cc8}, + {0x002cca, 0x002cca}, + {0x002ccc, 0x002ccc}, + {0x002cce, 0x002cce}, + {0x002cd0, 0x002cd0}, + {0x002cd2, 0x002cd2}, + {0x002cd4, 0x002cd4}, + {0x002cd6, 0x002cd6}, + {0x002cd8, 0x002cd8}, + {0x002cda, 0x002cda}, + {0x002cdc, 0x002cdc}, + {0x002cde, 0x002cde}, + {0x002ce0, 0x002ce0}, + {0x002ce2, 0x002ce2}, + {0x002ceb, 0x002ceb}, + {0x002ced, 0x002ced}, + {0x002cf2, 0x002cf2}, + {0x00a640, 0x00a640}, + {0x00a642, 0x00a642}, + {0x00a644, 0x00a644}, + {0x00a646, 0x00a646}, + {0x00a648, 0x00a648}, + {0x00a64a, 0x00a64a}, + {0x00a64c, 0x00a64c}, + {0x00a64e, 0x00a64e}, + {0x00a650, 0x00a650}, + {0x00a652, 0x00a652}, + {0x00a654, 0x00a654}, + {0x00a656, 0x00a656}, + {0x00a658, 0x00a658}, + {0x00a65a, 0x00a65a}, + {0x00a65c, 0x00a65c}, + {0x00a65e, 0x00a65e}, + {0x00a660, 0x00a660}, + {0x00a662, 0x00a662}, + {0x00a664, 0x00a664}, + {0x00a666, 0x00a666}, + {0x00a668, 0x00a668}, + {0x00a66a, 0x00a66a}, + {0x00a66c, 0x00a66c}, + {0x00a680, 0x00a680}, + {0x00a682, 0x00a682}, + {0x00a684, 0x00a684}, + {0x00a686, 0x00a686}, + {0x00a688, 0x00a688}, + {0x00a68a, 0x00a68a}, + {0x00a68c, 0x00a68c}, + {0x00a68e, 0x00a68e}, + {0x00a690, 0x00a690}, + {0x00a692, 0x00a692}, + {0x00a694, 0x00a694}, + {0x00a696, 0x00a696}, + {0x00a698, 0x00a698}, + {0x00a69a, 0x00a69a}, + {0x00a722, 0x00a722}, + {0x00a724, 0x00a724}, + {0x00a726, 0x00a726}, + {0x00a728, 0x00a728}, + {0x00a72a, 0x00a72a}, + {0x00a72c, 0x00a72c}, + {0x00a72e, 0x00a72e}, + {0x00a732, 0x00a732}, + {0x00a734, 0x00a734}, + {0x00a736, 0x00a736}, + {0x00a738, 0x00a738}, + {0x00a73a, 0x00a73a}, + {0x00a73c, 0x00a73c}, + {0x00a73e, 0x00a73e}, + {0x00a740, 0x00a740}, + {0x00a742, 0x00a742}, + {0x00a744, 0x00a744}, + {0x00a746, 0x00a746}, + {0x00a748, 0x00a748}, + {0x00a74a, 0x00a74a}, + {0x00a74c, 0x00a74c}, + {0x00a74e, 0x00a74e}, + {0x00a750, 0x00a750}, + {0x00a752, 0x00a752}, + {0x00a754, 0x00a754}, + {0x00a756, 0x00a756}, + {0x00a758, 0x00a758}, + {0x00a75a, 0x00a75a}, + {0x00a75c, 0x00a75c}, + {0x00a75e, 0x00a75e}, + {0x00a760, 0x00a760}, + {0x00a762, 0x00a762}, + {0x00a764, 0x00a764}, + {0x00a766, 0x00a766}, + {0x00a768, 0x00a768}, + {0x00a76a, 0x00a76a}, + {0x00a76c, 0x00a76c}, + {0x00a76e, 0x00a76e}, + {0x00a779, 0x00a779}, + {0x00a77b, 0x00a77b}, + {0x00a77d, 0x00a77e}, + {0x00a780, 0x00a780}, + {0x00a782, 0x00a782}, + {0x00a784, 0x00a784}, + {0x00a786, 0x00a786}, + {0x00a78b, 0x00a78b}, + {0x00a78d, 0x00a78d}, + {0x00a790, 0x00a790}, + {0x00a792, 0x00a792}, + {0x00a796, 0x00a796}, + {0x00a798, 0x00a798}, + {0x00a79a, 0x00a79a}, + {0x00a79c, 0x00a79c}, + {0x00a79e, 0x00a79e}, + {0x00a7a0, 0x00a7a0}, + {0x00a7a2, 0x00a7a2}, + {0x00a7a4, 0x00a7a4}, + {0x00a7a6, 0x00a7a6}, + {0x00a7a8, 0x00a7a8}, + {0x00a7aa, 0x00a7ae}, + {0x00a7b0, 0x00a7b4}, + {0x00a7b6, 0x00a7b6}, + {0x00a7b8, 0x00a7b8}, + {0x00a7ba, 0x00a7ba}, + {0x00a7bc, 0x00a7bc}, + {0x00a7be, 0x00a7be}, + {0x00a7c0, 0x00a7c0}, + {0x00a7c2, 0x00a7c2}, + {0x00a7c4, 0x00a7c7}, + {0x00a7c9, 0x00a7c9}, + {0x00a7d0, 0x00a7d0}, + {0x00a7d6, 0x00a7d6}, + {0x00a7d8, 0x00a7d8}, + {0x00a7f5, 0x00a7f5}, + {0x00ff21, 0x00ff3a}, + {0x010400, 0x010427}, + {0x0104b0, 0x0104d3}, + {0x010570, 0x01057a}, + {0x01057c, 0x01058a}, + {0x01058c, 0x010592}, + {0x010594, 0x010595}, + {0x010c80, 0x010cb2}, + {0x0118a0, 0x0118bf}, + {0x016e40, 0x016e5f}, + {0x01d400, 0x01d419}, + {0x01d434, 0x01d44d}, + {0x01d468, 0x01d481}, + {0x01d49c, 0x01d49c}, + {0x01d49e, 0x01d49f}, + {0x01d4a2, 0x01d4a2}, + {0x01d4a5, 0x01d4a6}, + {0x01d4a9, 0x01d4ac}, + {0x01d4ae, 0x01d4b5}, + {0x01d4d0, 0x01d4e9}, + {0x01d504, 0x01d505}, + {0x01d507, 0x01d50a}, + {0x01d50d, 0x01d514}, + {0x01d516, 0x01d51c}, + {0x01d538, 0x01d539}, + {0x01d53b, 0x01d53e}, + {0x01d540, 0x01d544}, + {0x01d546, 0x01d546}, + {0x01d54a, 0x01d550}, + {0x01d56c, 0x01d585}, + {0x01d5a0, 0x01d5b9}, + {0x01d5d4, 0x01d5ed}, + {0x01d608, 0x01d621}, + {0x01d63c, 0x01d655}, + {0x01d670, 0x01d689}, + {0x01d6a8, 0x01d6c0}, + {0x01d6e2, 0x01d6fa}, + {0x01d71c, 0x01d734}, + {0x01d756, 0x01d76e}, + {0x01d790, 0x01d7a8}, + {0x01d7ca, 0x01d7ca}, + {0x01e900, 0x01e921}, + {0x01f130, 0x01f149}, + {0x01f150, 0x01f169}, + {0x01f170, 0x01f189}, +}; + +/* table of Unicode codepoint ranges of Case_Ignorable characters */ +static const pg_unicode_range unicode_case_ignorable[491] = +{ + {0x000027, 0x000027}, + {0x00002e, 0x00002e}, + {0x00003a, 0x00003a}, + {0x00005e, 0x00005e}, + {0x000060, 0x000060}, + {0x0000a8, 0x0000a8}, + {0x0000ad, 0x0000ad}, + {0x0000af, 0x0000af}, + {0x0000b4, 0x0000b4}, + {0x0000b7, 0x0000b7}, + {0x0000b8, 0x0000b8}, + {0x0002b0, 0x0002c1}, + {0x0002c2, 0x0002c5}, + {0x0002c6, 0x0002d1}, + {0x0002d2, 0x0002df}, + {0x0002e0, 0x0002e4}, + {0x0002e5, 0x0002eb}, + {0x0002ec, 0x0002ec}, + {0x0002ed, 0x0002ed}, + {0x0002ee, 0x0002ee}, + {0x0002ef, 0x0002ff}, + {0x000300, 0x00036f}, + {0x000374, 0x000374}, + {0x000375, 0x000375}, + {0x00037a, 0x00037a}, + {0x000384, 0x000385}, + {0x000387, 0x000387}, + {0x000483, 0x000487}, + {0x000488, 0x000489}, + {0x000559, 0x000559}, + {0x00055f, 0x00055f}, + {0x000591, 0x0005bd}, + {0x0005bf, 0x0005bf}, + {0x0005c1, 0x0005c2}, + {0x0005c4, 0x0005c5}, + {0x0005c7, 0x0005c7}, + {0x0005f4, 0x0005f4}, + {0x000600, 0x000605}, + {0x000610, 0x00061a}, + {0x00061c, 0x00061c}, + {0x000640, 0x000640}, + {0x00064b, 0x00065f}, + {0x000670, 0x000670}, + {0x0006d6, 0x0006dc}, + {0x0006dd, 0x0006dd}, + {0x0006df, 0x0006e4}, + {0x0006e5, 0x0006e6}, + {0x0006e7, 0x0006e8}, + {0x0006ea, 0x0006ed}, + {0x00070f, 0x00070f}, + {0x000711, 0x000711}, + {0x000730, 0x00074a}, + {0x0007a6, 0x0007b0}, + {0x0007eb, 0x0007f3}, + {0x0007f4, 0x0007f5}, + {0x0007fa, 0x0007fa}, + {0x0007fd, 0x0007fd}, + {0x000816, 0x000819}, + {0x00081a, 0x00081a}, + {0x00081b, 0x000823}, + {0x000824, 0x000824}, + {0x000825, 0x000827}, + {0x000828, 0x000828}, + {0x000829, 0x00082d}, + {0x000859, 0x00085b}, + {0x000888, 0x000888}, + {0x000890, 0x000891}, + {0x000898, 0x00089f}, + {0x0008c9, 0x0008c9}, + {0x0008ca, 0x0008e1}, + {0x0008e2, 0x0008e2}, + {0x0008e3, 0x000902}, + {0x00093a, 0x00093a}, + {0x00093c, 0x00093c}, + {0x000941, 0x000948}, + {0x00094d, 0x00094d}, + {0x000951, 0x000957}, + {0x000962, 0x000963}, + {0x000971, 0x000971}, + {0x000981, 0x000981}, + {0x0009bc, 0x0009bc}, + {0x0009c1, 0x0009c4}, + {0x0009cd, 0x0009cd}, + {0x0009e2, 0x0009e3}, + {0x0009fe, 0x0009fe}, + {0x000a01, 0x000a02}, + {0x000a3c, 0x000a3c}, + {0x000a41, 0x000a42}, + {0x000a47, 0x000a48}, + {0x000a4b, 0x000a4d}, + {0x000a51, 0x000a51}, + {0x000a70, 0x000a71}, + {0x000a75, 0x000a75}, + {0x000a81, 0x000a82}, + {0x000abc, 0x000abc}, + {0x000ac1, 0x000ac5}, + {0x000ac7, 0x000ac8}, + {0x000acd, 0x000acd}, + {0x000ae2, 0x000ae3}, + {0x000afa, 0x000aff}, + {0x000b01, 0x000b01}, + {0x000b3c, 0x000b3c}, + {0x000b3f, 0x000b3f}, + {0x000b41, 0x000b44}, + {0x000b4d, 0x000b4d}, + {0x000b55, 0x000b56}, + {0x000b62, 0x000b63}, + {0x000b82, 0x000b82}, + {0x000bc0, 0x000bc0}, + {0x000bcd, 0x000bcd}, + {0x000c00, 0x000c00}, + {0x000c04, 0x000c04}, + {0x000c3c, 0x000c3c}, + {0x000c3e, 0x000c40}, + {0x000c46, 0x000c48}, + {0x000c4a, 0x000c4d}, + {0x000c55, 0x000c56}, + {0x000c62, 0x000c63}, + {0x000c81, 0x000c81}, + {0x000cbc, 0x000cbc}, + {0x000cbf, 0x000cbf}, + {0x000cc6, 0x000cc6}, + {0x000ccc, 0x000ccd}, + {0x000ce2, 0x000ce3}, + {0x000d00, 0x000d01}, + {0x000d3b, 0x000d3c}, + {0x000d41, 0x000d44}, + {0x000d4d, 0x000d4d}, + {0x000d62, 0x000d63}, + {0x000d81, 0x000d81}, + {0x000dca, 0x000dca}, + {0x000dd2, 0x000dd4}, + {0x000dd6, 0x000dd6}, + {0x000e31, 0x000e31}, + {0x000e34, 0x000e3a}, + {0x000e46, 0x000e46}, + {0x000e47, 0x000e4e}, + {0x000eb1, 0x000eb1}, + {0x000eb4, 0x000ebc}, + {0x000ec6, 0x000ec6}, + {0x000ec8, 0x000ece}, + {0x000f18, 0x000f19}, + {0x000f35, 0x000f35}, + {0x000f37, 0x000f37}, + {0x000f39, 0x000f39}, + {0x000f71, 0x000f7e}, + {0x000f80, 0x000f84}, + {0x000f86, 0x000f87}, + {0x000f8d, 0x000f97}, + {0x000f99, 0x000fbc}, + {0x000fc6, 0x000fc6}, + {0x00102d, 0x001030}, + {0x001032, 0x001037}, + {0x001039, 0x00103a}, + {0x00103d, 0x00103e}, + {0x001058, 0x001059}, + {0x00105e, 0x001060}, + {0x001071, 0x001074}, + {0x001082, 0x001082}, + {0x001085, 0x001086}, + {0x00108d, 0x00108d}, + {0x00109d, 0x00109d}, + {0x0010fc, 0x0010fc}, + {0x00135d, 0x00135f}, + {0x001712, 0x001714}, + {0x001732, 0x001733}, + {0x001752, 0x001753}, + {0x001772, 0x001773}, + {0x0017b4, 0x0017b5}, + {0x0017b7, 0x0017bd}, + {0x0017c6, 0x0017c6}, + {0x0017c9, 0x0017d3}, + {0x0017d7, 0x0017d7}, + {0x0017dd, 0x0017dd}, + {0x00180b, 0x00180d}, + {0x00180e, 0x00180e}, + {0x00180f, 0x00180f}, + {0x001843, 0x001843}, + {0x001885, 0x001886}, + {0x0018a9, 0x0018a9}, + {0x001920, 0x001922}, + {0x001927, 0x001928}, + {0x001932, 0x001932}, + {0x001939, 0x00193b}, + {0x001a17, 0x001a18}, + {0x001a1b, 0x001a1b}, + {0x001a56, 0x001a56}, + {0x001a58, 0x001a5e}, + {0x001a60, 0x001a60}, + {0x001a62, 0x001a62}, + {0x001a65, 0x001a6c}, + {0x001a73, 0x001a7c}, + {0x001a7f, 0x001a7f}, + {0x001aa7, 0x001aa7}, + {0x001ab0, 0x001abd}, + {0x001abe, 0x001abe}, + {0x001abf, 0x001ace}, + {0x001b00, 0x001b03}, + {0x001b34, 0x001b34}, + {0x001b36, 0x001b3a}, + {0x001b3c, 0x001b3c}, + {0x001b42, 0x001b42}, + {0x001b6b, 0x001b73}, + {0x001b80, 0x001b81}, + {0x001ba2, 0x001ba5}, + {0x001ba8, 0x001ba9}, + {0x001bab, 0x001bad}, + {0x001be6, 0x001be6}, + {0x001be8, 0x001be9}, + {0x001bed, 0x001bed}, + {0x001bef, 0x001bf1}, + {0x001c2c, 0x001c33}, + {0x001c36, 0x001c37}, + {0x001c78, 0x001c7d}, + {0x001cd0, 0x001cd2}, + {0x001cd4, 0x001ce0}, + {0x001ce2, 0x001ce8}, + {0x001ced, 0x001ced}, + {0x001cf4, 0x001cf4}, + {0x001cf8, 0x001cf9}, + {0x001d2c, 0x001d6a}, + {0x001d78, 0x001d78}, + {0x001d9b, 0x001dbf}, + {0x001dc0, 0x001dff}, + {0x001fbd, 0x001fbd}, + {0x001fbf, 0x001fc1}, + {0x001fcd, 0x001fcf}, + {0x001fdd, 0x001fdf}, + {0x001fed, 0x001fef}, + {0x001ffd, 0x001ffe}, + {0x00200b, 0x00200f}, + {0x002018, 0x002018}, + {0x002019, 0x002019}, + {0x002024, 0x002024}, + {0x002027, 0x002027}, + {0x00202a, 0x00202e}, + {0x002060, 0x002064}, + {0x002066, 0x00206f}, + {0x002071, 0x002071}, + {0x00207f, 0x00207f}, + {0x002090, 0x00209c}, + {0x0020d0, 0x0020dc}, + {0x0020dd, 0x0020e0}, + {0x0020e1, 0x0020e1}, + {0x0020e2, 0x0020e4}, + {0x0020e5, 0x0020f0}, + {0x002c7c, 0x002c7d}, + {0x002cef, 0x002cf1}, + {0x002d6f, 0x002d6f}, + {0x002d7f, 0x002d7f}, + {0x002de0, 0x002dff}, + {0x002e2f, 0x002e2f}, + {0x003005, 0x003005}, + {0x00302a, 0x00302d}, + {0x003031, 0x003035}, + {0x00303b, 0x00303b}, + {0x003099, 0x00309a}, + {0x00309b, 0x00309c}, + {0x00309d, 0x00309e}, + {0x0030fc, 0x0030fe}, + {0x00a015, 0x00a015}, + {0x00a4f8, 0x00a4fd}, + {0x00a60c, 0x00a60c}, + {0x00a66f, 0x00a66f}, + {0x00a670, 0x00a672}, + {0x00a674, 0x00a67d}, + {0x00a67f, 0x00a67f}, + {0x00a69c, 0x00a69d}, + {0x00a69e, 0x00a69f}, + {0x00a6f0, 0x00a6f1}, + {0x00a700, 0x00a716}, + {0x00a717, 0x00a71f}, + {0x00a720, 0x00a721}, + {0x00a770, 0x00a770}, + {0x00a788, 0x00a788}, + {0x00a789, 0x00a78a}, + {0x00a7f2, 0x00a7f4}, + {0x00a7f8, 0x00a7f9}, + {0x00a802, 0x00a802}, + {0x00a806, 0x00a806}, + {0x00a80b, 0x00a80b}, + {0x00a825, 0x00a826}, + {0x00a82c, 0x00a82c}, + {0x00a8c4, 0x00a8c5}, + {0x00a8e0, 0x00a8f1}, + {0x00a8ff, 0x00a8ff}, + {0x00a926, 0x00a92d}, + {0x00a947, 0x00a951}, + {0x00a980, 0x00a982}, + {0x00a9b3, 0x00a9b3}, + {0x00a9b6, 0x00a9b9}, + {0x00a9bc, 0x00a9bd}, + {0x00a9cf, 0x00a9cf}, + {0x00a9e5, 0x00a9e5}, + {0x00a9e6, 0x00a9e6}, + {0x00aa29, 0x00aa2e}, + {0x00aa31, 0x00aa32}, + {0x00aa35, 0x00aa36}, + {0x00aa43, 0x00aa43}, + {0x00aa4c, 0x00aa4c}, + {0x00aa70, 0x00aa70}, + {0x00aa7c, 0x00aa7c}, + {0x00aab0, 0x00aab0}, + {0x00aab2, 0x00aab4}, + {0x00aab7, 0x00aab8}, + {0x00aabe, 0x00aabf}, + {0x00aac1, 0x00aac1}, + {0x00aadd, 0x00aadd}, + {0x00aaec, 0x00aaed}, + {0x00aaf3, 0x00aaf4}, + {0x00aaf6, 0x00aaf6}, + {0x00ab5b, 0x00ab5b}, + {0x00ab5c, 0x00ab5f}, + {0x00ab69, 0x00ab69}, + {0x00ab6a, 0x00ab6b}, + {0x00abe5, 0x00abe5}, + {0x00abe8, 0x00abe8}, + {0x00abed, 0x00abed}, + {0x00fb1e, 0x00fb1e}, + {0x00fbb2, 0x00fbc2}, + {0x00fe00, 0x00fe0f}, + {0x00fe13, 0x00fe13}, + {0x00fe20, 0x00fe2f}, + {0x00fe52, 0x00fe52}, + {0x00fe55, 0x00fe55}, + {0x00feff, 0x00feff}, + {0x00ff07, 0x00ff07}, + {0x00ff0e, 0x00ff0e}, + {0x00ff1a, 0x00ff1a}, + {0x00ff3e, 0x00ff3e}, + {0x00ff40, 0x00ff40}, + {0x00ff70, 0x00ff70}, + {0x00ff9e, 0x00ff9f}, + {0x00ffe3, 0x00ffe3}, + {0x00fff9, 0x00fffb}, + {0x0101fd, 0x0101fd}, + {0x0102e0, 0x0102e0}, + {0x010376, 0x01037a}, + {0x010780, 0x010785}, + {0x010787, 0x0107b0}, + {0x0107b2, 0x0107ba}, + {0x010a01, 0x010a03}, + {0x010a05, 0x010a06}, + {0x010a0c, 0x010a0f}, + {0x010a38, 0x010a3a}, + {0x010a3f, 0x010a3f}, + {0x010ae5, 0x010ae6}, + {0x010d24, 0x010d27}, + {0x010eab, 0x010eac}, + {0x010efd, 0x010eff}, + {0x010f46, 0x010f50}, + {0x010f82, 0x010f85}, + {0x011001, 0x011001}, + {0x011038, 0x011046}, + {0x011070, 0x011070}, + {0x011073, 0x011074}, + {0x01107f, 0x011081}, + {0x0110b3, 0x0110b6}, + {0x0110b9, 0x0110ba}, + {0x0110bd, 0x0110bd}, + {0x0110c2, 0x0110c2}, + {0x0110cd, 0x0110cd}, + {0x011100, 0x011102}, + {0x011127, 0x01112b}, + {0x01112d, 0x011134}, + {0x011173, 0x011173}, + {0x011180, 0x011181}, + {0x0111b6, 0x0111be}, + {0x0111c9, 0x0111cc}, + {0x0111cf, 0x0111cf}, + {0x01122f, 0x011231}, + {0x011234, 0x011234}, + {0x011236, 0x011237}, + {0x01123e, 0x01123e}, + {0x011241, 0x011241}, + {0x0112df, 0x0112df}, + {0x0112e3, 0x0112ea}, + {0x011300, 0x011301}, + {0x01133b, 0x01133c}, + {0x011340, 0x011340}, + {0x011366, 0x01136c}, + {0x011370, 0x011374}, + {0x011438, 0x01143f}, + {0x011442, 0x011444}, + {0x011446, 0x011446}, + {0x01145e, 0x01145e}, + {0x0114b3, 0x0114b8}, + {0x0114ba, 0x0114ba}, + {0x0114bf, 0x0114c0}, + {0x0114c2, 0x0114c3}, + {0x0115b2, 0x0115b5}, + {0x0115bc, 0x0115bd}, + {0x0115bf, 0x0115c0}, + {0x0115dc, 0x0115dd}, + {0x011633, 0x01163a}, + {0x01163d, 0x01163d}, + {0x01163f, 0x011640}, + {0x0116ab, 0x0116ab}, + {0x0116ad, 0x0116ad}, + {0x0116b0, 0x0116b5}, + {0x0116b7, 0x0116b7}, + {0x01171d, 0x01171f}, + {0x011722, 0x011725}, + {0x011727, 0x01172b}, + {0x01182f, 0x011837}, + {0x011839, 0x01183a}, + {0x01193b, 0x01193c}, + {0x01193e, 0x01193e}, + {0x011943, 0x011943}, + {0x0119d4, 0x0119d7}, + {0x0119da, 0x0119db}, + {0x0119e0, 0x0119e0}, + {0x011a01, 0x011a0a}, + {0x011a33, 0x011a38}, + {0x011a3b, 0x011a3e}, + {0x011a47, 0x011a47}, + {0x011a51, 0x011a56}, + {0x011a59, 0x011a5b}, + {0x011a8a, 0x011a96}, + {0x011a98, 0x011a99}, + {0x011c30, 0x011c36}, + {0x011c38, 0x011c3d}, + {0x011c3f, 0x011c3f}, + {0x011c92, 0x011ca7}, + {0x011caa, 0x011cb0}, + {0x011cb2, 0x011cb3}, + {0x011cb5, 0x011cb6}, + {0x011d31, 0x011d36}, + {0x011d3a, 0x011d3a}, + {0x011d3c, 0x011d3d}, + {0x011d3f, 0x011d45}, + {0x011d47, 0x011d47}, + {0x011d90, 0x011d91}, + {0x011d95, 0x011d95}, + {0x011d97, 0x011d97}, + {0x011ef3, 0x011ef4}, + {0x011f00, 0x011f01}, + {0x011f36, 0x011f3a}, + {0x011f40, 0x011f40}, + {0x011f42, 0x011f42}, + {0x013430, 0x01343f}, + {0x013440, 0x013440}, + {0x013447, 0x013455}, + {0x016af0, 0x016af4}, + {0x016b30, 0x016b36}, + {0x016b40, 0x016b43}, + {0x016f4f, 0x016f4f}, + {0x016f8f, 0x016f92}, + {0x016f93, 0x016f9f}, + {0x016fe0, 0x016fe1}, + {0x016fe3, 0x016fe3}, + {0x016fe4, 0x016fe4}, + {0x01aff0, 0x01aff3}, + {0x01aff5, 0x01affb}, + {0x01affd, 0x01affe}, + {0x01bc9d, 0x01bc9e}, + {0x01bca0, 0x01bca3}, + {0x01cf00, 0x01cf2d}, + {0x01cf30, 0x01cf46}, + {0x01d167, 0x01d169}, + {0x01d173, 0x01d17a}, + {0x01d17b, 0x01d182}, + {0x01d185, 0x01d18b}, + {0x01d1aa, 0x01d1ad}, + {0x01d242, 0x01d244}, + {0x01da00, 0x01da36}, + {0x01da3b, 0x01da6c}, + {0x01da75, 0x01da75}, + {0x01da84, 0x01da84}, + {0x01da9b, 0x01da9f}, + {0x01daa1, 0x01daaf}, + {0x01e000, 0x01e006}, + {0x01e008, 0x01e018}, + {0x01e01b, 0x01e021}, + {0x01e023, 0x01e024}, + {0x01e026, 0x01e02a}, + {0x01e030, 0x01e06d}, + {0x01e08f, 0x01e08f}, + {0x01e130, 0x01e136}, + {0x01e137, 0x01e13d}, + {0x01e2ae, 0x01e2ae}, + {0x01e2ec, 0x01e2ef}, + {0x01e4eb, 0x01e4eb}, + {0x01e4ec, 0x01e4ef}, + {0x01e8d0, 0x01e8d6}, + {0x01e944, 0x01e94a}, + {0x01e94b, 0x01e94b}, + {0x01f3fb, 0x01f3ff}, + {0x0e0001, 0x0e0001}, + {0x0e0020, 0x0e007f}, + {0x0e0100, 0x0e01ef}, +}; + +/* table of Unicode codepoint ranges of White_Space characters */ +static const pg_unicode_range unicode_white_space[11] = +{ + {0x000009, 0x00000d}, + {0x000020, 0x000020}, + {0x000085, 0x000085}, + {0x0000a0, 0x0000a0}, + {0x001680, 0x001680}, + {0x002000, 0x00200a}, + {0x002028, 0x002028}, + {0x002029, 0x002029}, + {0x00202f, 0x00202f}, + {0x00205f, 0x00205f}, + {0x003000, 0x003000}, +}; + +/* table of Unicode codepoint ranges of Hex_Digit characters */ +static const pg_unicode_range unicode_hex_digit[6] = +{ + {0x000030, 0x000039}, + {0x000041, 0x000046}, + {0x000061, 0x000066}, + {0x00ff10, 0x00ff19}, + {0x00ff21, 0x00ff26}, + {0x00ff41, 0x00ff46}, +}; + +/* table of Unicode codepoint ranges of Join_Control characters */ +static const pg_unicode_range unicode_join_control[1] = +{ + {0x00200c, 0x00200d}, }; |