aboutsummaryrefslogtreecommitdiff
path: root/src/common/unicode/generate-unicode_east_asian_fw_table.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/common/unicode/generate-unicode_east_asian_fw_table.pl')
-rw-r--r--src/common/unicode/generate-unicode_east_asian_fw_table.pl76
1 files changed, 76 insertions, 0 deletions
diff --git a/src/common/unicode/generate-unicode_east_asian_fw_table.pl b/src/common/unicode/generate-unicode_east_asian_fw_table.pl
new file mode 100644
index 00000000000..45f7a4b7fe7
--- /dev/null
+++ b/src/common/unicode/generate-unicode_east_asian_fw_table.pl
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+#
+# Generate a sorted list of non-overlapping intervals of East Asian Wide (W)
+# and East Asian Fullwidth (F) characters, using Unicode data files as input.
+# Pass EastAsianWidth.txt as argument. The output is on stdout.
+#
+# Copyright (c) 2019-2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+my $range_start = undef;
+my ($first, $last);
+my $prev_last;
+
+print
+ "/* generated by src/common/unicode/generate-unicode_east_asian_fw_table.pl, do not edit */\n\n";
+
+print "static const struct mbinterval east_asian_fw[] = {\n";
+
+foreach my $line (<ARGV>)
+{
+ chomp $line;
+ $line =~ s/\s*#.*$//;
+ next if $line eq '';
+ my ($codepoint, $width) = split ';', $line;
+
+ if ($codepoint =~ /\.\./)
+ {
+ ($first, $last) = split /\.\./, $codepoint;
+ }
+ else
+ {
+ $first = $last = $codepoint;
+ }
+
+ ($first, $last) = map(hex, ($first, $last));
+
+ if ($width eq 'F' || $width eq 'W')
+ {
+ # fullwidth/wide characters
+ if (!defined($range_start))
+ {
+ # save for start of range if one hasn't been started yet
+ $range_start = $first;
+ }
+ elsif ($first != $prev_last + 1)
+ {
+ # ranges aren't contiguous; emit the last and start a new one
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+ $range_start = $first;
+ }
+ }
+ else
+ {
+ # not wide characters, print out previous range if any
+ if (defined($range_start))
+ {
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+ $range_start = undef;
+ }
+ }
+}
+continue
+{
+ $prev_last = $last;
+}
+
+# don't forget any ranges at the very end of the database (though there are none
+# as of Unicode 13.0)
+if (defined($range_start))
+{
+ printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_last;
+}
+
+print "};\n";