blob: 19da4b410f4228abe3768a3c9c6e87f8147389ca (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#! /usr/bin/perl
#
# Copyright (c) 2001-2025, PostgreSQL Global Development Group
#
# src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
#
# Generate UTF-8 <--> EUC_TW code conversion tables from
# map files provided by Unicode organization.
# Unfortunately it is prohibited by the organization
# to distribute the map files. So if you try to use this script,
# you have to obtain CNS11643.TXT from
# the organization's ftp site.
#
# CNS11643.TXT format:
# CNS11643 code in hex (3 bytes)
# (I guess the first byte means the plane No.)
# UCS-2 code in hex
# # and Unicode name (not used in this script)
use strict;
use warnings FATAL => 'all';
use convutils;
my $this_script = 'src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl';
my $mapping = &read_source("CNS11643.TXT");
my @extras;
foreach my $i (@$mapping)
{
my $ucs = $i->{ucs};
my $code = $i->{code};
my $origcode = $i->{code};
my $plane = ($code & 0x1f0000) >> 16;
if ($plane > 16)
{
printf STDERR "Warning: invalid plane No.$plane. ignored\n";
next;
}
if ($plane == 1)
{
$code = ($code & 0xffff) | 0x8080;
}
else
{
$code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
}
$i->{code} = $code;
# Some codes are mapped twice in the EUC_TW to UTF-8 table.
if ($origcode >= 0x12121 && $origcode <= 0x20000)
{
push @extras,
{
ucs => $i->{ucs},
code => ($i->{code} + 0x8ea10000),
rest => $i->{rest},
direction => TO_UNICODE,
f => $i->{f},
l => $i->{l}
};
}
}
push @$mapping, @extras;
print_conversion_tables($this_script, "EUC_TW", $mapping);
|