1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
#!/usr/bin/perl
# Copyright (c) 2024-2025, PostgreSQL Global Development Group
use strict;
use warnings FATAL => 'all';
use Getopt::Long;
my $outdir_path = '';
my $makefile_path = '';
my $input_path = '';
my $depfile;
our @languages = qw(
arabic
armenian
basque
catalan
danish
dutch
english
estonian
finnish
french
german
greek
hindi
hungarian
indonesian
irish
italian
lithuanian
nepali
norwegian
portuguese
romanian
russian
serbian
spanish
swedish
tamil
turkish
yiddish
);
# Names of alternative dictionaries for all-ASCII words. If not
# listed, the language itself is used. Note order dependency: Use of
# some other language as ASCII dictionary must come after creation of
# that language, so the "backup" language must be listed earlier in
# @languages.
our %ascii_languages = (
'hindi' => 'english',
'russian' => 'english',);
GetOptions(
'depfile' => \$depfile,
'outdir:s' => \$outdir_path,
'input:s' => \$input_path) || usage();
# Make sure input_path ends in a slash if needed.
if ($input_path ne '' && substr($input_path, -1) ne '/')
{
$outdir_path .= '/';
}
# Make sure outdir_path ends in a slash if needed.
if ($outdir_path ne '' && substr($outdir_path, -1) ne '/')
{
$outdir_path .= '/';
}
GenerateTsearchFiles();
sub usage
{
die <<EOM;
Usage: snowball_create.pl --input/-i <path> --outdir/-o <path>
--depfile Write dependency file
--outdir Output directory (default '.')
--input Input directory
snowball_create.pl creates snowball.sql from snowball.sql.in
EOM
}
sub GenerateTsearchFiles
{
my $target = shift;
my $outdir_file = "$outdir_path/snowball_create.sql";
my $F;
my $D;
my $tmpl = read_file("$input_path/snowball.sql.in");
if ($depfile)
{
open($D, '>', "$outdir_path/snowball_create.dep")
|| die "Could not write snowball_create.dep";
}
print $D "$outdir_file: $input_path/snowball.sql.in\n" if $depfile;
print $D "$outdir_file: $input_path/snowball_func.sql.in\n" if $depfile;
open($F, '>', $outdir_file)
|| die "Could not write snowball_create.sql";
print $F "-- Language-specific snowball dictionaries\n";
print $F read_file("$input_path/snowball_func.sql.in");
foreach my $lang (@languages)
{
my $asclang = $ascii_languages{$lang} || $lang;
my $txt = $tmpl;
my $stop = '';
my $stopword_path = "$input_path/stopwords/$lang.stop";
if (-s "$stopword_path")
{
$stop = ", StopWords=$lang";
print $D "$outdir_file: $stopword_path\n" if $depfile;
}
$txt =~ s#_LANGNAME_#${lang}#gs;
$txt =~ s#_DICTNAME_#${lang}_stem#gs;
$txt =~ s#_CFGNAME_#${lang}#gs;
$txt =~ s#_ASCDICTNAME_#${asclang}_stem#gs;
$txt =~ s#_NONASCDICTNAME_#${lang}_stem#gs;
$txt =~ s#_STOPWORDS_#$stop#gs;
print $F $txt;
}
close($F);
close($D) if $depfile;
return;
}
sub read_file
{
my $filename = shift;
my $F;
local $/ = undef;
open($F, '<', $filename) || die "Could not open file $filename\n";
my $txt = <$F>;
close($F);
return $txt;
}
|