aboutsummaryrefslogtreecommitdiff
path: root/src/backend/snowball/snowball_create.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/snowball/snowball_create.pl')
-rw-r--r--src/backend/snowball/snowball_create.pl148
1 files changed, 148 insertions, 0 deletions
diff --git a/src/backend/snowball/snowball_create.pl b/src/backend/snowball/snowball_create.pl
new file mode 100644
index 00000000000..f4b58ada1cb
--- /dev/null
+++ b/src/backend/snowball/snowball_create.pl
@@ -0,0 +1,148 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+my $outdir_path = '';
+my $makefile_path = '';
+my $input_path = '';
+my $depfile;
+
+our @languages = qw(
+ arabic
+ armenian
+ basque
+ catalan
+ danish
+ dutch
+ english
+ finnish
+ french
+ german
+ greek
+ hindi
+ hungarian
+ indonesian
+ irish
+ italian
+ lithuanian
+ nepali
+ norwegian
+ portuguese
+ romanian
+ russian
+ serbian
+ spanish
+ swedish
+ tamil
+ turkish
+ yiddish
+);
+
+# Names of alternative dictionaries for all-ASCII words. If not
+# listed, the language itself is used. Note order dependency: Use of
+# some other language as ASCII dictionary must come after creation of
+# that language, so the "backup" language must be listed earlier in
+# @languages.
+
+our %ascii_languages = (
+ 'hindi' => 'english',
+ 'russian' => 'english',
+);
+
+GetOptions(
+ 'depfile' => \$depfile,
+ 'outdir:s' => \$outdir_path,
+ 'input:s' => \$input_path) || usage();
+
+# Make sure input_path ends in a slash if needed.
+if ($input_path ne '' && substr($input_path, -1) ne '/')
+{
+ $outdir_path .= '/';
+}
+
+# Make sure outdir_path ends in a slash if needed.
+if ($outdir_path ne '' && substr($outdir_path, -1) ne '/')
+{
+ $outdir_path .= '/';
+}
+
+GenerateTsearchFiles();
+
+sub usage
+{
+ die <<EOM;
+Usage: snowball_create.pl --input/-i <path> --outdir/-o <path>
+ --depfile Write dependency file
+ --outdir Output directory (default '.')
+ --input Input directory
+
+snowball_create.pl creates snowball.sql from snowball.sql.in
+EOM
+}
+
+sub GenerateTsearchFiles
+{
+ my $target = shift;
+ my $outdir_file = "$outdir_path/snowball_create.sql";
+
+ my $F;
+ my $D;
+ my $tmpl = read_file("$input_path/snowball.sql.in");
+
+ if ($depfile)
+ {
+ open($D, '>', "$outdir_path/snowball_create.dep")
+ || die "Could not write snowball_create.dep";
+ }
+
+ print $D "$outdir_file: $input_path/snowball.sql.in\n" if $depfile;
+ print $D "$outdir_file: $input_path/snowball_func.sql.in\n" if $depfile;
+
+ open($F, '>', $outdir_file)
+ || die "Could not write snowball_create.sql";
+
+ print $F "-- Language-specific snowball dictionaries\n";
+
+ print $F read_file("$input_path/snowball_func.sql.in");
+
+ foreach my $lang (@languages)
+ {
+ my $asclang = $ascii_languages{$lang} || $lang;
+ my $txt = $tmpl;
+ my $stop = '';
+ my $stopword_path = "$input_path/stopwords/$lang.stop";
+
+ if (-s "$stopword_path")
+ {
+ $stop = ", StopWords=$lang";
+
+ print $D "$outdir_file: $stopword_path\n" if $depfile;
+ }
+
+ $txt =~ s#_LANGNAME_#${lang}#gs;
+ $txt =~ s#_DICTNAME_#${lang}_stem#gs;
+ $txt =~ s#_CFGNAME_#${lang}#gs;
+ $txt =~ s#_ASCDICTNAME_#${asclang}_stem#gs;
+ $txt =~ s#_NONASCDICTNAME_#${lang}_stem#gs;
+ $txt =~ s#_STOPWORDS_#$stop#gs;
+ print $F $txt;
+ }
+ close($F);
+ close($D) if $depfile;
+ return;
+}
+
+
+sub read_file
+{
+ my $filename = shift;
+ my $F;
+ local $/ = undef;
+ open($F, '<', $filename) || die "Could not open file $filename\n";
+ my $txt = <$F>;
+ close($F);
+
+ return $txt;
+}