aboutsummaryrefslogtreecommitdiff
path: root/src/backend/snowball/snowball_create.pl
blob: dffa8feb769002d3ac708704004870a9ee9fb34c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/perl

# Copyright (c) 2024-2025, PostgreSQL Global Development Group

use strict;
use warnings FATAL => 'all';
use Getopt::Long;

my $outdir_path = '';
my $makefile_path = '';
my $input_path = '';
my $depfile;

our @languages = qw(
  arabic
  armenian
  basque
  catalan
  danish
  dutch
  english
  estonian
  finnish
  french
  german
  greek
  hindi
  hungarian
  indonesian
  irish
  italian
  lithuanian
  nepali
  norwegian
  portuguese
  romanian
  russian
  serbian
  spanish
  swedish
  tamil
  turkish
  yiddish
);

# Names of alternative dictionaries for all-ASCII words.  If not
# listed, the language itself is used.  Note order dependency: Use of
# some other language as ASCII dictionary must come after creation of
# that language, so the "backup" language must be listed earlier in
# @languages.

our %ascii_languages = (
	'hindi' => 'english',
	'russian' => 'english',);

GetOptions(
	'depfile' => \$depfile,
	'outdir:s' => \$outdir_path,
	'input:s' => \$input_path) || usage();

# Make sure input_path ends in a slash if needed.
if ($input_path ne '' && substr($input_path, -1) ne '/')
{
	$outdir_path .= '/';
}

# Make sure outdir_path ends in a slash if needed.
if ($outdir_path ne '' && substr($outdir_path, -1) ne '/')
{
	$outdir_path .= '/';
}

GenerateTsearchFiles();

sub usage
{
	die <<EOM;
Usage: snowball_create.pl --input/-i <path> --outdir/-o <path>
    --depfile       Write dependency file
    --outdir        Output directory (default '.')
    --input         Input directory

snowball_create.pl creates snowball.sql from snowball.sql.in
EOM
}

sub GenerateTsearchFiles
{
	my $target = shift;
	my $outdir_file = "$outdir_path/snowball_create.sql";

	my $F;
	my $D;
	my $tmpl = read_file("$input_path/snowball.sql.in");

	if ($depfile)
	{
		open($D, '>', "$outdir_path/snowball_create.dep")
		  || die "Could not write snowball_create.dep";
	}

	print $D "$outdir_file: $input_path/snowball.sql.in\n" if $depfile;
	print $D "$outdir_file: $input_path/snowball_func.sql.in\n" if $depfile;

	open($F, '>', $outdir_file)
	  || die "Could not write snowball_create.sql";

	print $F "-- Language-specific snowball dictionaries\n";

	print $F read_file("$input_path/snowball_func.sql.in");

	foreach my $lang (@languages)
	{
		my $asclang = $ascii_languages{$lang} || $lang;
		my $txt = $tmpl;
		my $stop = '';
		my $stopword_path = "$input_path/stopwords/$lang.stop";

		if (-s "$stopword_path")
		{
			$stop = ", StopWords=$lang";

			print $D "$outdir_file: $stopword_path\n" if $depfile;
		}

		$txt =~ s#_LANGNAME_#${lang}#gs;
		$txt =~ s#_DICTNAME_#${lang}_stem#gs;
		$txt =~ s#_CFGNAME_#${lang}#gs;
		$txt =~ s#_ASCDICTNAME_#${asclang}_stem#gs;
		$txt =~ s#_NONASCDICTNAME_#${lang}_stem#gs;
		$txt =~ s#_STOPWORDS_#$stop#gs;
		print $F $txt;
	}
	close($F);
	close($D) if $depfile;
	return;
}


sub read_file
{
	my $filename = shift;
	my $F;
	local $/ = undef;
	open($F, '<', $filename) || die "Could not open file $filename\n";
	my $txt = <$F>;
	close($F);

	return $txt;
}