#!/usr/bin/perl -w use strict; # Check that the keyword lists in gram.y and kwlist.h are sane. Run from # the top directory, or pass a path to a top directory as argument. # # src/tools/check_keywords.pl my $path; if (@ARGV) { $path = $ARGV[0]; shift @ARGV; } else { $path = "."; } $[ = 1; # set array base to 1 $, = ' '; # set output field separator $\ = "\n"; # set output record separator my %keyword_categories; $keyword_categories{'unreserved_keyword'} = 'UNRESERVED_KEYWORD'; $keyword_categories{'col_name_keyword'} = 'COL_NAME_KEYWORD'; $keyword_categories{'type_func_name_keyword'} = 'TYPE_FUNC_NAME_KEYWORD'; $keyword_categories{'reserved_keyword'} = 'RESERVED_KEYWORD'; my $gram_filename = "$path/src/backend/parser/gram.y"; open(GRAM, $gram_filename) || die("Could not open : $gram_filename"); my ($S, $s, $k, $n, $kcat); my $comment; my @arr; my %keywords; line: while () { chomp; # strip record separator $S = $_; # Make sure any braces are split $s = '{', $S =~ s/$s/ { /g; $s = '}', $S =~ s/$s/ } /g; # Any comments are split $s = '[/][*]', $S =~ s#$s# /* #g; $s = '[*][/]', $S =~ s#$s# */ #g; if (!($kcat)) { # Is this the beginning of a keyword list? foreach $k (keys %keyword_categories) { if ($S =~ m/^($k):/) { $kcat = $k; next line; } } next line; } # Now split the line into individual fields $n = (@arr = split(' ', $S)); # Ok, we're in a keyword list. Go through each field in turn for (my $fieldIndexer = 1; $fieldIndexer <= $n; $fieldIndexer++) { if ($arr[$fieldIndexer] eq '*/' && $comment) { $comment = 0; next; } elsif ($comment) { next; } elsif ($arr[$fieldIndexer] eq '/*') { # start of a multiline comment $comment = 1; next; } elsif ($arr[$fieldIndexer] eq '//') { next line; } if ($arr[$fieldIndexer] eq ';') { # end of keyword list $kcat = ''; next; } if ($arr[$fieldIndexer] eq '|') { next; } # Put this keyword into the right list push @{$keywords{$kcat}}, $arr[$fieldIndexer]; } } close GRAM; # Check that all keywords are in alphabetical order my ($prevkword, $kword, $bare_kword); foreach $kcat (keys %keyword_categories) { $prevkword = ''; foreach $kword (@{$keywords{$kcat}}) { # Some keyword have a _P suffix. Remove it for the comparison. $bare_kword = $kword; $bare_kword =~ s/_P$//; if ($bare_kword le $prevkword) { print "'$bare_kword' after '$prevkword' in $kcat list is misplaced"; } $prevkword = $bare_kword; } } # Transform the keyword lists into hashes. # kwhashes is a hash of hashes, keyed by keyword category id, e.g. # UNRESERVED_KEYWORD. Each inner hash is a keyed by keyword id, e.g. ABORT_P # with a dummy value. my %kwhashes; while ( my ($kcat, $kcat_id) = each(%keyword_categories) ) { @arr = @{$keywords{$kcat}}; my $hash; foreach my $item (@arr) { $hash->{$item} = 1 } $kwhashes{$kcat_id} = $hash; } # Now read in kwlist.h my $kwlist_filename = "$path/src/include/parser/kwlist.h"; open(KWLIST, $kwlist_filename) || die("Could not open : $kwlist_filename"); my $prevkwstring = ''; my $bare_kwname; my %kwhash; kwlist_line: while () { my($line) = $_; if ($line =~ /^PG_KEYWORD\(\"(.*)\", (.*), (.*)\)/) { my($kwstring) = $1; my($kwname) = $2; my($kwcat_id) = $3; # Check that the list is in alphabetical order if ($kwstring le $prevkwstring) { print "'$kwstring' after '$prevkwstring' in kwlist.h is misplaced"; } $prevkwstring = $kwstring; # Check that the keyword string is valid: all lower-case ASCII chars if ($kwstring !~ /^[a-z_]*$/) { print "'$kwstring' is not a valid keyword string, must be all lower-case ASCII chars"; } # Check that the keyword name is valid: all upper-case ASCII chars if ($kwname !~ /^[A-Z_]*$/) { print "'$kwname' is not a valid keyword name, must be all upper-case ASCII chars"; } # Check that the keyword string matches keyword name $bare_kwname = $kwname; $bare_kwname =~ s/_P$//; if ($bare_kwname ne uc($kwstring)) { print "keyword name '$kwname' doesn't match keyword string '$kwstring'"; } # Check that the keyword is present in the grammar %kwhash = %{$kwhashes{$kwcat_id}}; if (!(%kwhash)) { #print "Unknown kwcat_id: $kwcat_id"; } else { if (!($kwhash{$kwname})) { print "'$kwname' not present in $kwcat_id section of gram.y"; } else { # Remove it from the hash, so that we can complain at the end # if there's keywords left that were not found in kwlist.h delete $kwhashes{$kwcat_id}->{$kwname}; } } } } close KWLIST; # Check that we've paired up all keywords from gram.y with lines in kwlist.h while ( my ($kwcat, $kwcat_id) = each(%keyword_categories) ) { %kwhash = %{$kwhashes{$kwcat_id}}; for my $kw ( keys %kwhash ) { print "'$kw' found in gram.y $kwcat category, but not in kwlist.h" } }