aboutsummaryrefslogtreecommitdiff
path: root/contrib/tsearch2/wordparser/parser.l
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tsearch2/wordparser/parser.l')
-rw-r--r--contrib/tsearch2/wordparser/parser.l346
1 files changed, 0 insertions, 346 deletions
diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l
deleted file mode 100644
index a7cb4684c32..00000000000
--- a/contrib/tsearch2/wordparser/parser.l
+++ /dev/null
@@ -1,346 +0,0 @@
-%{
-#include "postgres.h"
-
-#include "deflex.h"
-#include "parser.h"
-#include "common.h"
-
-/* Avoid exit() on fatal scanner errors */
-#undef fprintf
-#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg)
-
-char *token = NULL; /* pointer to token */
-int tokenlen;
-static char *s = NULL; /* to return WHOLE hyphenated-word */
-
-YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */
-
-typedef struct {
- int tlen;
- int clen;
- char *str;
-} TagStorage;
-
-static TagStorage ts={0,0,NULL};
-
-static void
-addTag(void)
-{
- while( ts.clen+tsearch2_yyleng+1 > ts.tlen ) {
- ts.tlen*=2;
- ts.str=realloc(ts.str,ts.tlen);
- if (!ts.str)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
- memcpy(ts.str+ts.clen,tsearch2_yytext,tsearch2_yyleng);
- ts.clen+=tsearch2_yyleng;
- ts.str[ts.clen]='\0';
-}
-
-static void
-startTag(void)
-{
- if ( ts.str==NULL ) {
- ts.tlen=tsearch2_yyleng+1;
- ts.str=malloc(ts.tlen);
- if (!ts.str)
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- }
- ts.clen=0;
- ts.str[0]='\0';
- addTag();
-}
-
-%}
-
-%option 8bit
-%option never-interactive
-%option nodefault
-%option nounput
-%option noyywrap
-
-/* parser's state for parsing hyphenated-word */
-%x DELIM
-/* parser's state for parsing URL*/
-%x URL
-%x SERVER
-
-/* parser's state for parsing TAGS */
-%x INTAG
-%x QINTAG
-%x INCOMMENT
-%x INSCRIPT
-
-/* cyrillic koi8 char */
-CYRALNUM [0-9\200-\377]
-CYRALPHA [\200-\377]
-ALPHA [a-zA-Z\200-\377]
-ALNUM [0-9a-zA-Z\200-\377]
-
-
-HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+
-URI [-_[:alnum:]/%,\.;=&?#]+
-
-%%
-
-"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; startTag(); }
-
-<INSCRIPT>"</"[Ss][Cc][Rr][Ii][Pp][Tt]">" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-"<!--" { BEGIN INCOMMENT; startTag(); }
-
-<INCOMMENT>"-->" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-
-"<"[\![:alpha:]] { BEGIN INTAG; startTag(); }
-
-"</"[[:alpha:]] { BEGIN INTAG; startTag(); }
-
-<INTAG>"\"" { BEGIN QINTAG; addTag(); }
-
-<QINTAG>"\\\"" { addTag(); }
-
-<QINTAG>"\"" { BEGIN INTAG; addTag(); }
-
-<INTAG>">" {
- BEGIN INITIAL;
- addTag();
- token = ts.str;
- tokenlen = ts.clen;
- return TAG;
-}
-
-<QINTAG,INTAG,INCOMMENT,INSCRIPT>.|\n { addTag(); }
-
-\&(quot|amp|nbsp|lt|gt)\; {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTMLENTITY;
-}
-
-\&\#[0-9][0-9]?[0-9]?\; {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTMLENTITY;
-}
-
-[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return EMAIL;
-}
-
-[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SCIENTIFIC;
-}
-
-[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return VERSIONNUMBER;
-}
-
-[+-]?[0-9]+\.[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return DECIMAL;
-}
-
-[+-][0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SIGNEDINT;
-}
-
-<DELIM,INITIAL>[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return UNSIGNEDINT;
-}
-
-http"://" {
- BEGIN URL;
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTTP;
-}
-
-ftp"://" {
- BEGIN URL;
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HTTP;
-}
-
-<URL,INITIAL>{HOSTNAME}[/:]{URI} {
- BEGIN SERVER;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return FURL;
-}
-
-<SERVER,URL,INITIAL>{HOSTNAME} {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return HOST;
-}
-
-<SERVER>[/:]{URI} {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return URI;
-}
-
-[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return FILEPATH;
-}
-
-({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return CYRHYPHENWORD;
-}
-
-([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return LATHYPHENWORD;
-}
-
-({ALNUM}+-)+{ALNUM}+ /* composite-word */ {
- BEGIN DELIM;
- if (s) { free(s); s=NULL; }
- s = strdup( tsearch2_yytext );
- tokenlen = tsearch2_yyleng;
- yyless( 0 );
- token = s;
- return HYPHENWORD;
-}
-
-<DELIM>[0-9]+\.[0-9]+\.[0-9\.]*[0-9] {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return VERSIONNUMBER;
-}
-
-<DELIM>\+?[0-9]+\.[0-9]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return DECIMAL;
-}
-
-<DELIM>{CYRALPHA}+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return CYRPARTHYPHENWORD;
-}
-
-<DELIM>[[:alpha:]]+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return LATPARTHYPHENWORD;
-}
-
-<DELIM>{ALNUM}+ /* one word in composite-word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return PARTHYPHENWORD;
-}
-
-<DELIM>- {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-<DELIM,SERVER,URL>.|\n /* return in basic state */ {
- BEGIN INITIAL;
- yyless( 0 );
-}
-
-{CYRALPHA}+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return CYRWORD;
-}
-
-[[:alpha:]]+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return LATWORD;
-}
-
-{ALNUM}+ /* normal word */ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return UWORD;
-}
-
-[ \r\n\t]+ {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-. {
- token = tsearch2_yytext;
- tokenlen = tsearch2_yyleng;
- return SPACE;
-}
-
-%%
-
-/* clearing after parsing from string */
-void
-tsearch2_end_parse(void)
-{
- if (s)
- {
- free(s);
- s = NULL;
- }
- tsearch2_yy_delete_buffer( buf );
- buf = NULL;
-}
-
-/* start parse from string */
-void
-tsearch2_start_parse_str(char* str, int limit)
-{
- if (buf)
- tsearch2_end_parse();
- buf = tsearch2_yy_scan_bytes( str, limit );
- tsearch2_yy_switch_to_buffer( buf );
- BEGIN INITIAL;
-}