diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2013-04-09 01:05:55 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2013-04-09 01:06:54 -0400 |
commit | 3ccae48f44d993351e1f881761bd6c556ebd6638 (patch) | |
tree | ec365a856f56aae32745644164d0a8b5889ee5fd /src/include/regex/regexport.h | |
parent | e60d20a35e436cef3c454bfeab34d8ea71b54910 (diff) | |
download | postgresql-3ccae48f44d993351e1f881761bd6c556ebd6638.tar.gz postgresql-3ccae48f44d993351e1f881761bd6c556ebd6638.zip |
Support indexing of regular-expression searches in contrib/pg_trgm.
This works by extracting trigrams from the given regular expression,
in generally the same spirit as the previously-existing support for
LIKE searches, though of course the details are far more complicated.
Currently, only GIN indexes are supported. We might be able to make
it work with GiST indexes later.
The implementation includes adding API functions to backend/regex/
to provide a view of the search NFA created from a regular expression.
These functions are meant to be generic enough to be supportable in
a standalone version of the regex library, should that ever happen.
Alexander Korotkov, reviewed by Heikki Linnakangas and Tom Lane
Diffstat (limited to 'src/include/regex/regexport.h')
-rw-r--r-- | src/include/regex/regexport.h | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/src/include/regex/regexport.h b/src/include/regex/regexport.h new file mode 100644 index 00000000000..35889da294b --- /dev/null +++ b/src/include/regex/regexport.h @@ -0,0 +1,57 @@ +/*------------------------------------------------------------------------- + * + * regexport.h + * Declarations for exporting info about a regex's NFA (nondeterministic + * finite automaton) + * + * The functions declared here provide accessors to extract the NFA state + * graph and color character sets of a successfully-compiled regex. + * + * An NFA contains one or more states, numbered 0..N-1. There is an initial + * state, as well as a final state --- reaching the final state denotes + * successful matching of an input string. Each state except the final one + * has some out-arcs that lead to successor states, each arc being labeled + * with a color that represents one or more concrete character codes. + * (The colors of a state's out-arcs need not be distinct, since this is an + * NFA not a DFA.) There are also "pseudocolors" representing start/end of + * line and start/end of string. Colors are numbered 0..C-1, but note that + * color 0 is "white" (all unused characters) and can generally be ignored. + * + * Portions Copyright (c) 2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1998, 1999 Henry Spencer + * + * IDENTIFICATION + * src/include/regex/regexport.h + * + *------------------------------------------------------------------------- + */ +#ifndef _REGEXPORT_H_ +#define _REGEXPORT_H_ + +#include "regex/regex.h" + +/* information about one arc of a regex's NFA */ +typedef struct +{ + int co; /* label (character-set color) of arc */ + int to; /* next state number */ +} regex_arc_t; + + +/* Functions for gathering information about NFA states and arcs */ +extern int pg_reg_getnumstates(const regex_t *regex); +extern int pg_reg_getinitialstate(const regex_t *regex); +extern int pg_reg_getfinalstate(const regex_t *regex); +extern int pg_reg_getnumoutarcs(const regex_t *regex, int st); +extern void pg_reg_getoutarcs(const regex_t *regex, int st, + regex_arc_t *arcs, int arcs_len); + +/* Functions for gathering information about colors */ +extern int pg_reg_getnumcolors(const regex_t *regex); +extern int pg_reg_colorisbegin(const regex_t *regex, int co); +extern int pg_reg_colorisend(const regex_t *regex, int co); +extern int pg_reg_getnumcharacters(const regex_t *regex, int co); +extern void pg_reg_getcharacters(const regex_t *regex, int co, + pg_wchar *chars, int chars_len); + +#endif /* _REGEXPORT_H_ */ |