diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2012-07-10 18:00:44 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2012-07-10 18:00:44 -0400 |
commit | a9287de1760450e7fe3b4309ee1ba7ea2af39217 (patch) | |
tree | 309455b0f7c242d78fda0048aad636765cd1e6b3 /src/include/regex | |
parent | ed45a5373029f2ff08ce76cf3807499afe3873ee (diff) | |
download | postgresql-a9287de1760450e7fe3b4309ee1ba7ea2af39217.tar.gz postgresql-a9287de1760450e7fe3b4309ee1ba7ea2af39217.zip |
Back-patch fix for extraction of fixed prefixes from regular expressions.
Back-patch of commits 628cbb50ba80c83917b07a7609ddec12cda172d0 and
c6aae3042be5249e672b731ebeb21875b5343010. This has been broken since
7.3, so back-patch to all supported branches.
Diffstat (limited to 'src/include/regex')
-rw-r--r-- | src/include/regex/regex.h | 4 | ||||
-rw-r--r-- | src/include/regex/regguts.h | 30 |
2 files changed, 26 insertions, 8 deletions
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h index cec4b837cd1..616c2c6450d 100644 --- a/src/include/regex/regex.h +++ b/src/include/regex/regex.h @@ -156,6 +156,9 @@ typedef struct /* two specials for debugging and testing */ #define REG_ATOI 101 /* convert error-code name to number */ #define REG_ITOA 102 /* convert error-code number to name */ +/* non-error result codes for pg_regprefix */ +#define REG_PREFIX (-1) /* identified a common prefix */ +#define REG_EXACT (-2) /* identified an exact match */ @@ -164,6 +167,7 @@ typedef struct */ extern int pg_regcomp(regex_t *, const pg_wchar *, size_t, int, Oid); extern int pg_regexec(regex_t *, const pg_wchar *, size_t, size_t, rm_detail_t *, size_t, regmatch_t[], int); +extern int pg_regprefix(regex_t *, pg_wchar **, size_t *); extern void pg_regfree(regex_t *); extern size_t pg_regerror(int, const regex_t *, char *, size_t); extern void pg_set_regex_collation(Oid collation); diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h index 0cced701dbd..81442a231e3 100644 --- a/src/include/regex/regguts.h +++ b/src/include/regex/regguts.h @@ -188,6 +188,7 @@ struct colordesc color sub; /* open subcolor (if any); free chain ptr */ #define NOSUB COLORLESS struct arc *arcs; /* color chain */ + chr firstchr; /* char first assigned to this color */ int flags; #define FREECOL 01 /* currently free */ #define PSEUDO 02 /* pseudocolor, no real chars */ @@ -255,15 +256,14 @@ struct state; struct arc { - int type; -#define ARCFREE '\0' + int type; /* 0 if free, else an NFA arc type code */ color co; struct state *from; /* where it's from (and contained within) */ struct state *to; /* where it's to */ - struct arc *outchain; /* *from's outs chain or free chain */ + struct arc *outchain; /* link in *from's outs chain or free chain */ #define freechain outchain - struct arc *inchain; /* *to's ins chain */ - struct arc *colorchain; /* color's arc chain */ + struct arc *inchain; /* link in *to's ins chain */ + struct arc *colorchain; /* link in color's arc chain */ struct arc *colorchainRev; /* back-link in color's arc chain */ }; @@ -315,24 +315,38 @@ struct nfa /* * definitions for compacted NFA + * + * The main space savings in a compacted NFA is from making the arcs as small + * as possible. We store only the transition color and next-state number for + * each arc. The list of out arcs for each state is an array beginning at + * cnfa.states[statenumber], and terminated by a dummy carc struct with + * co == COLORLESS. + * + * The non-dummy carc structs are of two types: plain arcs and LACON arcs. + * Plain arcs just store the transition color number as "co". LACON arcs + * store the lookahead constraint number plus cnfa.ncolors as "co". LACON + * arcs can be distinguished from plain by testing for co >= cnfa.ncolors. */ struct carc { color co; /* COLORLESS is list terminator */ - int to; /* state number */ + int to; /* next-state number */ }; struct cnfa { int nstates; /* number of states */ - int ncolors; /* number of colors */ + int ncolors; /* number of colors (max color in use + 1) */ int flags; -#define HASLACONS 01 /* uses lookahead constraints */ +#define HASLACONS 01 /* uses lookahead constraints */ int pre; /* setup state number */ int post; /* teardown state number */ color bos[2]; /* colors, if any, assigned to BOS and BOL */ color eos[2]; /* colors, if any, assigned to EOS and EOL */ + char *stflags; /* vector of per-state flags bytes */ +#define CNFA_NOPROGRESS 01 /* flag bit for a no-progress state */ struct carc **states; /* vector of pointers to outarc lists */ + /* states[n] are pointers into a single malloc'd array of arcs */ struct carc *arcs; /* the area for the lists */ }; |