aboutsummaryrefslogtreecommitdiff
path: root/src/include/regex/regguts.h
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-07-10 14:54:37 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-07-10 14:54:37 -0400
commit628cbb50ba80c83917b07a7609ddec12cda172d0 (patch)
tree7008492921c90e6de7c431633e33624a597a8416 /src/include/regex/regguts.h
parent00dac6000d422033c3e8d191f01ee0e6525794c2 (diff)
downloadpostgresql-628cbb50ba80c83917b07a7609ddec12cda172d0.tar.gz
postgresql-628cbb50ba80c83917b07a7609ddec12cda172d0.zip
Re-implement extraction of fixed prefixes from regular expressions.
To generate btree-indexable conditions from regex WHERE conditions (such as WHERE indexed_col ~ '^foo'), we need to be able to identify any fixed prefix that a regex might have; that is, find any string that must be a prefix of all strings satisfying the regex. We used to do that with entirely ad-hoc code that looked at the source text of the regex. It didn't know very much about regex syntax, which mostly meant that it would fail to identify some optimizable cases; but Viktor Rosenfeld reported that it would produce actively wrong answers for quantified parenthesized subexpressions, such as '^(foo)?bar'. Rather than trying to extend the ad-hoc code to cover this, let's get rid of it altogether in favor of identifying prefixes by examining the compiled form of a regex. To do this, I've added a new entry point "pg_regprefix" to the regex library; hopefully it is defined in a sufficiently general fashion that it can remain in the library when/if that code gets split out as a standalone project. Since this bug has been there for a very long time, this fix needs to get back-patched. However it depends on some other recent commits (particularly the addition of wchar-to-database-encoding conversion), so I'll commit this separately and then go to work on back-porting the necessary fixes.
Diffstat (limited to 'src/include/regex/regguts.h')
-rw-r--r--src/include/regex/regguts.h10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index b8788506d41..e1e406f4eaa 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -199,19 +199,21 @@ struct colordesc
color sub; /* open subcolor, if any; or free-chain ptr */
#define NOSUB COLORLESS /* value of "sub" when no open subcolor */
struct arc *arcs; /* chain of all arcs of this color */
+ chr firstchr; /* char first assigned to this color */
int flags; /* bit values defined next */
#define FREECOL 01 /* currently free */
#define PSEUDO 02 /* pseudocolor, no real chars */
-#define UNUSEDCOLOR(cd) ((cd)->flags&FREECOL)
+#define UNUSEDCOLOR(cd) ((cd)->flags & FREECOL)
union tree *block; /* block of solid color, if any */
};
/*
* The color map itself
*
- * Only the "tree" part is used at execution time, and that only via the
- * GETCOLOR() macro. Possibly that should be separated from the compile-time
- * data.
+ * Much of the data in the colormap struct is only used at compile time.
+ * However, the bulk of the space usage is in the "tree" structure, so it's
+ * not clear that there's much point in converting the rest to a more compact
+ * form when compilation is finished.
*/
struct colormap
{