13 files changed, 673 insertions, 70 deletions
diff --git a/src/backend/regex/README b/src/backend/regex/README
index 5c24d3dfe9d..6c9f48315e3 100644
--- a/src/backend/regex/README
+++ b/src/backend/regex/README
@@ -332,10 +332,10 @@ The possible arc types are:
     as "$0->to_state" or "$1->to_state" for end-of-string and end-of-line
     constraints respectively.
 
-    LACON constraints, which represent "(?=re)" and "(?!re)" constraints,
-    i.e. the input starting at this point must match (or not match) a
-    given sub-RE, but the matching input is not consumed.  These are
-    dumped as ":subtree_number:->to_state".
+    LACON constraints, which represent "(?=re)", "(?!re)", "(?<=re)", and
+    "(?<!re)" constraints, i.e. the input starting/ending at this point must
+    match (or not match) a given sub-RE, but the matching input is not
+    consumed.  These are dumped as ":subtree_number:->to_state".
 
 If you see anything else (especially any question marks) in the display of
 an arc, it's dumpnfa() trying to tell you that there's something fishy
diff --git a/src/backend/regex/re_syntax.n b/src/backend/regex/re_syntax.n
index f37bb85abdb..4621bfc25f4 100644
--- a/src/backend/regex/re_syntax.n
+++ b/src/backend/regex/re_syntax.n
@@ -196,10 +196,18 @@ where a substring matching \fIre\fR begins
 \fB(?!\fIre\fB)\fR
 \fInegative lookahead\fR (AREs only), matches at any point
 where no substring matching \fIre\fR begins
+.TP
+\fB(?<=\fIre\fB)\fR
+\fIpositive lookbehind\fR (AREs only), matches at any point
+where a substring matching \fIre\fR ends
+.TP
+\fB(?<!\fIre\fB)\fR
+\fInegative lookbehind\fR (AREs only), matches at any point
+where no substring matching \fIre\fR ends
 .RE
 .PP
-The lookahead constraints may not contain back references (see later),
-and all parentheses within them are considered non-capturing.
+Lookahead and lookbehind constraints may not contain back references
+(see later), and all parentheses within them are considered non-capturing.
 .PP
 An RE may not end with `\fB\e\fR'.
 
@@ -856,7 +864,8 @@ Incompatibilities of note include `\fB\eb\fR', `\fB\eB\fR',
 the lack of special treatment for a trailing newline,
 the addition of complemented bracket expressions to the things
 affected by newline-sensitive matching,
-the restrictions on parentheses and back references in lookahead constraints,
+the restrictions on parentheses and back references in lookahead/lookbehind
+constraints,
 and the longest/shortest-match (rather than first-match) matching semantics.
 .PP
 The matching rules for REs containing both normal and non-greedy quantifiers
diff --git a/src/backend/regex/regc_lex.c b/src/backend/regex/regc_lex.c
index f6ed9f09ea4..bfd9dcd2a49 100644
--- a/src/backend/regex/regc_lex.c
+++ b/src/backend/regex/regc_lex.c
@@ -582,6 +582,8 @@ next(struct vars * v)
 			{
 				NOTE(REG_UNONPOSIX);
 				v->now++;
+				if (ATEOS())
+					FAILW(REG_BADRPT);
 				switch (*v->now++)
 				{
 					case CHR(':'):		/* non-capturing paren */
@@ -596,12 +598,31 @@ next(struct vars * v)
 						return next(v);
 						break;
 					case CHR('='):		/* positive lookahead */
-						NOTE(REG_ULOOKAHEAD);
-						RETV(LACON, 1);
+						NOTE(REG_ULOOKAROUND);
+						RETV(LACON, LATYPE_AHEAD_POS);
 						break;
 					case CHR('!'):		/* negative lookahead */
-						NOTE(REG_ULOOKAHEAD);
-						RETV(LACON, 0);
+						NOTE(REG_ULOOKAROUND);
+						RETV(LACON, LATYPE_AHEAD_NEG);
+						break;
+					case CHR('<'):
+						if (ATEOS())
+							FAILW(REG_BADRPT);
+						switch (*v->now++)
+						{
+							case CHR('='):		/* positive lookbehind */
+								NOTE(REG_ULOOKAROUND);
+								RETV(LACON, LATYPE_BEHIND_POS);
+								break;
+							case CHR('!'):		/* negative lookbehind */
+								NOTE(REG_ULOOKAROUND);
+								RETV(LACON, LATYPE_BEHIND_NEG);
+								break;
+							default:
+								FAILW(REG_BADRPT);
+								break;
+						}
+						assert(NOTREACHED);
 						break;
 					default:
 						FAILW(REG_BADRPT);
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 6f04321cd35..cd9a3239bd3 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -1349,6 +1349,49 @@ cleartraverse(struct nfa * nfa,
 }
 
 /*
+ * single_color_transition - does getting from s1 to s2 cross one PLAIN arc?
+ *
+ * If traversing from s1 to s2 requires a single PLAIN match (possibly of any
+ * of a set of colors), return a state whose outarc list contains only PLAIN
+ * arcs of those color(s).  Otherwise return NULL.
+ *
+ * This is used before optimizing the NFA, so there may be EMPTY arcs, which
+ * we should ignore; the possibility of an EMPTY is why the result state could
+ * be different from s1.
+ *
+ * It's worth troubling to handle multiple parallel PLAIN arcs here because a
+ * bracket construct such as [abc] might yield either one or several parallel
+ * PLAIN arcs depending on earlier atoms in the expression.  We'd rather that
+ * that implementation detail not create user-visible performance differences.
+ */
+static struct state *
+single_color_transition(struct state * s1, struct state * s2)
+{
+	struct arc *a;
+
+	/* Ignore leading EMPTY arc, if any */
+	if (s1->nouts == 1 && s1->outs->type == EMPTY)
+		s1 = s1->outs->to;
+	/* Likewise for any trailing EMPTY arc */
+	if (s2->nins == 1 && s2->ins->type == EMPTY)
+		s2 = s2->ins->from;
+	/* Perhaps we could have a single-state loop in between, if so reject */
+	if (s1 == s2)
+		return NULL;
+	/* s1 must have at least one outarc... */
+	if (s1->outs == NULL)
+		return NULL;
+	/* ... and they must all be PLAIN arcs to s2 */
+	for (a = s1->outs; a != NULL; a = a->outchain)
+	{
+		if (a->type != PLAIN || a->to != s2)
+			return NULL;
+	}
+	/* OK, return s1 as the possessor of the relevant outarcs */
+	return s1;
+}
+
+/*
  * specialcolors - fill in special colors for an NFA
  */
 static void
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index b733bc7824e..aa759c26486 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -57,6 +57,8 @@ static const chr *scanplain(struct vars *);
 static void onechr(struct vars *, chr, struct state *, struct state *);
 static void dovec(struct vars *, struct cvec *, struct state *, struct state *);
 static void wordchrs(struct vars *);
+static void processlacon(struct vars *, struct state *, struct state *, int,
+			 struct state *, struct state *);
 static struct subre *subre(struct vars *, int, int, struct state *, struct state *);
 static void freesubre(struct vars *, struct subre *);
 static void freesrnode(struct vars *, struct subre *);
@@ -65,7 +67,7 @@ static int	numst(struct subre *, int);
 static void markst(struct subre *);
 static void cleanst(struct vars *);
 static long nfatree(struct vars *, struct subre *, FILE *);
-static long nfanode(struct vars *, struct subre *, FILE *);
+static long nfanode(struct vars *, struct subre *, int, FILE *);
 static int	newlacon(struct vars *, struct state *, struct state *, int);
 static void freelacons(struct subre *, int);
 static void rfree(regex_t *);
@@ -146,6 +148,7 @@ static void deltraverse(struct nfa *, struct state *, struct state *);
 static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
 static void duptraverse(struct nfa *, struct state *, struct state *);
 static void cleartraverse(struct nfa *, struct state *);
+static struct state *single_color_transition(struct state *, struct state *);
 static void specialcolors(struct nfa *);
 static long optimize(struct nfa *, FILE *);
 static void pullback(struct nfa *, FILE *);
@@ -245,8 +248,9 @@ struct vars
 	int			ntree;			/* number of tree nodes, plus one */
 	struct cvec *cv;			/* interface cvec */
 	struct cvec *cv2;			/* utility cvec */
-	struct subre *lacons;		/* lookahead-constraint vector */
-	int			nlacons;		/* size of lacons */
+	struct subre *lacons;		/* lookaround-constraint vector */
+	int			nlacons;		/* size of lacons[]; note that only slots
+								 * numbered 1 .. nlacons-1 are used */
 	size_t		spaceused;		/* approx. space used for compilation */
 };
 
@@ -277,7 +281,7 @@ struct vars
 #define CCLASS	'C'				/* start of [: */
 #define END 'X'					/* end of [. [= [: */
 #define RANGE	'R'				/* - within [] which might be range delim. */
-#define LACON	'L'				/* lookahead constraint subRE */
+#define LACON	'L'				/* lookaround constraint subRE */
 #define AHEAD	'a'				/* color-lookahead arc */
 #define BEHIND	'r'				/* color-lookbehind arc */
 #define WBDRY	'w'				/* word boundary constraint */
@@ -432,11 +436,15 @@ pg_regcomp(regex_t *re,
 	assert(v->nlacons == 0 || v->lacons != NULL);
 	for (i = 1; i < v->nlacons; i++)
 	{
+		struct subre *lasub = &v->lacons[i];
+
 #ifdef REG_DEBUG
 		if (debug != NULL)
 			fprintf(debug, "\n\n\n========= LA%d ==========\n", i);
 #endif
-		nfanode(v, &v->lacons[i], debug);
+
+		/* Prepend .* to pattern if it's a lookbehind LACON */
+		nfanode(v, lasub, !LATYPE_IS_AHEAD(lasub->subno), debug);
 	}
 	CNOERR();
 	if (v->tree->flags & SHORTER)
@@ -640,7 +648,7 @@ makesearch(struct vars * v,
 static struct subre *
 parse(struct vars * v,
 	  int stopper,				/* EOS or ')' */
-	  int type,					/* LACON (lookahead subRE) or PLAIN */
+	  int type,					/* LACON (lookaround subRE) or PLAIN */
 	  struct state * init,		/* initial state */
 	  struct state * final)		/* final state */
 {
@@ -719,7 +727,7 @@ parse(struct vars * v,
 static struct subre *
 parsebranch(struct vars * v,
 			int stopper,		/* EOS or ')' */
-			int type,			/* LACON (lookahead subRE) or PLAIN */
+			int type,			/* LACON (lookaround subRE) or PLAIN */
 			struct state * left,	/* leftmost state */
 			struct state * right,		/* rightmost state */
 			int partial)		/* is this only part of a branch? */
@@ -768,7 +776,7 @@ parsebranch(struct vars * v,
 static void
 parseqatom(struct vars * v,
 		   int stopper,			/* EOS or ')' */
-		   int type,			/* LACON (lookahead subRE) or PLAIN */
+		   int type,			/* LACON (lookaround subRE) or PLAIN */
 		   struct state * lp,	/* left state to hang it on */
 		   struct state * rp,	/* right state to hang it on */
 		   struct subre * top)	/* subtree top */
@@ -782,7 +790,7 @@ parseqatom(struct vars * v,
 	struct subre *atom;			/* atom's subtree */
 	struct subre *t;
 	int			cap;			/* capturing parens? */
-	int			pos;			/* positive lookahead? */
+	int			latype;			/* lookaround constraint type */
 	int			subno;			/* capturing-parens or backref number */
 	int			atomtype;
 	int			qprefer;		/* quantifier short/long preference */
@@ -866,19 +874,18 @@ parseqatom(struct vars * v,
 			nonword(v, AHEAD, s, rp);
 			return;
 			break;
-		case LACON:				/* lookahead constraint */
-			pos = v->nextvalue;
+		case LACON:				/* lookaround constraint */
+			latype = v->nextvalue;
 			NEXT();
 			s = newstate(v->nfa);
 			s2 = newstate(v->nfa);
 			NOERR();
 			t = parse(v, ')', LACON, s, s2);
 			freesubre(v, t);	/* internal structure irrelevant */
-			assert(SEE(')') || ISERR());
-			NEXT();
-			n = newlacon(v, s, s2, pos);
 			NOERR();
-			ARCV(LACON, n);
+			assert(SEE(')'));
+			NEXT();
+			processlacon(v, s, s2, latype, lp, rp);
 			return;
 			break;
 			/* then errors, to get them out of the way */
@@ -1634,6 +1641,75 @@ wordchrs(struct vars * v)
 }
 
 /*
+ * processlacon - generate the NFA representation of a LACON
+ *
+ * In the general case this is just newlacon() + newarc(), but some cases
+ * can be optimized.
+ */
+static void
+processlacon(struct vars * v,
+			 struct state * begin,		/* start of parsed LACON sub-re */
+			 struct state * end,	/* end of parsed LACON sub-re */
+			 int latype,
+			 struct state * lp, /* left state to hang it on */
+			 struct state * rp) /* right state to hang it on */
+{
+	struct state *s1;
+	int			n;
+
+	/*
+	 * Check for lookaround RE consisting of a single plain color arc (or set
+	 * of arcs); this would typically be a simple chr or a bracket expression.
+	 */
+	s1 = single_color_transition(begin, end);
+	switch (latype)
+	{
+		case LATYPE_AHEAD_POS:
+			/* If lookahead RE is just colorset C, convert to AHEAD(C) */
+			if (s1 != NULL)
+			{
+				cloneouts(v->nfa, s1, lp, rp, AHEAD);
+				return;
+			}
+			break;
+		case LATYPE_AHEAD_NEG:
+			/* If lookahead RE is just colorset C, convert to AHEAD(^C)|$ */
+			if (s1 != NULL)
+			{
+				colorcomplement(v->nfa, v->cm, AHEAD, s1, lp, rp);
+				newarc(v->nfa, '$', 1, lp, rp);
+				newarc(v->nfa, '$', 0, lp, rp);
+				return;
+			}
+			break;
+		case LATYPE_BEHIND_POS:
+			/* If lookbehind RE is just colorset C, convert to BEHIND(C) */
+			if (s1 != NULL)
+			{
+				cloneouts(v->nfa, s1, lp, rp, BEHIND);
+				return;
+			}
+			break;
+		case LATYPE_BEHIND_NEG:
+			/* If lookbehind RE is just colorset C, convert to BEHIND(^C)|^ */
+			if (s1 != NULL)
+			{
+				colorcomplement(v->nfa, v->cm, BEHIND, s1, lp, rp);
+				newarc(v->nfa, '^', 1, lp, rp);
+				newarc(v->nfa, '^', 0, lp, rp);
+				return;
+			}
+			break;
+		default:
+			assert(NOTREACHED);
+	}
+
+	/* General case: we need a LACON subre and arc */
+	n = newlacon(v, begin, end, latype);
+	newarc(v->nfa, LACON, n, lp, rp);
+}
+
+/*
  * subre - allocate a subre
  */
 static struct subre *
@@ -1826,15 +1902,18 @@ nfatree(struct vars * v,
 	if (t->right != NULL)
 		(DISCARD) nfatree(v, t->right, f);
 
-	return nfanode(v, t, f);
+	return nfanode(v, t, 0, f);
 }
 
 /*
- * nfanode - do one NFA for nfatree
+ * nfanode - do one NFA for nfatree or lacons
+ *
+ * If converttosearch is true, apply makesearch() to the NFA.
  */
 static long						/* optimize results */
 nfanode(struct vars * v,
 		struct subre * t,
+		int converttosearch,
 		FILE *f)				/* for debug output */
 {
 	struct nfa *nfa;
@@ -1855,10 +1934,11 @@ nfanode(struct vars * v,
 	NOERRZ();
 	dupnfa(nfa, t->begin, t->end, nfa->init, nfa->final);
 	if (!ISERR())
-	{
 		specialcolors(nfa);
+	if (!ISERR())
 		ret = optimize(nfa, f);
-	}
+	if (converttosearch && !ISERR())
+		makesearch(v, nfa);
 	if (!ISERR())
 		compact(nfa, &t->cnfa);
 
@@ -1867,13 +1947,13 @@ nfanode(struct vars * v,
 }
 
 /*
- * newlacon - allocate a lookahead-constraint subRE
+ * newlacon - allocate a lookaround-constraint subRE
  */
 static int						/* lacon number */
 newlacon(struct vars * v,
 		 struct state * begin,
 		 struct state * end,
-		 int pos)
+		 int latype)
 {
 	int			n;
 	struct subre *newlacons;
@@ -1900,13 +1980,13 @@ newlacon(struct vars * v,
 	sub = &v->lacons[n];
 	sub->begin = begin;
 	sub->end = end;
-	sub->subno = pos;
+	sub->subno = latype;
 	ZAPCNFA(sub->cnfa);
 	return n;
 }
 
 /*
- * freelacons - free lookahead-constraint subRE vector
+ * freelacons - free lookaround-constraint subRE vector
  */
 static void
 freelacons(struct subre * subs,
@@ -2020,9 +2100,29 @@ dump(regex_t *re,
 	}
 	for (i = 1; i < g->nlacons; i++)
 	{
-		fprintf(f, "\nla%d (%s):\n", i,
-				(g->lacons[i].subno) ? "positive" : "negative");
-		dumpcnfa(&g->lacons[i].cnfa, f);
+		struct subre *lasub = &g->lacons[i];
+		const char *latype;
+
+		switch (lasub->subno)
+		{
+			case LATYPE_AHEAD_POS:
+				latype = "positive lookahead";
+				break;
+			case LATYPE_AHEAD_NEG:
+				latype = "negative lookahead";
+				break;
+			case LATYPE_BEHIND_POS:
+				latype = "positive lookbehind";
+				break;
+			case LATYPE_BEHIND_NEG:
+				latype = "negative lookbehind";
+				break;
+			default:
+				latype = "???";
+				break;
+		}
+		fprintf(f, "\nla%d (%s):\n", i, latype);
+		dumpcnfa(&lasub->cnfa, f);
 	}
 	fprintf(f, "\n");
 	dumpst(g->tree, f, 0);
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index a37e4b0ef96..7d90242acef 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -287,6 +287,130 @@ shortest(struct vars * v,
 }
 
 /*
+ * matchuntil - incremental matching engine
+ *
+ * This is meant for use with a search-style NFA (that is, the pattern is
+ * known to act as though it had a leading .*).  We determine whether a
+ * match exists starting at v->start and ending at probe.  Multiple calls
+ * require only O(N) time not O(N^2) so long as the probe values are
+ * nondecreasing.  *lastcss and *lastcp must be initialized to NULL before
+ * starting a series of calls.
+ *
+ * Returns 1 if a match exists, 0 if not.
+ * Internal errors also return 0, with v->err set.
+ */
+static int
+matchuntil(struct vars * v,
+		   struct dfa * d,
+		   chr *probe,			/* we want to know if a match ends here */
+		   struct sset ** lastcss,		/* state storage across calls */
+		   chr **lastcp)		/* state storage across calls */
+{
+	chr		   *cp = *lastcp;
+	color		co;
+	struct sset *css = *lastcss;
+	struct sset *ss;
+	struct colormap *cm = d->cm;
+
+	/* initialize and startup, or restart, if necessary */
+	if (cp == NULL || cp > probe)
+	{
+		cp = v->start;
+		css = initialize(v, d, cp);
+		if (css == NULL)
+			return 0;
+
+		FDEBUG((">>> startup >>>\n"));
+		co = d->cnfa->bos[(v->eflags & REG_NOTBOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+
+		css = miss(v, d, css, co, cp, v->start);
+		if (css == NULL)
+			return 0;
+		css->lastseen = cp;
+	}
+	else if (css == NULL)
+	{
+		/* we previously found that no match is possible beyond *lastcp */
+		return 0;
+	}
+	ss = css;
+
+	/*
+	 * This is the main text-scanning loop.  It seems worth having two copies
+	 * to avoid the overhead of REG_FTRACE tests here, even in REG_DEBUG
+	 * builds, when you're not actively tracing.
+	 */
+#ifdef REG_DEBUG
+	if (v->eflags & REG_FTRACE)
+	{
+		while (cp < probe)
+		{
+			FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
+			co = GETCOLOR(cm, *cp);
+			FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, v->start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+	}
+	else
+#endif
+	{
+		while (cp < probe)
+		{
+			co = GETCOLOR(cm, *cp);
+			ss = css->outs[co];
+			if (ss == NULL)
+			{
+				ss = miss(v, d, css, co, cp + 1, v->start);
+				if (ss == NULL)
+					break;		/* NOTE BREAK OUT */
+			}
+			cp++;
+			ss->lastseen = cp;
+			css = ss;
+		}
+	}
+
+	*lastcss = ss;
+	*lastcp = cp;
+
+	if (ss == NULL)
+		return 0;				/* impossible match, or internal error */
+
+	/* We need to process one more chr, or the EOS symbol, to check match */
+	if (cp < v->stop)
+	{
+		FDEBUG((">>> at c%d >>>\n", (int) (css - d->ssets)));
+		co = GETCOLOR(cm, *cp);
+		FDEBUG(("char %c, color %ld\n", (char) *cp, (long) co));
+		ss = css->outs[co];
+		if (ss == NULL)
+			ss = miss(v, d, css, co, cp + 1, v->start);
+	}
+	else
+	{
+		assert(cp == v->stop);
+		co = d->cnfa->eos[(v->eflags & REG_NOTEOL) ? 0 : 1];
+		FDEBUG(("color %ld\n", (long) co));
+		ss = miss(v, d, css, co, cp, v->start);
+	}
+
+	if (ss == NULL || !(ss->flags & POSTSTATE))
+		return 0;
+
+	return 1;
+}
+
+/*
  * lastcold - determine last point at which no progress had been made
  */
 static chr *					/* endpoint, or NULL */
@@ -613,19 +737,19 @@ miss(struct vars * v,
 }
 
 /*
- * lacon - lookahead-constraint checker for miss()
+ * lacon - lookaround-constraint checker for miss()
  */
 static int						/* predicate:  constraint satisfied? */
 lacon(struct vars * v,
 	  struct cnfa * pcnfa,		/* parent cnfa */
 	  chr *cp,
-	  pcolor co)				/* "color" of the lookahead constraint */
+	  pcolor co)				/* "color" of the lookaround constraint */
 {
 	int			n;
 	struct subre *sub;
 	struct dfa *d;
-	struct smalldfa sd;
 	chr		   *end;
+	int			satisfied;
 
 	/* Since this is recursive, it could be driven to stack overflow */
 	if (STACK_TOO_DEEP(v->re))
@@ -635,19 +759,35 @@ lacon(struct vars * v,
 	}
 
 	n = co - pcnfa->ncolors;
-	assert(n < v->g->nlacons && v->g->lacons != NULL);
+	assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
 	FDEBUG(("=== testing lacon %d\n", n));
 	sub = &v->g->lacons[n];
-	d = newdfa(v, &sub->cnfa, &v->g->cmap, &sd);
+	d = getladfa(v, n);
 	if (d == NULL)
-	{
-		ERR(REG_ESPACE);
 		return 0;
+	if (LATYPE_IS_AHEAD(sub->subno))
+	{
+		/* used to use longest() here, but shortest() could be much cheaper */
+		end = shortest(v, d, cp, cp, v->stop,
+					   (chr **) NULL, (int *) NULL);
+		satisfied = LATYPE_IS_POS(sub->subno) ? (end != NULL) : (end == NULL);
+	}
+	else
+	{
+		/*
+		 * To avoid doing O(N^2) work when repeatedly testing a lookbehind
+		 * constraint in an N-character string, we use matchuntil() which can
+		 * cache the DFA state across calls.  We only need to restart if the
+		 * probe point decreases, which is not common.  The NFA we're using is
+		 * a search NFA, so it doesn't mind scanning over stuff before the
+		 * nominal match.
+		 */
+		satisfied = matchuntil(v, d, cp, &v->lblastcss[n], &v->lblastcp[n]);
+		if (!LATYPE_IS_POS(sub->subno))
+			satisfied = !satisfied;
 	}
-	end = longest(v, d, cp, v->stop, (int *) NULL);
-	freedfa(d);
-	FDEBUG(("=== lacon %d match %d\n", n, (end != NULL)));
-	return (sub->subno) ? (end != NULL) : (end == NULL);
+	FDEBUG(("=== lacon %d satisfied %d\n", n, satisfied));
+	return satisfied;
 }
 
 /*
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index 8a21f2cb787..82659a0f2f4 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -112,7 +112,10 @@ struct vars
 	chr		   *search_start;	/* search start of string */
 	chr		   *stop;			/* just past end of string */
 	int			err;			/* error code if any (0 none) */
-	struct dfa **subdfas;		/* per-subre DFAs */
+	struct dfa **subdfas;		/* per-tree-subre DFAs */
+	struct dfa **ladfas;		/* per-lacon-subre DFAs */
+	struct sset **lblastcss;	/* per-lacon-subre lookbehind restart data */
+	chr		  **lblastcp;		/* per-lacon-subre lookbehind restart data */
 	struct smalldfa dfa1;
 	struct smalldfa dfa2;
 };
@@ -132,6 +135,7 @@ struct vars
  */
 /* === regexec.c === */
 static struct dfa *getsubdfa(struct vars *, struct subre *);
+static struct dfa *getladfa(struct vars *, int);
 static int	find(struct vars *, struct cnfa *, struct colormap *);
 static int	cfind(struct vars *, struct cnfa *, struct colormap *);
 static int	cfindloop(struct vars *, struct cnfa *, struct colormap *, struct dfa *, struct dfa *, chr **);
@@ -149,6 +153,7 @@ static int	creviterdissect(struct vars *, struct subre *, chr *, chr *);
 /* === rege_dfa.c === */
 static chr *longest(struct vars *, struct dfa *, chr *, chr *, int *);
 static chr *shortest(struct vars *, struct dfa *, chr *, chr *, chr *, chr **, int *);
+static int	matchuntil(struct vars *, struct dfa *, chr *, struct sset **, chr **);
 static chr *lastcold(struct vars *, struct dfa *);
 static struct dfa *newdfa(struct vars *, struct cnfa *, struct colormap *, struct smalldfa *);
 static void freedfa(struct dfa *);
@@ -226,21 +231,54 @@ pg_regexec(regex_t *re,
 	v->search_start = (chr *) string + search_start;
 	v->stop = (chr *) string + len;
 	v->err = 0;
+	v->subdfas = NULL;
+	v->ladfas = NULL;
+	v->lblastcss = NULL;
+	v->lblastcp = NULL;
+	/* below this point, "goto cleanup" will behave sanely */
+
 	assert(v->g->ntree >= 0);
 	n = (size_t) v->g->ntree;
 	if (n <= LOCALDFAS)
 		v->subdfas = subdfas;
 	else
-		v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
-	if (v->subdfas == NULL)
 	{
-		if (v->pmatch != pmatch && v->pmatch != mat)
-			FREE(v->pmatch);
-		return REG_ESPACE;
+		v->subdfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
+		if (v->subdfas == NULL)
+		{
+			st = REG_ESPACE;
+			goto cleanup;
+		}
 	}
 	for (i = 0; i < n; i++)
 		v->subdfas[i] = NULL;
 
+	assert(v->g->nlacons >= 0);
+	n = (size_t) v->g->nlacons;
+	if (n > 0)
+	{
+		v->ladfas = (struct dfa **) MALLOC(n * sizeof(struct dfa *));
+		if (v->ladfas == NULL)
+		{
+			st = REG_ESPACE;
+			goto cleanup;
+		}
+		for (i = 0; i < n; i++)
+			v->ladfas[i] = NULL;
+		v->lblastcss = (struct sset **) MALLOC(n * sizeof(struct sset *));
+		v->lblastcp = (chr **) MALLOC(n * sizeof(chr *));
+		if (v->lblastcss == NULL || v->lblastcp == NULL)
+		{
+			st = REG_ESPACE;
+			goto cleanup;
+		}
+		for (i = 0; i < n; i++)
+		{
+			v->lblastcss[i] = NULL;
+			v->lblastcp[i] = NULL;
+		}
+	}
+
 	/* do it */
 	assert(v->g->tree != NULL);
 	if (backref)
@@ -257,22 +295,40 @@ pg_regexec(regex_t *re,
 	}
 
 	/* clean up */
+cleanup:
 	if (v->pmatch != pmatch && v->pmatch != mat)
 		FREE(v->pmatch);
-	n = (size_t) v->g->ntree;
-	for (i = 0; i < n; i++)
+	if (v->subdfas != NULL)
+	{
+		n = (size_t) v->g->ntree;
+		for (i = 0; i < n; i++)
+		{
+			if (v->subdfas[i] != NULL)
+				freedfa(v->subdfas[i]);
+		}
+		if (v->subdfas != subdfas)
+			FREE(v->subdfas);
+	}
+	if (v->ladfas != NULL)
 	{
-		if (v->subdfas[i] != NULL)
-			freedfa(v->subdfas[i]);
+		n = (size_t) v->g->nlacons;
+		for (i = 0; i < n; i++)
+		{
+			if (v->ladfas[i] != NULL)
+				freedfa(v->ladfas[i]);
+		}
+		FREE(v->ladfas);
 	}
-	if (v->subdfas != subdfas)
-		FREE(v->subdfas);
+	if (v->lblastcss != NULL)
+		FREE(v->lblastcss);
+	if (v->lblastcp != NULL)
+		FREE(v->lblastcp);
 
 	return st;
 }
 
 /*
- * getsubdfa - create or re-fetch the DFA for a subre node
+ * getsubdfa - create or re-fetch the DFA for a tree subre node
  *
  * We only need to create the DFA once per overall regex execution.
  * The DFA will be freed by the cleanup step in pg_regexec().
@@ -291,6 +347,28 @@ getsubdfa(struct vars * v,
 }
 
 /*
+ * getladfa - create or re-fetch the DFA for a LACON subre node
+ *
+ * Same as above, but for LACONs.
+ */
+static struct dfa *
+getladfa(struct vars * v,
+		 int n)
+{
+	assert(n > 0 && n < v->g->nlacons && v->g->lacons != NULL);
+
+	if (v->ladfas[n] == NULL)
+	{
+		struct subre *sub = &v->g->lacons[n];
+
+		v->ladfas[n] = newdfa(v, &sub->cnfa, &v->g->cmap, DOMALLOC);
+		if (ISERR())
+			return NULL;
+	}
+	return v->ladfas[n];
+}
+
+/*
  * find - find a match for the main NFA (no-complications case)
  */
 static int
diff --git a/src/backend/regex/regexport.c b/src/backend/regex/regexport.c
index c5524ae4927..91340719e88 100644
--- a/src/backend/regex/regexport.c
+++ b/src/backend/regex/regexport.c
@@ -6,7 +6,7 @@
  * In this implementation, the NFA defines a necessary but not sufficient
  * condition for a string to match the regex: that is, there can be strings
  * that match the NFA but don't match the full regex, but not vice versa.
- * Thus, for example, it is okay for the functions below to ignore lookahead
+ * Thus, for example, it is okay for the functions below to ignore lookaround
  * constraints, which merely constrain the string some more.
  *
  * Notice that these functions return info into caller-provided arrays
diff --git a/src/backend/regex/regprefix.c b/src/backend/regex/regprefix.c
index ce41620a0b4..86928453260 100644
--- a/src/backend/regex/regprefix.c
+++ b/src/backend/regex/regprefix.c
@@ -36,7 +36,7 @@ static int findprefix(struct cnfa * cnfa, struct colormap * cm,
  * the common prefix or exact value, of length *slength (measured in chrs
  * not bytes!).
  *
- * This function does not analyze all complex cases (such as lookahead
+ * This function does not analyze all complex cases (such as lookaround
  * constraints) exactly.  Therefore it is possible that some strings matching
  * the reported prefix or exact-match string do not satisfy the regex.  But
  * it should never be the case that a string satisfying the regex does not
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 5e1b692d26c..2f89dc9326b 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -58,7 +58,7 @@ typedef struct
 	size_t		re_nsub;		/* number of subexpressions */
 	long		re_info;		/* information about RE */
 #define  REG_UBACKREF		 000001
-#define  REG_ULOOKAHEAD		 000002
+#define  REG_ULOOKAROUND	 000002
 #define  REG_UBOUNDS	 000004
 #define  REG_UBRACES	 000010
 #define  REG_UBSALNUM		 000020
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 19fe991c74f..2ceffa6563b 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -89,13 +89,19 @@
  */
 
 #define NOTREACHED	0
-#define xxx		1
 
 #define DUPMAX	_POSIX2_RE_DUP_MAX
 #define DUPINF	(DUPMAX+1)
 
 #define REMAGIC 0xfed7			/* magic number for main struct */
 
+/* Type codes for lookaround constraints */
+#define LATYPE_AHEAD_POS	03	/* positive lookahead */
+#define LATYPE_AHEAD_NEG	02	/* negative lookahead */
+#define LATYPE_BEHIND_POS	01	/* positive lookbehind */
+#define LATYPE_BEHIND_NEG	00	/* negative lookbehind */
+#define LATYPE_IS_POS(la)	((la) & 01)
+#define LATYPE_IS_AHEAD(la) ((la) & 02)
 
 
 /*
@@ -351,7 +357,7 @@ struct nfa
  *
  * The non-dummy carc structs are of two types: plain arcs and LACON arcs.
  * Plain arcs just store the transition color number as "co".  LACON arcs
- * store the lookahead constraint number plus cnfa.ncolors as "co".  LACON
+ * store the lookaround constraint number plus cnfa.ncolors as "co".  LACON
  * arcs can be distinguished from plain by testing for co >= cnfa.ncolors.
  */
 struct carc
@@ -365,7 +371,7 @@ struct cnfa
 	int			nstates;		/* number of states */
 	int			ncolors;		/* number of colors (max color in use + 1) */
 	int			flags;
-#define  HASLACONS	01			/* uses lookahead constraints */
+#define  HASLACONS	01			/* uses lookaround constraints */
 	int			pre;			/* setup state number */
 	int			post;			/* teardown state number */
 	color		bos[2];			/* colors, if any, assigned to BOS and BOL */
@@ -433,7 +439,8 @@ struct subre
 #define  PREF2(f1, f2)	 ((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
 #define  COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
 	short		id;				/* ID of subre (1..ntree-1) */
-	int			subno;			/* subexpression number (for 'b' and '(') */
+	int			subno;			/* subexpression number for 'b' and '(', or
+								 * LATYPE code for lookaround constraint */
 	short		min;			/* min repetitions for iteration or backref */
 	short		max;			/* max repetitions for iteration or backref */
 	struct subre *left;			/* left child, if any (also freelist chain) */
@@ -479,6 +486,7 @@ struct guts
 	int			ntree;			/* number of subre's, plus one */
 	struct colormap cmap;
 	int			FUNCPTR(compare, (const chr *, const chr *, size_t));
-	struct subre *lacons;		/* lookahead-constraint vector */
-	int			nlacons;		/* size of lacons */
+	struct subre *lacons;		/* lookaround-constraint vector */
+	int			nlacons;		/* size of lacons[]; note that only slots
+								 * numbered 1 .. nlacons-1 are used */
 };
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
index be151858a38..f0e2fc9eb89 100644
--- a/src/test/regress/expected/regex.out
+++ b/src/test/regress/expected/regex.out
@@ -90,6 +90,175 @@ select substring('a' from '((a)+)');
  a
 (1 row)
 
+-- Test lookahead constraints
+select regexp_matches('ab', 'a(?=b)b*');
+ regexp_matches 
+----------------
+ {ab}
+(1 row)
+
+select regexp_matches('a', 'a(?=b)b*');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('abc', 'a(?=b)b*(?=c)c*');
+ regexp_matches 
+----------------
+ {abc}
+(1 row)
+
+select regexp_matches('ab', 'a(?=b)b*(?=c)c*');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('ab', 'a(?!b)b*');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('a', 'a(?!b)b*');
+ regexp_matches 
+----------------
+ {a}
+(1 row)
+
+select regexp_matches('b', '(?=b)b');
+ regexp_matches 
+----------------
+ {b}
+(1 row)
+
+select regexp_matches('a', '(?=b)b');
+ regexp_matches 
+----------------
+(0 rows)
+
+-- Test lookbehind constraints
+select regexp_matches('abb', '(?<=a)b*');
+ regexp_matches 
+----------------
+ {bb}
+(1 row)
+
+select regexp_matches('a', 'a(?<=a)b*');
+ regexp_matches 
+----------------
+ {a}
+(1 row)
+
+select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*');
+ regexp_matches 
+----------------
+ {abc}
+(1 row)
+
+select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*');
+ regexp_matches 
+----------------
+ {ab}
+(1 row)
+
+select regexp_matches('ab', 'a*(?<!a)b*');
+ regexp_matches 
+----------------
+ {""}
+(1 row)
+
+select regexp_matches('ab', 'a*(?<!a)b+');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('b', 'a*(?<!a)b+');
+ regexp_matches 
+----------------
+ {b}
+(1 row)
+
+select regexp_matches('a', 'a(?<!a)b*');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('b', '(?<=b)b');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('foobar', '(?<=f)b+');
+ regexp_matches 
+----------------
+(0 rows)
+
+select regexp_matches('foobar', '(?<=foo)b+');
+ regexp_matches 
+----------------
+ {b}
+(1 row)
+
+select regexp_matches('foobar', '(?<=oo)b+');
+ regexp_matches 
+----------------
+ {b}
+(1 row)
+
+-- Test optimization of single-chr-or-bracket-expression lookaround constraints
+select 'xz' ~ 'x(?=[xy])';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'xy' ~ 'x(?=[xy])';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'xz' ~ 'x(?![xy])';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'xy' ~ 'x(?![xy])';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'x'  ~ 'x(?![xy])';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'xyy' ~ '(?<=[xy])yy+';
+ ?column? 
+----------
+ t
+(1 row)
+
+select 'zyy' ~ '(?<=[xy])yy+';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'xyy' ~ '(?<![xy])yy+';
+ ?column? 
+----------
+ f
+(1 row)
+
+select 'zyy' ~ '(?<![xy])yy+';
+ ?column? 
+----------
+ t
+(1 row)
+
 -- Test conversion of regex patterns to indexable conditions
 explain (costs off) select * from pg_proc where proname ~ 'abc';
             QUERY PLAN             
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
index c59fa35f24d..d3030af295d 100644
--- a/src/test/regress/sql/regex.sql
+++ b/src/test/regress/sql/regex.sql
@@ -25,6 +25,41 @@ select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
 select substring('a' from '((a))+');
 select substring('a' from '((a)+)');
 
+-- Test lookahead constraints
+select regexp_matches('ab', 'a(?=b)b*');
+select regexp_matches('a', 'a(?=b)b*');
+select regexp_matches('abc', 'a(?=b)b*(?=c)c*');
+select regexp_matches('ab', 'a(?=b)b*(?=c)c*');
+select regexp_matches('ab', 'a(?!b)b*');
+select regexp_matches('a', 'a(?!b)b*');
+select regexp_matches('b', '(?=b)b');
+select regexp_matches('a', '(?=b)b');
+
+-- Test lookbehind constraints
+select regexp_matches('abb', '(?<=a)b*');
+select regexp_matches('a', 'a(?<=a)b*');
+select regexp_matches('abc', 'a(?<=a)b*(?<=b)c*');
+select regexp_matches('ab', 'a(?<=a)b*(?<=b)c*');
+select regexp_matches('ab', 'a*(?<!a)b*');
+select regexp_matches('ab', 'a*(?<!a)b+');
+select regexp_matches('b', 'a*(?<!a)b+');
+select regexp_matches('a', 'a(?<!a)b*');
+select regexp_matches('b', '(?<=b)b');
+select regexp_matches('foobar', '(?<=f)b+');
+select regexp_matches('foobar', '(?<=foo)b+');
+select regexp_matches('foobar', '(?<=oo)b+');
+
+-- Test optimization of single-chr-or-bracket-expression lookaround constraints
+select 'xz' ~ 'x(?=[xy])';
+select 'xy' ~ 'x(?=[xy])';
+select 'xz' ~ 'x(?![xy])';
+select 'xy' ~ 'x(?![xy])';
+select 'x'  ~ 'x(?![xy])';
+select 'xyy' ~ '(?<=[xy])yy+';
+select 'zyy' ~ '(?<=[xy])yy+';
+select 'xyy' ~ '(?<![xy])yy+';
+select 'zyy' ~ '(?<![xy])yy+';
+
 -- Test conversion of regex patterns to indexable conditions
 explain (costs off) select * from pg_proc where proname ~ 'abc';
 explain (costs off) select * from pg_proc where proname ~ '^abc';