15 files changed, 333 insertions, 64 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index fbf63db85e2..b8fd3d03489 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.320 2002/06/11 13:40:50 wieck Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.321 2002/06/11 15:41:37 thomas Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -227,10 +227,10 @@ static void doNegateFloat(Value *v);
 %type <node>	join_outer, join_qual
 %type <jtype>	join_type
 
-%type <list>	extract_list, position_list
+%type <list>	extract_list, overlay_list, position_list
 %type <list>	substr_list, trim_list
 %type <ival>	opt_interval
-%type <node>	substr_from, substr_for
+%type <node>	overlay_placing, substr_from, substr_for
 
 %type <boolean>	opt_binary, opt_using, opt_instead, opt_cursor
 %type <boolean>	opt_with_copy, index_opt_unique, opt_verbose, opt_full
@@ -336,7 +336,7 @@ static void doNegateFloat(Value *v);
 	FALSE_P, FETCH, FLOAT_P, FOR, FORCE, FOREIGN, FORWARD, FREEZE, FROM,
 	FULL, FUNCTION,
 
-	GLOBAL, GRANT, GROUP_P,
+	GET, GLOBAL, GRANT, GROUP_P,
 	HANDLER, HAVING, HOUR_P,
 
 	ILIKE, IMMEDIATE, IMMUTABLE, IMPLICIT, IN_P, INCREMENT, INDEX, INHERITS,
@@ -356,16 +356,16 @@ static void doNegateFloat(Value *v);
 	NUMERIC,
 
 	OF, OFF, OFFSET, OIDS, OLD, ON, ONLY, OPERATOR, OPTION, OR, ORDER,
-	OUT_P, OUTER_P, OVERLAPS, OWNER,
+	OUT_P, OUTER_P, OVERLAPS, OVERLAY, OWNER,
 
-	PARTIAL, PASSWORD, PATH_P, PENDANT, POSITION, PRECISION, PRIMARY,
+	PARTIAL, PASSWORD, PATH_P, PENDANT, PLACING, POSITION, PRECISION, PRIMARY,
 	PRIOR, PRIVILEGES, PROCEDURE, PROCEDURAL,
 
 	READ, REAL, REFERENCES, REINDEX, RELATIVE, RENAME, REPLACE, RESET,
 	RESTRICT, RETURNS, REVOKE, RIGHT, ROLLBACK, ROW, RULE,
 
 	SCHEMA, SCROLL, SECOND_P, SECURITY, SELECT, SEQUENCE, SERIALIZABLE,
-	SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SMALLINT, SOME,
+	SESSION, SESSION_USER, SET, SETOF, SHARE, SHOW, SIMILAR, SMALLINT, SOME,
 	STABLE, START, STATEMENT, STATISTICS, STDIN, STDOUT, STORAGE, STRICT,
 	SUBSTRING, SYSID,
 
@@ -402,7 +402,7 @@ static void doNegateFloat(Value *v);
 %right		NOT
 %right		'='
 %nonassoc	'<' '>'
-%nonassoc	LIKE ILIKE
+%nonassoc	LIKE ILIKE SIMILAR
 %nonassoc	ESCAPE
 %nonassoc	OVERLAPS
 %nonassoc	BETWEEN
@@ -420,6 +420,7 @@ static void doNegateFloat(Value *v);
 %right		UMINUS
 %left		'[' ']'
 %left		'(' ')'
+%left		COLLATE
 %left		TYPECAST
 %left		'.'
 %%
@@ -2139,6 +2140,14 @@ DefineStmt:  CREATE AGGREGATE func_name definition
 					n->definition = $4;
 					$$ = (Node *)n;
 				}
+		| CREATE CHARACTER SET opt_as any_name GET definition opt_collate
+				{
+					DefineStmt *n = makeNode(DefineStmt);
+					n->defType = CHARACTER;
+					n->defnames = $5;
+					n->definition = $7;
+					$$ = (Node *)n;
+				}
 		;
 
 definition:  '(' def_list ')'				{ $$ = $2; }
@@ -4978,9 +4987,18 @@ qual_all_Op:  all_Op
  * it's factored out just to eliminate redundant coding.
  */
 a_expr:  c_expr
-				{	$$ = $1;  }
+				{	$$ = $1; }
 		| a_expr TYPECAST Typename
 				{	$$ = makeTypeCast($1, $3); }
+		| a_expr COLLATE ColId
+				{
+					FuncCall *n = makeNode(FuncCall);
+					n->funcname = SystemFuncName($3);
+					n->args = makeList1($1);
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *) n;
+				}
 		| a_expr AT TIME ZONE c_expr
 				{
 					FuncCall *n = makeNode(FuncCall);
@@ -5088,6 +5106,30 @@ a_expr:  c_expr
 					n->agg_distinct = FALSE;
 					$$ = (Node *) makeSimpleA_Expr(OP, "!~~*", $1, (Node *) n);
 				}
+
+		| a_expr SIMILAR TO a_expr				%prec SIMILAR
+				{	$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); }
+		| a_expr SIMILAR TO a_expr ESCAPE a_expr
+				{
+					FuncCall *n = makeNode(FuncCall);
+					n->funcname = SystemFuncName("like_escape");
+					n->args = makeList2($4, $6);
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
+				}
+		| a_expr NOT SIMILAR TO a_expr			%prec SIMILAR
+				{	$$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); }
+		| a_expr NOT SIMILAR TO a_expr ESCAPE a_expr
+				{
+					FuncCall *n = makeNode(FuncCall);
+					n->funcname = SystemFuncName("like_escape");
+					n->args = makeList2($5, $7);
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n);
+				}
+
 		/* NullTest clause
 		 * Define SQL92-style Null test clause.
 		 * Allow two forms described in the standard:
@@ -5568,6 +5610,20 @@ c_expr:  columnref
 					n->agg_distinct = FALSE;
 					$$ = (Node *)n;
 				}
+		| OVERLAY '(' overlay_list ')'
+				{
+					/* overlay(A PLACING B FROM C FOR D) is converted to
+					 * substring(A, 1, C-1) || B || substring(A, C+1, C+D)
+					 * overlay(A PLACING B FROM C) is converted to
+					 * substring(A, 1, C-1) || B || substring(A, C+1, C+char_length(B))
+					 */
+					FuncCall *n = makeNode(FuncCall);
+					n->funcname = SystemFuncName("overlay");
+					n->args = $3;
+					n->agg_star = FALSE;
+					n->agg_distinct = FALSE;
+					$$ = (Node *)n;
+				}
 		| POSITION '(' position_list ')'
 				{
 					/* position(A in B) is converted to position(B, A) */
@@ -5706,6 +5762,25 @@ extract_arg:  IDENT						{ $$ = $1; }
 		| SCONST						{ $$ = $1; }
 		;
 
+/* OVERLAY() arguments
+ * SQL99 defines the OVERLAY() function:
+ * o overlay(text placing text from int for int)
+ * o overlay(text placing text from int)
+ */
+overlay_list:  a_expr overlay_placing substr_from substr_for
+				{
+					$$ = makeList4($1, $2, $3, $4);
+				}
+		| a_expr overlay_placing substr_from
+				{
+					$$ = makeList3($1, $2, $3);
+				}
+		;
+
+overlay_placing:  PLACING a_expr
+				{	$$ = $2; }
+		;
+
 /* position_list uses b_expr not a_expr to avoid conflict with general IN */
 
 position_list:  b_expr IN_P b_expr
@@ -6259,6 +6334,7 @@ unreserved_keyword:
 		| FORCE
 		| FORWARD
 		| FUNCTION
+		| GET
 		| GLOBAL
 		| HANDLER
 		| HOUR_P
@@ -6404,6 +6480,7 @@ col_name_keyword:
 		| NONE
 		| NULLIF
 		| NUMERIC
+		| OVERLAY
 		| POSITION
 		| REAL
 		| SETOF
@@ -6423,7 +6500,7 @@ col_name_keyword:
  *
  * Do not include POSITION, SUBSTRING, etc here since they have explicit
  * productions in a_expr to support the goofy SQL9x argument syntax.
- *  - thomas 2000-11-28
+ * - thomas 2000-11-28
  */
 func_name_keyword:
 		  AUTHORIZATION
@@ -6445,6 +6522,7 @@ func_name_keyword:
 		| OUTER_P
 		| OVERLAPS
 		| RIGHT
+		| SIMILAR
 		| VERBOSE
 		;
 
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index c60b2df2a6d..dca44c448f9 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.112 2002/06/11 13:40:51 wieck Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.113 2002/06/11 15:41:37 thomas Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -129,6 +129,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"from", FROM},
 	{"full", FULL},
 	{"function", FUNCTION},
+	{"get", GET},
 	{"global", GLOBAL},
 	{"grant", GRANT},
 	{"group", GROUP_P},
@@ -211,11 +212,13 @@ static const ScanKeyword ScanKeywords[] = {
 	{"out", OUT_P},
 	{"outer", OUTER_P},
 	{"overlaps", OVERLAPS},
+	{"overlay", OVERLAY},
 	{"owner", OWNER},
 	{"partial", PARTIAL},
 	{"password", PASSWORD},
 	{"path", PATH_P},
 	{"pendant", PENDANT},
+	{"placing", PLACING},
 	{"position", POSITION},
 	{"precision", PRECISION},
 	{"primary", PRIMARY},
@@ -251,6 +254,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"setof", SETOF},
 	{"share", SHARE},
 	{"show", SHOW},
+	{"similar", SIMILAR},
 	{"smallint", SMALLINT},
 	{"some", SOME},
 	{"stable", STABLE},
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index a6bc3549e17..e3d88b4521b 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.94 2002/05/02 18:44:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.95 2002/06/11 15:41:37 thomas Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -92,14 +92,14 @@ unsigned char unescape_single_char(unsigned char c);
  * We use exclusive states for quoted strings, extended comments,
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
- *  <xbit> bit string literal
+ *  <xb> bit string literal
  *  <xc> extended C-style comments - thomas 1997-07-12
  *  <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
  *  <xh> hexadecimal numeric string - thomas 1997-11-16
  *  <xq> quoted strings - thomas 1997-07-30
  */
 
-%x xbit
+%x xb
 %x xc
 %x xd
 %x xh
@@ -107,10 +107,10 @@ unsigned char unescape_single_char(unsigned char c);
 
 /* Bit string
  */
-xbitstart		[bB]{quote}
-xbitstop		{quote}
-xbitinside		[^']*
-xbitcat			{quote}{whitespace_with_newline}{quote}
+xbstart		[bB]{quote}
+xbstop		{quote}
+xbinside		[^']*
+xbcat			{quote}{whitespace_with_newline}{quote}
 
 /* Hexadecimal number
  */
@@ -285,13 +285,13 @@ other			.
 
 <xc><<EOF>>		{ yyerror("unterminated /* comment"); }
 
-{xbitstart}		{
+{xbstart}		{
 					token_start = yytext;
-					BEGIN(xbit);
+					BEGIN(xb);
 					startlit();
 					addlitchar('b');
 				}
-<xbit>{xbitstop}	{
+<xb>{xbstop}	{
 					BEGIN(INITIAL);
 					if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
 						yyerror("invalid bit string input");
@@ -299,14 +299,14 @@ other			.
 					return BITCONST;
 				}
 <xh>{xhinside}	|
-<xbit>{xbitinside}	{
+<xb>{xbinside}	{
 					addlit(yytext, yyleng);
 				}
 <xh>{xhcat}		|
-<xbit>{xbitcat}		{
+<xb>{xbcat}		{
 					/* ignore */
 				}
-<xbit><<EOF>>		{ yyerror("unterminated bit string literal"); }
+<xb><<EOF>>		{ yyerror("unterminated bit string literal"); }
 
 {xhstart}		{
 					token_start = yytext;
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index bea464bd2ae..d6f7b26fa1a 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -174,9 +174,10 @@ static int	never = 0;			/* for use in asserts; shuts lint up */
 
 /*
  * regcomp - interface for parser and compilation
+ * returns 0 success, otherwise REG_something
  */
-int								/* 0 success, otherwise REG_something */
-pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
+int
+pg_regcomp(regex_t *preg, const char *pattern, int cflags)
 {
 	struct parse pa;
 	struct re_guts *g;
@@ -224,7 +225,6 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
 		(void) pg_mb2wchar((unsigned char *) pattern, wcp);
 		len = pg_wchar_strlen(wcp);
 #else
-
 		len = strlen((char *) pattern);
 #endif
 	}
@@ -305,7 +305,7 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
 
 	/* win or lose, we're done */
 	if (p->error != 0)			/* lose */
-		pg95_regfree(preg);
+		pg_regfree(preg);
 	return p->error;
 }
 
diff --git a/src/backend/regex/regerror.c b/src/backend/regex/regerror.c
index 3fa00946aa4..fb12cba3048 100644
--- a/src/backend/regex/regerror.c
+++ b/src/backend/regex/regerror.c
@@ -117,8 +117,8 @@ static struct rerr
  */
 /* ARGSUSED */
 size_t
-pg95_regerror(int errcode, const regex_t *preg,
-			  char *errbuf, size_t errbuf_size)
+pg_regerror(int errcode, const regex_t *preg,
+			char *errbuf, size_t errbuf_size)
 {
 	struct rerr *r;
 	size_t		len;
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index 47677304413..06459ef1dbc 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -149,8 +149,8 @@ do { \
  * when choosing which matcher to call.
  */
 int								/* 0 success, REG_NOMATCH failure */
-pg95_regexec(const regex_t *preg, const char *string, size_t nmatch,
-			 regmatch_t *pmatch, int eflags)
+pg_regexec(const regex_t *preg, const char *string, size_t nmatch,
+		   regmatch_t *pmatch, int eflags)
 {
 	struct re_guts *g = preg->re_g;
 
diff --git a/src/backend/regex/regfree.c b/src/backend/regex/regfree.c
index 87e0c99ef96..5672fcf240f 100644
--- a/src/backend/regex/regfree.c
+++ b/src/backend/regex/regfree.c
@@ -49,7 +49,7 @@
  * regfree - free everything
  */
 void
-pg95_regfree(regex_t *preg)
+pg_regfree(regex_t *preg)
 {
 	struct re_guts *g;
 
diff --git a/src/backend/regex/retest.c b/src/backend/regex/retest.c
index eee8ef09ae3..ca5d6c5394a 100644
--- a/src/backend/regex/retest.c
+++ b/src/backend/regex/retest.c
@@ -1,7 +1,7 @@
 /*
  * a simple regexp debug program
  *
- * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.4 1999/07/17 20:17:34 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/regex/Attic/retest.c,v 1.5 2002/06/11 15:41:37 thomas Exp $
  */
 
 #include "postgres.h"
@@ -22,7 +22,7 @@ main()
 	if (p)
 		*p = '\0';
 
-	sts = pg95_regcomp(&re, buf, 1);
+	sts = pg_regcomp(&re, buf, 1);
 	printf("regcomp: parses \"%s\" and returns %d\n", buf, sts);
 	for (;;)
 	{
@@ -33,7 +33,7 @@ main()
 		if (p)
 			*p = '\0';
 
-		sts = pg95_regexec(&re, buf, 0, 0, 0);
+		sts = pg_regexec(&re, buf, 0, 0, 0);
 		printf("regexec: returns %d\n", sts);
 	}
 }
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a61717a8602..9afc58feee7 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.38 2001/11/05 17:46:29 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.39 2002/06/11 15:41:37 thomas Exp $
  *
  *		Alistair Crooks added the code for the regex caching
  *		agc - cached the regular expressions used - there's a good chance
@@ -19,7 +19,7 @@
  *
  *		agc - incorporated Keith Bostic's Berkeley regex code into
  *		the tree for all ports. To distinguish this regex code from any that
- *		is existent on a platform, I've prepended the string "pg95_" to
+ *		is existent on a platform, I've prepended the string "pg_" to
  *		the functions regcomp, regerror, regexec and regfree.
  *		Fixed a bug that was originally a typo by me, where `i' was used
  *		instead of `oldest' when compiling regular expressions - benign
@@ -53,11 +53,13 @@ struct cached_re_str
 static int	rec = 0;			/* # of cached re's */
 static struct cached_re_str rev[MAX_CACHED_RES];		/* cached re's */
 static unsigned long lru;		/* system lru tag */
+static int pg_lastre = 0;
 
 /* attempt to compile `re' as an re, then match it against text */
 /* cflags - flag to regcomp indicates case sensitivity */
 static bool
-RE_compile_and_execute(text *text_re, char *text, int cflags)
+RE_compile_and_execute(text *text_re, char *text, int cflags,
+					   int nmatch, regmatch_t *pmatch)
 {
 	char	   *re;
 	int			oldest;
@@ -68,9 +70,27 @@ RE_compile_and_execute(text *text_re, char *text, int cflags)
 	re = DatumGetCString(DirectFunctionCall1(textout,
 											 PointerGetDatum(text_re)));
 
+	if ((i = pg_lastre) < rec)
+	{
+		if (rev[i].cre_s)
+		{
+			if (strcmp(rev[i].cre_s, re) == 0 &&
+				rev[i].cre_type == cflags)
+			{
+				rev[i].cre_lru = ++lru;
+				pfree(re);
+				return (pg_regexec(&rev[i].cre_re,
+								   text, nmatch,
+								   pmatch, 0) == 0);
+			}
+		}
+	}
+
 	/* find a previously compiled regular expression */
 	for (i = 0; i < rec; i++)
 	{
+		if (i == pg_lastre) continue;
+
 		if (rev[i].cre_s)
 		{
 			if (strcmp(rev[i].cre_s, re) == 0 &&
@@ -78,9 +98,9 @@ RE_compile_and_execute(text *text_re, char *text, int cflags)
 			{
 				rev[i].cre_lru = ++lru;
 				pfree(re);
-				return (pg95_regexec(&rev[i].cre_re,
-									 text, 0,
-									 (regmatch_t *) NULL, 0) == 0);
+				return (pg_regexec(&rev[i].cre_re,
+								   text, nmatch,
+								   pmatch, 0) == 0);
 			}
 		}
 	}
@@ -107,7 +127,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags)
 			if (rev[i].cre_lru > lru)
 				lru = rev[i].cre_lru;
 		}
-		pg95_regfree(&rev[oldest].cre_re);
+		pg_regfree(&rev[oldest].cre_re);
 
 		/*
 		 * use malloc/free for the cre_s field because the storage has to
@@ -118,7 +138,7 @@ RE_compile_and_execute(text *text_re, char *text, int cflags)
 	}
 
 	/* compile the re */
-	regcomp_result = pg95_regcomp(&rev[oldest].cre_re, re, cflags);
+	regcomp_result = pg_regcomp(&rev[oldest].cre_re, re, cflags);
 	if (regcomp_result == 0)
 	{
 		/*
@@ -130,16 +150,16 @@ RE_compile_and_execute(text *text_re, char *text, int cflags)
 		rev[oldest].cre_type = cflags;
 		pfree(re);
 		/* agc - fixed an old typo here */
-		return (pg95_regexec(&rev[oldest].cre_re, text, 0,
-							 (regmatch_t *) NULL, 0) == 0);
+		return (pg_regexec(&rev[oldest].cre_re, text,
+						   nmatch, pmatch, 0) == 0);
 	}
 	else
 	{
 		char		errMsg[1000];
 
 		/* re didn't compile */
-		pg95_regerror(regcomp_result, &rev[oldest].cre_re, errMsg,
-					  sizeof(errMsg));
+		pg_regerror(regcomp_result, &rev[oldest].cre_re, errMsg,
+					sizeof(errMsg));
 		elog(ERROR, "Invalid regular expression: %s", errMsg);
 	}
 
@@ -167,7 +187,7 @@ fixedlen_regexeq(char *s, text *p, int charlen, int cflags)
 	memcpy(sterm, s, charlen);
 	sterm[charlen] = '\0';
 
-	result = RE_compile_and_execute(p, sterm, cflags);
+	result = RE_compile_and_execute(p, sterm, cflags, 0, NULL);
 
 	pfree(sterm);
 
@@ -230,7 +250,7 @@ textregexne(PG_FUNCTION_ARGS)
 
 /*
  *	routines that use the regexp stuff, but ignore the case.
- *	for this, we use the REG_ICASE flag to pg95_regcomp
+ *	for this, we use the REG_ICASE flag to pg_regcomp
  */
 
 
@@ -281,3 +301,54 @@ nameicregexne(PG_FUNCTION_ARGS)
 									 strlen(NameStr(*n)),
 									 REG_ICASE | REG_EXTENDED));
 }
+
+
+/* textregexsubstr()
+ * Return a substring matched by a regular expression.
+ */
+Datum
+textregexsubstr(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *result;
+	char	   *sterm;
+	int			len;
+	bool		match;
+	int			nmatch = 1;
+	regmatch_t	pmatch;
+
+	/* be sure sterm is null-terminated */
+	len = VARSIZE(s) - VARHDRSZ;
+	sterm = (char *) palloc(len + 1);
+	memcpy(sterm, VARDATA(s), len);
+	sterm[len] = '\0';
+	/* We need the match info back from the pattern match
+	 * to be able to actually extract the substring.
+	 * It seems to be adequate to pass in a structure to return
+	 * only one result.
+	 */
+	match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
+	pfree(sterm);
+
+	/* match? then return the substring matching the pattern */
+	if (match)
+	{
+		return (DirectFunctionCall3(text_substr,
+									PointerGetDatum(s),
+									Int32GetDatum(pmatch.rm_so+1),
+									Int32GetDatum(pmatch.rm_eo-pmatch.rm_so)));
+	}
+#if 0
+	/* otherwise, return a zero-length string */
+	else
+	{
+		result = palloc(VARHDRSZ);
+		VARATT_SIZEP(result) = VARHDRSZ;
+		PG_RETURN_TEXT_P(result);
+	}
+#endif
+
+	/* not reached */
+	PG_RETURN_NULL();
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index bd861396210..ae10bfb6879 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.133 2002/05/22 17:21:01 petere Exp $
+ * $Id: catversion.h,v 1.134 2002/06/11 15:44:38 thomas Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	200205221
+#define CATALOG_VERSION_NO	200206111
 
 #endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index a7d56dc90dc..56831dcfca1 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.240 2002/05/24 18:57:56 tgl Exp $
+ * $Id: pg_proc.h,v 1.241 2002/06/11 15:41:37 thomas Exp $
  *
  * NOTES
  *	  The script catalog/genbki.sh reads this file and generates .bki
@@ -1679,7 +1679,7 @@ DESCR("less-equal-greater");
 DATA(insert OID = 1359 (  timestamptz	   PGNSP PGUID 12 f f f t f i 2 1184 "1082 1266" 100 0 0 100  datetimetz_timestamptz - _null_ ));
 DESCR("convert date and time with time zone to timestamp with time zone");
 
-DATA(insert OID = 1364 (  time				 PGNSP PGUID 14 f f f t f i 1 1083 "702" 100 0 0 100  "select time(cast($1 as timestamp without time zone))" - _null_ ));
+DATA(insert OID = 1364 (  time			   PGNSP PGUID 14 f f f t f i 1 1083 "702" 100 0 0 100  "select time(cast($1 as timestamp without time zone))" - _null_ ));
 DESCR("convert abstime to time");
 
 DATA(insert OID = 1367 (  character_length	PGNSP PGUID 12 f f f t f i 1	23 "1042" 100 0 0 100  bpcharlen - _null_ ));
@@ -1764,6 +1764,11 @@ DESCR("current schema name");
 DATA(insert OID = 1403 (  current_schemas	PGNSP PGUID 12 f f f t f s 0  1003 "0" 100 0 0 100	current_schemas - _null_ ));
 DESCR("current schema search list");
 
+DATA(insert OID = 1404 (  overlay			PGNSP PGUID 14 f f f t f i 4 25 "25 25 23 23" 100 0 0 100  "select substring($1, 1, ($3 - 1)) || $2 || substring($1, ($3 + $4))" - _null_ ));
+DESCR("substitute portion of string");
+DATA(insert OID = 1405 (  overlay			PGNSP PGUID 14 f f f t f i 3 25 "25 25 23" 100 0 0 100  "select substring($1, 1, ($3 - 1)) || $2 || substring($1, ($3 + char_length($2)))" - _null_ ));
+DESCR("substitute portion of string");
+
 DATA(insert OID = 1406 (  isvertical		PGNSP PGUID 12 f f f t f i 2	16 "600 600" 100 0 0 100  point_vert - _null_ ));
 DESCR("vertically aligned?");
 DATA(insert OID = 1407 (  ishorizontal		PGNSP PGUID 12 f f f t f i 2	16 "600 600" 100 0 0 100  point_horiz - _null_ ));
@@ -2871,6 +2876,11 @@ DESCR("add");
 DATA(insert OID = 2072 (  date_mi_interval	PGNSP PGUID 14 f f f t f i 2 1114 "1082 1186" 100 0 0 100  "select cast($1 as timestamp without time zone) - $2;" - _null_ ));
 DESCR("subtract");
 
+DATA(insert OID = 2073 (  substring			PGNSP PGUID 12 f f f t f i 2 25 "25 25" 100 0 0 100	textregexsubstr - _null_ ));
+DESCR("substitutes regular expression");
+DATA(insert OID = 2074 (  substring			PGNSP PGUID 14 f f f t f i 3 25 "25 25 25" 100 0 0 100	"select substring($1, like_escape($2, $3))" - _null_ ));
+DESCR("substitutes regular expression with escape argument");
+
 /* Aggregates (moved here from pg_aggregate for 7.3) */
 
 DATA(insert OID = 2100 (  avg				PGNSP PGUID 12 t f f f f i 1 1700 "20" 100 0 0 100  aggregate_dummy - _null_ ));
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 09f966e1bf2..dd8e0da171b 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -53,7 +53,7 @@ typedef struct
 	const pg_wchar *re_endp;	/* end pointer for REG_PEND */
 	struct re_guts *re_g;		/* none of your business :-) */
 #ifdef MULTIBYTE
-	pg_wchar   *patsave;		/* mee too :-) */
+	pg_wchar   *patsave;		/* me too :-) */
 #endif
 
 } regex_t;
@@ -102,12 +102,12 @@ typedef struct
 #define REG_LARGE		01000	/* force large representation */
 #define REG_BACKR		02000	/* force use of backref code */
 
-extern int	pg95_regcomp(regex_t *preg, const char *pattern, int cflags);
-extern size_t pg95_regerror(int errcode, const regex_t *preg,
-			  char *errbuf, size_t errbuf_size);
-extern int pg95_regexec(const regex_t *preg, const char *string,
-			 size_t nmatch,
-			 regmatch_t *pmatch, int eflags);
-extern void pg95_regfree(regex_t *preg);
+extern int	pg_regcomp(regex_t *preg, const char *pattern, int cflags);
+extern size_t pg_regerror(int errcode, const regex_t *preg,
+						  char *errbuf, size_t errbuf_size);
+extern int pg_regexec(const regex_t *preg, const char *string,
+					  size_t nmatch,
+					  regmatch_t *pmatch, int eflags);
+extern void pg_regfree(regex_t *preg);
 
 #endif   /* !_REGEX_H_ */
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 6340aa22540..7676ce5663f 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.182 2002/05/18 21:38:41 tgl Exp $
+ * $Id: builtins.h,v 1.183 2002/06/11 15:41:38 thomas Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -327,6 +327,7 @@ extern Datum nameicregexeq(PG_FUNCTION_ARGS);
 extern Datum nameicregexne(PG_FUNCTION_ARGS);
 extern Datum texticregexeq(PG_FUNCTION_ARGS);
 extern Datum texticregexne(PG_FUNCTION_ARGS);
+extern Datum textregexsubstr(PG_FUNCTION_ARGS);
 
 /* regproc.c */
 extern Datum regprocin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index ebfe8eeb663..680a76c0bd5 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -3,6 +3,7 @@
 -- Test various data entry syntaxes.
 --
 -- SQL92 string continuation syntax
+-- E021-03 character string literals
 SELECT 'first line'
 ' - next line'
 	' - third line'
@@ -20,6 +21,7 @@ SELECT 'first line'
 ERROR:  parser: parse error at or near "' - third line'"
 --
 -- test conversions between various string types
+-- E021-10 implicit casting among the character data types
 --
 SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL;
  text(char) 
@@ -93,7 +95,9 @@ SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)";
 
 --
 -- test SQL92 string functions
+-- E### and T### are feature reference numbers from SQL99
 --
+-- E021-09 trim function
 SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS "bunch o blanks";
  bunch o blanks 
 ----------------
@@ -118,6 +122,7 @@ SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS "some Xs";
  t
 (1 row)
 
+-- E021-06 substring expression
 SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
  34567890 
 ----------
@@ -130,6 +135,47 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
  t
 (1 row)
 
+-- T581 regular expression substring
+SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+ bcd 
+-----
+ bcd
+(1 row)
+
+-- No match should return NULL
+SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+ True 
+------
+ t
+(1 row)
+
+-- Null inputs should return NULL
+SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True";
+ True 
+------
+ t
+(1 row)
+
+SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True";
+ True 
+------
+ t
+(1 row)
+
+SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+ True 
+------
+ t
+(1 row)
+
+-- PostgreSQL extention to allow omitting the escape character
+SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+ cde 
+-----
+ cde
+(1 row)
+
+-- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";
  4 
 ---
@@ -142,11 +188,37 @@ SELECT POSITION(5 IN '1234567890') = '5' AS "5";
  t
 (1 row)
 
+-- T312 character overlay function
+SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
+ abc45f 
+--------
+ abc45f
+(1 row)
+
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS "yabadaba";
+ yabadaba 
+----------
+ yabadaba
+(1 row)
+
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS "yabadabadoo";
+ yabadabadoo 
+-------------
+ yabadabadoo
+(1 row)
+
+SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS "bubba";
+ bubba 
+-------
+ bubba
+(1 row)
+
 --
 -- test LIKE
 -- Be sure to form every test as a LIKE/NOT LIKE pair.
 --
 -- simplest examples
+-- E061-04 like predicate
 SELECT 'hawkeye' LIKE 'h%' AS "true";
  true 
 ------
@@ -257,6 +329,7 @@ SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false";
 (1 row)
 
 -- escape character
+-- E061-05 like predicate with escape clause
 SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true";
  true 
 ------
@@ -469,6 +542,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
 --
 -- test implicit type conversion
 --
+-- E021-07 character concatenation
 SELECT 'unknown' || ' and unknown' AS "Concat unknown types";
  Concat unknown types 
 ----------------------
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index b7f214f4d89..3127d0ebffc 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -4,6 +4,7 @@
 --
 
 -- SQL92 string continuation syntax
+-- E021-03 character string literals
 SELECT 'first line'
 ' - next line'
 	' - third line'
@@ -17,6 +18,7 @@ SELECT 'first line'
 
 --
 -- test conversions between various string types
+-- E021-10 implicit casting among the character data types
 --
 
 SELECT CAST(f1 AS text) AS "text(char)" FROM CHAR_TBL;
@@ -41,8 +43,10 @@ SELECT CAST(name 'namefield' AS varchar) AS "varchar(name)";
 
 --
 -- test SQL92 string functions
+-- E### and T### are feature reference numbers from SQL99
 --
 
+-- E021-09 trim function
 SELECT TRIM(BOTH FROM '  bunch o blanks  ') = 'bunch o blanks' AS "bunch o blanks";
 
 SELECT TRIM(LEADING FROM '  bunch o blanks  ') = 'bunch o blanks  ' AS "bunch o blanks  ";
@@ -51,20 +55,46 @@ SELECT TRIM(TRAILING FROM '  bunch o blanks  ') = '  bunch o blanks' AS "  bunch
 
 SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS "some Xs";
 
+-- E021-06 substring expression
 SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
 
 SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
 
+-- T581 regular expression substring
+SELECT SUBSTRING('abcdefg' FROM '(b|f).*(d)' FOR '#') AS "bcd";
+
+-- No match should return NULL
+SELECT SUBSTRING('abcdefg' FROM '(1|2|3)' FOR '#') IS NULL AS "True";
+
+-- Null inputs should return NULL
+SELECT SUBSTRING('abcdefg' FROM '(b|c)' FOR NULL) IS NULL AS "True";
+SELECT SUBSTRING(NULL FROM '(b|c)' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+
+-- PostgreSQL extention to allow omitting the escape character
+SELECT SUBSTRING('abcdefg' FROM '(c|d).e') AS "cde";
+
+-- E021-11 position expression
 SELECT POSITION('4' IN '1234567890') = '4' AS "4";
 
 SELECT POSITION(5 IN '1234567890') = '5' AS "5";
 
+-- T312 character overlay function
+SELECT OVERLAY('abcdef' PLACING '45' FROM 4) AS "abc45f";
+
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5) AS "yabadaba";
+
+SELECT OVERLAY('yabadoo' PLACING 'daba' FROM 5 FOR 0) AS "yabadabadoo";
+
+SELECT OVERLAY('babosa' PLACING 'ubb' FROM 2 FOR 4) AS "bubba";
+
 --
 -- test LIKE
 -- Be sure to form every test as a LIKE/NOT LIKE pair.
 --
 
 -- simplest examples
+-- E061-04 like predicate
 SELECT 'hawkeye' LIKE 'h%' AS "true";
 SELECT 'hawkeye' NOT LIKE 'h%' AS "false";
 
@@ -94,6 +124,7 @@ SELECT 'indio' LIKE 'ind_o' ESCAPE '$' AS "true";
 SELECT 'indio' NOT LIKE 'ind_o' ESCAPE '$' AS "false";
 
 -- escape character
+-- E061-05 like predicate with escape clause
 SELECT 'h%' LIKE 'h#%' ESCAPE '#' AS "true";
 SELECT 'h%' NOT LIKE 'h#%' ESCAPE '#' AS "false";
 
@@ -156,6 +187,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
 -- test implicit type conversion
 --
 
+-- E021-07 character concatenation
 SELECT 'unknown' || ' and unknown' AS "Concat unknown types";
 
 SELECT text 'text' || ' and unknown' AS "Concat text to unknown type";
@@ -165,4 +197,3 @@ SELECT char(20) 'characters' || 'and text' AS "Concat char to unknown type";
 SELECT text 'text' || char(20) ' and characters' AS "Concat text to char";
 
 SELECT text 'text' || varchar ' and varchar' AS "Concat text to varchar";
-