aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-10-18 20:38:58 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-10-18 20:38:58 +0000
commit220f2a7d15adc6ca811a3cbe5fee79ce4904cd91 (patch)
treef71baba620e60bdc4fcff8b3a008590d17e5ede1 /src
parent800af89004f1a9e816b8df644c31b3fa2a247f92 (diff)
downloadpostgresql-220f2a7d15adc6ca811a3cbe5fee79ce4904cd91.tar.gz
postgresql-220f2a7d15adc6ca811a3cbe5fee79ce4904cd91.zip
Code review for regexp_replace patch. Improve documentation and comments,
fix problems with replacement-string backslashes that aren't followed by one of the expected characters, avoid giving the impression that replace_text_regexp() is meant to be called directly as a SQL function, etc.
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/adt/regexp.c48
-rw-r--r--src/backend/utils/adt/varlena.c132
-rw-r--r--src/include/utils/builtins.h5
3 files changed, 100 insertions, 85 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a872762c3c2..ce04ce77e67 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.59 2005/10/15 02:49:29 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.60 2005/10/18 20:38:58 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -83,7 +83,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
/*
* RE_compile_and_cache - compile a RE, caching if possible
*
- * Returns regex_t
+ * Returns regex_t *
*
* text_re --- the pattern, expressed as an *untoasted* TEXT object
* cflags --- compile options for the pattern
@@ -91,7 +91,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
* Pattern is given in the database encoding. We internally convert to
* array of pg_wchar which is what Spencer's regex package wants.
*/
-static regex_t
+static regex_t *
RE_compile_and_cache(text *text_re, int cflags)
{
int text_re_len = VARSIZE(text_re);
@@ -123,7 +123,7 @@ RE_compile_and_cache(text *text_re, int cflags)
re_array[0] = re_temp;
}
- return re_array[0].cre_re;
+ return &re_array[0].cre_re;
}
}
@@ -188,7 +188,7 @@ RE_compile_and_cache(text *text_re, int cflags)
re_array[0] = re_temp;
num_res++;
- return re_array[0].cre_re;
+ return &re_array[0].cre_re;
}
/*
@@ -212,7 +212,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
pg_wchar *data;
size_t data_len;
int regexec_result;
- regex_t re;
+ regex_t *re;
char errMsg[100];
/* Convert data string to wide characters */
@@ -223,7 +223,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
re = RE_compile_and_cache(text_re, cflags);
/* Perform RE match and return result */
- regexec_result = pg_regexec(&re_array[0].cre_re,
+ regexec_result = pg_regexec(re,
data,
data_len,
0,
@@ -237,8 +237,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
- pg_regerror(regexec_result, &re_array[0].cre_re,
- errMsg, sizeof(errMsg));
+ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression failed: %s", errMsg)));
@@ -442,10 +441,10 @@ textregexsubstr(PG_FUNCTION_ARGS)
/*
* textregexreplace_noopt()
- * Return a replace string matched by a regular expression.
- * This function is a version that doesn't specify the option of
- * textregexreplace. This is case sensitive, replace the first
- * instance only.
+ * Return a string matched by a regular expression, with replacement.
+ *
+ * This version doesn't have an option argument: we default to case
+ * sensitive match, replace the first instance only.
*/
Datum
textregexreplace_noopt(PG_FUNCTION_ARGS)
@@ -453,20 +452,16 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *r = PG_GETARG_TEXT_P(2);
- regex_t re;
+ regex_t *re;
re = RE_compile_and_cache(p, regex_flavor);
- return DirectFunctionCall4(replace_text_regexp,
- PointerGetDatum(s),
- PointerGetDatum(&re),
- PointerGetDatum(r),
- BoolGetDatum(false));
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
}
/*
* textregexreplace()
- * Return a replace string matched by a regular expression.
+ * Return a string matched by a regular expression, with replacement.
*/
Datum
textregexreplace(PG_FUNCTION_ARGS)
@@ -478,9 +473,9 @@ textregexreplace(PG_FUNCTION_ARGS)
char *opt_p = VARDATA(opt);
int opt_len = (VARSIZE(opt) - VARHDRSZ);
int i;
- bool global = false;
+ bool glob = false;
bool ignorecase = false;
- regex_t re;
+ regex_t *re;
/* parse options */
for (i = 0; i < opt_len; i++)
@@ -491,8 +486,7 @@ textregexreplace(PG_FUNCTION_ARGS)
ignorecase = true;
break;
case 'g':
- global = true;
-
+ glob = true;
break;
default:
ereport(ERROR,
@@ -508,11 +502,7 @@ textregexreplace(PG_FUNCTION_ARGS)
else
re = RE_compile_and_cache(p, regex_flavor);
- return DirectFunctionCall4(replace_text_regexp,
- PointerGetDatum(s),
- PointerGetDatum(&re),
- PointerGetDatum(r),
- BoolGetDatum(global));
+ PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob));
}
/* similar_escape()
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 5fc3128360c..3f67403ba44 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.137 2005/10/17 16:24:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.138 2005/10/18 20:38:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,11 +24,11 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "parser/scansup.h"
+#include "regex/regex.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
-#include "regex/regex.h"
typedef struct varlena unknown;
@@ -2066,7 +2066,8 @@ replace_text(PG_FUNCTION_ARGS)
/*
* check_replace_text_has_escape_char
- * check whether replace_text has escape char.
+ *
+ * check whether replace_text contains escape char.
*/
static bool
check_replace_text_has_escape_char(const text *replace_text)
@@ -2077,14 +2078,18 @@ check_replace_text_has_escape_char(const text *replace_text)
if (pg_database_encoding_max_length() == 1)
{
for (; p < p_end; p++)
+ {
if (*p == '\\')
return true;
+ }
}
else
{
for (; p < p_end; p += pg_mblen(p))
+ {
if (*p == '\\')
return true;
+ }
}
return false;
@@ -2092,7 +2097,9 @@ check_replace_text_has_escape_char(const text *replace_text)
/*
* appendStringInfoRegexpSubstr
- * append string by using back references of regexp.
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes.
*/
static void
appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
@@ -2100,50 +2107,41 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
{
const char *p = VARDATA(replace_text);
const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
-
int eml = pg_database_encoding_max_length();
- int substr_start = 1;
- int ch_cnt;
-
- int so;
- int eo;
-
- while (1)
+ for (;;)
{
- /* Find escape char. */
- ch_cnt = 0;
+ const char *chunk_start = p;
+ int so;
+ int eo;
+
+ /* Find next escape char. */
if (eml == 1)
{
for (; p < p_end && *p != '\\'; p++)
- ch_cnt++;
+ /* nothing */ ;
}
else
{
for (; p < p_end && *p != '\\'; p += pg_mblen(p))
- ch_cnt++;
+ /* nothing */ ;
}
- /*
- * Copy the text when there is a text in the left of escape char or
- * escape char is not found.
- */
- if (ch_cnt)
- {
- text *append_text = text_substring(PointerGetDatum(replace_text),
- substr_start, ch_cnt, false);
-
- appendStringInfoText(str, append_text);
- pfree(append_text);
- }
- substr_start += ch_cnt + 1;
+ /* Copy the text we just scanned over, if any. */
+ if (p > chunk_start)
+ appendBinaryStringInfo(str, chunk_start, p - chunk_start);
- if (p >= p_end) /* When escape char is not found. */
+ /* Done if at end of string, else advance over escape char. */
+ if (p >= p_end)
break;
-
- /* See the next character of escape char. */
p++;
- so = eo = -1;
+
+ if (p >= p_end)
+ {
+ /* Escape at very end of input. Treat same as unexpected char */
+ appendStringInfoChar(str, '\\');
+ break;
+ }
if (*p >= '1' && *p <= '9')
{
@@ -2153,7 +2151,6 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
so = pmatch[idx].rm_so;
eo = pmatch[idx].rm_eo;
p++;
- substr_start++;
}
else if (*p == '&')
{
@@ -2161,15 +2158,36 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
p++;
- substr_start++;
+ }
+ else if (*p == '\\')
+ {
+ /* \\ means transfer one \ to output. */
+ appendStringInfoChar(str, '\\');
+ p++;
+ continue;
+ }
+ else
+ {
+ /*
+ * If escape char is not followed by any expected char,
+ * just treat it as ordinary data to copy. (XXX would it be
+ * better to throw an error?)
+ */
+ appendStringInfoChar(str, '\\');
+ continue;
}
if (so != -1 && eo != -1)
{
- /* Copy the text that is back reference of regexp. */
- text *append_text = text_substring(PointerGetDatum(src_text),
- so + 1, (eo - so), false);
+ /*
+ * Copy the text that is back reference of regexp. Because so and
+ * eo are counted in characters not bytes, it's easiest to use
+ * text_substring to pull out the correct chunk of text.
+ */
+ text *append_text;
+ append_text = text_substring(PointerGetDatum(src_text),
+ so + 1, (eo - so), false);
appendStringInfoText(str, append_text);
pfree(append_text);
}
@@ -2180,17 +2198,19 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
/*
* replace_text_regexp
+ *
* replace text that matches to regexp in src_text to replace_text.
+ *
+ * Note: to avoid having to include regex.h in builtins.h, we declare
+ * the regexp argument as void *, but really it's regex_t *.
*/
-Datum
-replace_text_regexp(PG_FUNCTION_ARGS)
+text *
+replace_text_regexp(text *src_text, void *regexp,
+ text *replace_text, bool glob)
{
text *ret_text;
- text *src_text = PG_GETARG_TEXT_P(0);
+ regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE(src_text) - VARHDRSZ;
- regex_t *re = (regex_t *) PG_GETARG_POINTER(1);
- text *replace_text = PG_GETARG_TEXT_P(2);
- bool global = PG_GETARG_BOOL(3);
StringInfo str = makeStringInfo();
int regexec_result;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
@@ -2233,14 +2253,18 @@ replace_text_regexp(PG_FUNCTION_ARGS)
break;
/*
- * Copy the text when there is a text in the left of matched position.
+ * Copy the text to the left of the match position. Because we
+ * are working with character not byte indexes, it's easiest to
+ * use text_substring to pull out the needed data.
*/
if (pmatch[0].rm_so - data_pos > 0)
{
- text *left_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1,
- pmatch[0].rm_so - data_pos, false);
+ text *left_text;
+ left_text = text_substring(PointerGetDatum(src_text),
+ data_pos + 1,
+ pmatch[0].rm_so - data_pos,
+ false);
appendStringInfoText(str, left_text);
pfree(left_text);
}
@@ -2259,7 +2283,7 @@ replace_text_regexp(PG_FUNCTION_ARGS)
/*
* When global option is off, replace the first instance only.
*/
- if (!global)
+ if (!glob)
break;
/*
@@ -2270,14 +2294,14 @@ replace_text_regexp(PG_FUNCTION_ARGS)
}
/*
- * Copy the text when there is a text at the right of last matched or
- * regexp is not matched.
+ * Copy the text to the right of the last match.
*/
if (data_pos < data_len)
{
- text *right_text = text_substring(PointerGetDatum(src_text),
- data_pos + 1, -1, true);
+ text *right_text;
+ right_text = text_substring(PointerGetDatum(src_text),
+ data_pos + 1, -1, true);
appendStringInfoText(str, right_text);
pfree(right_text);
}
@@ -2287,7 +2311,7 @@ replace_text_regexp(PG_FUNCTION_ARGS)
pfree(str);
pfree(data);
- PG_RETURN_TEXT_P(ret_text);
+ return ret_text;
}
/*
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 469d993d04d..1ec358f3849 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.266 2005/10/15 02:49:46 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.267 2005/10/18 20:38:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -593,7 +593,8 @@ extern List *textToQualifiedNameList(text *textval);
extern bool SplitIdentifierString(char *rawstring, char separator,
List **namelist);
extern Datum replace_text(PG_FUNCTION_ARGS);
-extern Datum replace_text_regexp(PG_FUNCTION_ARGS);
+extern text *replace_text_regexp(text *src_text, void *regexp,
+ text *replace_text, bool glob);
extern Datum split_text(PG_FUNCTION_ARGS);
extern Datum text_to_array(PG_FUNCTION_ARGS);
extern Datum array_to_text(PG_FUNCTION_ARGS);