diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2005-10-18 20:38:58 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2005-10-18 20:38:58 +0000 |
commit | 220f2a7d15adc6ca811a3cbe5fee79ce4904cd91 (patch) | |
tree | f71baba620e60bdc4fcff8b3a008590d17e5ede1 /src | |
parent | 800af89004f1a9e816b8df644c31b3fa2a247f92 (diff) | |
download | postgresql-220f2a7d15adc6ca811a3cbe5fee79ce4904cd91.tar.gz postgresql-220f2a7d15adc6ca811a3cbe5fee79ce4904cd91.zip |
Code review for regexp_replace patch. Improve documentation and comments,
fix problems with replacement-string backslashes that aren't followed by
one of the expected characters, avoid giving the impression that
replace_text_regexp() is meant to be called directly as a SQL function,
etc.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/utils/adt/regexp.c | 48 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 132 | ||||
-rw-r--r-- | src/include/utils/builtins.h | 5 |
3 files changed, 100 insertions, 85 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index a872762c3c2..ce04ce77e67 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.59 2005/10/15 02:49:29 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.60 2005/10/18 20:38:58 tgl Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -83,7 +83,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ /* * RE_compile_and_cache - compile a RE, caching if possible * - * Returns regex_t + * Returns regex_t * * * text_re --- the pattern, expressed as an *untoasted* TEXT object * cflags --- compile options for the pattern @@ -91,7 +91,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */ * Pattern is given in the database encoding. We internally convert to * array of pg_wchar which is what Spencer's regex package wants. */ -static regex_t +static regex_t * RE_compile_and_cache(text *text_re, int cflags) { int text_re_len = VARSIZE(text_re); @@ -123,7 +123,7 @@ RE_compile_and_cache(text *text_re, int cflags) re_array[0] = re_temp; } - return re_array[0].cre_re; + return &re_array[0].cre_re; } } @@ -188,7 +188,7 @@ RE_compile_and_cache(text *text_re, int cflags) re_array[0] = re_temp; num_res++; - return re_array[0].cre_re; + return &re_array[0].cre_re; } /* @@ -212,7 +212,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len, pg_wchar *data; size_t data_len; int regexec_result; - regex_t re; + regex_t *re; char errMsg[100]; /* Convert data string to wide characters */ @@ -223,7 +223,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len, re = RE_compile_and_cache(text_re, cflags); /* Perform RE match and return result */ - regexec_result = pg_regexec(&re_array[0].cre_re, + regexec_result = pg_regexec(re, data, data_len, 0, @@ -237,8 +237,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len, if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) { /* re failed??? */ - pg_regerror(regexec_result, &re_array[0].cre_re, - errMsg, sizeof(errMsg)); + pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); ereport(ERROR, (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), errmsg("regular expression failed: %s", errMsg))); @@ -442,10 +441,10 @@ textregexsubstr(PG_FUNCTION_ARGS) /* * textregexreplace_noopt() - * Return a replace string matched by a regular expression. - * This function is a version that doesn't specify the option of - * textregexreplace. This is case sensitive, replace the first - * instance only. + * Return a string matched by a regular expression, with replacement. + * + * This version doesn't have an option argument: we default to case + * sensitive match, replace the first instance only. */ Datum textregexreplace_noopt(PG_FUNCTION_ARGS) @@ -453,20 +452,16 @@ textregexreplace_noopt(PG_FUNCTION_ARGS) text *s = PG_GETARG_TEXT_P(0); text *p = PG_GETARG_TEXT_P(1); text *r = PG_GETARG_TEXT_P(2); - regex_t re; + regex_t *re; re = RE_compile_and_cache(p, regex_flavor); - return DirectFunctionCall4(replace_text_regexp, - PointerGetDatum(s), - PointerGetDatum(&re), - PointerGetDatum(r), - BoolGetDatum(false)); + PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false)); } /* * textregexreplace() - * Return a replace string matched by a regular expression. + * Return a string matched by a regular expression, with replacement. */ Datum textregexreplace(PG_FUNCTION_ARGS) @@ -478,9 +473,9 @@ textregexreplace(PG_FUNCTION_ARGS) char *opt_p = VARDATA(opt); int opt_len = (VARSIZE(opt) - VARHDRSZ); int i; - bool global = false; + bool glob = false; bool ignorecase = false; - regex_t re; + regex_t *re; /* parse options */ for (i = 0; i < opt_len; i++) @@ -491,8 +486,7 @@ textregexreplace(PG_FUNCTION_ARGS) ignorecase = true; break; case 'g': - global = true; - + glob = true; break; default: ereport(ERROR, @@ -508,11 +502,7 @@ textregexreplace(PG_FUNCTION_ARGS) else re = RE_compile_and_cache(p, regex_flavor); - return DirectFunctionCall4(replace_text_regexp, - PointerGetDatum(s), - PointerGetDatum(&re), - PointerGetDatum(r), - BoolGetDatum(global)); + PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob)); } /* similar_escape() diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 5fc3128360c..3f67403ba44 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.137 2005/10/17 16:24:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.138 2005/10/18 20:38:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,11 +24,11 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "parser/scansup.h" +#include "regex/regex.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" -#include "regex/regex.h" typedef struct varlena unknown; @@ -2066,7 +2066,8 @@ replace_text(PG_FUNCTION_ARGS) /* * check_replace_text_has_escape_char - * check whether replace_text has escape char. + * + * check whether replace_text contains escape char. */ static bool check_replace_text_has_escape_char(const text *replace_text) @@ -2077,14 +2078,18 @@ check_replace_text_has_escape_char(const text *replace_text) if (pg_database_encoding_max_length() == 1) { for (; p < p_end; p++) + { if (*p == '\\') return true; + } } else { for (; p < p_end; p += pg_mblen(p)) + { if (*p == '\\') return true; + } } return false; @@ -2092,7 +2097,9 @@ check_replace_text_has_escape_char(const text *replace_text) /* * appendStringInfoRegexpSubstr - * append string by using back references of regexp. + * + * Append replace_text to str, substituting regexp back references for + * \n escapes. */ static void appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, @@ -2100,50 +2107,41 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, { const char *p = VARDATA(replace_text); const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ); - int eml = pg_database_encoding_max_length(); - int substr_start = 1; - int ch_cnt; - - int so; - int eo; - - while (1) + for (;;) { - /* Find escape char. */ - ch_cnt = 0; + const char *chunk_start = p; + int so; + int eo; + + /* Find next escape char. */ if (eml == 1) { for (; p < p_end && *p != '\\'; p++) - ch_cnt++; + /* nothing */ ; } else { for (; p < p_end && *p != '\\'; p += pg_mblen(p)) - ch_cnt++; + /* nothing */ ; } - /* - * Copy the text when there is a text in the left of escape char or - * escape char is not found. - */ - if (ch_cnt) - { - text *append_text = text_substring(PointerGetDatum(replace_text), - substr_start, ch_cnt, false); - - appendStringInfoText(str, append_text); - pfree(append_text); - } - substr_start += ch_cnt + 1; + /* Copy the text we just scanned over, if any. */ + if (p > chunk_start) + appendBinaryStringInfo(str, chunk_start, p - chunk_start); - if (p >= p_end) /* When escape char is not found. */ + /* Done if at end of string, else advance over escape char. */ + if (p >= p_end) break; - - /* See the next character of escape char. */ p++; - so = eo = -1; + + if (p >= p_end) + { + /* Escape at very end of input. Treat same as unexpected char */ + appendStringInfoChar(str, '\\'); + break; + } if (*p >= '1' && *p <= '9') { @@ -2153,7 +2151,6 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, so = pmatch[idx].rm_so; eo = pmatch[idx].rm_eo; p++; - substr_start++; } else if (*p == '&') { @@ -2161,15 +2158,36 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, so = pmatch[0].rm_so; eo = pmatch[0].rm_eo; p++; - substr_start++; + } + else if (*p == '\\') + { + /* \\ means transfer one \ to output. */ + appendStringInfoChar(str, '\\'); + p++; + continue; + } + else + { + /* + * If escape char is not followed by any expected char, + * just treat it as ordinary data to copy. (XXX would it be + * better to throw an error?) + */ + appendStringInfoChar(str, '\\'); + continue; } if (so != -1 && eo != -1) { - /* Copy the text that is back reference of regexp. */ - text *append_text = text_substring(PointerGetDatum(src_text), - so + 1, (eo - so), false); + /* + * Copy the text that is back reference of regexp. Because so and + * eo are counted in characters not bytes, it's easiest to use + * text_substring to pull out the correct chunk of text. + */ + text *append_text; + append_text = text_substring(PointerGetDatum(src_text), + so + 1, (eo - so), false); appendStringInfoText(str, append_text); pfree(append_text); } @@ -2180,17 +2198,19 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text, /* * replace_text_regexp + * * replace text that matches to regexp in src_text to replace_text. + * + * Note: to avoid having to include regex.h in builtins.h, we declare + * the regexp argument as void *, but really it's regex_t *. */ -Datum -replace_text_regexp(PG_FUNCTION_ARGS) +text * +replace_text_regexp(text *src_text, void *regexp, + text *replace_text, bool glob) { text *ret_text; - text *src_text = PG_GETARG_TEXT_P(0); + regex_t *re = (regex_t *) regexp; int src_text_len = VARSIZE(src_text) - VARHDRSZ; - regex_t *re = (regex_t *) PG_GETARG_POINTER(1); - text *replace_text = PG_GETARG_TEXT_P(2); - bool global = PG_GETARG_BOOL(3); StringInfo str = makeStringInfo(); int regexec_result; regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT]; @@ -2233,14 +2253,18 @@ replace_text_regexp(PG_FUNCTION_ARGS) break; /* - * Copy the text when there is a text in the left of matched position. + * Copy the text to the left of the match position. Because we + * are working with character not byte indexes, it's easiest to + * use text_substring to pull out the needed data. */ if (pmatch[0].rm_so - data_pos > 0) { - text *left_text = text_substring(PointerGetDatum(src_text), - data_pos + 1, - pmatch[0].rm_so - data_pos, false); + text *left_text; + left_text = text_substring(PointerGetDatum(src_text), + data_pos + 1, + pmatch[0].rm_so - data_pos, + false); appendStringInfoText(str, left_text); pfree(left_text); } @@ -2259,7 +2283,7 @@ replace_text_regexp(PG_FUNCTION_ARGS) /* * When global option is off, replace the first instance only. */ - if (!global) + if (!glob) break; /* @@ -2270,14 +2294,14 @@ replace_text_regexp(PG_FUNCTION_ARGS) } /* - * Copy the text when there is a text at the right of last matched or - * regexp is not matched. + * Copy the text to the right of the last match. */ if (data_pos < data_len) { - text *right_text = text_substring(PointerGetDatum(src_text), - data_pos + 1, -1, true); + text *right_text; + right_text = text_substring(PointerGetDatum(src_text), + data_pos + 1, -1, true); appendStringInfoText(str, right_text); pfree(right_text); } @@ -2287,7 +2311,7 @@ replace_text_regexp(PG_FUNCTION_ARGS) pfree(str); pfree(data); - PG_RETURN_TEXT_P(ret_text); + return ret_text; } /* diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 469d993d04d..1ec358f3849 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.266 2005/10/15 02:49:46 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.267 2005/10/18 20:38:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -593,7 +593,8 @@ extern List *textToQualifiedNameList(text *textval); extern bool SplitIdentifierString(char *rawstring, char separator, List **namelist); extern Datum replace_text(PG_FUNCTION_ARGS); -extern Datum replace_text_regexp(PG_FUNCTION_ARGS); +extern text *replace_text_regexp(text *src_text, void *regexp, + text *replace_text, bool glob); extern Datum split_text(PG_FUNCTION_ARGS); extern Datum text_to_array(PG_FUNCTION_ARGS); extern Datum array_to_text(PG_FUNCTION_ARGS); |