aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2023-04-08 21:57:46 +1200
committerThomas Munro <tmunro@postgresql.org>2023-04-08 22:10:39 +1200
commitdb4f21e4a34b1d5a3f7123e28e77f575d1a971ea (patch)
tree16cd41226f81b111ed78c0ac9f0c917e8e54125f
parent6db75edb2ecbc9f6912f90b671b01ab4ac3a01b0 (diff)
downloadpostgresql-db4f21e4a34b1d5a3f7123e28e77f575d1a971ea.tar.gz
postgresql-db4f21e4a34b1d5a3f7123e28e77f575d1a971ea.zip
Redesign interrupt/cancel API for regex engine.
Previously, a PostgreSQL-specific callback checked by the regex engine had a way to trigger a special error code REG_CANCEL if it detected that the next call to CHECK_FOR_INTERRUPTS() would certainly throw via ereport(). A later proposed bugfix aims to move some complex logic out of signal handlers, so that it won't run until the next CHECK_FOR_INTERRUPTS(), which makes the above design impossible unless we split CHECK_FOR_INTERRUPTS() into two phases, one to run logic and another to ereport(). We may develop such a system in the future, but for the regex code it is no longer necessary. An earlier commit moved regex memory management over to our MemoryContext system. Given that the purpose of the two-phase interrupt checking was to free memory before throwing, something we don't need to worry about anymore, it seems simpler to inject CHECK_FOR_INTERRUPTS() directly into cancelation points, and just let it throw. Since the plan is to keep PostgreSQL-specific concerns separate from the main regex engine code (with a view to bein able to stay in sync with other projects), do this with a new macro INTERRUPT(), customizable in regcustom.h and defaulting to nothing. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CA%2BhUKGK3PGKwcKqzoosamn36YW-fsuTdOPPF1i_rtEO%3DnEYKSg%40mail.gmail.com
-rw-r--r--src/backend/regex/regc_locale.c6
-rw-r--r--src/backend/regex/regc_nfa.c48
-rw-r--r--src/backend/regex/regcomp.c18
-rw-r--r--src/backend/regex/rege_dfa.c6
-rw-r--r--src/backend/regex/regexec.c3
-rw-r--r--src/backend/utils/adt/jsonpath_gram.y2
-rw-r--r--src/backend/utils/adt/regexp.c11
-rw-r--r--src/backend/utils/adt/varlena.c1
-rw-r--r--src/include/regex/regcustom.h3
-rw-r--r--src/include/regex/regerrs.h4
-rw-r--r--src/include/regex/regex.h1
-rw-r--r--src/include/regex/regguts.h9
-rw-r--r--src/test/modules/test_regex/test_regex.c10
13 files changed, 18 insertions, 104 deletions
diff --git a/src/backend/regex/regc_locale.c b/src/backend/regex/regc_locale.c
index b5f3a73b1bb..77d1ce28168 100644
--- a/src/backend/regex/regc_locale.c
+++ b/src/backend/regex/regc_locale.c
@@ -475,11 +475,7 @@ range(struct vars *v, /* context */
}
addchr(cv, cc);
}
- if (CANCEL_REQUESTED(v->re))
- {
- ERR(REG_CANCEL);
- return NULL;
- }
+ INTERRUPT(v->re);
}
return cv;
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index 60fb0bec5d7..f1819a24f6d 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -143,11 +143,7 @@ newstate(struct nfa *nfa)
* compilation, since no code path will go very long without making a new
* state or arc.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return NULL;
- }
+ INTERRUPT(nfa->v->re);
/* first, recycle anything that's on the freelist */
if (nfa->freestates != NULL)
@@ -297,11 +293,7 @@ newarc(struct nfa *nfa,
* compilation, since no code path will go very long without making a new
* state or arc.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
/* check for duplicate arc, using whichever chain is shorter */
if (from->nouts <= to->nins)
@@ -825,11 +817,7 @@ moveins(struct nfa *nfa,
* Because we bypass newarc() in this code path, we'd better include a
* cancel check.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
sortins(nfa, oldState);
sortins(nfa, newState);
@@ -929,11 +917,7 @@ copyins(struct nfa *nfa,
* Because we bypass newarc() in this code path, we'd better include a
* cancel check.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
sortins(nfa, oldState);
sortins(nfa, newState);
@@ -1000,11 +984,7 @@ mergeins(struct nfa *nfa,
* Because we bypass newarc() in this code path, we'd better include a
* cancel check.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
/* Sort existing inarcs as well as proposed new ones */
sortins(nfa, s);
@@ -1125,11 +1105,7 @@ moveouts(struct nfa *nfa,
* Because we bypass newarc() in this code path, we'd better include a
* cancel check.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
sortouts(nfa, oldState);
sortouts(nfa, newState);
@@ -1226,11 +1202,7 @@ copyouts(struct nfa *nfa,
* Because we bypass newarc() in this code path, we'd better include a
* cancel check.
*/
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return;
- }
+ INTERRUPT(nfa->v->re);
sortouts(nfa, oldState);
sortouts(nfa, newState);
@@ -3282,11 +3254,7 @@ checkmatchall_recurse(struct nfa *nfa, struct state *s, bool **haspaths)
return false;
/* In case the search takes a long time, check for cancel */
- if (CANCEL_REQUESTED(nfa->v->re))
- {
- NERR(REG_CANCEL);
- return false;
- }
+ INTERRUPT(nfa->v->re);
/* Create a haspath array for this state */
haspath = (bool *) MALLOC((DUPINF + 2) * sizeof(bool));
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index bb8c2405989..8a6cfb2973d 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -86,7 +86,6 @@ static int newlacon(struct vars *v, struct state *begin, struct state *end,
int latype);
static void freelacons(struct subre *subs, int n);
static void rfree(regex_t *re);
-static int rcancelrequested(void);
static int rstacktoodeep(void);
#ifdef REG_DEBUG
@@ -356,7 +355,6 @@ struct vars
/* static function list */
static const struct fns functions = {
rfree, /* regfree insides */
- rcancelrequested, /* check for cancel request */
rstacktoodeep /* check for stack getting dangerously deep */
};
@@ -2469,22 +2467,6 @@ rfree(regex_t *re)
}
/*
- * rcancelrequested - check for external request to cancel regex operation
- *
- * Return nonzero to fail the operation with error code REG_CANCEL,
- * zero to keep going
- *
- * The current implementation is Postgres-specific. If we ever get around
- * to splitting the regex code out as a standalone library, there will need
- * to be some API to let applications define a callback function for this.
- */
-static int
-rcancelrequested(void)
-{
- return InterruptPending && (QueryCancelPending || ProcDiePending);
-}
-
-/*
* rstacktoodeep - check for stack getting dangerously deep
*
* Return nonzero to fail the operation with error code REG_ETOOBIG,
diff --git a/src/backend/regex/rege_dfa.c b/src/backend/regex/rege_dfa.c
index ba1289c64a9..1f8f2ab1441 100644
--- a/src/backend/regex/rege_dfa.c
+++ b/src/backend/regex/rege_dfa.c
@@ -805,11 +805,7 @@ miss(struct vars *v,
* Checking for operation cancel in the inner text search loop seems
* unduly expensive. As a compromise, check during cache misses.
*/
- if (CANCEL_REQUESTED(v->re))
- {
- ERR(REG_CANCEL);
- return NULL;
- }
+ INTERRUPT(v->re);
/*
* What set of states would we end up in after consuming the co character?
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index 3d9ff2e6079..2a1d5bebda3 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -764,8 +764,7 @@ cdissect(struct vars *v,
MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end)));
/* handy place to check for operation cancel */
- if (CANCEL_REQUESTED(v->re))
- return REG_CANCEL;
+ INTERRUPT(v->re);
/* ... and stack overrun */
if (STACK_TOO_DEEP(v->re))
return REG_ETOOBIG;
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
index d34ad6b80da..adc259d5bf8 100644
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -553,8 +553,6 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
{
char errMsg[100];
- /* See regexp.c for explanation */
- CHECK_FOR_INTERRUPTS();
pg_regerror(re_result, &re_tmp, errMsg, sizeof(errMsg));
ereturn(escontext, false,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 45280902d6e..702cd52b6d4 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -218,15 +218,6 @@ RE_compile_and_cache(text *text_re, int cflags, Oid collation)
if (regcomp_result != REG_OKAY)
{
/* re didn't compile (no need for pg_regfree, if so) */
-
- /*
- * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
- * before reporting a regex error. This is so that if the regex
- * library aborts and returns REG_CANCEL, we don't print an error
- * message that implies the regex was invalid.
- */
- CHECK_FOR_INTERRUPTS();
-
pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -308,7 +299,6 @@ RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
- CHECK_FOR_INTERRUPTS();
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -2001,7 +1991,6 @@ regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
default:
/* re failed??? */
- CHECK_FOR_INTERRUPTS();
pg_regerror(re_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index f9a607adaff..b5718764684 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -4265,7 +4265,6 @@ replace_text_regexp(text *src_text, text *pattern_text,
{
char errMsg[100];
- CHECK_FOR_INTERRUPTS();
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
diff --git a/src/include/regex/regcustom.h b/src/include/regex/regcustom.h
index 8f4025128ec..bedee1e9cac 100644
--- a/src/include/regex/regcustom.h
+++ b/src/include/regex/regcustom.h
@@ -44,7 +44,7 @@
#include "mb/pg_wchar.h"
-#include "miscadmin.h" /* needed by rcancelrequested/rstacktoodeep */
+#include "miscadmin.h" /* needed by stacktoodeep */
/* overrides for regguts.h definitions, if any */
@@ -52,6 +52,7 @@
#define MALLOC(n) palloc_extended((n), MCXT_ALLOC_NO_OOM)
#define FREE(p) pfree(VS(p))
#define REALLOC(p,n) repalloc_extended(VS(p),(n), MCXT_ALLOC_NO_OOM)
+#define INTERRUPT(re) CHECK_FOR_INTERRUPTS()
#define assert(x) Assert(x)
/* internal character type and related */
diff --git a/src/include/regex/regerrs.h b/src/include/regex/regerrs.h
index 41e25f7ff00..2c8873eb810 100644
--- a/src/include/regex/regerrs.h
+++ b/src/include/regex/regerrs.h
@@ -81,7 +81,3 @@
{
REG_ECOLORS, "REG_ECOLORS", "too many colors"
},
-
-{
- REG_CANCEL, "REG_CANCEL", "operation cancelled"
-},
diff --git a/src/include/regex/regex.h b/src/include/regex/regex.h
index 1297abec62f..d08113724f6 100644
--- a/src/include/regex/regex.h
+++ b/src/include/regex/regex.h
@@ -156,7 +156,6 @@ typedef struct
#define REG_BADOPT 18 /* invalid embedded option */
#define REG_ETOOBIG 19 /* regular expression is too complex */
#define REG_ECOLORS 20 /* too many colors */
-#define REG_CANCEL 21 /* operation cancelled */
/* two specials for debugging and testing */
#define REG_ATOI 101 /* convert error-code name to number */
#define REG_ITOA 102 /* convert error-code number to name */
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index 91a52840c47..3ca3647e118 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -77,6 +77,11 @@
#define FREE(p) free(VS(p))
#endif
+/* interruption */
+#ifndef INTERRUPT
+#define INTERRUPT(re)
+#endif
+
/* want size of a char in bits, and max value in bounded quantifiers */
#ifndef _POSIX2_RE_DUP_MAX
#define _POSIX2_RE_DUP_MAX 255 /* normally from <limits.h> */
@@ -510,13 +515,9 @@ struct subre
struct fns
{
void FUNCPTR(free, (regex_t *));
- int FUNCPTR(cancel_requested, (void));
int FUNCPTR(stack_too_deep, (void));
};
-#define CANCEL_REQUESTED(re) \
- ((*((struct fns *) (re)->re_fns)->cancel_requested) ())
-
#define STACK_TOO_DEEP(re) \
((*((struct fns *) (re)->re_fns)->stack_too_deep) ())
diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c
index 1d4f79c9d31..d1dd48a993b 100644
--- a/src/test/modules/test_regex/test_regex.c
+++ b/src/test/modules/test_regex/test_regex.c
@@ -185,15 +185,6 @@ test_re_compile(text *text_re, int cflags, Oid collation,
if (regcomp_result != REG_OKAY)
{
/* re didn't compile (no need for pg_regfree, if so) */
-
- /*
- * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
- * before reporting a regex error. This is so that if the regex
- * library aborts and returns REG_CANCEL, we don't print an error
- * message that implies the regex was invalid.
- */
- CHECK_FOR_INTERRUPTS();
-
pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
@@ -239,7 +230,6 @@ test_re_execute(regex_t *re, pg_wchar *data, int data_len,
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
/* re failed??? */
- CHECK_FOR_INTERRUPTS();
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),