aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2021-03-18 21:44:43 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2021-03-18 21:44:43 -0400
commit0b618ddf8bb2fba5492445b56d21647722b8350e (patch)
treeba9af3d1740d6e1c50e2d2c4856e8a6b5e677894 /src/backend
parent2bed650c4841cb63a8cede0db9452664f39bc314 (diff)
downloadpostgresql-0b618ddf8bb2fba5492445b56d21647722b8350e.tar.gz
postgresql-0b618ddf8bb2fba5492445b56d21647722b8350e.zip
Don't leak compiled regex(es) when an ispell cache entry is dropped.
The text search cache mechanisms assume that we can clean up an invalidated dictionary cache entry simply by resetting the associated long-lived memory context. However, that does not work for ispell affixes that make use of regular expressions, because the regex library deals in plain old malloc. Hence, we leaked compiled regex(es) any time we dropped such a cache entry. That could quickly add up, since even a fairly trivial regex can use up tens of kB, and a large one can eat megabytes. Add a memory context callback to ensure that a regex gets freed when its owning cache entry is cleared. Found via valgrind testing. This problem is ancient, so back-patch to all supported branches. Discussion: https://postgr.es/m/3816764.1616104288@sss.pgh.pa.us
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/tsearch/spell.c34
1 files changed, 30 insertions, 4 deletions
diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c
index 32f6c5081e9..34c91e049f3 100644
--- a/src/backend/tsearch/spell.c
+++ b/src/backend/tsearch/spell.c
@@ -655,6 +655,17 @@ FindWord(IspellDict *Conf, const char *word, char *affixflag, int flag)
}
/*
+ * Context reset/delete callback for a regular expression used in an affix
+ */
+static void
+regex_affix_deletion_callback(void *arg)
+{
+ aff_regex_struct *pregex = (aff_regex_struct *) arg;
+
+ pg_regfree(&(pregex->regex));
+}
+
+/*
* Adds a new affix rule to the Affix field.
*
* Conf: current dictionary.
@@ -716,6 +727,7 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
int err;
pg_wchar *wmask;
char *tmask;
+ aff_regex_struct *pregex;
Affix->issimple = 0;
Affix->isregis = 0;
@@ -729,18 +741,32 @@ NIAddAffix(IspellDict *Conf, const char *flag, char flagflags, const char *mask,
wmask = (pg_wchar *) tmpalloc((masklen + 1) * sizeof(pg_wchar));
wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
- err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
+ /*
+ * The regex engine stores its stuff using malloc not palloc, so we
+ * must arrange to explicitly clean up the regex when the dictionary's
+ * context is cleared. That means the regex_t has to stay in a fixed
+ * location within the context; we can't keep it directly in the AFFIX
+ * struct, since we may sort and resize the array of AFFIXes.
+ */
+ Affix->reg.pregex = pregex = palloc(sizeof(aff_regex_struct));
+
+ err = pg_regcomp(&(pregex->regex), wmask, wmasklen,
REG_ADVANCED | REG_NOSUB,
DEFAULT_COLLATION_OID);
if (err)
{
char errstr[100];
- pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
+ pg_regerror(err, &(pregex->regex), errstr, sizeof(errstr));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("invalid regular expression: %s", errstr)));
}
+
+ pregex->mcallback.func = regex_affix_deletion_callback;
+ pregex->mcallback.arg = (void *) pregex;
+ MemoryContextRegisterResetCallback(CurrentMemoryContext,
+ &pregex->mcallback);
}
Affix->flagflags = flagflags;
@@ -2119,7 +2145,6 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
}
else
{
- int err;
pg_wchar *data;
size_t data_len;
int newword_len;
@@ -2129,7 +2154,8 @@ CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *neww
data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(newword, data, newword_len);
- if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
+ if (pg_regexec(&(Affix->reg.pregex->regex), data, data_len,
+ 0, NULL, 0, NULL, 0) == REG_OKAY)
{
pfree(data);
return newword;