aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorÁlvaro Herrera <alvherre@alvh.no-ip.org>2025-03-18 18:56:11 +0100
committerÁlvaro Herrera <alvherre@alvh.no-ip.org>2025-03-18 18:56:11 +0100
commit62d712ecfd940f60e68bde5b6972b6859937c412 (patch)
treecad7cd2202f4dd3ad75c152e6aca98246ef661cd /src
parent247ce06b883d7b3a40d08312dc03dfb37fbff212 (diff)
downloadpostgresql-62d712ecfd940f60e68bde5b6972b6859937c412.tar.gz
postgresql-62d712ecfd940f60e68bde5b6972b6859937c412.zip
Introduce squashing of constant lists in query jumbling
pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on the number of parameters, because every element of ArrayExpr is individually jumbled. Most of the time that's undesirable, especially if the list becomes too large. Fix this by introducing a new GUC query_id_squash_values which modifies the node jumbling code to only consider the first and last element of a list of constants, rather than each list element individually. This affects both the query_id generated by query jumbling, as well as pg_stat_statements query normalization so that it suppresses printing of the individual elements of such a list. The default value is off, meaning the previous behavior is maintained. Author: Dmitry Dolgov <9erthalion6@gmail.com> Reviewed-by: Sergey Dudoladov (mysterious, off-list) Reviewed-by: David Geier <geidav.pg@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Reviewed-by: Sami Imseih <samimseih@gmail.com> Reviewed-by: Sutou Kouhei <kou@clear-code.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Michael Paquier <michael@paquier.xyz> Reviewed-by: Marcos Pegoraro <marcos@f10.com.br> Reviewed-by: Julien Rouhaud <rjuju123@gmail.com> Reviewed-by: Zhihong Yu <zyu@yugabyte.com> Tested-by: Yasuo Honda <yasuo.honda@gmail.com> Tested-by: Sergei Kornilov <sk@zsrv.org> Tested-by: Maciek Sakrejda <m.sakrejda@gmail.com> Tested-by: Chengxi Sun <sunchengxi@highgo.com> Tested-by: Jakub Wartak <jakub.wartak@enterprisedb.com> Discussion: https://postgr.es/m/CA+q6zcWtUbT_Sxj0V6HY6EZ89uv5wuG5aefpe_9n0Jr3VwntFg@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/nodes/gen_node_support.pl19
-rw-r--r--src/backend/nodes/queryjumblefuncs.c146
-rw-r--r--src/backend/postmaster/launch_backend.c3
-rw-r--r--src/backend/utils/misc/guc_tables.c10
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/include/nodes/nodes.h2
-rw-r--r--src/include/nodes/primnodes.h2
-rw-r--r--src/include/nodes/queryjumble.h7
8 files changed, 182 insertions, 8 deletions
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 1a657f7e0ae..7e3f335ac09 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -476,6 +476,7 @@ foreach my $infile (@ARGV)
equal_ignore_if_zero
query_jumble_ignore
query_jumble_location
+ query_jumble_squash
read_write_ignore
write_only_relids
write_only_nondefault_pathtarget
@@ -1283,6 +1284,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
my $query_jumble_ignore = $struct_no_query_jumble;
my $query_jumble_location = 0;
+ my $query_jumble_squash = 0;
# extract per-field attributes
foreach my $a (@a)
@@ -1295,14 +1297,27 @@ _jumble${n}(JumbleState *jstate, Node *node)
{
$query_jumble_location = 1;
}
+ elsif ($a eq 'query_jumble_squash')
+ {
+ $query_jumble_squash = 1;
+ }
}
# node type
if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
and elem $1, @node_types)
{
- print $jff "\tJUMBLE_NODE($f);\n"
- unless $query_jumble_ignore;
+ # Squash constants if requested.
+ if ($query_jumble_squash)
+ {
+ print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ unless $query_jumble_ignore;
+ }
+ else
+ {
+ print $jff "\tJUMBLE_NODE($f);\n"
+ unless $query_jumble_ignore;
+ }
}
elsif ($t eq 'ParseLoc')
{
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a281936..189bfda610a 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,9 +32,13 @@
*/
#include "postgres.h"
+#include "access/transam.h"
+#include "catalog/pg_proc.h"
#include "common/hashfn.h"
#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
#include "nodes/queryjumble.h"
+#include "utils/lsyscache.h"
#include "parser/scansup.h"
#define JUMBLE_SIZE 1024 /* query serialization buffer size */
@@ -42,6 +46,9 @@
/* GUC parameters */
int compute_query_id = COMPUTE_QUERY_ID_AUTO;
+/* Whether to merge constants in a list when computing query_id */
+bool query_id_squash_values = false;
+
/*
* True when compute_query_id is ON or AUTO, and a module requests them.
*
@@ -53,8 +60,10 @@ bool query_id_enabled = false;
static void AppendJumble(JumbleState *jstate,
const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+ int location, bool merged);
static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -198,11 +207,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
}
/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * 'squashed' signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool squashed)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -218,15 +231,99 @@ RecordConstLocation(JumbleState *jstate, int location)
}
jstate->clocations[jstate->clocations_count].location = location;
/* initialize lengths to -1 to simplify third-party module usage */
+ jstate->clocations[jstate->clocations_count].squashed = squashed;
jstate->clocations[jstate->clocations_count].length = -1;
jstate->clocations_count++;
}
}
+/*
+ * Subroutine for _jumbleElements: Verify a few simple cases where we can
+ * deduce that the expression is a constant:
+ *
+ * - Ignore a possible wrapping RelabelType and CoerceViaIO.
+ * - If it's a FuncExpr, check that the function is an implicit
+ * cast and its arguments are Const.
+ * - Otherwise test if the expression is a simple Const.
+ */
+static bool
+IsSquashableConst(Node *element)
+{
+ if (IsA(element, RelabelType))
+ element = (Node *) ((RelabelType *) element)->arg;
+
+ if (IsA(element, CoerceViaIO))
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+
+ if (IsA(element, FuncExpr))
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
+
+ if (func->funcformat != COERCE_IMPLICIT_CAST &&
+ func->funcformat != COERCE_EXPLICIT_CAST)
+ return false;
+
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
+
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const)) /* XXX we could recurse here instead */
+ return false;
+ }
+
+ return true;
+ }
+
+ if (!IsA(element, Const))
+ return false;
+
+ return true;
+}
+
+/*
+ * Subroutine for _jumbleElements: Verify whether the provided list
+ * can be squashed, meaning it contains only constant expressions.
+ *
+ * Return value indicates if squashing is possible.
+ *
+ * Note that this function searches only for explicit Const nodes with
+ * possibly very simple decorations on top, and does not try to simplify
+ * expressions.
+ */
+static bool
+IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+{
+ ListCell *temp;
+
+ /*
+ * If squashing is disabled, or the list is too short, we don't try to
+ * squash it.
+ */
+ if (!query_id_squash_values || list_length(elements) < 2)
+ return false;
+
+ foreach(temp, elements)
+ {
+ if (!IsSquashableConst(lfirst(temp)))
+ return false;
+ }
+
+ *firstExpr = linitial(elements);
+ *lastExpr = llast(elements);
+
+ return true;
+}
+
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
+#define JUMBLE_ELEMENTS(list) \
+ _jumbleElements(jstate, (List *) expr->list)
#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location)
+ RecordConstLocation(jstate, expr->location, false)
#define JUMBLE_FIELD(item) \
AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
#define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +336,45 @@ do { \
#include "queryjumblefuncs.funcs.c"
+/*
+ * When query_id_squash_values is enabled, we jumble lists of constant
+ * elements as one individual item regardless of how many elements are
+ * in the list. This means different queries jumble to the same query_id,
+ * if the only difference is the number of elements in the list.
+ *
+ * If query_id_squash_values is disabled or the list is not "simple
+ * enough", we jumble each element normally.
+ */
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+ Node *first,
+ *last;
+
+ if (IsSquashableConstList(elements, &first, &last))
+ {
+ /*
+ * If this list of elements is squashable, keep track of the location
+ * of its first and last elements. When reading back the locations
+ * array, we'll see two consecutive locations with ->squashed set to
+ * true, indicating the location of initial and final elements of this
+ * list.
+ *
+ * For the limited set of cases we support now (implicit coerce via
+ * FuncExpr, Const) it's fine to use exprLocation of the 'last'
+ * expression, but if more complex composite expressions are to be
+ * supported (e.g., OpExpr or FuncExpr as an explicit call), more
+ * sophisticated tracking will be needed.
+ */
+ RecordConstLocation(jstate, exprLocation(first), true);
+ RecordConstLocation(jstate, exprLocation(last), true);
+ }
+ else
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/postmaster/launch_backend.c b/src/backend/postmaster/launch_backend.c
index bf6b55ee830..9e6c900d146 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -116,6 +116,7 @@ typedef struct
bool redirection_done;
bool IsBinaryUpgrade;
bool query_id_enabled;
+ bool query_id_squash_values;
int max_safe_fds;
int MaxBackends;
int num_pmchild_slots;
@@ -777,6 +778,7 @@ save_backend_variables(BackendParameters *param,
param->redirection_done = redirection_done;
param->IsBinaryUpgrade = IsBinaryUpgrade;
param->query_id_enabled = query_id_enabled;
+ param->query_id_squash_values = query_id_squash_values;
param->max_safe_fds = max_safe_fds;
param->MaxBackends = MaxBackends;
@@ -1037,6 +1039,7 @@ restore_backend_variables(BackendParameters *param)
redirection_done = param->redirection_done;
IsBinaryUpgrade = param->IsBinaryUpgrade;
query_id_enabled = param->query_id_enabled;
+ query_id_squash_values = param->query_id_squash_values;
max_safe_fds = param->max_safe_fds;
MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index c89316ce294..60a40ed445a 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2120,6 +2120,16 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"query_id_squash_values", PGC_USERSET, STATS_MONITORING,
+ gettext_noop("Allows to merge constants in a list when computing "
+ "query_id."),
+ },
+ &query_id_squash_values,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 4c55d0c1383..beb05a89501 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -659,6 +659,7 @@
# - Monitoring -
#compute_query_id = auto
+#query_id_squash_values = off
#log_statement_stats = off
#log_parser_stats = off
#log_planner_stats = off
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 580238bfab1..d18044b4e65 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -105,6 +105,8 @@ typedef enum NodeTag
* that typmod and collation information are usually irrelevant for the
* query jumbling.
*
+ * - query_jumble_squash: Squash multiple values during query jumbling.
+ *
* - query_jumble_location: Mark the field as a location to track. This is
* only allowed for integer fields that include "location" in their name.
*
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index d0576da3e25..7d3b4198f26 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1394,7 +1394,7 @@ typedef struct ArrayExpr
/* common type of array elements */
Oid element_typeid pg_node_attr(query_jumble_ignore);
/* the array elements or sub-arrays */
- List *elements;
+ List *elements pg_node_attr(query_jumble_squash);
/* true if elements are sub-arrays */
bool multidims pg_node_attr(query_jumble_ignore);
/* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 50eb9566587..905f66bc0bd 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
{
int location; /* start offset in query text */
int length; /* length in bytes, or -1 to ignore */
+
+ /*
+ * Indicates that this location represents the beginning or end of a run
+ * of squashed constants.
+ */
+ bool squashed;
} LocationLen;
/*
@@ -68,6 +74,7 @@ extern JumbleState *JumbleQuery(Query *query);
extern void EnableQueryId(void);
extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_squash_values;
/*
* Returns whether query identifier computation has been enabled, either