aboutsummaryrefslogtreecommitdiff
path: root/src/include/parser/parse_node.h
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2024-10-24 09:28:51 +0900
committerMichael Paquier <michael@paquier.xyz>2024-10-24 09:29:54 +0900
commit499edb09741b8fad2de038361fb342aae6e6007f (patch)
tree03e40bbc6cf03c6f6442525865391e2b7066de57 /src/include/parser/parse_node.h
parent4b096c67e0eed81e287094b9692fff72b9ea3eef (diff)
downloadpostgresql-499edb09741b8fad2de038361fb342aae6e6007f.tar.gz
postgresql-499edb09741b8fad2de038361fb342aae6e6007f.zip
Track more precisely query locations for nested statements
Previously, a Query generated through the transform phase would have unset stmt_location, tracking the starting point of a query string. Extensions relying on the statement location to extract its relevant parts in the source text string would fallback to use the whole statement instead, leading to confusing results like in pg_stat_statements for queries relying on nested queries, like: - EXPLAIN, with top-level and nested query using the same query string, and a query ID coming from the nested query when the non-top-level entry. - Multi-statements, with only partial portions of queries being normalized. - COPY TO with a query, SELECT or DMLs. This patch improves things by keeping track of the statement locations and propagate it to Query during transform, allowing PGSS to only show the relevant part of the query for nested query. This leads to less bloat in entries for non-top-level entries, as queries can now be grouped within the same (toplevel, queryid) duos in pg_stat_statements. The result gives a stricter one-one mapping between query IDs and its query strings. The regression tests introduced in 45e0ba30fc40 produce differences reflecting the new logic. Author: Anthonin Bonnefoy Reviewed-by: Michael Paquier, Jian He Discussion: https://postgr.es/m/CAO6_XqqM6S9bQ2qd=75W+yKATwoazxSNhv5sjW06fjGAtHbTUA@mail.gmail.com
Diffstat (limited to 'src/include/parser/parse_node.h')
-rw-r--r--src/include/parser/parse_node.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index 543df568147..2375e95c107 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -108,6 +108,20 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param,
* byte-wise locations in parse structures to character-wise cursor
* positions.)
*
+ * p_stmt_location: location of the top level RawStmt's start. During
+ * transformation, the Query's location will be set to the statement's
+ * location if available. Otherwise, the RawStmt's start location will
+ * be used. Propagating the location through ParseState is needed for
+ * the Query length calculation (see p_stmt_len below).
+ *
+ * p_stmt_len: length of the top level RawStmt. Most of the time, the
+ * statement's length is not provided by the parser, with the exception
+ * of SelectStmt within parentheses and PreparableStmt in COPY. If the
+ * statement's location is provided by the parser, the top-level location
+ * and length are needed to accurately compute the Query's length. If the
+ * statement's location is not provided, the RawStmt's length can be used
+ * directly.
+ *
* p_rtable: list of RTEs that will become the rangetable of the query.
* Note that neither relname nor refname of these entries are necessarily
* unique; searching the rtable by name is a bad idea.
@@ -193,6 +207,8 @@ struct ParseState
{
ParseState *parentParseState; /* stack link */
const char *p_sourcetext; /* source text, or NULL if not available */
+ ParseLoc p_stmt_location; /* start location, or -1 if unknown */
+ ParseLoc p_stmt_len; /* length in bytes; 0 means "rest of string" */
List *p_rtable; /* range table so far */
List *p_rteperminfos; /* list of RTEPermissionInfo nodes for each
* RTE_RELATION entry in rtable */