aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/ref/create_trigger.sgml112
-rw-r--r--doc/src/sgml/trigger.sgml54
-rw-r--r--src/backend/commands/copy.c10
-rw-r--r--src/backend/commands/trigger.c820
-rw-r--r--src/backend/executor/README2
-rw-r--r--src/backend/executor/execMain.c11
-rw-r--r--src/backend/executor/nodeModifyTable.c57
-rw-r--r--src/include/commands/trigger.h29
-rw-r--r--src/include/nodes/execnodes.h4
-rw-r--r--src/test/regress/expected/triggers.out52
-rw-r--r--src/test/regress/sql/triggers.sql42
11 files changed, 803 insertions, 390 deletions
diff --git a/doc/src/sgml/ref/create_trigger.sgml b/doc/src/sgml/ref/create_trigger.sgml
index 18efe6a9ed7..065c8272710 100644
--- a/doc/src/sgml/ref/create_trigger.sgml
+++ b/doc/src/sgml/ref/create_trigger.sgml
@@ -52,7 +52,7 @@ CREATE [ CONSTRAINT ] TRIGGER <replaceable class="PARAMETER">name</replaceable>
trigger will be associated with the specified table, view, or foreign table
and will execute the specified
function <replaceable class="parameter">function_name</replaceable> when
- certain events occur.
+ certain operations are performed on that table.
</para>
<para>
@@ -82,10 +82,7 @@ CREATE [ CONSTRAINT ] TRIGGER <replaceable class="PARAMETER">name</replaceable>
executes once for any given operation, regardless of how many rows
it modifies (in particular, an operation that modifies zero rows
will still result in the execution of any applicable <literal>FOR
- EACH STATEMENT</literal> triggers). Note that with an
- <command>INSERT</command> with an <literal>ON CONFLICT DO UPDATE</>
- clause, both <command>INSERT</command> and
- <command>UPDATE</command> statement level trigger will be fired.
+ EACH STATEMENT</literal> triggers).
</para>
<para>
@@ -174,7 +171,8 @@ CREATE [ CONSTRAINT ] TRIGGER <replaceable class="PARAMETER">name</replaceable>
<firstterm>constraint trigger</>. This is the same as a regular trigger
except that the timing of the trigger firing can be adjusted using
<xref linkend="SQL-SET-CONSTRAINTS">.
- Constraint triggers must be <literal>AFTER ROW</> triggers on tables. They
+ Constraint triggers must be <literal>AFTER ROW</> triggers on plain
+ tables (not foreign tables). They
can be fired either at the end of the statement causing the triggering
event, or at the end of the containing transaction; in the latter case they
are said to be <firstterm>deferred</>. A pending deferred-trigger firing
@@ -184,18 +182,29 @@ CREATE [ CONSTRAINT ] TRIGGER <replaceable class="PARAMETER">name</replaceable>
</para>
<para>
- The <literal>REFERENCING</> option is only allowed for an <literal>AFTER</>
- trigger which is not a constraint trigger. <literal>OLD TABLE</> may only
- be specified once, and only on a trigger which can fire on
- <literal>UPDATE</> or <literal>DELETE</>. <literal>NEW TABLE</> may only
- be specified once, and only on a trigger which can fire on
- <literal>UPDATE</> or <literal>INSERT</>.
+ The <literal>REFERENCING</> option enables collection
+ of <firstterm>transition relations</>, which are row sets that include all
+ of the rows inserted, deleted, or modified by the current SQL statement.
+ This feature lets the trigger see a global view of what the statement did,
+ not just one row at a time. This option is only allowed for
+ an <literal>AFTER</> trigger that is not a constraint trigger; also, if
+ the trigger is an <literal>UPDATE</> trigger, it must not specify
+ a <replaceable class="parameter">column_name</replaceable> list.
+ <literal>OLD TABLE</> may only be specified once, and only for a trigger
+ that can fire on <literal>UPDATE</> or <literal>DELETE</>; it creates a
+ transition relation containing the <firstterm>before-images</> of all rows
+ updated or deleted by the statement.
+ Similarly, <literal>NEW TABLE</> may only be specified once, and only for
+ a trigger that can fire on <literal>UPDATE</> or <literal>INSERT</>;
+ it creates a transition relation containing the <firstterm>after-images</>
+ of all rows updated or inserted by the statement.
</para>
<para>
<command>SELECT</command> does not modify any rows so you cannot
- create <command>SELECT</command> triggers. Rules and views are more
- appropriate in such cases.
+ create <command>SELECT</command> triggers. Rules and views may provide
+ workable solutions to problems that seem to need <command>SELECT</command>
+ triggers.
</para>
<para>
@@ -300,12 +309,9 @@ UPDATE OF <replaceable>column_name1</replaceable> [, <replaceable>column_name2</
<term><literal>REFERENCING</literal></term>
<listitem>
<para>
- This immediately precedes the declaration of one or two relations which
- can be used to read the before and/or after images of all rows directly
- affected by the triggering statement. An <literal>AFTER EACH ROW</>
- trigger is allowed to use both these transition relation names and the
- row names (<literal>OLD</> and <literal>NEW</>) which reference each
- individual row for which the trigger fires.
+ This keyword immediately precedes the declaration of one or two
+ relation names that provide access to the transition relations of the
+ triggering statement.
</para>
</listitem>
</varlistentry>
@@ -315,8 +321,9 @@ UPDATE OF <replaceable>column_name1</replaceable> [, <replaceable>column_name2</
<term><literal>NEW TABLE</literal></term>
<listitem>
<para>
- This specifies whether the named relation contains the before or after
- images for rows affected by the statement which fired the trigger.
+ This clause indicates whether the following relation name is for the
+ before-image transition relation or the after-image transition
+ relation.
</para>
</listitem>
</varlistentry>
@@ -325,7 +332,8 @@ UPDATE OF <replaceable>column_name1</replaceable> [, <replaceable>column_name2</
<term><replaceable class="PARAMETER">transition_relation_name</replaceable></term>
<listitem>
<para>
- The (unqualified) name to be used within the trigger for this relation.
+ The (unqualified) name to be used within the trigger for this
+ transition relation.
</para>
</listitem>
</varlistentry>
@@ -459,6 +467,35 @@ UPDATE OF <replaceable>column_name1</replaceable> [, <replaceable>column_name2</
</para>
<para>
+ In some cases it is possible for a single SQL command to fire more than
+ one kind of trigger. For instance an <command>INSERT</command> with
+ an <literal>ON CONFLICT DO UPDATE</> clause may cause both insert and
+ update operations, so it will fire both kinds of triggers as needed.
+ The transition relations supplied to triggers are
+ specific to their event type; thus an <command>INSERT</command> trigger
+ will see only the inserted rows, while an <command>UPDATE</command>
+ trigger will see only the updated rows.
+ </para>
+
+ <para>
+ Row updates or deletions caused by foreign-key enforcement actions, such
+ as <literal>ON UPDATE CASCADE</> or <literal>ON DELETE SET NULL</>, are
+ treated as part of the SQL command that caused them (note that such
+ actions are never deferred). Relevant triggers on the affected table will
+ be fired, so that this provides another way in which a SQL command might
+ fire triggers not directly matching its type. In simple cases, triggers
+ that request transition relations will see all changes caused in their
+ table by a single original SQL command as a single transition relation.
+ However, there are cases in which the presence of an <literal>AFTER ROW</>
+ trigger that requests transition relations will cause the foreign-key
+ enforcement actions triggered by a single SQL command to be split into
+ multiple steps, each with its own transition relation(s). In such cases,
+ any <literal>AFTER STATEMENT</> triggers that are present will be fired
+ once per creation of a transition relation, ensuring that the triggers see
+ each affected row once and only once.
+ </para>
+
+ <para>
Modifying a partitioned table or a table with inheritance children fires
statement-level triggers directly attached to that table, but not
statement-level triggers for its partitions or child tables. In contrast,
@@ -589,19 +626,30 @@ CREATE TRIGGER paired_items_update
<itemizedlist>
<listitem>
<para>
- While transition tables for <literal>AFTER</> triggers are specified
- using the <literal>REFERENCING</> clause in the standard way, the row
- variables used in <literal>FOR EACH ROW</> triggers may not be
- specified in <literal>REFERENCING</> clause. They are available in a
- manner which is dependent on the language in which the trigger function
- is written. Some languages effectively behave as though there is a
- <literal>REFERENCING</> clause containing <literal>OLD ROW AS OLD NEW
- ROW AS NEW</>.
+ While transition table names for <literal>AFTER</> triggers are
+ specified using the <literal>REFERENCING</> clause in the standard way,
+ the row variables used in <literal>FOR EACH ROW</> triggers may not be
+ specified in a <literal>REFERENCING</> clause. They are available in a
+ manner that is dependent on the language in which the trigger function
+ is written, but is fixed for any one language. Some languages
+ effectively behave as though there is a <literal>REFERENCING</> clause
+ containing <literal>OLD ROW AS OLD NEW ROW AS NEW</>.
</para>
</listitem>
<listitem>
- <para><productname>PostgreSQL</productname> only allows the execution
+ <para>
+ The standard allows transition tables to be used with
+ column-specific <literal>UPDATE</> triggers, but then the set of rows
+ that should be visible in the transition tables depends on the
+ trigger's column list. This is not currently implemented by
+ <productname>PostgreSQL</productname>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <productname>PostgreSQL</productname> only allows the execution
of a user-defined function for the triggered action. The standard
allows the execution of a number of other SQL commands, such as
<command>CREATE TABLE</command>, as the triggered action. This
diff --git a/doc/src/sgml/trigger.sgml b/doc/src/sgml/trigger.sgml
index 950245d19a2..a16256056f0 100644
--- a/doc/src/sgml/trigger.sgml
+++ b/doc/src/sgml/trigger.sgml
@@ -41,17 +41,13 @@
On tables and foreign tables, triggers can be defined to execute either
before or after any <command>INSERT</command>, <command>UPDATE</command>,
or <command>DELETE</command> operation, either once per modified row,
- or once per <acronym>SQL</acronym> statement. If an
- <command>INSERT</command> contains an <literal>ON CONFLICT DO UPDATE</>
- clause, it is possible that the effects of a BEFORE insert trigger and
- a BEFORE update trigger can both be applied together, if a reference to
- an <varname>EXCLUDED</> column appears. <command>UPDATE</command>
- triggers can moreover be set to fire only if certain columns are
- mentioned in the <literal>SET</literal> clause of the
- <command>UPDATE</command> statement. Triggers can also fire for
- <command>TRUNCATE</command> statements. If a trigger event occurs,
+ or once per <acronym>SQL</acronym> statement.
+ <command>UPDATE</command> triggers can moreover be set to fire only if
+ certain columns are mentioned in the <literal>SET</literal> clause of
+ the <command>UPDATE</command> statement. Triggers can also fire
+ for <command>TRUNCATE</command> statements. If a trigger event occurs,
the trigger's function is called at the appropriate time to handle the
- event. Foreign tables do not support the TRUNCATE statement at all.
+ event.
</para>
<para>
@@ -97,10 +93,7 @@
two types of triggers are sometimes called <firstterm>row-level</>
triggers and <firstterm>statement-level</> triggers,
respectively. Triggers on <command>TRUNCATE</command> may only be
- defined at statement level. On views, triggers that fire before or
- after may only be defined at statement level, while triggers that fire
- instead of an <command>INSERT</command>, <command>UPDATE</command>,
- or <command>DELETE</command> may only be defined at row level.
+ defined at statement level, not per-row.
</para>
<para>
@@ -117,9 +110,9 @@
operated on, while row-level <literal>AFTER</> triggers fire at the end of
the statement (but before any statement-level <literal>AFTER</> triggers).
These types of triggers may only be defined on non-partitioned tables and
- foreign tables. Row-level <literal>INSTEAD OF</> triggers may only be
- defined on views, and fire immediately as each row in the view is
- identified as needing to be operated on.
+ foreign tables, not views. <literal>INSTEAD OF</> triggers may only be
+ defined on views, and only at row level; they fire immediately as each
+ row in the view is identified as needing to be operated on.
</para>
<para>
@@ -132,18 +125,19 @@
<para>
If an <command>INSERT</command> contains an <literal>ON CONFLICT
- DO UPDATE</> clause, it is possible that the effects of all
- row-level <literal>BEFORE</> <command>INSERT</command> triggers
- and all row-level <literal>BEFORE</literal> <command>UPDATE</command> triggers can
+ DO UPDATE</> clause, it is possible that the effects of
+ row-level <literal>BEFORE</> <command>INSERT</command> triggers and
+ row-level <literal>BEFORE</literal> <command>UPDATE</command> triggers can
both be applied in a way that is apparent from the final state of
the updated row, if an <varname>EXCLUDED</> column is referenced.
There need not be an <varname>EXCLUDED</> column reference for
- both sets of row-level <literal>BEFORE</literal> triggers to execute, though. The
+ both sets of row-level <literal>BEFORE</literal> triggers to execute,
+ though. The
possibility of surprising outcomes should be considered when there
are both <literal>BEFORE</> <command>INSERT</command> and
<literal>BEFORE</> <command>UPDATE</command> row-level triggers
- that both affect a row being inserted/updated (this can still be
- problematic if the modifications are more or less equivalent if
+ that change a row being inserted/updated (this can be
+ problematic even if the modifications are more or less equivalent, if
they're not also idempotent). Note that statement-level
<command>UPDATE</command> triggers are executed when <literal>ON
CONFLICT DO UPDATE</> is specified, regardless of whether or not
@@ -314,8 +308,18 @@
<varname>NEW</varname> row for <command>INSERT</command> and
<command>UPDATE</command> triggers, and/or the <varname>OLD</varname> row
for <command>UPDATE</command> and <command>DELETE</command> triggers.
- Statement-level triggers do not currently have any way to examine the
- individual row(s) modified by the statement.
+ </para>
+
+ <para>
+ By default, statement-level triggers do not have any way to examine the
+ individual row(s) modified by the statement. But an <literal>AFTER
+ STATEMENT</> trigger can request that <firstterm>transition tables</>
+ be created to make the sets of affected rows available to the trigger.
+ <literal>AFTER ROW</> triggers can also request transition tables, so
+ that they can see the total changes in the table as well as the change in
+ the individual row they are currently being fired for. The syntax for
+ examining the transition tables again depends on the programming language
+ that is being used.
</para>
</sect1>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 375a25fbcf8..ad1fcd8d77b 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2429,12 +2429,17 @@ CopyFrom(CopyState cstate)
/* Triggers might need a slot as well */
estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate);
+ /* Prepare to catch AFTER triggers. */
+ AfterTriggerBeginQuery();
+
/*
* If there are any triggers with transition tables on the named relation,
* we need to be prepared to capture transition tuples.
*/
cstate->transition_capture =
- MakeTransitionCaptureState(cstate->rel->trigdesc);
+ MakeTransitionCaptureState(cstate->rel->trigdesc,
+ RelationGetRelid(cstate->rel),
+ CMD_INSERT);
/*
* If the named relation is a partitioned table, initialize state for
@@ -2510,9 +2515,6 @@ CopyFrom(CopyState cstate)
bufferedTuples = palloc(MAX_BUFFERED_TUPLES * sizeof(HeapTuple));
}
- /* Prepare to catch AFTER triggers. */
- AfterTriggerBeginQuery();
-
/*
* Check BEFORE STATEMENT insertion triggers. It's debatable whether we
* should do this for COPY, since it's not really an "INSERT" statement as
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 269c9e17dd1..7e391a10921 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -234,6 +234,11 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
RelationGetRelationName(rel)),
errdetail("Foreign tables cannot have TRUNCATE triggers.")));
+ /*
+ * We disallow constraint triggers to protect the assumption that
+ * triggers on FKs can't be deferred. See notes with AfterTriggers
+ * data structures, below.
+ */
if (stmt->isconstraint)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -418,6 +423,26 @@ CreateTrigger(CreateTrigStmt *stmt, const char *queryString,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("transition tables cannot be specified for triggers with more than one event")));
+ /*
+ * We currently don't allow column-specific triggers with
+ * transition tables. Per spec, that seems to require
+ * accumulating separate transition tables for each combination of
+ * columns, which is a lot of work for a rather marginal feature.
+ */
+ if (stmt->columns != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("transition tables cannot be specified for triggers with column lists")));
+
+ /*
+ * We disallow constraint triggers with transition tables, to
+ * protect the assumption that such triggers can't be deferred.
+ * See notes with AfterTriggers data structures, below.
+ *
+ * Currently this is enforced by the grammar, so just Assert here.
+ */
+ Assert(!stmt->isconstraint);
+
if (tt->isNew)
{
if (!(TRIGGER_FOR_INSERT(tgtype) ||
@@ -2086,96 +2111,6 @@ FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc)
}
/*
- * Make a TransitionCaptureState object from a given TriggerDesc. The
- * resulting object holds the flags which control whether transition tuples
- * are collected when tables are modified, and the tuplestores themselves.
- * Note that we copy the flags from a parent table into this struct (rather
- * than using each relation's TriggerDesc directly) so that we can use it to
- * control the collection of transition tuples from child tables.
- *
- * If there are no triggers with transition tables configured for 'trigdesc',
- * then return NULL.
- *
- * The resulting object can be passed to the ExecAR* functions. The caller
- * should set tcs_map or tcs_original_insert_tuple as appropriate when dealing
- * with child tables.
- */
-TransitionCaptureState *
-MakeTransitionCaptureState(TriggerDesc *trigdesc)
-{
- TransitionCaptureState *state = NULL;
-
- if (trigdesc != NULL &&
- (trigdesc->trig_delete_old_table || trigdesc->trig_update_old_table ||
- trigdesc->trig_update_new_table || trigdesc->trig_insert_new_table))
- {
- MemoryContext oldcxt;
- ResourceOwner saveResourceOwner;
-
- /*
- * Normally DestroyTransitionCaptureState should be called after
- * executing all AFTER triggers for the current statement.
- *
- * To handle error cleanup, TransitionCaptureState and the tuplestores
- * it contains will live in the current [sub]transaction's memory
- * context. Likewise for the current resource owner, because we also
- * want to clean up temporary files spilled to disk by the tuplestore
- * in that scenario. This scope is sufficient, because AFTER triggers
- * with transition tables cannot be deferred (only constraint triggers
- * can be deferred, and constraint triggers cannot have transition
- * tables). The AFTER trigger queue may contain pointers to this
- * TransitionCaptureState, but any such entries will be processed or
- * discarded before the end of the current [sub]transaction.
- *
- * If a future release allows deferred triggers with transition
- * tables, we'll need to reconsider the scope of the
- * TransitionCaptureState object.
- */
- oldcxt = MemoryContextSwitchTo(CurTransactionContext);
- saveResourceOwner = CurrentResourceOwner;
-
- state = (TransitionCaptureState *)
- palloc0(sizeof(TransitionCaptureState));
- state->tcs_delete_old_table = trigdesc->trig_delete_old_table;
- state->tcs_update_old_table = trigdesc->trig_update_old_table;
- state->tcs_update_new_table = trigdesc->trig_update_new_table;
- state->tcs_insert_new_table = trigdesc->trig_insert_new_table;
- PG_TRY();
- {
- CurrentResourceOwner = CurTransactionResourceOwner;
- if (trigdesc->trig_delete_old_table || trigdesc->trig_update_old_table)
- state->tcs_old_tuplestore = tuplestore_begin_heap(false, false, work_mem);
- if (trigdesc->trig_insert_new_table)
- state->tcs_insert_tuplestore = tuplestore_begin_heap(false, false, work_mem);
- if (trigdesc->trig_update_new_table)
- state->tcs_update_tuplestore = tuplestore_begin_heap(false, false, work_mem);
- }
- PG_CATCH();
- {
- CurrentResourceOwner = saveResourceOwner;
- PG_RE_THROW();
- }
- PG_END_TRY();
- CurrentResourceOwner = saveResourceOwner;
- MemoryContextSwitchTo(oldcxt);
- }
-
- return state;
-}
-
-void
-DestroyTransitionCaptureState(TransitionCaptureState *tcs)
-{
- if (tcs->tcs_insert_tuplestore != NULL)
- tuplestore_end(tcs->tcs_insert_tuplestore);
- if (tcs->tcs_update_tuplestore != NULL)
- tuplestore_end(tcs->tcs_update_tuplestore);
- if (tcs->tcs_old_tuplestore != NULL)
- tuplestore_end(tcs->tcs_old_tuplestore);
- pfree(tcs);
-}
-
-/*
* Call a trigger function.
*
* trigdata: trigger descriptor.
@@ -3338,9 +3273,11 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo,
* during the current transaction tree. (BEFORE triggers are fired
* immediately so we don't need any persistent state about them.) The struct
* and most of its subsidiary data are kept in TopTransactionContext; however
- * the individual event records are kept in a separate sub-context. This is
- * done mainly so that it's easy to tell from a memory context dump how much
- * space is being eaten by trigger events.
+ * some data that can be discarded sooner appears in the CurTransactionContext
+ * of the relevant subtransaction. Also, the individual event records are
+ * kept in a separate sub-context of TopTransactionContext. This is done
+ * mainly so that it's easy to tell from a memory context dump how much space
+ * is being eaten by trigger events.
*
* Because the list of pending events can grow large, we go to some
* considerable effort to minimize per-event memory consumption. The event
@@ -3400,6 +3337,13 @@ typedef SetConstraintStateData *SetConstraintState;
* tuple(s). This permits storing tuples once regardless of the number of
* row-level triggers on a foreign table.
*
+ * Note that we need triggers on foreign tables to be fired in exactly the
+ * order they were queued, so that the tuples come out of the tuplestore in
+ * the right order. To ensure that, we forbid deferrable (constraint)
+ * triggers on foreign tables. This also ensures that such triggers do not
+ * get deferred into outer trigger query levels, meaning that it's okay to
+ * destroy the tuplestore at the end of the query level.
+ *
* Statement-level triggers always bear AFTER_TRIGGER_1CTID, though they
* require no ctid field. We lack the flag bit space to neatly represent that
* distinct case, and it seems unlikely to be worth much trouble.
@@ -3433,7 +3377,7 @@ typedef struct AfterTriggerSharedData
Oid ats_tgoid; /* the trigger's ID */
Oid ats_relid; /* the relation it's on */
CommandId ats_firing_id; /* ID for firing cycle */
- TransitionCaptureState *ats_transition_capture;
+ struct AfterTriggersTableData *ats_table; /* transition table access */
} AfterTriggerSharedData;
typedef struct AfterTriggerEventData *AfterTriggerEvent;
@@ -3505,6 +3449,14 @@ typedef struct AfterTriggerEventList
#define for_each_event_chunk(eptr, cptr, evtlist) \
for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr)
+/* Macros for iterating from a start point that might not be list start */
+#define for_each_chunk_from(cptr) \
+ for (; cptr != NULL; cptr = cptr->next)
+#define for_each_event_from(eptr, cptr) \
+ for (; \
+ (char *) eptr < (cptr)->freeptr; \
+ eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+
/*
* All per-transaction data for the AFTER TRIGGERS module.
@@ -3529,60 +3481,107 @@ typedef struct AfterTriggerEventList
* query_depth is the current depth of nested AfterTriggerBeginQuery calls
* (-1 when the stack is empty).
*
- * query_stack[query_depth] is a list of AFTER trigger events queued by the
- * current query (and the query_stack entries below it are lists of trigger
- * events queued by calling queries). None of these are valid until the
- * matching AfterTriggerEndQuery call occurs. At that point we fire
- * immediate-mode triggers, and append any deferred events to the main events
- * list.
+ * query_stack[query_depth] is the per-query-level data, including these fields:
+ *
+ * events is a list of AFTER trigger events queued by the current query.
+ * None of these are valid until the matching AfterTriggerEndQuery call
+ * occurs. At that point we fire immediate-mode triggers, and append any
+ * deferred events to the main events list.
*
- * fdw_tuplestores[query_depth] is a tuplestore containing the foreign tuples
- * needed for the current query.
+ * fdw_tuplestore is a tuplestore containing the foreign-table tuples
+ * needed by events queued by the current query. (Note: we use just one
+ * tuplestore even though more than one foreign table might be involved.
+ * This is okay because tuplestores don't really care what's in the tuples
+ * they store; but it's possible that someday it'd break.)
*
- * maxquerydepth is just the allocated length of query_stack and the
- * tuplestores.
+ * tables is a List of AfterTriggersTableData structs for target tables
+ * of the current query (see below).
*
- * state_stack is a stack of pointers to saved copies of the SET CONSTRAINTS
- * state data; each subtransaction level that modifies that state first
+ * maxquerydepth is just the allocated length of query_stack.
+ *
+ * trans_stack holds per-subtransaction data, including these fields:
+ *
+ * state is NULL or a pointer to a saved copy of the SET CONSTRAINTS
+ * state data. Each subtransaction level that modifies that state first
* saves a copy, which we use to restore the state if we abort.
*
- * events_stack is a stack of copies of the events head/tail pointers,
+ * events is a copy of the events head/tail pointers,
* which we use to restore those values during subtransaction abort.
*
- * depth_stack is a stack of copies of subtransaction-start-time query_depth,
+ * query_depth is the subtransaction-start-time value of query_depth,
* which we similarly use to clean up at subtransaction abort.
*
- * firing_stack is a stack of copies of subtransaction-start-time
- * firing_counter. We use this to recognize which deferred triggers were
- * fired (or marked for firing) within an aborted subtransaction.
+ * firing_counter is the subtransaction-start-time value of firing_counter.
+ * We use this to recognize which deferred triggers were fired (or marked
+ * for firing) within an aborted subtransaction.
*
* We use GetCurrentTransactionNestLevel() to determine the correct array
- * index in these stacks. maxtransdepth is the number of allocated entries in
- * each stack. (By not keeping our own stack pointer, we can avoid trouble
+ * index in trans_stack. maxtransdepth is the number of allocated entries in
+ * trans_stack. (By not keeping our own stack pointer, we can avoid trouble
* in cases where errors during subxact abort cause multiple invocations
* of AfterTriggerEndSubXact() at the same nesting depth.)
+ *
+ * We create an AfterTriggersTableData struct for each target table of the
+ * current query, and each operation mode (INSERT/UPDATE/DELETE), that has
+ * either transition tables or AFTER STATEMENT triggers. This is used to
+ * hold the relevant transition tables, as well as info tracking whether
+ * we already queued the AFTER STATEMENT triggers. (We use that info to
+ * prevent, as much as possible, firing the same AFTER STATEMENT trigger
+ * more than once per statement.) These structs, along with the transition
+ * table tuplestores, live in the (sub)transaction's CurTransactionContext.
+ * That's sufficient lifespan because we don't allow transition tables to be
+ * used by deferrable triggers, so they only need to survive until
+ * AfterTriggerEndQuery.
*/
+typedef struct AfterTriggersQueryData AfterTriggersQueryData;
+typedef struct AfterTriggersTransData AfterTriggersTransData;
+typedef struct AfterTriggersTableData AfterTriggersTableData;
+
typedef struct AfterTriggersData
{
CommandId firing_counter; /* next firing ID to assign */
SetConstraintState state; /* the active S C state */
AfterTriggerEventList events; /* deferred-event list */
- int query_depth; /* current query list index */
- AfterTriggerEventList *query_stack; /* events pending from each query */
- Tuplestorestate **fdw_tuplestores; /* foreign tuples for one row from
- * each query */
- int maxquerydepth; /* allocated len of above array */
MemoryContext event_cxt; /* memory context for events, if any */
- /* these fields are just for resetting at subtrans abort: */
+ /* per-query-level data: */
+ AfterTriggersQueryData *query_stack; /* array of structs shown below */
+ int query_depth; /* current index in above array */
+ int maxquerydepth; /* allocated len of above array */
- SetConstraintState *state_stack; /* stacked S C states */
- AfterTriggerEventList *events_stack; /* stacked list pointers */
- int *depth_stack; /* stacked query_depths */
- CommandId *firing_stack; /* stacked firing_counters */
- int maxtransdepth; /* allocated len of above arrays */
+ /* per-subtransaction-level data: */
+ AfterTriggersTransData *trans_stack; /* array of structs shown below */
+ int maxtransdepth; /* allocated len of above array */
} AfterTriggersData;
+struct AfterTriggersQueryData
+{
+ AfterTriggerEventList events; /* events pending from this query */
+ Tuplestorestate *fdw_tuplestore; /* foreign tuples for said events */
+ List *tables; /* list of AfterTriggersTableData, see below */
+};
+
+struct AfterTriggersTransData
+{
+ /* these fields are just for resetting at subtrans abort: */
+ SetConstraintState state; /* saved S C state, or NULL if not yet saved */
+ AfterTriggerEventList events; /* saved list pointer */
+ int query_depth; /* saved query_depth */
+ CommandId firing_counter; /* saved firing_counter */
+};
+
+struct AfterTriggersTableData
+{
+ /* relid + cmdType form the lookup key for these structs: */
+ Oid relid; /* target table's OID */
+ CmdType cmdType; /* event type, CMD_INSERT/UPDATE/DELETE */
+ bool closed; /* true when no longer OK to add tuples */
+ bool stmt_trig_done; /* did we already queue stmt-level triggers? */
+ AfterTriggerEventList stmt_trig_events; /* if so, saved list pointer */
+ Tuplestorestate *old_tuplestore; /* "old" transition table, if any */
+ Tuplestorestate *new_tuplestore; /* "new" transition table, if any */
+};
+
static AfterTriggersData afterTriggers;
static void AfterTriggerExecute(AfterTriggerEvent event,
@@ -3591,38 +3590,41 @@ static void AfterTriggerExecute(AfterTriggerEvent event,
Instrumentation *instr,
MemoryContext per_tuple_context,
TupleTableSlot *trig_tuple_slot1,
- TupleTableSlot *trig_tuple_slot2,
- TransitionCaptureState *transition_capture);
+ TupleTableSlot *trig_tuple_slot2);
+static AfterTriggersTableData *GetAfterTriggersTableData(Oid relid,
+ CmdType cmdType);
+static void AfterTriggerFreeQuery(AfterTriggersQueryData *qs);
static SetConstraintState SetConstraintStateCreate(int numalloc);
static SetConstraintState SetConstraintStateCopy(SetConstraintState state);
static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
Oid tgoid, bool tgisdeferred);
+static void cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent);
/*
- * Gets a current query transition tuplestore and initializes it if necessary.
+ * Get the FDW tuplestore for the current trigger query level, creating it
+ * if necessary.
*/
static Tuplestorestate *
-GetTriggerTransitionTuplestore(Tuplestorestate **tss)
+GetCurrentFDWTuplestore(void)
{
Tuplestorestate *ret;
- ret = tss[afterTriggers.query_depth];
+ ret = afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore;
if (ret == NULL)
{
MemoryContext oldcxt;
ResourceOwner saveResourceOwner;
/*
- * Make the tuplestore valid until end of transaction. This is the
- * allocation lifespan of the associated events list, but we really
+ * Make the tuplestore valid until end of subtransaction. We really
* only need it until AfterTriggerEndQuery().
*/
- oldcxt = MemoryContextSwitchTo(TopTransactionContext);
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
saveResourceOwner = CurrentResourceOwner;
PG_TRY();
{
- CurrentResourceOwner = TopTransactionResourceOwner;
+ CurrentResourceOwner = CurTransactionResourceOwner;
ret = tuplestore_begin_heap(false, false, work_mem);
}
PG_CATCH();
@@ -3634,7 +3636,7 @@ GetTriggerTransitionTuplestore(Tuplestorestate **tss)
CurrentResourceOwner = saveResourceOwner;
MemoryContextSwitchTo(oldcxt);
- tss[afterTriggers.query_depth] = ret;
+ afterTriggers.query_stack[afterTriggers.query_depth].fdw_tuplestore = ret;
}
return ret;
@@ -3780,7 +3782,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events,
if (newshared->ats_tgoid == evtshared->ats_tgoid &&
newshared->ats_relid == evtshared->ats_relid &&
newshared->ats_event == evtshared->ats_event &&
- newshared->ats_transition_capture == evtshared->ats_transition_capture &&
+ newshared->ats_table == evtshared->ats_table &&
newshared->ats_firing_id == 0)
break;
}
@@ -3892,8 +3894,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
FmgrInfo *finfo, Instrumentation *instr,
MemoryContext per_tuple_context,
TupleTableSlot *trig_tuple_slot1,
- TupleTableSlot *trig_tuple_slot2,
- TransitionCaptureState *transition_capture)
+ TupleTableSlot *trig_tuple_slot2)
{
AfterTriggerShared evtshared = GetTriggerSharedData(event);
Oid tgoid = evtshared->ats_tgoid;
@@ -3934,9 +3935,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
{
case AFTER_TRIGGER_FDW_FETCH:
{
- Tuplestorestate *fdw_tuplestore =
- GetTriggerTransitionTuplestore
- (afterTriggers.fdw_tuplestores);
+ Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore();
if (!tuplestore_gettupleslot(fdw_tuplestore, true, false,
trig_tuple_slot1))
@@ -4006,36 +4005,25 @@ AfterTriggerExecute(AfterTriggerEvent event,
}
/*
- * Set up the tuplestore information.
+ * Set up the tuplestore information to let the trigger have access to
+ * transition tables. When we first make a transition table available to
+ * a trigger, mark it "closed" so that it cannot change anymore. If any
+ * additional events of the same type get queued in the current trigger
+ * query level, they'll go into new transition tables.
*/
LocTriggerData.tg_oldtable = LocTriggerData.tg_newtable = NULL;
- if (transition_capture != NULL)
+ if (evtshared->ats_table)
{
if (LocTriggerData.tg_trigger->tgoldtable)
- LocTriggerData.tg_oldtable = transition_capture->tcs_old_tuplestore;
- if (LocTriggerData.tg_trigger->tgnewtable)
{
- /*
- * Currently a trigger with transition tables may only be defined
- * for a single event type (here AFTER INSERT or AFTER UPDATE, but
- * not AFTER INSERT OR ...).
- */
- Assert((TRIGGER_FOR_INSERT(LocTriggerData.tg_trigger->tgtype) != 0) ^
- (TRIGGER_FOR_UPDATE(LocTriggerData.tg_trigger->tgtype) != 0));
+ LocTriggerData.tg_oldtable = evtshared->ats_table->old_tuplestore;
+ evtshared->ats_table->closed = true;
+ }
- /*
- * Show either the insert or update new tuple images, depending on
- * which event type the trigger was registered for. A single
- * statement may have produced both in the case of INSERT ... ON
- * CONFLICT ... DO UPDATE, and in that case the event determines
- * which tuplestore the trigger sees as the NEW TABLE.
- */
- if (TRIGGER_FOR_INSERT(LocTriggerData.tg_trigger->tgtype))
- LocTriggerData.tg_newtable =
- transition_capture->tcs_insert_tuplestore;
- else
- LocTriggerData.tg_newtable =
- transition_capture->tcs_update_tuplestore;
+ if (LocTriggerData.tg_trigger->tgnewtable)
+ {
+ LocTriggerData.tg_newtable = evtshared->ats_table->new_tuplestore;
+ evtshared->ats_table->closed = true;
}
}
@@ -4245,8 +4233,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
* won't try to re-fire it.
*/
AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
- per_tuple_context, slot1, slot2,
- evtshared->ats_transition_capture);
+ per_tuple_context, slot1, slot2);
/*
* Mark the event as done.
@@ -4296,6 +4283,166 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
}
+/*
+ * GetAfterTriggersTableData
+ *
+ * Find or create an AfterTriggersTableData struct for the specified
+ * trigger event (relation + operation type). Ignore existing structs
+ * marked "closed"; we don't want to put any additional tuples into them,
+ * nor change their stmt-triggers-fired state.
+ *
+ * Note: the AfterTriggersTableData list is allocated in the current
+ * (sub)transaction's CurTransactionContext. This is OK because
+ * we don't need it to live past AfterTriggerEndQuery.
+ */
+static AfterTriggersTableData *
+GetAfterTriggersTableData(Oid relid, CmdType cmdType)
+{
+ AfterTriggersTableData *table;
+ AfterTriggersQueryData *qs;
+ MemoryContext oldcxt;
+ ListCell *lc;
+
+ /* Caller should have ensured query_depth is OK. */
+ Assert(afterTriggers.query_depth >= 0 &&
+ afterTriggers.query_depth < afterTriggers.maxquerydepth);
+ qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ foreach(lc, qs->tables)
+ {
+ table = (AfterTriggersTableData *) lfirst(lc);
+ if (table->relid == relid && table->cmdType == cmdType &&
+ !table->closed)
+ return table;
+ }
+
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+
+ table = (AfterTriggersTableData *) palloc0(sizeof(AfterTriggersTableData));
+ table->relid = relid;
+ table->cmdType = cmdType;
+ qs->tables = lappend(qs->tables, table);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ return table;
+}
+
+
+/*
+ * MakeTransitionCaptureState
+ *
+ * Make a TransitionCaptureState object for the given TriggerDesc, target
+ * relation, and operation type. The TCS object holds all the state needed
+ * to decide whether to capture tuples in transition tables.
+ *
+ * If there are no triggers in 'trigdesc' that request relevant transition
+ * tables, then return NULL.
+ *
+ * The resulting object can be passed to the ExecAR* functions. The caller
+ * should set tcs_map or tcs_original_insert_tuple as appropriate when dealing
+ * with child tables.
+ *
+ * Note that we copy the flags from a parent table into this struct (rather
+ * than subsequently using the relation's TriggerDesc directly) so that we can
+ * use it to control collection of transition tuples from child tables.
+ *
+ * Per SQL spec, all operations of the same kind (INSERT/UPDATE/DELETE)
+ * on the same table during one query should share one transition table.
+ * Therefore, the Tuplestores are owned by an AfterTriggersTableData struct
+ * looked up using the table OID + CmdType, and are merely referenced by
+ * the TransitionCaptureState objects we hand out to callers.
+ */
+TransitionCaptureState *
+MakeTransitionCaptureState(TriggerDesc *trigdesc, Oid relid, CmdType cmdType)
+{
+ TransitionCaptureState *state;
+ bool need_old,
+ need_new;
+ AfterTriggersTableData *table;
+ MemoryContext oldcxt;
+ ResourceOwner saveResourceOwner;
+
+ if (trigdesc == NULL)
+ return NULL;
+
+ /* Detect which table(s) we need. */
+ switch (cmdType)
+ {
+ case CMD_INSERT:
+ need_old = false;
+ need_new = trigdesc->trig_insert_new_table;
+ break;
+ case CMD_UPDATE:
+ need_old = trigdesc->trig_update_old_table;
+ need_new = trigdesc->trig_update_new_table;
+ break;
+ case CMD_DELETE:
+ need_old = trigdesc->trig_delete_old_table;
+ need_new = false;
+ break;
+ default:
+ elog(ERROR, "unexpected CmdType: %d", (int) cmdType);
+ need_old = need_new = false; /* keep compiler quiet */
+ break;
+ }
+ if (!need_old && !need_new)
+ return NULL;
+
+ /* Check state, like AfterTriggerSaveEvent. */
+ if (afterTriggers.query_depth < 0)
+ elog(ERROR, "MakeTransitionCaptureState() called outside of query");
+
+ /* Be sure we have enough space to record events at this query depth. */
+ if (afterTriggers.query_depth >= afterTriggers.maxquerydepth)
+ AfterTriggerEnlargeQueryState();
+
+ /*
+ * Find or create an AfterTriggersTableData struct to hold the
+ * tuplestore(s). If there's a matching struct but it's marked closed,
+ * ignore it; we need a newer one.
+ *
+ * Note: the AfterTriggersTableData list, as well as the tuplestores, are
+ * allocated in the current (sub)transaction's CurTransactionContext, and
+ * the tuplestores are managed by the (sub)transaction's resource owner.
+ * This is sufficient lifespan because we do not allow triggers using
+ * transition tables to be deferrable; they will be fired during
+ * AfterTriggerEndQuery, after which it's okay to delete the data.
+ */
+ table = GetAfterTriggersTableData(relid, cmdType);
+
+ /* Now create required tuplestore(s), if we don't have them already. */
+ oldcxt = MemoryContextSwitchTo(CurTransactionContext);
+ saveResourceOwner = CurrentResourceOwner;
+ PG_TRY();
+ {
+ CurrentResourceOwner = CurTransactionResourceOwner;
+ if (need_old && table->old_tuplestore == NULL)
+ table->old_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+ if (need_new && table->new_tuplestore == NULL)
+ table->new_tuplestore = tuplestore_begin_heap(false, false, work_mem);
+ }
+ PG_CATCH();
+ {
+ CurrentResourceOwner = saveResourceOwner;
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+ CurrentResourceOwner = saveResourceOwner;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Now build the TransitionCaptureState struct, in caller's context */
+ state = (TransitionCaptureState *) palloc0(sizeof(TransitionCaptureState));
+ state->tcs_delete_old_table = trigdesc->trig_delete_old_table;
+ state->tcs_update_old_table = trigdesc->trig_update_old_table;
+ state->tcs_update_new_table = trigdesc->trig_update_new_table;
+ state->tcs_insert_new_table = trigdesc->trig_insert_new_table;
+ state->tcs_private = table;
+
+ return state;
+}
+
+
/* ----------
* AfterTriggerBeginXact()
*
@@ -4319,14 +4466,10 @@ AfterTriggerBeginXact(void)
*/
Assert(afterTriggers.state == NULL);
Assert(afterTriggers.query_stack == NULL);
- Assert(afterTriggers.fdw_tuplestores == NULL);
Assert(afterTriggers.maxquerydepth == 0);
Assert(afterTriggers.event_cxt == NULL);
Assert(afterTriggers.events.head == NULL);
- Assert(afterTriggers.state_stack == NULL);
- Assert(afterTriggers.events_stack == NULL);
- Assert(afterTriggers.depth_stack == NULL);
- Assert(afterTriggers.firing_stack == NULL);
+ Assert(afterTriggers.trans_stack == NULL);
Assert(afterTriggers.maxtransdepth == 0);
}
@@ -4362,9 +4505,6 @@ AfterTriggerBeginQuery(void)
void
AfterTriggerEndQuery(EState *estate)
{
- AfterTriggerEventList *events;
- Tuplestorestate *fdw_tuplestore;
-
/* Must be inside a query, too */
Assert(afterTriggers.query_depth >= 0);
@@ -4393,38 +4533,86 @@ AfterTriggerEndQuery(EState *estate)
* will instead fire any triggers in a dedicated query level. Foreign key
* enforcement triggers do add to the current query level, thanks to their
* passing fire_triggers = false to SPI_execute_snapshot(). Other
- * C-language triggers might do likewise. Be careful here: firing a
- * trigger could result in query_stack being repalloc'd, so we can't save
- * its address across afterTriggerInvokeEvents calls.
+ * C-language triggers might do likewise.
*
* If we find no firable events, we don't have to increment
* firing_counter.
*/
for (;;)
{
- events = &afterTriggers.query_stack[afterTriggers.query_depth];
- if (afterTriggerMarkEvents(events, &afterTriggers.events, true))
+ AfterTriggersQueryData *qs;
+
+ /*
+ * Firing a trigger could result in query_stack being repalloc'd, so
+ * we must recalculate qs after each afterTriggerInvokeEvents call.
+ */
+ qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ if (afterTriggerMarkEvents(&qs->events, &afterTriggers.events, true))
{
CommandId firing_id = afterTriggers.firing_counter++;
/* OK to delete the immediate events after processing them */
- if (afterTriggerInvokeEvents(events, firing_id, estate, true))
+ if (afterTriggerInvokeEvents(&qs->events, firing_id, estate, true))
break; /* all fired */
}
else
break;
}
- /* Release query-local storage for events, including tuplestore if any */
- fdw_tuplestore = afterTriggers.fdw_tuplestores[afterTriggers.query_depth];
- if (fdw_tuplestore)
+ /* Release query-level-local storage, including tuplestores if any */
+ AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
+
+ afterTriggers.query_depth--;
+}
+
+
+/*
+ * AfterTriggerFreeQuery
+ * Release subsidiary storage for a trigger query level.
+ * This includes closing down tuplestores.
+ * Note: it's important for this to be safe if interrupted by an error
+ * and then called again for the same query level.
+ */
+static void
+AfterTriggerFreeQuery(AfterTriggersQueryData *qs)
+{
+ Tuplestorestate *ts;
+ List *tables;
+ ListCell *lc;
+
+ /* Drop the trigger events */
+ afterTriggerFreeEventList(&qs->events);
+
+ /* Drop FDW tuplestore if any */
+ ts = qs->fdw_tuplestore;
+ qs->fdw_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+
+ /* Release per-table subsidiary storage */
+ tables = qs->tables;
+ foreach(lc, tables)
{
- tuplestore_end(fdw_tuplestore);
- afterTriggers.fdw_tuplestores[afterTriggers.query_depth] = NULL;
+ AfterTriggersTableData *table = (AfterTriggersTableData *) lfirst(lc);
+
+ ts = table->old_tuplestore;
+ table->old_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
+ ts = table->new_tuplestore;
+ table->new_tuplestore = NULL;
+ if (ts)
+ tuplestore_end(ts);
}
- afterTriggerFreeEventList(&afterTriggers.query_stack[afterTriggers.query_depth]);
- afterTriggers.query_depth--;
+ /*
+ * Now free the AfterTriggersTableData structs and list cells. Reset list
+ * pointer first; if list_free_deep somehow gets an error, better to leak
+ * that storage than have an infinite loop.
+ */
+ qs->tables = NIL;
+ list_free_deep(tables);
}
@@ -4521,10 +4709,7 @@ AfterTriggerEndXact(bool isCommit)
* large, we let the eventual reset of TopTransactionContext free the
* memory instead of doing it here.
*/
- afterTriggers.state_stack = NULL;
- afterTriggers.events_stack = NULL;
- afterTriggers.depth_stack = NULL;
- afterTriggers.firing_stack = NULL;
+ afterTriggers.trans_stack = NULL;
afterTriggers.maxtransdepth = 0;
@@ -4534,7 +4719,6 @@ AfterTriggerEndXact(bool isCommit)
* memory here.
*/
afterTriggers.query_stack = NULL;
- afterTriggers.fdw_tuplestores = NULL;
afterTriggers.maxquerydepth = 0;
afterTriggers.state = NULL;
@@ -4553,48 +4737,28 @@ AfterTriggerBeginSubXact(void)
int my_level = GetCurrentTransactionNestLevel();
/*
- * Allocate more space in the stacks if needed. (Note: because the
+ * Allocate more space in the trans_stack if needed. (Note: because the
* minimum nest level of a subtransaction is 2, we waste the first couple
- * entries of each array; not worth the notational effort to avoid it.)
+ * entries of the array; not worth the notational effort to avoid it.)
*/
while (my_level >= afterTriggers.maxtransdepth)
{
if (afterTriggers.maxtransdepth == 0)
{
- MemoryContext old_cxt;
-
- old_cxt = MemoryContextSwitchTo(TopTransactionContext);
-
-#define DEFTRIG_INITALLOC 8
- afterTriggers.state_stack = (SetConstraintState *)
- palloc(DEFTRIG_INITALLOC * sizeof(SetConstraintState));
- afterTriggers.events_stack = (AfterTriggerEventList *)
- palloc(DEFTRIG_INITALLOC * sizeof(AfterTriggerEventList));
- afterTriggers.depth_stack = (int *)
- palloc(DEFTRIG_INITALLOC * sizeof(int));
- afterTriggers.firing_stack = (CommandId *)
- palloc(DEFTRIG_INITALLOC * sizeof(CommandId));
- afterTriggers.maxtransdepth = DEFTRIG_INITALLOC;
-
- MemoryContextSwitchTo(old_cxt);
+ /* Arbitrarily initialize for max of 8 subtransaction levels */
+ afterTriggers.trans_stack = (AfterTriggersTransData *)
+ MemoryContextAlloc(TopTransactionContext,
+ 8 * sizeof(AfterTriggersTransData));
+ afterTriggers.maxtransdepth = 8;
}
else
{
- /* repalloc will keep the stacks in the same context */
+ /* repalloc will keep the stack in the same context */
int new_alloc = afterTriggers.maxtransdepth * 2;
- afterTriggers.state_stack = (SetConstraintState *)
- repalloc(afterTriggers.state_stack,
- new_alloc * sizeof(SetConstraintState));
- afterTriggers.events_stack = (AfterTriggerEventList *)
- repalloc(afterTriggers.events_stack,
- new_alloc * sizeof(AfterTriggerEventList));
- afterTriggers.depth_stack = (int *)
- repalloc(afterTriggers.depth_stack,
- new_alloc * sizeof(int));
- afterTriggers.firing_stack = (CommandId *)
- repalloc(afterTriggers.firing_stack,
- new_alloc * sizeof(CommandId));
+ afterTriggers.trans_stack = (AfterTriggersTransData *)
+ repalloc(afterTriggers.trans_stack,
+ new_alloc * sizeof(AfterTriggersTransData));
afterTriggers.maxtransdepth = new_alloc;
}
}
@@ -4604,10 +4768,10 @@ AfterTriggerBeginSubXact(void)
* is not saved until/unless changed. Likewise, we don't make a
* per-subtransaction event context until needed.
*/
- afterTriggers.state_stack[my_level] = NULL;
- afterTriggers.events_stack[my_level] = afterTriggers.events;
- afterTriggers.depth_stack[my_level] = afterTriggers.query_depth;
- afterTriggers.firing_stack[my_level] = afterTriggers.firing_counter;
+ afterTriggers.trans_stack[my_level].state = NULL;
+ afterTriggers.trans_stack[my_level].events = afterTriggers.events;
+ afterTriggers.trans_stack[my_level].query_depth = afterTriggers.query_depth;
+ afterTriggers.trans_stack[my_level].firing_counter = afterTriggers.firing_counter;
}
/*
@@ -4631,70 +4795,58 @@ AfterTriggerEndSubXact(bool isCommit)
{
Assert(my_level < afterTriggers.maxtransdepth);
/* If we saved a prior state, we don't need it anymore */
- state = afterTriggers.state_stack[my_level];
+ state = afterTriggers.trans_stack[my_level].state;
if (state != NULL)
pfree(state);
/* this avoids double pfree if error later: */
- afterTriggers.state_stack[my_level] = NULL;
+ afterTriggers.trans_stack[my_level].state = NULL;
Assert(afterTriggers.query_depth ==
- afterTriggers.depth_stack[my_level]);
+ afterTriggers.trans_stack[my_level].query_depth);
}
else
{
/*
* Aborting. It is possible subxact start failed before calling
* AfterTriggerBeginSubXact, in which case we mustn't risk touching
- * stack levels that aren't there.
+ * trans_stack levels that aren't there.
*/
if (my_level >= afterTriggers.maxtransdepth)
return;
/*
- * Release any event lists from queries being aborted, and restore
+ * Release query-level storage for queries being aborted, and restore
* query_depth to its pre-subxact value. This assumes that a
* subtransaction will not add events to query levels started in a
* earlier transaction state.
*/
- while (afterTriggers.query_depth > afterTriggers.depth_stack[my_level])
+ while (afterTriggers.query_depth > afterTriggers.trans_stack[my_level].query_depth)
{
if (afterTriggers.query_depth < afterTriggers.maxquerydepth)
- {
- Tuplestorestate *ts;
-
- ts = afterTriggers.fdw_tuplestores[afterTriggers.query_depth];
- if (ts)
- {
- tuplestore_end(ts);
- afterTriggers.fdw_tuplestores[afterTriggers.query_depth] = NULL;
- }
-
- afterTriggerFreeEventList(&afterTriggers.query_stack[afterTriggers.query_depth]);
- }
-
+ AfterTriggerFreeQuery(&afterTriggers.query_stack[afterTriggers.query_depth]);
afterTriggers.query_depth--;
}
Assert(afterTriggers.query_depth ==
- afterTriggers.depth_stack[my_level]);
+ afterTriggers.trans_stack[my_level].query_depth);
/*
* Restore the global deferred-event list to its former length,
* discarding any events queued by the subxact.
*/
afterTriggerRestoreEventList(&afterTriggers.events,
- &afterTriggers.events_stack[my_level]);
+ &afterTriggers.trans_stack[my_level].events);
/*
* Restore the trigger state. If the saved state is NULL, then this
* subxact didn't save it, so it doesn't need restoring.
*/
- state = afterTriggers.state_stack[my_level];
+ state = afterTriggers.trans_stack[my_level].state;
if (state != NULL)
{
pfree(afterTriggers.state);
afterTriggers.state = state;
}
/* this avoids double pfree if error later: */
- afterTriggers.state_stack[my_level] = NULL;
+ afterTriggers.trans_stack[my_level].state = NULL;
/*
* Scan for any remaining deferred events that were marked DONE or IN
@@ -4704,7 +4856,7 @@ AfterTriggerEndSubXact(bool isCommit)
* (This essentially assumes that the current subxact includes all
* subxacts started after it.)
*/
- subxact_firing_id = afterTriggers.firing_stack[my_level];
+ subxact_firing_id = afterTriggers.trans_stack[my_level].firing_counter;
for_each_event_chunk(event, chunk, afterTriggers.events)
{
AfterTriggerShared evtshared = GetTriggerSharedData(event);
@@ -4740,12 +4892,9 @@ AfterTriggerEnlargeQueryState(void)
{
int new_alloc = Max(afterTriggers.query_depth + 1, 8);
- afterTriggers.query_stack = (AfterTriggerEventList *)
+ afterTriggers.query_stack = (AfterTriggersQueryData *)
MemoryContextAlloc(TopTransactionContext,
- new_alloc * sizeof(AfterTriggerEventList));
- afterTriggers.fdw_tuplestores = (Tuplestorestate **)
- MemoryContextAllocZero(TopTransactionContext,
- new_alloc * sizeof(Tuplestorestate *));
+ new_alloc * sizeof(AfterTriggersQueryData));
afterTriggers.maxquerydepth = new_alloc;
}
else
@@ -4755,27 +4904,22 @@ AfterTriggerEnlargeQueryState(void)
int new_alloc = Max(afterTriggers.query_depth + 1,
old_alloc * 2);
- afterTriggers.query_stack = (AfterTriggerEventList *)
+ afterTriggers.query_stack = (AfterTriggersQueryData *)
repalloc(afterTriggers.query_stack,
- new_alloc * sizeof(AfterTriggerEventList));
- afterTriggers.fdw_tuplestores = (Tuplestorestate **)
- repalloc(afterTriggers.fdw_tuplestores,
- new_alloc * sizeof(Tuplestorestate *));
- /* Clear newly-allocated slots for subsequent lazy initialization. */
- memset(afterTriggers.fdw_tuplestores + old_alloc,
- 0, (new_alloc - old_alloc) * sizeof(Tuplestorestate *));
+ new_alloc * sizeof(AfterTriggersQueryData));
afterTriggers.maxquerydepth = new_alloc;
}
- /* Initialize new query lists to empty */
+ /* Initialize new array entries to empty */
while (init_depth < afterTriggers.maxquerydepth)
{
- AfterTriggerEventList *events;
+ AfterTriggersQueryData *qs = &afterTriggers.query_stack[init_depth];
- events = &afterTriggers.query_stack[init_depth];
- events->head = NULL;
- events->tail = NULL;
- events->tailfree = NULL;
+ qs->events.head = NULL;
+ qs->events.tail = NULL;
+ qs->events.tailfree = NULL;
+ qs->fdw_tuplestore = NULL;
+ qs->tables = NIL;
++init_depth;
}
@@ -4873,9 +5017,9 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt)
* save it so it can be restored if the subtransaction aborts.
*/
if (my_level > 1 &&
- afterTriggers.state_stack[my_level] == NULL)
+ afterTriggers.trans_stack[my_level].state == NULL)
{
- afterTriggers.state_stack[my_level] =
+ afterTriggers.trans_stack[my_level].state =
SetConstraintStateCopy(afterTriggers.state);
}
@@ -5184,7 +5328,7 @@ AfterTriggerPendingOnRel(Oid relid)
*/
for (depth = 0; depth <= afterTriggers.query_depth && depth < afterTriggers.maxquerydepth; depth++)
{
- for_each_event_chunk(event, chunk, afterTriggers.query_stack[depth])
+ for_each_event_chunk(event, chunk, afterTriggers.query_stack[depth].events)
{
AfterTriggerShared evtshared = GetTriggerSharedData(event);
@@ -5229,7 +5373,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
AfterTriggerEventData new_event;
AfterTriggerSharedData new_shared;
- char relkind = relinfo->ri_RelationDesc->rd_rel->relkind;
+ char relkind = rel->rd_rel->relkind;
int tgtype_event;
int tgtype_level;
int i;
@@ -5266,7 +5410,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Tuplestorestate *old_tuplestore;
Assert(oldtup != NULL);
- old_tuplestore = transition_capture->tcs_old_tuplestore;
+ old_tuplestore = transition_capture->tcs_private->old_tuplestore;
if (map != NULL)
{
@@ -5284,10 +5428,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Tuplestorestate *new_tuplestore;
Assert(newtup != NULL);
- if (event == TRIGGER_EVENT_INSERT)
- new_tuplestore = transition_capture->tcs_insert_tuplestore;
- else
- new_tuplestore = transition_capture->tcs_update_tuplestore;
+ new_tuplestore = transition_capture->tcs_private->new_tuplestore;
if (original_insert_tuple != NULL)
tuplestore_puttuple(new_tuplestore, original_insert_tuple);
@@ -5316,6 +5457,11 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
* The event code will be used both as a bitmask and an array offset, so
* validation is important to make sure we don't walk off the edge of our
* arrays.
+ *
+ * Also, if we're considering statement-level triggers, check whether we
+ * already queued a set of them for this event, and cancel the prior set
+ * if so. This preserves the behavior that statement-level triggers fire
+ * just once per statement and fire after row-level triggers.
*/
switch (event)
{
@@ -5334,6 +5480,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newtup == NULL);
ItemPointerSetInvalid(&(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_INSERT, event);
}
break;
case TRIGGER_EVENT_DELETE:
@@ -5351,6 +5499,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newtup == NULL);
ItemPointerSetInvalid(&(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_DELETE, event);
}
break;
case TRIGGER_EVENT_UPDATE:
@@ -5368,6 +5518,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newtup == NULL);
ItemPointerSetInvalid(&(new_event.ate_ctid1));
ItemPointerSetInvalid(&(new_event.ate_ctid2));
+ cancel_prior_stmt_triggers(RelationGetRelid(rel),
+ CMD_UPDATE, event);
}
break;
case TRIGGER_EVENT_TRUNCATE:
@@ -5407,9 +5559,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
{
if (fdw_tuplestore == NULL)
{
- fdw_tuplestore =
- GetTriggerTransitionTuplestore
- (afterTriggers.fdw_tuplestores);
+ fdw_tuplestore = GetCurrentFDWTuplestore();
new_event.ate_flags = AFTER_TRIGGER_FDW_FETCH;
}
else
@@ -5465,6 +5615,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
/*
* Fill in event structure and add it to the current query's queue.
+ * Note we set ats_table to NULL whenever this trigger doesn't use
+ * transition tables, to improve sharability of the shared event data.
*/
new_shared.ats_event =
(event & TRIGGER_EVENT_OPMASK) |
@@ -5474,11 +5626,13 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
new_shared.ats_tgoid = trigger->tgoid;
new_shared.ats_relid = RelationGetRelid(rel);
new_shared.ats_firing_id = 0;
- /* deferrable triggers cannot access transition data */
- new_shared.ats_transition_capture =
- trigger->tgdeferrable ? NULL : transition_capture;
+ if ((trigger->tgoldtable || trigger->tgnewtable) &&
+ transition_capture != NULL)
+ new_shared.ats_table = transition_capture->tcs_private;
+ else
+ new_shared.ats_table = NULL;
- afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth],
+ afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events,
&new_event, &new_shared);
}
@@ -5496,6 +5650,100 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
}
}
+/*
+ * If we previously queued a set of AFTER STATEMENT triggers for the given
+ * relation + operation, and they've not been fired yet, cancel them. The
+ * caller will queue a fresh set that's after any row-level triggers that may
+ * have been queued by the current sub-statement, preserving (as much as
+ * possible) the property that AFTER ROW triggers fire before AFTER STATEMENT
+ * triggers, and that the latter only fire once. This deals with the
+ * situation where several FK enforcement triggers sequentially queue triggers
+ * for the same table into the same trigger query level. We can't fully
+ * prevent odd behavior though: if there are AFTER ROW triggers taking
+ * transition tables, we don't want to change the transition tables once the
+ * first such trigger has seen them. In such a case, any additional events
+ * will result in creating new transition tables and allowing new firings of
+ * statement triggers.
+ *
+ * This also saves the current event list location so that a later invocation
+ * of this function can cheaply find the triggers we're about to queue and
+ * cancel them.
+ */
+static void
+cancel_prior_stmt_triggers(Oid relid, CmdType cmdType, int tgevent)
+{
+ AfterTriggersTableData *table;
+ AfterTriggersQueryData *qs = &afterTriggers.query_stack[afterTriggers.query_depth];
+
+ /*
+ * We keep this state in the AfterTriggersTableData that also holds
+ * transition tables for the relation + operation. In this way, if we are
+ * forced to make a new set of transition tables because more tuples get
+ * entered after we've already fired triggers, we will allow a new set of
+ * statement triggers to get queued without canceling the old ones.
+ */
+ table = GetAfterTriggersTableData(relid, cmdType);
+
+ if (table->stmt_trig_done)
+ {
+ /*
+ * We want to start scanning from the tail location that existed just
+ * before we inserted any statement triggers. But the events list
+ * might've been entirely empty then, in which case scan from the
+ * current head.
+ */
+ AfterTriggerEvent event;
+ AfterTriggerEventChunk *chunk;
+
+ if (table->stmt_trig_events.tail)
+ {
+ chunk = table->stmt_trig_events.tail;
+ event = (AfterTriggerEvent) table->stmt_trig_events.tailfree;
+ }
+ else
+ {
+ chunk = qs->events.head;
+ event = NULL;
+ }
+
+ for_each_chunk_from(chunk)
+ {
+ if (event == NULL)
+ event = (AfterTriggerEvent) CHUNK_DATA_START(chunk);
+ for_each_event_from(event, chunk)
+ {
+ AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+ /*
+ * Exit loop when we reach events that aren't AS triggers for
+ * the target relation.
+ */
+ if (evtshared->ats_relid != relid)
+ goto done;
+ if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) != tgevent)
+ goto done;
+ if (!TRIGGER_FIRED_FOR_STATEMENT(evtshared->ats_event))
+ goto done;
+ if (!TRIGGER_FIRED_AFTER(evtshared->ats_event))
+ goto done;
+ /* OK, mark it DONE */
+ event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+ event->ate_flags |= AFTER_TRIGGER_DONE;
+ }
+ /* signal we must reinitialize event ptr for next chunk */
+ event = NULL;
+ }
+ }
+done:
+
+ /* In any case, save current insertion point for next time */
+ table->stmt_trig_done = true;
+ table->stmt_trig_events = qs->events;
+}
+
+/*
+ * SQL function pg_trigger_depth()
+ */
Datum
pg_trigger_depth(PG_FUNCTION_ARGS)
{
diff --git a/src/backend/executor/README b/src/backend/executor/README
index a0045067fb8..b3e74aa1a54 100644
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -241,11 +241,11 @@ This is a sketch of control flow for full query processing:
CreateExecutorState
creates per-query context
switch to per-query context to run ExecInitNode
+ AfterTriggerBeginQuery
ExecInitNode --- recursively scans plan tree
CreateExprContext
creates per-tuple context
ExecInitExpr
- AfterTriggerBeginQuery
ExecutorRun
ExecProcNode --- recursively called in per-query context
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 9dcc358ec27..396b7a1e83f 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -252,17 +252,17 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
estate->es_instrument = queryDesc->instrument_options;
/*
- * Initialize the plan state tree
- */
- InitPlan(queryDesc, eflags);
-
- /*
* Set up an AFTER-trigger statement context, unless told not to, or
* unless it's EXPLAIN-only mode (when ExecutorFinish won't be called).
*/
if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
AfterTriggerBeginQuery();
+ /*
+ * Initialize the plan state tree
+ */
+ InitPlan(queryDesc, eflags);
+
MemoryContextSwitchTo(oldcontext);
}
@@ -1174,6 +1174,7 @@ CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation)
switch (operation)
{
case CMD_INSERT:
+
/*
* If foreign partition to do tuple-routing for, skip the
* check; it's disallowed elsewhere.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index d78e868154e..7b5214c9996 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -342,6 +342,9 @@ ExecInsert(ModifyTableState *mtstate,
mtstate->mt_transition_capture->tcs_map = NULL;
}
}
+ if (mtstate->mt_oc_transition_capture != NULL)
+ mtstate->mt_oc_transition_capture->tcs_map =
+ mtstate->mt_transition_tupconv_maps[leaf_part_index];
/*
* We might need to convert from the parent rowtype to the partition
@@ -1157,6 +1160,8 @@ lreplace:;
/* AFTER ROW UPDATE Triggers */
ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, tuple,
recheckIndexes,
+ mtstate->operation == CMD_INSERT ?
+ mtstate->mt_oc_transition_capture :
mtstate->mt_transition_capture);
list_free(recheckIndexes);
@@ -1443,7 +1448,7 @@ fireASTriggers(ModifyTableState *node)
if (node->mt_onconflict == ONCONFLICT_UPDATE)
ExecASUpdateTriggers(node->ps.state,
resultRelInfo,
- node->mt_transition_capture);
+ node->mt_oc_transition_capture);
ExecASInsertTriggers(node->ps.state, resultRelInfo,
node->mt_transition_capture);
break;
@@ -1473,14 +1478,24 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
/* Check for transition tables on the directly targeted relation. */
mtstate->mt_transition_capture =
- MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc);
+ MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
+ RelationGetRelid(targetRelInfo->ri_RelationDesc),
+ mtstate->operation);
+ if (mtstate->operation == CMD_INSERT &&
+ mtstate->mt_onconflict == ONCONFLICT_UPDATE)
+ mtstate->mt_oc_transition_capture =
+ MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
+ RelationGetRelid(targetRelInfo->ri_RelationDesc),
+ CMD_UPDATE);
/*
* If we found that we need to collect transition tuples then we may also
* need tuple conversion maps for any children that have TupleDescs that
- * aren't compatible with the tuplestores.
+ * aren't compatible with the tuplestores. (We can share these maps
+ * between the regular and ON CONFLICT cases.)
*/
- if (mtstate->mt_transition_capture != NULL)
+ if (mtstate->mt_transition_capture != NULL ||
+ mtstate->mt_oc_transition_capture != NULL)
{
ResultRelInfo *resultRelInfos;
int numResultRelInfos;
@@ -1521,10 +1536,12 @@ ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
/*
* Install the conversion map for the first plan for UPDATE and DELETE
* operations. It will be advanced each time we switch to the next
- * plan. (INSERT operations set it every time.)
+ * plan. (INSERT operations set it every time, so we need not update
+ * mtstate->mt_oc_transition_capture here.)
*/
- mtstate->mt_transition_capture->tcs_map =
- mtstate->mt_transition_tupconv_maps[0];
+ if (mtstate->mt_transition_capture)
+ mtstate->mt_transition_capture->tcs_map =
+ mtstate->mt_transition_tupconv_maps[0];
}
}
@@ -1628,13 +1645,19 @@ ExecModifyTable(PlanState *pstate)
estate->es_result_relation_info = resultRelInfo;
EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
node->mt_arowmarks[node->mt_whichplan]);
+ /* Prepare to convert transition tuples from this child. */
if (node->mt_transition_capture != NULL)
{
- /* Prepare to convert transition tuples from this child. */
Assert(node->mt_transition_tupconv_maps != NULL);
node->mt_transition_capture->tcs_map =
node->mt_transition_tupconv_maps[node->mt_whichplan];
}
+ if (node->mt_oc_transition_capture != NULL)
+ {
+ Assert(node->mt_transition_tupconv_maps != NULL);
+ node->mt_oc_transition_capture->tcs_map =
+ node->mt_transition_tupconv_maps[node->mt_whichplan];
+ }
continue;
}
else
@@ -1933,8 +1956,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->mt_partition_tuple_slot = partition_tuple_slot;
}
- /* Build state for collecting transition tuples */
- ExecSetupTransitionCaptureState(mtstate, estate);
+ /*
+ * Build state for collecting transition tuples. This requires having a
+ * valid trigger query context, so skip it in explain-only mode.
+ */
+ if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
+ ExecSetupTransitionCaptureState(mtstate, estate);
/*
* Initialize any WITH CHECK OPTION constraints if needed.
@@ -2318,16 +2345,6 @@ ExecEndModifyTable(ModifyTableState *node)
int i;
/*
- * Free transition tables, unless this query is being run in
- * EXEC_FLAG_SKIP_TRIGGERS mode, which means that it may have queued AFTER
- * triggers that won't be run till later. In that case we'll just leak
- * the transition tables till end of (sub)transaction.
- */
- if (node->mt_transition_capture != NULL &&
- !(node->ps.state->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS))
- DestroyTransitionCaptureState(node->mt_transition_capture);
-
- /*
* Allow any FDWs to shut down
*/
for (i = 0; i < node->mt_nplans; i++)
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index aeb363f13e8..adbcfa1297a 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -43,13 +43,21 @@ typedef struct TriggerData
/*
* The state for capturing old and new tuples into transition tables for a
- * single ModifyTable node.
+ * single ModifyTable node (or other operation source, e.g. copy.c).
+ *
+ * This is per-caller to avoid conflicts in setting tcs_map or
+ * tcs_original_insert_tuple. Note, however, that the pointed-to
+ * private data may be shared across multiple callers.
*/
+struct AfterTriggersTableData; /* private in trigger.c */
+
typedef struct TransitionCaptureState
{
/*
* Is there at least one trigger specifying each transition relation on
* the relation explicitly named in the DML statement or COPY command?
+ * Note: in current usage, these flags could be part of the private state,
+ * but it seems possibly useful to let callers see them.
*/
bool tcs_delete_old_table;
bool tcs_update_old_table;
@@ -60,7 +68,7 @@ typedef struct TransitionCaptureState
* For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the
* new and old tuples from a child table's format to the format of the
* relation named in a query so that it is compatible with the transition
- * tuplestores.
+ * tuplestores. The caller must store the conversion map here if so.
*/
TupleConversionMap *tcs_map;
@@ -74,17 +82,9 @@ typedef struct TransitionCaptureState
HeapTuple tcs_original_insert_tuple;
/*
- * The tuplestores backing the transition tables. We use separate
- * tuplestores for INSERT and UPDATE, because INSERT ... ON CONFLICT ...
- * DO UPDATE causes INSERT and UPDATE triggers to fire and needs a way to
- * keep track of the new tuple images resulting from the two cases
- * separately. We only need a single old image tuplestore, because there
- * is no statement that can both update and delete at the same time.
+ * Private data including the tuplestore(s) into which to insert tuples.
*/
- Tuplestorestate *tcs_old_tuplestore; /* for DELETE and UPDATE old
- * images */
- Tuplestorestate *tcs_insert_tuplestore; /* for INSERT new images */
- Tuplestorestate *tcs_update_tuplestore; /* for UPDATE new images */
+ struct AfterTriggersTableData *tcs_private;
} TransitionCaptureState;
/*
@@ -174,8 +174,9 @@ extern void RelationBuildTriggers(Relation relation);
extern TriggerDesc *CopyTriggerDesc(TriggerDesc *trigdesc);
extern const char *FindTriggerIncompatibleWithInheritance(TriggerDesc *trigdesc);
-extern TransitionCaptureState *MakeTransitionCaptureState(TriggerDesc *trigdesc);
-extern void DestroyTransitionCaptureState(TransitionCaptureState *tcs);
+
+extern TransitionCaptureState *MakeTransitionCaptureState(TriggerDesc *trigdesc,
+ Oid relid, CmdType cmdType);
extern void FreeTriggerDesc(TriggerDesc *trigdesc);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index a46a56ebc10..0f5d47ba8e4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -983,7 +983,9 @@ typedef struct ModifyTableState
/* Per partition tuple conversion map */
TupleTableSlot *mt_partition_tuple_slot;
struct TransitionCaptureState *mt_transition_capture;
- /* controls transition table population */
+ /* controls transition table population for specified operation */
+ struct TransitionCaptureState *mt_oc_transition_capture;
+ /* controls transition table population for INSERT...ON CONFLICT UPDATE */
TupleConversionMap **mt_transition_tupconv_maps;
/* Per plan/partition tuple conversion */
} ModifyTableState;
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index 620fac1e2c5..3ab6be3421c 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -2217,6 +2217,23 @@ with wcte as (insert into table1 values (42))
insert into table2 values ('hello world');
NOTICE: trigger = table2_trig, new table = ("hello world")
NOTICE: trigger = table1_trig, new table = (42)
+with wcte as (insert into table1 values (43))
+ insert into table1 values (44);
+NOTICE: trigger = table1_trig, new table = (43), (44)
+select * from table1;
+ a
+----
+ 42
+ 44
+ 43
+(3 rows)
+
+select * from table2;
+ a
+-------------
+ hello world
+(1 row)
+
drop table table1;
drop table table2;
--
@@ -2256,6 +2273,14 @@ create trigger my_table_multievent_trig
after insert or update on my_table referencing new table as new_table
for each statement execute procedure dump_insert();
ERROR: transition tables cannot be specified for triggers with more than one event
+--
+-- Verify that you can't create a trigger with transition tables with
+-- a column list.
+--
+create trigger my_table_col_update_trig
+ after update of b on my_table referencing new table as new_table
+ for each statement execute procedure dump_insert();
+ERROR: transition tables cannot be specified for triggers with column lists
drop table my_table;
--
-- Test firing of triggers with transition tables by foreign key cascades
@@ -2299,8 +2324,7 @@ select * from trig_table;
(6 rows)
delete from refd_table where length(b) = 3;
-NOTICE: trigger = trig_table_delete_trig, old table = (2,"two a"), (2,"two b")
-NOTICE: trigger = trig_table_delete_trig, old table = (11,"one a"), (11,"one b")
+NOTICE: trigger = trig_table_delete_trig, old table = (2,"two a"), (2,"two b"), (11,"one a"), (11,"one b")
select * from trig_table;
a | b
---+---------
@@ -2309,6 +2333,30 @@ select * from trig_table;
(2 rows)
drop table refd_table, trig_table;
+--
+-- self-referential FKs are even more fun
+--
+create table self_ref (a int primary key,
+ b int references self_ref(a) on delete cascade);
+create trigger self_ref_r_trig
+ after delete on self_ref referencing old table as old_table
+ for each row execute procedure dump_delete();
+create trigger self_ref_s_trig
+ after delete on self_ref referencing old table as old_table
+ for each statement execute procedure dump_delete();
+insert into self_ref values (1, null), (2, 1), (3, 2);
+delete from self_ref where a = 1;
+NOTICE: trigger = self_ref_r_trig, old table = (1,), (2,1)
+NOTICE: trigger = self_ref_r_trig, old table = (1,), (2,1)
+NOTICE: trigger = self_ref_s_trig, old table = (1,), (2,1)
+NOTICE: trigger = self_ref_r_trig, old table = (3,2)
+NOTICE: trigger = self_ref_s_trig, old table = (3,2)
+-- without AR trigger, cascaded deletes all end up in one transition table
+drop trigger self_ref_r_trig on self_ref;
+insert into self_ref values (1, null), (2, 1), (3, 2), (4, 3);
+delete from self_ref where a = 1;
+NOTICE: trigger = self_ref_s_trig, old table = (1,), (2,1), (3,2), (4,3)
+drop table self_ref;
-- cleanup
drop function dump_insert();
drop function dump_update();
diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql
index c6deb56c507..30bb7d17b08 100644
--- a/src/test/regress/sql/triggers.sql
+++ b/src/test/regress/sql/triggers.sql
@@ -1729,6 +1729,12 @@ create trigger table2_trig
with wcte as (insert into table1 values (42))
insert into table2 values ('hello world');
+with wcte as (insert into table1 values (43))
+ insert into table1 values (44);
+
+select * from table1;
+select * from table2;
+
drop table table1;
drop table table2;
@@ -1769,6 +1775,15 @@ create trigger my_table_multievent_trig
after insert or update on my_table referencing new table as new_table
for each statement execute procedure dump_insert();
+--
+-- Verify that you can't create a trigger with transition tables with
+-- a column list.
+--
+
+create trigger my_table_col_update_trig
+ after update of b on my_table referencing new table as new_table
+ for each statement execute procedure dump_insert();
+
drop table my_table;
--
@@ -1812,6 +1827,33 @@ select * from trig_table;
drop table refd_table, trig_table;
+--
+-- self-referential FKs are even more fun
+--
+
+create table self_ref (a int primary key,
+ b int references self_ref(a) on delete cascade);
+
+create trigger self_ref_r_trig
+ after delete on self_ref referencing old table as old_table
+ for each row execute procedure dump_delete();
+create trigger self_ref_s_trig
+ after delete on self_ref referencing old table as old_table
+ for each statement execute procedure dump_delete();
+
+insert into self_ref values (1, null), (2, 1), (3, 2);
+
+delete from self_ref where a = 1;
+
+-- without AR trigger, cascaded deletes all end up in one transition table
+drop trigger self_ref_r_trig on self_ref;
+
+insert into self_ref values (1, null), (2, 1), (3, 2), (4, 3);
+
+delete from self_ref where a = 1;
+
+drop table self_ref;
+
-- cleanup
drop function dump_insert();
drop function dump_update();