aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/execIndexing.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2015-05-08 05:31:36 +0200
committerAndres Freund <andres@anarazel.de>2015-05-08 05:43:10 +0200
commit168d5805e4c08bed7b95d351bf097cff7c07dd65 (patch)
treecd55bff71bf05324f388d3404c1b3697f3a96e7e /src/backend/executor/execIndexing.c
parent2c8f4836db058d0715bc30a30655d646287ba509 (diff)
downloadpostgresql-168d5805e4c08bed7b95d351bf097cff7c07dd65.tar.gz
postgresql-168d5805e4c08bed7b95d351bf097cff7c07dd65.zip
Add support for INSERT ... ON CONFLICT DO NOTHING/UPDATE.
The newly added ON CONFLICT clause allows to specify an alternative to raising a unique or exclusion constraint violation error when inserting. ON CONFLICT refers to constraints that can either be specified using a inference clause (by specifying the columns of a unique constraint) or by naming a unique or exclusion constraint. DO NOTHING avoids the constraint violation, without touching the pre-existing row. DO UPDATE SET ... [WHERE ...] updates the pre-existing tuple, and has access to both the tuple proposed for insertion and the existing tuple; the optional WHERE clause can be used to prevent an update from being executed. The UPDATE SET and WHERE clauses have access to the tuple proposed for insertion using the "magic" EXCLUDED alias, and to the pre-existing tuple using the table name or its alias. This feature is often referred to as upsert. This is implemented using a new infrastructure called "speculative insertion". It is an optimistic variant of regular insertion that first does a pre-check for existing tuples and then attempts an insert. If a violating tuple was inserted concurrently, the speculatively inserted tuple is deleted and a new attempt is made. If the pre-check finds a matching tuple the alternative DO NOTHING or DO UPDATE action is taken. If the insertion succeeds without detecting a conflict, the tuple is deemed inserted. To handle the possible ambiguity between the excluded alias and a table named excluded, and for convenience with long relation names, INSERT INTO now can alias its target table. Bumps catversion as stored rules change. Author: Peter Geoghegan, with significant contributions from Heikki Linnakangas and Andres Freund. Testing infrastructure by Jeff Janes. Reviewed-By: Heikki Linnakangas, Andres Freund, Robert Haas, Simon Riggs, Dean Rasheed, Stephen Frost and many others.
Diffstat (limited to 'src/backend/executor/execIndexing.c')
-rw-r--r--src/backend/executor/execIndexing.c417
1 files changed, 364 insertions, 53 deletions
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index a697682b20e..e7cf72b3875 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -50,6 +50,50 @@
* to the caller. The caller must re-check them later by calling
* check_exclusion_constraint().
*
+ * Speculative insertion
+ * ---------------------
+ *
+ * Speculative insertion is a is a two-phase mechanism, used to implement
+ * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted
+ * to the heap and update the indexes as usual, but if a constraint is
+ * violated, we can still back out the insertion without aborting the whole
+ * transaction. In an INSERT ... ON CONFLICT statement, if a conflict is
+ * detected, the inserted tuple is backed out and the ON CONFLICT action is
+ * executed instead.
+ *
+ * Insertion to a unique index works as usual: the index AM checks for
+ * duplicate keys atomically with the insertion. But instead of throwing
+ * an error on a conflict, the speculatively inserted heap tuple is backed
+ * out.
+ *
+ * Exclusion constraints are slightly more complicated. As mentioned
+ * earlier, there is a risk of deadlock when two backends insert the same
+ * key concurrently. That was not a problem for regular insertions, when
+ * one of the transactions has to be aborted anyway, but with a speculative
+ * insertion we cannot let a deadlock happen, because we only want to back
+ * out the speculatively inserted tuple on conflict, not abort the whole
+ * transaction.
+ *
+ * When a backend detects that the speculative insertion conflicts with
+ * another in-progress tuple, it has two options:
+ *
+ * 1. back out the speculatively inserted tuple, then wait for the other
+ * transaction, and retry. Or,
+ * 2. wait for the other transaction, with the speculatively inserted tuple
+ * still in place.
+ *
+ * If two backends insert at the same time, and both try to wait for each
+ * other, they will deadlock. So option 2 is not acceptable. Option 1
+ * avoids the deadlock, but it is prone to a livelock instead. Both
+ * transactions will wake up immediately as the other transaction backs
+ * out. Then they both retry, and conflict with each other again, lather,
+ * rinse, repeat.
+ *
+ * To avoid the livelock, one of the backends must back out first, and then
+ * wait, while the other one waits without backing out. It doesn't matter
+ * which one backs out, so we employ an arbitrary rule that the transaction
+ * with the higher XID backs out.
+ *
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -63,12 +107,30 @@
#include "postgres.h"
#include "access/relscan.h"
+#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
#include "utils/tqual.h"
+/* waitMode argument to check_exclusion_or_unique_constraint() */
+typedef enum
+{
+ CEOUC_WAIT,
+ CEOUC_NOWAIT,
+ CEOUC_LIVELOCK_PREVENTING_WAIT,
+} CEOUC_WAIT_MODE;
+
+static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool errorOK,
+ ItemPointer conflictTid);
+
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values);
@@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs,
* ----------------------------------------------------------------
*/
void
-ExecOpenIndices(ResultRelInfo *resultRelInfo)
+ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
{
Relation resultRelation = resultRelInfo->ri_RelationDesc;
List *indexoidlist;
@@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo)
/* extract index key information from the index's pg_index info */
ii = BuildIndexInfo(indexDesc);
+ /*
+ * If the indexes are to be used for speculative insertion, add extra
+ * information required by unique index entries.
+ */
+ if (speculative && ii->ii_Unique)
+ BuildSpeculativeIndexInfo(indexDesc, ii);
+
relationDescs[i] = indexDesc;
indexInfoArray[i] = ii;
i++;
@@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
- * potential (unconfirmed) conflicts.
+ * potential (unconfirmed) conflicts. (if noDupErr == true,
+ * the same is done for non-deferred constraints, but report
+ * if conflict was speculative or deferred conflict to caller)
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
@@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
- EState *estate)
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict,
+ List *arbiterIndexes)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
@@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
IndexInfo *indexInfo;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
+ bool arbiter;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
+ /* Record if speculative insertion arbiter */
+ arbiter = list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid);
+
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
@@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+ * the same as for a deferrable unique index.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
+ else if (noDupErr && (arbiterIndexes == NIL || arbiter))
+ checkUnique = UNIQUE_CHECK_PARTIAL;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
@@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
* always insert first and then check. If the constraint is deferred,
- * we check now anyway, but don't throw error on violation; instead
- * we'll queue a recheck event.
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
@@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
- bool errorOK = !indexRelation->rd_index->indimmediate;
+ bool violationOK;
+ bool waitMode;
+
+ if (noDupErr)
+ {
+ violationOK = true;
+ waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+ }
+ else if (!indexRelation->rd_index->indimmediate)
+ {
+ violationOK = true;
+ waitMode = CEOUC_NOWAIT;
+ }
+ else
+ {
+ violationOK = false;
+ waitMode = CEOUC_WAIT;
+ }
satisfiesConstraint =
- check_exclusion_constraint(heapRelation,
- indexRelation, indexInfo,
- tupleid, values, isnull,
- estate, false, errorOK);
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ waitMode, violationOK, NULL);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
@@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
/*
* The tuple potentially violates the uniqueness or exclusion
* constraint, so make a note of the index so that we can re-check
- * it later.
+ * it later. Speculative inserters are told if there was a
+ * speculative conflict, since that always requires a restart.
*/
result = lappend_oid(result, RelationGetRelid(indexRelation));
+ if (indexRelation->rd_index->indimmediate && specConflict)
+ *specConflict = true;
}
}
return result;
}
+/* ----------------------------------------------------------------
+ * ExecCheckIndexConstraints
+ *
+ * This routine checks if a tuple violates any unique or
+ * exclusion constraints. Returns true if there is no no conflict.
+ * Otherwise returns false, and the TID of the conflicting
+ * tuple is returned in *conflictTid.
+ *
+ * If 'arbiterIndexes' is given, only those indexes are checked.
+ * NIL means all indexes.
+ *
+ * Note that this doesn't lock the values in any way, so it's
+ * possible that a conflicting tuple is inserted immediately
+ * after this returns. But this can be used for a pre-check
+ * before insertion.
+ * ----------------------------------------------------------------
+ */
+bool
+ExecCheckIndexConstraints(TupleTableSlot *slot,
+ EState *estate, ItemPointer conflictTid,
+ List *arbiterIndexes)
+{
+ ResultRelInfo *resultRelInfo;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointerData invalidItemPtr;
+ bool checkedIndex = false;
+
+ ItemPointerSetInvalid(conflictTid);
+ ItemPointerSetInvalid(&invalidItemPtr);
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ resultRelInfo = estate->es_result_relation_info;
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * For each index, form index tuple and check if it satisfies the
+ * constraint.
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool satisfiesConstraint;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
+ continue;
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /* When specific arbiter indexes requested, only examine them */
+ if (arbiterIndexes != NIL &&
+ !list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid))
+ continue;
+
+ if (!indexRelation->rd_index->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
+
+ checkedIndex = true;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ List *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NIL)
+ {
+ predicate = (List *)
+ ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+ estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext, false))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+ indexInfo, &invalidItemPtr,
+ values, isnull, estate, false,
+ CEOUC_WAIT, true,
+ conflictTid);
+ if (!satisfiesConstraint)
+ return false;
+ }
+
+ if (arbiterIndexes != NIL && !checkedIndex)
+ elog(ERROR, "unexpected failure to find arbiter index");
+
+ return true;
+}
+
/*
- * Check for violation of an exclusion constraint
+ * Check for violation of an exclusion or unique constraint
*
* heap: the table containing the new tuple
- * index: the index supporting the exclusion constraint
+ * index: the index supporting the constraint
* indexInfo: info about the index, including the exclusion properties
- * tupleid: heap TID of the new tuple we have just inserted
+ * tupleid: heap TID of the new tuple we have just inserted (invalid if we
+ * haven't inserted a new tuple yet)
* values, isnull: the *index* column values computed for the new tuple
* estate: an EState we can do evaluation in
* newIndex: if true, we are trying to build a new index (this affects
* only the wording of error messages)
- * errorOK: if true, don't throw error for violation
+ * waitMode: whether to wait for concurrent inserters/deleters
+ * violationOK: if true, don't throw error for violation
+ * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here
*
* Returns true if OK, false if actual or potential violation
*
- * When errorOK is true, we report violation without waiting to see if any
- * concurrent transaction has committed or not; so the violation is only
- * potential, and the caller must recheck sometime later. This behavior
- * is convenient for deferred exclusion checks; we need not bother queuing
- * a deferred event if there is definitely no conflict at insertion time.
+ * 'waitMode' determines what happens if a conflict is detected with a tuple
+ * that was inserted or deleted by a transaction that's still running.
+ * CEOUC_WAIT means that we wait for the transaction to commit, before
+ * throwing an error or returning. CEOUC_NOWAIT means that we report the
+ * violation immediately; so the violation is only potential, and the caller
+ * must recheck sometime later. This behavior is convenient for deferred
+ * exclusion checks; we need not bother queuing a deferred event if there is
+ * definitely no conflict at insertion time.
+ *
+ * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes
+ * wait anyway, to prevent livelocking if two transactions try inserting at
+ * the same time. This is used with speculative insertions, for INSERT ON
+ * CONFLICT statements. (See notes in file header)
*
- * When errorOK is false, we'll throw error on violation, so a false result
- * is impossible.
+ * If violationOK is true, we just report the potential or actual violation to
+ * the caller by returning 'false'. Otherwise we throw a descriptive error
+ * message here. When violationOK is false, a false result is impossible.
+ *
+ * Note: The indexam is normally responsible for checking unique constraints,
+ * so this normally only needs to be used for exclusion constraints. But this
+ * function is also called when doing a "pre-check" for conflicts on a unique
+ * constraint, when doing speculative insertion. Caller may use the returned
+ * conflict TID to take further steps.
*/
-bool
-check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
- ItemPointer tupleid, Datum *values, bool *isnull,
- EState *estate, bool newIndex, bool errorOK)
+static bool
+check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool violationOK,
+ ItemPointer conflictTid)
{
- Oid *constr_procs = indexInfo->ii_ExclusionProcs;
- uint16 *constr_strats = indexInfo->ii_ExclusionStrats;
+ Oid *constr_procs;
+ uint16 *constr_strats;
Oid *index_collations = index->rd_indcollation;
int index_natts = index->rd_index->indnatts;
IndexScanDesc index_scan;
@@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
+ if (indexInfo->ii_ExclusionOps)
+ {
+ constr_procs = indexInfo->ii_ExclusionProcs;
+ constr_strats = indexInfo->ii_ExclusionStrats;
+ }
+ else
+ {
+ constr_procs = indexInfo->ii_UniqueProcs;
+ constr_strats = indexInfo->ii_UniqueStrats;
+ }
+
/*
* If any of the input values are NULL, the constraint check is assumed to
* pass (i.e., we assume the operators are strict).
@@ -450,7 +733,8 @@ retry:
/*
* Ignore the entry for the tuple we're trying to check.
*/
- if (ItemPointerEquals(tupleid, &tup->t_self))
+ if (ItemPointerIsValid(tupleid) &&
+ ItemPointerEquals(tupleid, &tup->t_self))
{
if (found_self) /* should not happen */
elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -480,39 +764,47 @@ retry:
}
/*
- * At this point we have either a conflict or a potential conflict. If
- * we're not supposed to raise error, just return the fact of the
- * potential conflict without waiting to see if it's real.
- */
- if (errorOK)
- {
- conflict = true;
- break;
- }
-
- /*
+ * At this point we have either a conflict or a potential conflict.
+ *
* If an in-progress transaction is affecting the visibility of this
- * tuple, we need to wait for it to complete and then recheck. For
- * simplicity we do rechecking by just restarting the whole scan ---
- * this case probably doesn't happen often enough to be worth trying
- * harder, and anyway we don't want to hold any index internal locks
- * while waiting.
+ * tuple, we need to wait for it to complete and then recheck (unless
+ * the caller requested not to). For simplicity we do rechecking by
+ * just restarting the whole scan --- this case probably doesn't
+ * happen often enough to be worth trying harder, and anyway we don't
+ * want to hold any index internal locks while waiting.
*/
xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
DirtySnapshot.xmin : DirtySnapshot.xmax;
- if (TransactionIdIsValid(xwait))
+ if (TransactionIdIsValid(xwait) &&
+ (waitMode == CEOUC_WAIT ||
+ (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT &&
+ DirtySnapshot.speculativeToken &&
+ TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
{
ctid_wait = tup->t_data->t_ctid;
index_endscan(index_scan);
- XactLockTableWait(xwait, heap, &ctid_wait,
- XLTW_RecheckExclusionConstr);
+ if (DirtySnapshot.speculativeToken)
+ SpeculativeInsertionWait(DirtySnapshot.xmin,
+ DirtySnapshot.speculativeToken);
+ else
+ XactLockTableWait(xwait, heap, &ctid_wait,
+ XLTW_RecheckExclusionConstr);
goto retry;
}
/*
- * We have a definite conflict. Report it.
+ * We have a definite conflict (or a potential one, but the caller
+ * didn't want to wait). Return it to caller, or report it.
*/
+ if (violationOK)
+ {
+ conflict = true;
+ if (conflictTid)
+ *conflictTid = tup->t_self;
+ break;
+ }
+
error_new = BuildIndexValueDescription(index, values, isnull);
error_existing = BuildIndexValueDescription(index, existing_values,
existing_isnull);
@@ -544,10 +836,10 @@ retry:
/*
* Ordinarily, at this point the search should have found the originally
- * inserted tuple, unless we exited the loop early because of conflict.
- * However, it is possible to define exclusion constraints for which that
- * wouldn't be true --- for instance, if the operator is <>. So we no
- * longer complain if found_self is still false.
+ * inserted tuple (if any), unless we exited the loop early because of
+ * conflict. However, it is possible to define exclusion constraints for
+ * which that wouldn't be true --- for instance, if the operator is <>.
+ * So we no longer complain if found_self is still false.
*/
econtext->ecxt_scantuple = save_scantuple;
@@ -558,6 +850,25 @@ retry:
}
/*
+ * Check for violation of an exclusion constraint
+ *
+ * This is a dumbed down version of check_exclusion_or_unique_constraint
+ * for external callers. They don't need all the special modes.
+ */
+void
+check_exclusion_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex)
+{
+ (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid,
+ values, isnull,
+ estate, newIndex,
+ CEOUC_WAIT, false, NULL);
+}
+
+/*
* Check existing tuple's index values to see if it really matches the
* exclusion condition against the new_values. Returns true if conflict.
*/