aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/execIndexing.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/execIndexing.c')
-rw-r--r--src/backend/executor/execIndexing.c417
1 files changed, 364 insertions, 53 deletions
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index a697682b20e..e7cf72b3875 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -50,6 +50,50 @@
* to the caller. The caller must re-check them later by calling
* check_exclusion_constraint().
*
+ * Speculative insertion
+ * ---------------------
+ *
+ * Speculative insertion is a is a two-phase mechanism, used to implement
+ * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted
+ * to the heap and update the indexes as usual, but if a constraint is
+ * violated, we can still back out the insertion without aborting the whole
+ * transaction. In an INSERT ... ON CONFLICT statement, if a conflict is
+ * detected, the inserted tuple is backed out and the ON CONFLICT action is
+ * executed instead.
+ *
+ * Insertion to a unique index works as usual: the index AM checks for
+ * duplicate keys atomically with the insertion. But instead of throwing
+ * an error on a conflict, the speculatively inserted heap tuple is backed
+ * out.
+ *
+ * Exclusion constraints are slightly more complicated. As mentioned
+ * earlier, there is a risk of deadlock when two backends insert the same
+ * key concurrently. That was not a problem for regular insertions, when
+ * one of the transactions has to be aborted anyway, but with a speculative
+ * insertion we cannot let a deadlock happen, because we only want to back
+ * out the speculatively inserted tuple on conflict, not abort the whole
+ * transaction.
+ *
+ * When a backend detects that the speculative insertion conflicts with
+ * another in-progress tuple, it has two options:
+ *
+ * 1. back out the speculatively inserted tuple, then wait for the other
+ * transaction, and retry. Or,
+ * 2. wait for the other transaction, with the speculatively inserted tuple
+ * still in place.
+ *
+ * If two backends insert at the same time, and both try to wait for each
+ * other, they will deadlock. So option 2 is not acceptable. Option 1
+ * avoids the deadlock, but it is prone to a livelock instead. Both
+ * transactions will wake up immediately as the other transaction backs
+ * out. Then they both retry, and conflict with each other again, lather,
+ * rinse, repeat.
+ *
+ * To avoid the livelock, one of the backends must back out first, and then
+ * wait, while the other one waits without backing out. It doesn't matter
+ * which one backs out, so we employ an arbitrary rule that the transaction
+ * with the higher XID backs out.
+ *
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -63,12 +107,30 @@
#include "postgres.h"
#include "access/relscan.h"
+#include "access/xact.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "nodes/nodeFuncs.h"
#include "storage/lmgr.h"
#include "utils/tqual.h"
+/* waitMode argument to check_exclusion_or_unique_constraint() */
+typedef enum
+{
+ CEOUC_WAIT,
+ CEOUC_NOWAIT,
+ CEOUC_LIVELOCK_PREVENTING_WAIT,
+} CEOUC_WAIT_MODE;
+
+static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool errorOK,
+ ItemPointer conflictTid);
+
static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Datum *new_values);
@@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs,
* ----------------------------------------------------------------
*/
void
-ExecOpenIndices(ResultRelInfo *resultRelInfo)
+ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
{
Relation resultRelation = resultRelInfo->ri_RelationDesc;
List *indexoidlist;
@@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo)
/* extract index key information from the index's pg_index info */
ii = BuildIndexInfo(indexDesc);
+ /*
+ * If the indexes are to be used for speculative insertion, add extra
+ * information required by unique index entries.
+ */
+ if (speculative && ii->ii_Unique)
+ BuildSpeculativeIndexInfo(indexDesc, ii);
+
relationDescs[i] = indexDesc;
indexInfoArray[i] = ii;
i++;
@@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* Unique and exclusion constraints are enforced at the same
* time. This returns a list of index OIDs for any unique or
* exclusion constraints that are deferred and that had
- * potential (unconfirmed) conflicts.
+ * potential (unconfirmed) conflicts. (if noDupErr == true,
+ * the same is done for non-deferred constraints, but report
+ * if conflict was speculative or deferred conflict to caller)
*
* CAUTION: this must not be called for a HOT update.
* We can't defend against that here for lack of info.
@@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
List *
ExecInsertIndexTuples(TupleTableSlot *slot,
ItemPointer tupleid,
- EState *estate)
+ EState *estate,
+ bool noDupErr,
+ bool *specConflict,
+ List *arbiterIndexes)
{
List *result = NIL;
ResultRelInfo *resultRelInfo;
@@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
IndexInfo *indexInfo;
IndexUniqueCheck checkUnique;
bool satisfiesConstraint;
+ bool arbiter;
if (indexRelation == NULL)
continue;
indexInfo = indexInfoArray[i];
+ /* Record if speculative insertion arbiter */
+ arbiter = list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid);
+
/* If the index is marked as read-only, ignore it */
if (!indexInfo->ii_ReadyForInserts)
continue;
@@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* For a deferrable unique index, we tell the index AM to just detect
* possible non-uniqueness, and we add the index OID to the result
* list if further checking is needed.
+ *
+ * For a speculative insertion (used by INSERT ... ON CONFLICT), do
+ * the same as for a deferrable unique index.
*/
if (!indexRelation->rd_index->indisunique)
checkUnique = UNIQUE_CHECK_NO;
+ else if (noDupErr && (arbiterIndexes == NIL || arbiter))
+ checkUnique = UNIQUE_CHECK_PARTIAL;
else if (indexRelation->rd_index->indimmediate)
checkUnique = UNIQUE_CHECK_YES;
else
@@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
* If the index has an associated exclusion constraint, check that.
* This is simpler than the process for uniqueness checks since we
* always insert first and then check. If the constraint is deferred,
- * we check now anyway, but don't throw error on violation; instead
- * we'll queue a recheck event.
+ * we check now anyway, but don't throw error on violation or wait for
+ * a conclusive outcome from a concurrent insertion; instead we'll
+ * queue a recheck event. Similarly, noDupErr callers (speculative
+ * inserters) will recheck later, and wait for a conclusive outcome
+ * then.
*
* An index for an exclusion constraint can't also be UNIQUE (not an
* essential property, we just don't allow it in the grammar), so no
@@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
*/
if (indexInfo->ii_ExclusionOps != NULL)
{
- bool errorOK = !indexRelation->rd_index->indimmediate;
+ bool violationOK;
+ bool waitMode;
+
+ if (noDupErr)
+ {
+ violationOK = true;
+ waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
+ }
+ else if (!indexRelation->rd_index->indimmediate)
+ {
+ violationOK = true;
+ waitMode = CEOUC_NOWAIT;
+ }
+ else
+ {
+ violationOK = false;
+ waitMode = CEOUC_WAIT;
+ }
satisfiesConstraint =
- check_exclusion_constraint(heapRelation,
- indexRelation, indexInfo,
- tupleid, values, isnull,
- estate, false, errorOK);
+ check_exclusion_or_unique_constraint(heapRelation,
+ indexRelation, indexInfo,
+ tupleid, values, isnull,
+ estate, false,
+ waitMode, violationOK, NULL);
}
if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
@@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
/*
* The tuple potentially violates the uniqueness or exclusion
* constraint, so make a note of the index so that we can re-check
- * it later.
+ * it later. Speculative inserters are told if there was a
+ * speculative conflict, since that always requires a restart.
*/
result = lappend_oid(result, RelationGetRelid(indexRelation));
+ if (indexRelation->rd_index->indimmediate && specConflict)
+ *specConflict = true;
}
}
return result;
}
+/* ----------------------------------------------------------------
+ * ExecCheckIndexConstraints
+ *
+ * This routine checks if a tuple violates any unique or
+ * exclusion constraints. Returns true if there is no no conflict.
+ * Otherwise returns false, and the TID of the conflicting
+ * tuple is returned in *conflictTid.
+ *
+ * If 'arbiterIndexes' is given, only those indexes are checked.
+ * NIL means all indexes.
+ *
+ * Note that this doesn't lock the values in any way, so it's
+ * possible that a conflicting tuple is inserted immediately
+ * after this returns. But this can be used for a pre-check
+ * before insertion.
+ * ----------------------------------------------------------------
+ */
+bool
+ExecCheckIndexConstraints(TupleTableSlot *slot,
+ EState *estate, ItemPointer conflictTid,
+ List *arbiterIndexes)
+{
+ ResultRelInfo *resultRelInfo;
+ int i;
+ int numIndices;
+ RelationPtr relationDescs;
+ Relation heapRelation;
+ IndexInfo **indexInfoArray;
+ ExprContext *econtext;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ ItemPointerData invalidItemPtr;
+ bool checkedIndex = false;
+
+ ItemPointerSetInvalid(conflictTid);
+ ItemPointerSetInvalid(&invalidItemPtr);
+
+ /*
+ * Get information from the result relation info structure.
+ */
+ resultRelInfo = estate->es_result_relation_info;
+ numIndices = resultRelInfo->ri_NumIndices;
+ relationDescs = resultRelInfo->ri_IndexRelationDescs;
+ indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
+ heapRelation = resultRelInfo->ri_RelationDesc;
+
+ /*
+ * We will use the EState's per-tuple context for evaluating predicates
+ * and index expressions (creating it if it's not already there).
+ */
+ econtext = GetPerTupleExprContext(estate);
+
+ /* Arrange for econtext's scan tuple to be the tuple under test */
+ econtext->ecxt_scantuple = slot;
+
+ /*
+ * For each index, form index tuple and check if it satisfies the
+ * constraint.
+ */
+ for (i = 0; i < numIndices; i++)
+ {
+ Relation indexRelation = relationDescs[i];
+ IndexInfo *indexInfo;
+ bool satisfiesConstraint;
+
+ if (indexRelation == NULL)
+ continue;
+
+ indexInfo = indexInfoArray[i];
+
+ if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
+ continue;
+
+ /* If the index is marked as read-only, ignore it */
+ if (!indexInfo->ii_ReadyForInserts)
+ continue;
+
+ /* When specific arbiter indexes requested, only examine them */
+ if (arbiterIndexes != NIL &&
+ !list_member_oid(arbiterIndexes,
+ indexRelation->rd_index->indexrelid))
+ continue;
+
+ if (!indexRelation->rd_index->indimmediate)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"),
+ errtableconstraint(heapRelation,
+ RelationGetRelationName(indexRelation))));
+
+ checkedIndex = true;
+
+ /* Check for partial index */
+ if (indexInfo->ii_Predicate != NIL)
+ {
+ List *predicate;
+
+ /*
+ * If predicate state not set up yet, create it (in the estate's
+ * per-query context)
+ */
+ predicate = indexInfo->ii_PredicateState;
+ if (predicate == NIL)
+ {
+ predicate = (List *)
+ ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
+ estate);
+ indexInfo->ii_PredicateState = predicate;
+ }
+
+ /* Skip this index-update if the predicate isn't satisfied */
+ if (!ExecQual(predicate, econtext, false))
+ continue;
+ }
+
+ /*
+ * FormIndexDatum fills in its values and isnull parameters with the
+ * appropriate values for the column(s) of the index.
+ */
+ FormIndexDatum(indexInfo,
+ slot,
+ estate,
+ values,
+ isnull);
+
+ satisfiesConstraint =
+ check_exclusion_or_unique_constraint(heapRelation, indexRelation,
+ indexInfo, &invalidItemPtr,
+ values, isnull, estate, false,
+ CEOUC_WAIT, true,
+ conflictTid);
+ if (!satisfiesConstraint)
+ return false;
+ }
+
+ if (arbiterIndexes != NIL && !checkedIndex)
+ elog(ERROR, "unexpected failure to find arbiter index");
+
+ return true;
+}
+
/*
- * Check for violation of an exclusion constraint
+ * Check for violation of an exclusion or unique constraint
*
* heap: the table containing the new tuple
- * index: the index supporting the exclusion constraint
+ * index: the index supporting the constraint
* indexInfo: info about the index, including the exclusion properties
- * tupleid: heap TID of the new tuple we have just inserted
+ * tupleid: heap TID of the new tuple we have just inserted (invalid if we
+ * haven't inserted a new tuple yet)
* values, isnull: the *index* column values computed for the new tuple
* estate: an EState we can do evaluation in
* newIndex: if true, we are trying to build a new index (this affects
* only the wording of error messages)
- * errorOK: if true, don't throw error for violation
+ * waitMode: whether to wait for concurrent inserters/deleters
+ * violationOK: if true, don't throw error for violation
+ * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here
*
* Returns true if OK, false if actual or potential violation
*
- * When errorOK is true, we report violation without waiting to see if any
- * concurrent transaction has committed or not; so the violation is only
- * potential, and the caller must recheck sometime later. This behavior
- * is convenient for deferred exclusion checks; we need not bother queuing
- * a deferred event if there is definitely no conflict at insertion time.
+ * 'waitMode' determines what happens if a conflict is detected with a tuple
+ * that was inserted or deleted by a transaction that's still running.
+ * CEOUC_WAIT means that we wait for the transaction to commit, before
+ * throwing an error or returning. CEOUC_NOWAIT means that we report the
+ * violation immediately; so the violation is only potential, and the caller
+ * must recheck sometime later. This behavior is convenient for deferred
+ * exclusion checks; we need not bother queuing a deferred event if there is
+ * definitely no conflict at insertion time.
+ *
+ * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes
+ * wait anyway, to prevent livelocking if two transactions try inserting at
+ * the same time. This is used with speculative insertions, for INSERT ON
+ * CONFLICT statements. (See notes in file header)
*
- * When errorOK is false, we'll throw error on violation, so a false result
- * is impossible.
+ * If violationOK is true, we just report the potential or actual violation to
+ * the caller by returning 'false'. Otherwise we throw a descriptive error
+ * message here. When violationOK is false, a false result is impossible.
+ *
+ * Note: The indexam is normally responsible for checking unique constraints,
+ * so this normally only needs to be used for exclusion constraints. But this
+ * function is also called when doing a "pre-check" for conflicts on a unique
+ * constraint, when doing speculative insertion. Caller may use the returned
+ * conflict TID to take further steps.
*/
-bool
-check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
- ItemPointer tupleid, Datum *values, bool *isnull,
- EState *estate, bool newIndex, bool errorOK)
+static bool
+check_exclusion_or_unique_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex,
+ CEOUC_WAIT_MODE waitMode,
+ bool violationOK,
+ ItemPointer conflictTid)
{
- Oid *constr_procs = indexInfo->ii_ExclusionProcs;
- uint16 *constr_strats = indexInfo->ii_ExclusionStrats;
+ Oid *constr_procs;
+ uint16 *constr_strats;
Oid *index_collations = index->rd_indcollation;
int index_natts = index->rd_index->indnatts;
IndexScanDesc index_scan;
@@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo,
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
+ if (indexInfo->ii_ExclusionOps)
+ {
+ constr_procs = indexInfo->ii_ExclusionProcs;
+ constr_strats = indexInfo->ii_ExclusionStrats;
+ }
+ else
+ {
+ constr_procs = indexInfo->ii_UniqueProcs;
+ constr_strats = indexInfo->ii_UniqueStrats;
+ }
+
/*
* If any of the input values are NULL, the constraint check is assumed to
* pass (i.e., we assume the operators are strict).
@@ -450,7 +733,8 @@ retry:
/*
* Ignore the entry for the tuple we're trying to check.
*/
- if (ItemPointerEquals(tupleid, &tup->t_self))
+ if (ItemPointerIsValid(tupleid) &&
+ ItemPointerEquals(tupleid, &tup->t_self))
{
if (found_self) /* should not happen */
elog(ERROR, "found self tuple multiple times in index \"%s\"",
@@ -480,39 +764,47 @@ retry:
}
/*
- * At this point we have either a conflict or a potential conflict. If
- * we're not supposed to raise error, just return the fact of the
- * potential conflict without waiting to see if it's real.
- */
- if (errorOK)
- {
- conflict = true;
- break;
- }
-
- /*
+ * At this point we have either a conflict or a potential conflict.
+ *
* If an in-progress transaction is affecting the visibility of this
- * tuple, we need to wait for it to complete and then recheck. For
- * simplicity we do rechecking by just restarting the whole scan ---
- * this case probably doesn't happen often enough to be worth trying
- * harder, and anyway we don't want to hold any index internal locks
- * while waiting.
+ * tuple, we need to wait for it to complete and then recheck (unless
+ * the caller requested not to). For simplicity we do rechecking by
+ * just restarting the whole scan --- this case probably doesn't
+ * happen often enough to be worth trying harder, and anyway we don't
+ * want to hold any index internal locks while waiting.
*/
xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
DirtySnapshot.xmin : DirtySnapshot.xmax;
- if (TransactionIdIsValid(xwait))
+ if (TransactionIdIsValid(xwait) &&
+ (waitMode == CEOUC_WAIT ||
+ (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT &&
+ DirtySnapshot.speculativeToken &&
+ TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
{
ctid_wait = tup->t_data->t_ctid;
index_endscan(index_scan);
- XactLockTableWait(xwait, heap, &ctid_wait,
- XLTW_RecheckExclusionConstr);
+ if (DirtySnapshot.speculativeToken)
+ SpeculativeInsertionWait(DirtySnapshot.xmin,
+ DirtySnapshot.speculativeToken);
+ else
+ XactLockTableWait(xwait, heap, &ctid_wait,
+ XLTW_RecheckExclusionConstr);
goto retry;
}
/*
- * We have a definite conflict. Report it.
+ * We have a definite conflict (or a potential one, but the caller
+ * didn't want to wait). Return it to caller, or report it.
*/
+ if (violationOK)
+ {
+ conflict = true;
+ if (conflictTid)
+ *conflictTid = tup->t_self;
+ break;
+ }
+
error_new = BuildIndexValueDescription(index, values, isnull);
error_existing = BuildIndexValueDescription(index, existing_values,
existing_isnull);
@@ -544,10 +836,10 @@ retry:
/*
* Ordinarily, at this point the search should have found the originally
- * inserted tuple, unless we exited the loop early because of conflict.
- * However, it is possible to define exclusion constraints for which that
- * wouldn't be true --- for instance, if the operator is <>. So we no
- * longer complain if found_self is still false.
+ * inserted tuple (if any), unless we exited the loop early because of
+ * conflict. However, it is possible to define exclusion constraints for
+ * which that wouldn't be true --- for instance, if the operator is <>.
+ * So we no longer complain if found_self is still false.
*/
econtext->ecxt_scantuple = save_scantuple;
@@ -558,6 +850,25 @@ retry:
}
/*
+ * Check for violation of an exclusion constraint
+ *
+ * This is a dumbed down version of check_exclusion_or_unique_constraint
+ * for external callers. They don't need all the special modes.
+ */
+void
+check_exclusion_constraint(Relation heap, Relation index,
+ IndexInfo *indexInfo,
+ ItemPointer tupleid,
+ Datum *values, bool *isnull,
+ EState *estate, bool newIndex)
+{
+ (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid,
+ values, isnull,
+ estate, newIndex,
+ CEOUC_WAIT, false, NULL);
+}
+
+/*
* Check existing tuple's index values to see if it really matches the
* exclusion condition against the new_values. Returns true if conflict.
*/