aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor')
-rw-r--r--src/backend/executor/execExpr.c155
-rw-r--r--src/backend/executor/execGrouping.c82
-rw-r--r--src/backend/executor/nodeSubplan.c18
3 files changed, 203 insertions, 52 deletions
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index e0eb96fd5ad..81714341f07 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3979,6 +3979,161 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
}
/*
+ * Build an ExprState that calls the given hash function(s) on the attnums
+ * given by 'keyColIdx' . When numCols > 1, the hash values returned by each
+ * hash function are combined to produce a single hash value.
+ *
+ * desc: tuple descriptor for the to-be-hashed columns
+ * ops: TupleTableSlotOps to use for the give TupleDesc
+ * hashfunctions: FmgrInfos for each hash function to call, one per numCols.
+ * These are used directly in the returned ExprState so must remain allocated.
+ * collations: collation to use when calling the hash function.
+ * numCols: array length of hashfunctions, collations and keyColIdx.
+ * parent: PlanState node that the resulting ExprState will be evaluated at
+ * init_value: Normally 0, but can be set to other values to seed the hash
+ * with. Non-zero is marginally slower, so best to only use if it's provably
+ * worthwhile.
+ */
+ExprState *
+ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops,
+ FmgrInfo *hashfunctions, Oid *collations,
+ int numCols, AttrNumber *keyColIdx,
+ PlanState *parent, uint32 init_value)
+{
+ ExprState *state = makeNode(ExprState);
+ ExprEvalStep scratch = {0};
+ NullableDatum *iresult = NULL;
+ intptr_t opcode;
+ AttrNumber last_attnum = 0;
+
+ Assert(numCols >= 0);
+
+ state->parent = parent;
+
+ /*
+ * Make a place to store intermediate hash values between subsequent
+ * hashing of individual columns. We only need this if there is more than
+ * one column to hash or an initial value plus one column.
+ */
+ if ((int64) numCols + (init_value != 0) > 1)
+ iresult = palloc(sizeof(NullableDatum));
+
+ /* find the highest attnum so we deform the tuple to that point */
+ for (int i = 0; i < numCols; i++)
+ last_attnum = Max(last_attnum, keyColIdx[i]);
+
+ scratch.opcode = EEOP_INNER_FETCHSOME;
+ scratch.d.fetch.last_var = last_attnum;
+ scratch.d.fetch.fixed = false;
+ scratch.d.fetch.kind = ops;
+ scratch.d.fetch.known_desc = desc;
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
+
+ if (init_value == 0)
+ {
+ /*
+ * No initial value, so we can assign the result of the hash function
+ * for the first attribute without having to concern ourselves with
+ * combining the result with any initial value.
+ */
+ opcode = EEOP_HASHDATUM_FIRST;
+ }
+ else
+ {
+ /*
+ * Set up operation to set the initial value. Normally we store this
+ * in the intermediate hash value location, but if there are no
+ * columns to hash, store it in the ExprState's result field.
+ */
+ scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
+ scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
+ scratch.resvalue = numCols > 0 ? &iresult->value : &state->resvalue;
+ scratch.resnull = numCols > 0 ? &iresult->isnull : &state->resnull;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /*
+ * When using an initial value use the NEXT32 ops as the FIRST ops
+ * would overwrite the stored initial value.
+ */
+ opcode = EEOP_HASHDATUM_NEXT32;
+ }
+
+ for (int i = 0; i < numCols; i++)
+ {
+ FmgrInfo *finfo;
+ FunctionCallInfo fcinfo;
+ Oid inputcollid = collations[i];
+ AttrNumber attnum = keyColIdx[i] - 1;
+
+ finfo = &hashfunctions[i];
+ fcinfo = palloc0(SizeForFunctionCallInfo(1));
+
+ /* Initialize function call parameter structure too */
+ InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
+
+ /*
+ * Fetch inner Var for this attnum and store it in the 1st arg of the
+ * hash func.
+ */
+ scratch.opcode = EEOP_INNER_VAR;
+ scratch.resvalue = &fcinfo->args[0].value;
+ scratch.resnull = &fcinfo->args[0].isnull;
+ scratch.d.var.attnum = attnum;
+ scratch.d.var.vartype = TupleDescAttr(desc, attnum)->atttypid;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /* Call the hash function */
+ scratch.opcode = opcode;
+
+ if (i == numCols - 1)
+ {
+ /*
+ * The result for hashing the final column is stored in the
+ * ExprState.
+ */
+ scratch.resvalue = &state->resvalue;
+ scratch.resnull = &state->resnull;
+ }
+ else
+ {
+ Assert(iresult != NULL);
+
+ /* intermediate values are stored in an intermediate result */
+ scratch.resvalue = &iresult->value;
+ scratch.resnull = &iresult->isnull;
+ }
+
+ /*
+ * NEXT32 opcodes need to look at the intermediate result. We might
+ * as well just set this for all ops. FIRSTs won't look at it.
+ */
+ scratch.d.hashdatum.iresult = iresult;
+
+ scratch.d.hashdatum.finfo = finfo;
+ scratch.d.hashdatum.fcinfo_data = fcinfo;
+ scratch.d.hashdatum.fn_addr = finfo->fn_addr;
+ scratch.d.hashdatum.jumpdone = -1;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /* subsequent attnums must be combined with the previous */
+ opcode = EEOP_HASHDATUM_NEXT32;
+ }
+
+ scratch.resvalue = NULL;
+ scratch.resnull = NULL;
+ scratch.opcode = EEOP_DONE;
+ ExprEvalPushStep(state, &scratch);
+
+ ExecReadyExpr(state);
+
+ return state;
+}
+
+/*
* Build an ExprState that calls the given hash function(s) on the given
* 'hash_exprs'. When multiple expressions are present, the hash values
* returned by each hash function are combined to produce a single hash value.
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 774e4de8828..9a88fc65249 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -169,6 +169,7 @@ BuildTupleHashTableExt(PlanState *parent,
Size hash_mem_limit;
MemoryContext oldcontext;
bool allow_jit;
+ uint32 hash_iv = 0;
Assert(nbuckets > 0);
@@ -183,14 +184,13 @@ BuildTupleHashTableExt(PlanState *parent,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
- hashtable->tab_hash_funcs = hashfunctions;
hashtable->tab_collations = collations;
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
hashtable->tableslot = NULL; /* will be made on first lookup */
hashtable->inputslot = NULL;
- hashtable->in_hash_funcs = NULL;
+ hashtable->in_hash_expr = NULL;
hashtable->cur_eq_func = NULL;
/*
@@ -202,9 +202,7 @@ BuildTupleHashTableExt(PlanState *parent,
* underestimated.
*/
if (use_variable_hash_iv)
- hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
- else
- hashtable->hash_iv = 0;
+ hash_iv = murmurhash32(ParallelWorkerNumber);
hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
@@ -225,6 +223,16 @@ BuildTupleHashTableExt(PlanState *parent,
*/
allow_jit = metacxt != tablecxt;
+ /* build hash ExprState for all columns */
+ hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
+ &TTSOpsMinimalTuple,
+ hashfunctions,
+ collations,
+ numCols,
+ keyColIdx,
+ allow_jit ? parent : NULL,
+ hash_iv);
+
/* build comparator for all columns */
/* XXX: should we support non-minimal tuples for the inputslot? */
hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
@@ -316,7 +324,7 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
/* set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
hashtable->cur_eq_func = hashtable->tab_eq_func;
local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
@@ -342,7 +350,7 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
uint32 hash;
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
/* Need to run the hash functions in short-lived context */
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
@@ -370,7 +378,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
/* set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
hashtable->cur_eq_func = hashtable->tab_eq_func;
entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
@@ -386,14 +394,14 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot,
* created if there's not a match. This is similar to the non-creating
* case of LookupTupleHashEntry, except that it supports cross-type
* comparisons, in which the given tuple is not of the same type as the
- * table entries. The caller must provide the hash functions to use for
- * the input tuple, as well as the equality functions, since these may be
+ * table entries. The caller must provide the hash ExprState to use for
+ * the input tuple, as well as the equality ExprState, since these may be
* different from the table's internal functions.
*/
TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
ExprState *eqcomp,
- FmgrInfo *hashfunctions)
+ ExprState *hashexpr)
{
TupleHashEntry entry;
MemoryContext oldContext;
@@ -404,7 +412,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
/* Set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashfunctions;
+ hashtable->in_hash_expr = hashexpr;
hashtable->cur_eq_func = eqcomp;
/* Search the hash table */
@@ -421,25 +429,24 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
* copied into the table.
*
* Also, the caller must select an appropriate memory context for running
- * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
+ * the hash functions.
*/
static uint32
TupleHashTableHash_internal(struct tuplehash_hash *tb,
const MinimalTuple tuple)
{
TupleHashTable hashtable = (TupleHashTable) tb->private_data;
- int numCols = hashtable->numCols;
- AttrNumber *keyColIdx = hashtable->keyColIdx;
- uint32 hashkey = hashtable->hash_iv;
+ uint32 hashkey;
TupleTableSlot *slot;
- FmgrInfo *hashfunctions;
- int i;
+ bool isnull;
if (tuple == NULL)
{
/* Process the current input tuple for the table */
- slot = hashtable->inputslot;
- hashfunctions = hashtable->in_hash_funcs;
+ hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
+ hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
+ hashtable->exprcontext,
+ &isnull));
}
else
{
@@ -449,38 +456,17 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb,
* (this case never actually occurs due to the way simplehash.h is
* used, as the hash-value is stored in the entries)
*/
- slot = hashtable->tableslot;
+ slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
ExecStoreMinimalTuple(tuple, slot, false);
- hashfunctions = hashtable->tab_hash_funcs;
- }
-
- for (i = 0; i < numCols; i++)
- {
- AttrNumber att = keyColIdx[i];
- Datum attr;
- bool isNull;
-
- /* combine successive hashkeys by rotating */
- hashkey = pg_rotate_left32(hashkey, 1);
-
- attr = slot_getattr(slot, att, &isNull);
-
- if (!isNull) /* treat nulls as having hash key 0 */
- {
- uint32 hkey;
-
- hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
- hashtable->tab_collations[i],
- attr));
- hashkey ^= hkey;
- }
+ hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
+ hashtable->exprcontext,
+ &isnull));
}
/*
- * The way hashes are combined above, among each other and with the IV,
- * doesn't lead to good bit perturbation. As the IV's goal is to lead to
- * achieve that, perform a round of hashing of the combined hash -
- * resulting in near perfect perturbation.
+ * The hashing done above, even with an initial value, doesn't tend to
+ * result in good hash perturbation. Running the value produced above
+ * through murmurhash32 leads to near perfect hash perturbation.
*/
return murmurhash32(hashkey);
}
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 236222d72a1..f26c883c67d 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -160,7 +160,7 @@ ExecHashSubPlan(SubPlanState *node,
FindTupleHashEntry(node->hashtable,
slot,
node->cur_eq_comp,
- node->lhs_hash_funcs) != NULL)
+ node->lhs_hash_expr) != NULL)
{
ExecClearTuple(slot);
return BoolGetDatum(true);
@@ -857,7 +857,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->tab_eq_funcoids = NULL;
sstate->tab_hash_funcs = NULL;
sstate->tab_collations = NULL;
- sstate->lhs_hash_funcs = NULL;
sstate->cur_eq_funcs = NULL;
/*
@@ -897,6 +896,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
TupleDesc tupDescRight;
Oid *cross_eq_funcoids;
TupleTableSlot *slot;
+ FmgrInfo *lhs_hash_funcs;
List *oplist,
*lefttlist,
*righttlist;
@@ -953,7 +953,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
- sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
/* we'll need the cross-type equality fns below, but not in sstate */
cross_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
@@ -1003,7 +1003,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
&left_hashfn, &right_hashfn))
elog(ERROR, "could not find hash function for hash operator %u",
opexpr->opno);
- fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
+ fmgr_info(left_hashfn, &lhs_hash_funcs[i - 1]);
fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
/* Set collation */
@@ -1039,6 +1039,16 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent)
sstate->planstate,
NULL);
+ /* Build the ExprState for generating hash values */
+ sstate->lhs_hash_expr = ExecBuildHash32FromAttrs(tupDescLeft,
+ &TTSOpsVirtual,
+ lhs_hash_funcs,
+ sstate->tab_collations,
+ sstate->numCols,
+ sstate->keyColIdx,
+ parent,
+ 0);
+
/*
* Create comparator for lookups of rows in the table (potentially
* cross-type comparisons).