diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/executor/execExpr.c | 155 | ||||
-rw-r--r-- | src/backend/executor/execGrouping.c | 82 | ||||
-rw-r--r-- | src/backend/executor/nodeSubplan.c | 18 | ||||
-rw-r--r-- | src/include/executor/executor.h | 10 | ||||
-rw-r--r-- | src/include/nodes/execnodes.h | 25 |
5 files changed, 224 insertions, 66 deletions
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index e0eb96fd5ad..81714341f07 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3979,6 +3979,161 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate, } /* + * Build an ExprState that calls the given hash function(s) on the attnums + * given by 'keyColIdx' . When numCols > 1, the hash values returned by each + * hash function are combined to produce a single hash value. + * + * desc: tuple descriptor for the to-be-hashed columns + * ops: TupleTableSlotOps to use for the give TupleDesc + * hashfunctions: FmgrInfos for each hash function to call, one per numCols. + * These are used directly in the returned ExprState so must remain allocated. + * collations: collation to use when calling the hash function. + * numCols: array length of hashfunctions, collations and keyColIdx. + * parent: PlanState node that the resulting ExprState will be evaluated at + * init_value: Normally 0, but can be set to other values to seed the hash + * with. Non-zero is marginally slower, so best to only use if it's provably + * worthwhile. + */ +ExprState * +ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops, + FmgrInfo *hashfunctions, Oid *collations, + int numCols, AttrNumber *keyColIdx, + PlanState *parent, uint32 init_value) +{ + ExprState *state = makeNode(ExprState); + ExprEvalStep scratch = {0}; + NullableDatum *iresult = NULL; + intptr_t opcode; + AttrNumber last_attnum = 0; + + Assert(numCols >= 0); + + state->parent = parent; + + /* + * Make a place to store intermediate hash values between subsequent + * hashing of individual columns. We only need this if there is more than + * one column to hash or an initial value plus one column. + */ + if ((int64) numCols + (init_value != 0) > 1) + iresult = palloc(sizeof(NullableDatum)); + + /* find the highest attnum so we deform the tuple to that point */ + for (int i = 0; i < numCols; i++) + last_attnum = Max(last_attnum, keyColIdx[i]); + + scratch.opcode = EEOP_INNER_FETCHSOME; + scratch.d.fetch.last_var = last_attnum; + scratch.d.fetch.fixed = false; + scratch.d.fetch.kind = ops; + scratch.d.fetch.known_desc = desc; + if (ExecComputeSlotInfo(state, &scratch)) + ExprEvalPushStep(state, &scratch); + + if (init_value == 0) + { + /* + * No initial value, so we can assign the result of the hash function + * for the first attribute without having to concern ourselves with + * combining the result with any initial value. + */ + opcode = EEOP_HASHDATUM_FIRST; + } + else + { + /* + * Set up operation to set the initial value. Normally we store this + * in the intermediate hash value location, but if there are no + * columns to hash, store it in the ExprState's result field. + */ + scratch.opcode = EEOP_HASHDATUM_SET_INITVAL; + scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value); + scratch.resvalue = numCols > 0 ? &iresult->value : &state->resvalue; + scratch.resnull = numCols > 0 ? &iresult->isnull : &state->resnull; + + ExprEvalPushStep(state, &scratch); + + /* + * When using an initial value use the NEXT32 ops as the FIRST ops + * would overwrite the stored initial value. + */ + opcode = EEOP_HASHDATUM_NEXT32; + } + + for (int i = 0; i < numCols; i++) + { + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + Oid inputcollid = collations[i]; + AttrNumber attnum = keyColIdx[i] - 1; + + finfo = &hashfunctions[i]; + fcinfo = palloc0(SizeForFunctionCallInfo(1)); + + /* Initialize function call parameter structure too */ + InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL); + + /* + * Fetch inner Var for this attnum and store it in the 1st arg of the + * hash func. + */ + scratch.opcode = EEOP_INNER_VAR; + scratch.resvalue = &fcinfo->args[0].value; + scratch.resnull = &fcinfo->args[0].isnull; + scratch.d.var.attnum = attnum; + scratch.d.var.vartype = TupleDescAttr(desc, attnum)->atttypid; + + ExprEvalPushStep(state, &scratch); + + /* Call the hash function */ + scratch.opcode = opcode; + + if (i == numCols - 1) + { + /* + * The result for hashing the final column is stored in the + * ExprState. + */ + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + } + else + { + Assert(iresult != NULL); + + /* intermediate values are stored in an intermediate result */ + scratch.resvalue = &iresult->value; + scratch.resnull = &iresult->isnull; + } + + /* + * NEXT32 opcodes need to look at the intermediate result. We might + * as well just set this for all ops. FIRSTs won't look at it. + */ + scratch.d.hashdatum.iresult = iresult; + + scratch.d.hashdatum.finfo = finfo; + scratch.d.hashdatum.fcinfo_data = fcinfo; + scratch.d.hashdatum.fn_addr = finfo->fn_addr; + scratch.d.hashdatum.jumpdone = -1; + + ExprEvalPushStep(state, &scratch); + + /* subsequent attnums must be combined with the previous */ + opcode = EEOP_HASHDATUM_NEXT32; + } + + scratch.resvalue = NULL; + scratch.resnull = NULL; + scratch.opcode = EEOP_DONE; + ExprEvalPushStep(state, &scratch); + + ExecReadyExpr(state); + + return state; +} + +/* * Build an ExprState that calls the given hash function(s) on the given * 'hash_exprs'. When multiple expressions are present, the hash values * returned by each hash function are combined to produce a single hash value. diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 774e4de8828..9a88fc65249 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -169,6 +169,7 @@ BuildTupleHashTableExt(PlanState *parent, Size hash_mem_limit; MemoryContext oldcontext; bool allow_jit; + uint32 hash_iv = 0; Assert(nbuckets > 0); @@ -183,14 +184,13 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; - hashtable->tab_hash_funcs = hashfunctions; hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; - hashtable->in_hash_funcs = NULL; + hashtable->in_hash_expr = NULL; hashtable->cur_eq_func = NULL; /* @@ -202,9 +202,7 @@ BuildTupleHashTableExt(PlanState *parent, * underestimated. */ if (use_variable_hash_iv) - hashtable->hash_iv = murmurhash32(ParallelWorkerNumber); - else - hashtable->hash_iv = 0; + hash_iv = murmurhash32(ParallelWorkerNumber); hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); @@ -225,6 +223,16 @@ BuildTupleHashTableExt(PlanState *parent, */ allow_jit = metacxt != tablecxt; + /* build hash ExprState for all columns */ + hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc, + &TTSOpsMinimalTuple, + hashfunctions, + collations, + numCols, + keyColIdx, + allow_jit ? parent : NULL, + hash_iv); + /* build comparator for all columns */ /* XXX: should we support non-minimal tuples for the inputslot? */ hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, @@ -316,7 +324,7 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, /* set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; hashtable->cur_eq_func = hashtable->tab_eq_func; local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); @@ -342,7 +350,7 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot) uint32 hash; hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; /* Need to run the hash functions in short-lived context */ oldContext = MemoryContextSwitchTo(hashtable->tempcxt); @@ -370,7 +378,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, /* set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; hashtable->cur_eq_func = hashtable->tab_eq_func; entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); @@ -386,14 +394,14 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, * created if there's not a match. This is similar to the non-creating * case of LookupTupleHashEntry, except that it supports cross-type * comparisons, in which the given tuple is not of the same type as the - * table entries. The caller must provide the hash functions to use for - * the input tuple, as well as the equality functions, since these may be + * table entries. The caller must provide the hash ExprState to use for + * the input tuple, as well as the equality ExprState, since these may be * different from the table's internal functions. */ TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, - FmgrInfo *hashfunctions) + ExprState *hashexpr) { TupleHashEntry entry; MemoryContext oldContext; @@ -404,7 +412,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, /* Set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashfunctions; + hashtable->in_hash_expr = hashexpr; hashtable->cur_eq_func = eqcomp; /* Search the hash table */ @@ -421,25 +429,24 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, * copied into the table. * * Also, the caller must select an appropriate memory context for running - * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) + * the hash functions. */ static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; - int numCols = hashtable->numCols; - AttrNumber *keyColIdx = hashtable->keyColIdx; - uint32 hashkey = hashtable->hash_iv; + uint32 hashkey; TupleTableSlot *slot; - FmgrInfo *hashfunctions; - int i; + bool isnull; if (tuple == NULL) { /* Process the current input tuple for the table */ - slot = hashtable->inputslot; - hashfunctions = hashtable->in_hash_funcs; + hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot; + hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr, + hashtable->exprcontext, + &isnull)); } else { @@ -449,38 +456,17 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) */ - slot = hashtable->tableslot; + slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); - hashfunctions = hashtable->tab_hash_funcs; - } - - for (i = 0; i < numCols; i++) - { - AttrNumber att = keyColIdx[i]; - Datum attr; - bool isNull; - - /* combine successive hashkeys by rotating */ - hashkey = pg_rotate_left32(hashkey, 1); - - attr = slot_getattr(slot, att, &isNull); - - if (!isNull) /* treat nulls as having hash key 0 */ - { - uint32 hkey; - - hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], - hashtable->tab_collations[i], - attr)); - hashkey ^= hkey; - } + hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr, + hashtable->exprcontext, + &isnull)); } /* - * The way hashes are combined above, among each other and with the IV, - * doesn't lead to good bit perturbation. As the IV's goal is to lead to - * achieve that, perform a round of hashing of the combined hash - - * resulting in near perfect perturbation. + * The hashing done above, even with an initial value, doesn't tend to + * result in good hash perturbation. Running the value produced above + * through murmurhash32 leads to near perfect hash perturbation. */ return murmurhash32(hashkey); } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 236222d72a1..f26c883c67d 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -160,7 +160,7 @@ ExecHashSubPlan(SubPlanState *node, FindTupleHashEntry(node->hashtable, slot, node->cur_eq_comp, - node->lhs_hash_funcs) != NULL) + node->lhs_hash_expr) != NULL) { ExecClearTuple(slot); return BoolGetDatum(true); @@ -857,7 +857,6 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = NULL; sstate->tab_hash_funcs = NULL; sstate->tab_collations = NULL; - sstate->lhs_hash_funcs = NULL; sstate->cur_eq_funcs = NULL; /* @@ -897,6 +896,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) TupleDesc tupDescRight; Oid *cross_eq_funcoids; TupleTableSlot *slot; + FmgrInfo *lhs_hash_funcs; List *oplist, *lefttlist, *righttlist; @@ -953,7 +953,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid)); sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); - sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); /* we'll need the cross-type equality fns below, but not in sstate */ cross_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid)); @@ -1003,7 +1003,7 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) &left_hashfn, &right_hashfn)) elog(ERROR, "could not find hash function for hash operator %u", opexpr->opno); - fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]); + fmgr_info(left_hashfn, &lhs_hash_funcs[i - 1]); fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]); /* Set collation */ @@ -1039,6 +1039,16 @@ ExecInitSubPlan(SubPlan *subplan, PlanState *parent) sstate->planstate, NULL); + /* Build the ExprState for generating hash values */ + sstate->lhs_hash_expr = ExecBuildHash32FromAttrs(tupDescLeft, + &TTSOpsVirtual, + lhs_hash_funcs, + sstate->tab_collations, + sstate->numCols, + sstate->keyColIdx, + parent, + 0); + /* * Create comparator for lookups of rows in the table (potentially * cross-type comparisons). diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index b8433eabc1f..494ec4f2e5d 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -159,7 +159,7 @@ extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, - FmgrInfo *hashfunctions); + ExprState *hashexpr); extern void ResetTupleHashTable(TupleHashTable hashtable); /* @@ -288,6 +288,14 @@ extern ExprState *ExecInitCheck(List *qual, PlanState *parent); extern List *ExecInitExprList(List *nodes, PlanState *parent); extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase, bool doSort, bool doHash, bool nullcheck); +extern ExprState *ExecBuildHash32FromAttrs(TupleDesc desc, + const TupleTableSlotOps *ops, + FmgrInfo *hashfunctions, + Oid *collations, + int numCols, + AttrNumber *keyColIdx, + PlanState *parent, + uint32 init_value); extern ExprState *ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops, const Oid *hashfunc_oids, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 182a6956bb0..7f71b7625df 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -795,15 +795,15 @@ typedef struct ExecAuxRowMark * * All-in-memory tuple hash tables are used for a number of purposes. * - * Note: tab_hash_funcs are for the key datatype(s) stored in the table, - * and tab_eq_func are non-cross-type equality operators for those types. - * Normally these are the only functions used, but FindTupleHashEntry() - * supports searching a hashtable using cross-data-type hashing. For that, - * the caller must supply hash functions for the LHS datatype as well as - * the cross-type equality operators to use. in_hash_funcs and cur_eq_func - * are set to point to the caller's function arrays while doing such a search. - * During LookupTupleHashEntry(), they point to tab_hash_funcs and - * tab_eq_func respectively. + * Note: tab_hash_expr is for hashing the key datatype(s) stored in the table, + * and tab_eq_func is a non-cross-type ExprState for equality checks on those + * types. Normally these are the only ExprStates used, but + * FindTupleHashEntry() supports searching a hashtable using cross-data-type + * hashing. For that, the caller must supply an ExprState to hash the LHS + * datatype as well as the cross-type equality ExprState to use. in_hash_expr + * and cur_eq_func are set to point to the caller's hash and equality + * ExprStates while doing such a search. During LookupTupleHashEntry(), they + * point to tab_hash_expr and tab_eq_func respectively. * ---------------------------------------------------------------- */ typedef struct TupleHashEntryData *TupleHashEntry; @@ -830,7 +830,7 @@ typedef struct TupleHashTableData tuplehash_hash *hashtab; /* underlying hash table */ int numCols; /* number of columns in lookup key */ AttrNumber *keyColIdx; /* attr numbers of key columns */ - FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ + ExprState *tab_hash_expr; /* ExprState for hashing table datatype(s) */ ExprState *tab_eq_func; /* comparator for table datatype(s) */ Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ @@ -839,9 +839,8 @@ typedef struct TupleHashTableData TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ TupleTableSlot *inputslot; /* current input tuple's slot */ - FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ + ExprState *in_hash_expr; /* ExprState for hashing input datatype(s) */ ExprState *cur_eq_func; /* comparator for input vs. table */ - uint32 hash_iv; /* hash-function IV */ ExprContext *exprcontext; /* expression context */ } TupleHashTableData; @@ -994,7 +993,7 @@ typedef struct SubPlanState * datatype(s) */ Oid *tab_collations; /* collations for hash and comparison */ FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */ - FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */ + ExprState *lhs_hash_expr; /* hash expr for lefthand datatype(s) */ FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */ ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */ } SubPlanState; |