Speed up Hash Join by making ExprStates support hashing

Here we add ExprState support for obtaining a 32-bit hash value from a list of expressions. This allows both faster hashing and also JIT compilation of these expressions. This is especially useful when hash joins have multiple join keys as the previous code called ExecEvalExpr on each hash join key individually and that was inefficient as tuple deformation would have only taken into account one key at a time, which could lead to walking the tuple once for each join key. With the new code, we'll determine the maximum attribute required and deform the tuple to that point only once. Some performance tests done with this change have shown up to a 20% performance increase of a query containing a Hash Join without JIT compilation and up to a 26% performance increase when JIT is enabled and optimization and inlining were performed by the JIT compiler. The performance increase with 1 join column was less with a 14% increase with and without JIT. This test was done using a fairly small hash table and a large number of hash probes. The increase will likely be less with large tables, especially ones larger than L3 cache as memory pressure is more likely to be the limiting factor there. This commit only addresses Hash Joins, but lays expression evaluation and JIT compilation infrastructure for other hashing needs such as Hash Aggregate. Author: David Rowley Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net> Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com> Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
author: David Rowley <drowley@postgresql.org> 2024-08-20 13:38:22 +1200
committer: David Rowley <drowley@postgresql.org> 2024-08-20 13:38:22 +1200
commit: adf97c1562380e02acd60dc859c289ed3a8352ee (patch)
tree: bbc199a61078c00d997903c4d5ce0c2fdccc7224 /src/backend/executor/execExprInterp.c
parent: 9380e5f129d2a160ecc2444f61bb7cb97fd51fbb (diff)
download: postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.tar.gz
postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.zip
1 files changed, 110 insertions, 0 deletions
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index ea47c4d6f9c..77394e76c37 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -477,6 +477,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 		&&CASE_EEOP_DOMAIN_TESTVAL,
 		&&CASE_EEOP_DOMAIN_NOTNULL,
 		&&CASE_EEOP_DOMAIN_CHECK,
+		&&CASE_EEOP_HASHDATUM_SET_INITVAL,
+		&&CASE_EEOP_HASHDATUM_FIRST,
+		&&CASE_EEOP_HASHDATUM_FIRST_STRICT,
+		&&CASE_EEOP_HASHDATUM_NEXT32,
+		&&CASE_EEOP_HASHDATUM_NEXT32_STRICT,
 		&&CASE_EEOP_CONVERT_ROWTYPE,
 		&&CASE_EEOP_SCALARARRAYOP,
 		&&CASE_EEOP_HASHED_SCALARARRAYOP,
@@ -1543,6 +1548,111 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
 			EEO_NEXT();
 		}
 
+		EEO_CASE(EEOP_HASHDATUM_SET_INITVAL)
+		{
+			*op->resvalue = op->d.hashdatum_initvalue.init_value;
+			*op->resnull = false;
+
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_HASHDATUM_FIRST)
+		{
+			FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
+
+			/*
+			 * Save the Datum on non-null inputs, otherwise store 0 so that
+			 * subsequent NEXT32 operations combine with an initialized value.
+			 */
+			if (!fcinfo->args[0].isnull)
+				*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
+			else
+				*op->resvalue = (Datum) 0;
+
+			*op->resnull = false;
+
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_HASHDATUM_FIRST_STRICT)
+		{
+			FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
+
+			if (fcinfo->args[0].isnull)
+			{
+				/*
+				 * With strict we have the expression return NULL instead of
+				 * ignoring NULL input values.  We've nothing more to do after
+				 * finding a NULL.
+				 */
+				*op->resnull = true;
+				*op->resvalue = (Datum) 0;
+				EEO_JUMP(op->d.hashdatum.jumpdone);
+			}
+
+			/* execute the hash function and save the resulting value */
+			*op->resvalue = op->d.hashdatum.fn_addr(fcinfo);
+			*op->resnull = false;
+
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_HASHDATUM_NEXT32)
+		{
+			FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
+			uint32		existing_hash = DatumGetUInt32(*op->resvalue);
+
+			/* combine successive hash values by rotating */
+			existing_hash = pg_rotate_left32(existing_hash, 1);
+
+			/* leave the hash value alone on NULL inputs */
+			if (!fcinfo->args[0].isnull)
+			{
+				uint32		hashvalue;
+
+				/* execute hash func and combine with previous hash value */
+				hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
+				existing_hash = existing_hash ^ hashvalue;
+			}
+
+			*op->resvalue = UInt32GetDatum(existing_hash);
+			*op->resnull = false;
+
+			EEO_NEXT();
+		}
+
+		EEO_CASE(EEOP_HASHDATUM_NEXT32_STRICT)
+		{
+			FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
+
+			if (fcinfo->args[0].isnull)
+			{
+				/*
+				 * With strict we have the expression return NULL instead of
+				 * ignoring NULL input values.  We've nothing more to do after
+				 * finding a NULL.
+				 */
+				*op->resnull = true;
+				*op->resvalue = (Datum) 0;
+				EEO_JUMP(op->d.hashdatum.jumpdone);
+			}
+			else
+			{
+				uint32		existing_hash = DatumGetUInt32(*op->resvalue);
+				uint32		hashvalue;
+
+				/* combine successive hash values by rotating */
+				existing_hash = pg_rotate_left32(existing_hash, 1);
+
+				/* execute hash func and combine with previous hash value */
+				hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
+				*op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue);
+				*op->resnull = false;
+			}
+
+			EEO_NEXT();
+		}
+
 		EEO_CASE(EEOP_XMLEXPR)
 		{
 			/* too complex for an inline implementation */
author	David Rowley <drowley@postgresql.org>	2024-08-20 13:38:22 +1200
committer	David Rowley <drowley@postgresql.org>	2024-08-20 13:38:22 +1200
commit	adf97c1562380e02acd60dc859c289ed3a8352ee (patch)
tree	bbc199a61078c00d997903c4d5ce0c2fdccc7224 /src/backend/executor/execExprInterp.c
parent	9380e5f129d2a160ecc2444f61bb7cb97fd51fbb (diff)
download	postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.tar.gz postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.zip