diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2001-08-14 22:21:59 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2001-08-14 22:21:59 +0000 |
commit | 5f7c2bdb537bd18fd7f1cc942950e7e64c6e0a92 (patch) | |
tree | deb73ae98b59fb4315f82b781a7c22859cbccd00 /src/backend/utils/adt/numeric.c | |
parent | 6f2943b52e9f4af3c4488c0373a3490e38c032a7 (diff) | |
download | postgresql-5f7c2bdb537bd18fd7f1cc942950e7e64c6e0a92.tar.gz postgresql-5f7c2bdb537bd18fd7f1cc942950e7e64c6e0a92.zip |
sum() on int2 and int4 columns now uses an int8, not numeric, accumulator
for speed reasons; its result type also changes to int8. avg() on these
datatypes now accumulates the running sum in int8 for speed; but we still
deliver the final result as numeric, so that fractional accuracy is
preserved.
count() now counts and returns in int8, not int4. I am a little nervous
about this possibly breaking users' code, but there didn't seem to be
a strong sentiment for avoiding the problem. If we get complaints during
beta, we can change count back to int4 and add a "count8" aggregate.
For that matter, users can do it for themselves with a simple CREATE
AGGREGATE command; the int4inc function is still present, so no C hacking
is needed.
Also added max() and min() aggregates for OID that do proper unsigned
comparison, instead of piggybacking on int4 aggregates.
initdb forced.
Diffstat (limited to 'src/backend/utils/adt/numeric.c')
-rw-r--r-- | src/backend/utils/adt/numeric.c | 145 |
1 files changed, 118 insertions, 27 deletions
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index bb0e8b2b7ef..f115657102d 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -5,7 +5,7 @@ * * 1998 Jan Wieck * - * $Header: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v 1.42 2001/06/07 00:09:29 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/numeric.c,v 1.43 2001/08/14 22:21:58 tgl Exp $ * * ---------- */ @@ -1772,7 +1772,11 @@ numeric_accum(PG_FUNCTION_ARGS) /* * Integer data types all use Numeric accumulators to share code and - * avoid risk of overflow. + * avoid risk of overflow. For int2 and int4 inputs, Numeric accumulation + * is overkill for the N and sum(X) values, but definitely not overkill + * for the sum(X*X) value. Hence, we use int2_accum and int4_accum only + * for stddev/variance --- there are faster special-purpose accumulator + * routines for SUM and AVG of these datatypes. */ Datum @@ -1979,20 +1983,25 @@ numeric_stddev(PG_FUNCTION_ARGS) /* * SUM transition functions for integer datatypes. * - * We use a Numeric accumulator to avoid overflow. Because SQL92 defines - * the SUM() of no values to be NULL, not zero, the initial condition of - * the transition data value needs to be NULL. This means we can't rely - * on ExecAgg to automatically insert the first non-null data value into - * the transition data: it doesn't know how to do the type conversion. - * The upshot is that these routines have to be marked non-strict and - * handle substitution of the first non-null input themselves. + * To avoid overflow, we use accumulators wider than the input datatype. + * A Numeric accumulator is needed for int8 input; for int4 and int2 + * inputs, we use int8 accumulators which should be sufficient for practical + * purposes. (The latter two therefore don't really belong in this file, + * but we keep them here anyway.) + * + * Because SQL92 defines the SUM() of no values to be NULL, not zero, + * the initial condition of the transition data value needs to be NULL. This + * means we can't rely on ExecAgg to automatically insert the first non-null + * data value into the transition data: it doesn't know how to do the type + * conversion. The upshot is that these routines have to be marked non-strict + * and handle substitution of the first non-null input themselves. */ Datum int2_sum(PG_FUNCTION_ARGS) { - Numeric oldsum; - Datum newval; + int64 oldsum; + int64 newval; if (PG_ARGISNULL(0)) { @@ -2000,28 +2009,27 @@ int2_sum(PG_FUNCTION_ARGS) if (PG_ARGISNULL(1)) PG_RETURN_NULL(); /* still no non-null */ /* This is the first non-null input. */ - newval = DirectFunctionCall1(int2_numeric, PG_GETARG_DATUM(1)); - PG_RETURN_DATUM(newval); + newval = (int64) PG_GETARG_INT16(1); + PG_RETURN_INT64(newval); } - oldsum = PG_GETARG_NUMERIC(0); + oldsum = PG_GETARG_INT64(0); /* Leave sum unchanged if new input is null. */ if (PG_ARGISNULL(1)) - PG_RETURN_NUMERIC(oldsum); + PG_RETURN_INT64(oldsum); /* OK to do the addition. */ - newval = DirectFunctionCall1(int2_numeric, PG_GETARG_DATUM(1)); + newval = oldsum + (int64) PG_GETARG_INT16(1); - PG_RETURN_DATUM(DirectFunctionCall2(numeric_add, - NumericGetDatum(oldsum), newval)); + PG_RETURN_INT64(newval); } Datum int4_sum(PG_FUNCTION_ARGS) { - Numeric oldsum; - Datum newval; + int64 oldsum; + int64 newval; if (PG_ARGISNULL(0)) { @@ -2029,21 +2037,20 @@ int4_sum(PG_FUNCTION_ARGS) if (PG_ARGISNULL(1)) PG_RETURN_NULL(); /* still no non-null */ /* This is the first non-null input. */ - newval = DirectFunctionCall1(int4_numeric, PG_GETARG_DATUM(1)); - PG_RETURN_DATUM(newval); + newval = (int64) PG_GETARG_INT32(1); + PG_RETURN_INT64(newval); } - oldsum = PG_GETARG_NUMERIC(0); + oldsum = PG_GETARG_INT64(0); /* Leave sum unchanged if new input is null. */ if (PG_ARGISNULL(1)) - PG_RETURN_NUMERIC(oldsum); + PG_RETURN_INT64(oldsum); /* OK to do the addition. */ - newval = DirectFunctionCall1(int4_numeric, PG_GETARG_DATUM(1)); + newval = oldsum + (int64) PG_GETARG_INT32(1); - PG_RETURN_DATUM(DirectFunctionCall2(numeric_add, - NumericGetDatum(oldsum), newval)); + PG_RETURN_INT64(newval); } Datum @@ -2076,6 +2083,90 @@ int8_sum(PG_FUNCTION_ARGS) } +/* + * Routines for avg(int2) and avg(int4). The transition datatype + * is a two-element int8 array, holding count and sum. + */ + +typedef struct Int8TransTypeData +{ +#ifndef INT64_IS_BUSTED + int64 count; + int64 sum; +#else + /* "int64" isn't really 64 bits, so fake up properly-aligned fields */ + int32 count; + int32 pad1; + int32 sum; + int32 pad2; +#endif +} Int8TransTypeData; + +Datum +int2_avg_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + int16 newval = PG_GETARG_INT16(1); + Int8TransTypeData *transdata; + + /* + * We copied the input array, so it's okay to scribble on it directly. + */ + if (ARR_SIZE(transarray) != ARR_OVERHEAD(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "int2_avg_accum: expected 2-element int8 array"); + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + + transdata->count++; + transdata->sum += newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int4_avg_accum(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P_COPY(0); + int32 newval = PG_GETARG_INT32(1); + Int8TransTypeData *transdata; + + /* + * We copied the input array, so it's okay to scribble on it directly. + */ + if (ARR_SIZE(transarray) != ARR_OVERHEAD(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "int4_avg_accum: expected 2-element int8 array"); + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + + transdata->count++; + transdata->sum += newval; + + PG_RETURN_ARRAYTYPE_P(transarray); +} + +Datum +int8_avg(PG_FUNCTION_ARGS) +{ + ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); + Int8TransTypeData *transdata; + Datum countd, + sumd; + + if (ARR_SIZE(transarray) != ARR_OVERHEAD(1) + sizeof(Int8TransTypeData)) + elog(ERROR, "int8_avg: expected 2-element int8 array"); + transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray); + + /* SQL92 defines AVG of no values to be NULL */ + if (transdata->count == 0) + PG_RETURN_NULL(); + + countd = DirectFunctionCall1(int8_numeric, + Int64GetDatumFast(transdata->count)); + sumd = DirectFunctionCall1(int8_numeric, + Int64GetDatumFast(transdata->sum)); + + PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd)); +} + + /* ---------------------------------------------------------------------- * * Local functions follow |