aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/intarray/Makefile4
-rw-r--r--contrib/intarray/_int_selfuncs.c341
-rw-r--r--contrib/intarray/expected/_int.out1
-rw-r--r--contrib/intarray/intarray--1.0--1.1.sql49
-rw-r--r--contrib/intarray/intarray--1.1.sql (renamed from contrib/intarray/intarray--1.0.sql)61
-rw-r--r--contrib/intarray/intarray.control2
-rw-r--r--contrib/intarray/sql/_int.sql2
7 files changed, 443 insertions, 17 deletions
diff --git a/contrib/intarray/Makefile b/contrib/intarray/Makefile
index 920c5b1ba03..5ea7f2aedf8 100644
--- a/contrib/intarray/Makefile
+++ b/contrib/intarray/Makefile
@@ -2,10 +2,10 @@
MODULE_big = _int
OBJS = _int_bool.o _int_gist.o _int_op.o _int_tool.o \
- _intbig_gist.o _int_gin.o $(WIN32RES)
+ _intbig_gist.o _int_gin.o _int_selfuncs.o $(WIN32RES)
EXTENSION = intarray
-DATA = intarray--1.0.sql intarray--unpackaged--1.0.sql
+DATA = intarray--1.1.sql intarray--1.0--1.1.sql intarray--unpackaged--1.0.sql
PGFILEDESC = "intarray - functions and operators for arrays of integers"
REGRESS = _int
diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c
new file mode 100644
index 00000000000..2af1c9b65d1
--- /dev/null
+++ b/contrib/intarray/_int_selfuncs.c
@@ -0,0 +1,341 @@
+/*-------------------------------------------------------------------------
+ *
+ * _int_selfuncs.c
+ * Functions for selectivity estimation of intarray operators
+ *
+ * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * contrib/intarray/_int_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "_int.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "utils/selfuncs.h"
+#include "utils/syscache.h"
+#include "utils/lsyscache.h"
+#include "miscadmin.h"
+
+PG_FUNCTION_INFO_V1(_int_overlap_sel);
+PG_FUNCTION_INFO_V1(_int_contains_sel);
+PG_FUNCTION_INFO_V1(_int_contained_sel);
+PG_FUNCTION_INFO_V1(_int_overlap_joinsel);
+PG_FUNCTION_INFO_V1(_int_contains_joinsel);
+PG_FUNCTION_INFO_V1(_int_contained_joinsel);
+PG_FUNCTION_INFO_V1(_int_matchsel);
+
+Datum _int_overlap_sel(PG_FUNCTION_ARGS);
+Datum _int_contains_sel(PG_FUNCTION_ARGS);
+Datum _int_contained_sel(PG_FUNCTION_ARGS);
+Datum _int_overlap_joinsel(PG_FUNCTION_ARGS);
+Datum _int_contains_joinsel(PG_FUNCTION_ARGS);
+Datum _int_contained_joinsel(PG_FUNCTION_ARGS);
+Datum _int_matchsel(PG_FUNCTION_ARGS);
+
+
+static Selectivity int_query_opr_selec(ITEM *item, Datum *values, float4 *freqs,
+ int nmncelems, float4 minfreq);
+static int compare_val_int4(const void *a, const void *b);
+
+/*
+ * Wrappers around the default array selectivity estimation functions.
+ *
+ * The default array selectivity operators for the @>, && and @< operators
+ * work fine for integer arrays. However, if we tried to just use arraycontsel
+ * and arracontjoinsel directly as the cost estimator functions for our
+ * operators, they would not work as intended, because they look at the
+ * operator's OID. Our operators behave exactly like the built-in anyarray
+ * versions, but we must tell the cost estimator functions which built-in
+ * operators they correspond to. These wrappers just replace the operator
+ * OID with the corresponding built-in operator's OID, and call the built-in
+ * function.
+ */
+
+Datum
+_int_overlap_sel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3)));
+}
+
+Datum
+_int_contains_sel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3)));
+}
+
+Datum
+_int_contained_sel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall4(arraycontsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3)));
+}
+
+Datum
+_int_overlap_joinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_OVERLAP_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4)));
+}
+
+Datum
+_int_contains_joinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_CONTAINS_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4)));
+}
+
+Datum
+_int_contained_joinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall5(arraycontjoinsel,
+ PG_GETARG_DATUM(0),
+ ObjectIdGetDatum(OID_ARRAY_CONTAINED_OP),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4)));
+}
+
+
+/*
+ * _int_matchsel -- restriction selectivity function for intarray @@ query_int
+ */
+Datum
+_int_matchsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec;
+ QUERYTYPE *query;
+ Datum *mcelems = NULL;
+ float4 *mcefreqs = NULL;
+ int nmcelems = 0;
+ float4 minfreq = 0.0;
+ float4 nullfrac = 0.0;
+ Form_pg_statistic stats;
+ Datum *values = NULL;
+ int nvalues = 0;
+ float4 *numbers = NULL;
+ int nnumbers = 0;
+
+ /*
+ * If expression is not "variable @@ something" or "something @@ variable"
+ * then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+ /*
+ * Variable should be int[]. We don't support cases where variable is
+ * query_int.
+ */
+ if (vardata.vartype != INT4ARRAYOID)
+ PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+ }
+
+ /*
+ * The "@@" operator is strict, so we can cope with NULL right away.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ /* The caller made sure the const is a query, so get it now */
+ query = DatumGetQueryTypeP(((Const *) other)->constvalue);
+
+ /* Empty query matches nothing */
+ if (query->size == 0)
+ {
+ ReleaseVariableStats(vardata);
+ return (Selectivity) 0.0;
+ }
+
+ /*
+ * Get the statistics for the intarray column.
+ *
+ * We're interested in the Most-Common-Elements list, and the NULL
+ * fraction.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+
+ /*
+ * For an int4 array, the default array type analyze function will
+ * collect a Most Common Elements list, which is an array of int4s.
+ */
+ if (get_attstatsslot(vardata.statsTuple,
+ INT4OID, -1,
+ STATISTIC_KIND_MCELEM, InvalidOid,
+ NULL,
+ &values, &nvalues,
+ &numbers, &nnumbers))
+ {
+ /*
+ * There should be three more Numbers than Values, because the
+ * last three (for intarray) cells are taken for minimal, maximal
+ * and nulls frequency. Punt if not.
+ */
+ if (nnumbers == nvalues + 3)
+ {
+ /* Grab the lowest frequency. */
+ minfreq = numbers[nnumbers - (nnumbers - nvalues)];
+
+ mcelems = values;
+ mcefreqs = numbers;
+ nmcelems = nvalues;
+ }
+ }
+ }
+
+ /* Process the logical expression in the query, using the stats */
+ selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
+ mcelems, mcefreqs, nmcelems, minfreq);
+
+ /* MCE stats count only non-null rows, so adjust for null rows. */
+ selec *= (1.0 - nullfrac);
+
+ free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers);
+ ReleaseVariableStats(vardata);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * Estimate selectivity of single intquery operator
+ */
+static Selectivity
+int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs,
+ int nmcelems, float4 minfreq)
+{
+ Selectivity selec;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ if (item->type == VAL)
+ {
+ Datum *searchres;
+
+ if (mcelems == NULL)
+ return (Selectivity) DEFAULT_EQ_SEL;
+
+ searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems,
+ sizeof(Datum), compare_val_int4);
+ if (searchres)
+ {
+ /*
+ * The element is in MCELEM. Return precise selectivity (or at
+ * least as precise as ANALYZE could find out).
+ */
+ selec = mcefreqs[searchres - mcelems];
+ }
+ else
+ {
+ /*
+ * The element is not in MCELEM. Punt, but assume that the
+ * selectivity cannot be more than minfreq / 2.
+ */
+ selec = Min(DEFAULT_EQ_SEL, minfreq / 2);
+ }
+ }
+ else if (item->type == OPR)
+ {
+ /* Current query node is an operator */
+ Selectivity s1,
+ s2;
+
+ s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems,
+ minfreq);
+ switch (item->val)
+ {
+ case (int32) '!':
+ selec = 1.0 - s1;
+ break;
+
+ case (int32) '&':
+ s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
+ nmcelems, minfreq);
+ selec = s1 * s2;
+ break;
+
+ case (int32) '|':
+ s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs,
+ nmcelems, minfreq);
+ selec = s1 + s2 - s1 * s2;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", item->val);
+ selec = 0; /* keep compiler quiet */
+ break;
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized int query item type: %u", item->type);
+ selec = 0; /* keep compiler quiet */
+ }
+
+ /* Clamp intermediate results to stay sane despite roundoff error */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * Comparison function for binary search in mcelem array.
+ */
+static int
+compare_val_int4(const void *a, const void *b)
+{
+ int32 key = *(int32 *) a;
+ const Datum *t = (const Datum *) b;
+
+ return key - DatumGetInt32(*t);
+}
diff --git a/contrib/intarray/expected/_int.out b/contrib/intarray/expected/_int.out
index 4080b9633fe..962e5c6a4b1 100644
--- a/contrib/intarray/expected/_int.out
+++ b/contrib/intarray/expected/_int.out
@@ -368,6 +368,7 @@ SELECT '1&(2&(4&(5|!6)))'::query_int;
CREATE TABLE test__int( a int[] );
\copy test__int from 'data/test__int.data'
+ANALYZE test__int;
SELECT count(*) from test__int WHERE a && '{23,50}';
count
-------
diff --git a/contrib/intarray/intarray--1.0--1.1.sql b/contrib/intarray/intarray--1.0--1.1.sql
new file mode 100644
index 00000000000..fecebddfcae
--- /dev/null
+++ b/contrib/intarray/intarray--1.0--1.1.sql
@@ -0,0 +1,49 @@
+/* contrib/intarray/intarray--1.0--1.1.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION intarray UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION _int_matchsel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+ALTER OPERATOR @@ (_int4, query_int) SET (RESTRICT = _int_matchsel);
+ALTER OPERATOR ~~ (query_int, _int4) SET (RESTRICT = _int_matchsel);
+
+CREATE FUNCTION _int_overlap_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contains_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contained_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_overlap_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contains_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contained_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+ALTER OPERATOR && (_int4, _int4) SET (RESTRICT = _int_overlap_sel, JOIN = _int_overlap_joinsel);
+ALTER OPERATOR @> (_int4, _int4) SET (RESTRICT = _int_contains_sel, JOIN = _int_contains_joinsel);
+ALTER OPERATOR <@ (_int4, _int4) SET (RESTRICT = _int_contained_sel, JOIN = _int_contained_joinsel);
+
+ALTER OPERATOR @ (_int4, _int4) SET (RESTRICT = _int_contains_sel, JOIN = _int_contains_joinsel);
+ALTER OPERATOR ~ (_int4, _int4) SET (RESTRICT = _int_contained_sel, JOIN = _int_contained_joinsel);
diff --git a/contrib/intarray/intarray--1.0.sql b/contrib/intarray/intarray--1.1.sql
index 0b89e0f55e5..817625e54a0 100644
--- a/contrib/intarray/intarray--1.0.sql
+++ b/contrib/intarray/intarray--1.1.sql
@@ -1,4 +1,4 @@
-/* contrib/intarray/intarray--1.0.sql */
+/* contrib/intarray/intarray--1.1.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION intarray" to load this file. \quit
@@ -45,12 +45,17 @@ LANGUAGE C STRICT IMMUTABLE;
COMMENT ON FUNCTION rboolop(query_int, _int4) IS 'boolean operation with array';
+CREATE FUNCTION _int_matchsel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
CREATE OPERATOR @@ (
LEFTARG = _int4,
RIGHTARG = query_int,
PROCEDURE = boolop,
COMMUTATOR = '~~',
- RESTRICT = contsel,
+ RESTRICT = _int_matchsel,
JOIN = contjoinsel
);
@@ -59,7 +64,7 @@ CREATE OPERATOR ~~ (
RIGHTARG = _int4,
PROCEDURE = rboolop,
COMMUTATOR = '@@',
- RESTRICT = contsel,
+ RESTRICT = _int_matchsel,
JOIN = contjoinsel
);
@@ -117,6 +122,36 @@ RETURNS _int4
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT IMMUTABLE;
+CREATE FUNCTION _int_overlap_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contains_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contained_sel(internal, oid, internal, integer)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_overlap_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contains_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
+CREATE FUNCTION _int_contained_joinsel(internal, oid, internal, smallint, internal)
+RETURNS float8
+AS 'MODULE_PATHNAME'
+LANGUAGE C STRICT STABLE;
+
--
-- OPERATORS
--
@@ -126,8 +161,8 @@ CREATE OPERATOR && (
RIGHTARG = _int4,
PROCEDURE = _int_overlap,
COMMUTATOR = '&&',
- RESTRICT = contsel,
- JOIN = contjoinsel
+ RESTRICT = _int_overlap_sel,
+ JOIN = _int_overlap_joinsel
);
--CREATE OPERATOR = (
@@ -157,8 +192,8 @@ CREATE OPERATOR @> (
RIGHTARG = _int4,
PROCEDURE = _int_contains,
COMMUTATOR = '<@',
- RESTRICT = contsel,
- JOIN = contjoinsel
+ RESTRICT = _int_contains_sel,
+ JOIN = _int_contains_joinsel
);
CREATE OPERATOR <@ (
@@ -166,8 +201,8 @@ CREATE OPERATOR <@ (
RIGHTARG = _int4,
PROCEDURE = _int_contained,
COMMUTATOR = '@>',
- RESTRICT = contsel,
- JOIN = contjoinsel
+ RESTRICT = _int_contained_sel,
+ JOIN = _int_contained_joinsel
);
-- obsolete:
@@ -176,8 +211,8 @@ CREATE OPERATOR @ (
RIGHTARG = _int4,
PROCEDURE = _int_contains,
COMMUTATOR = '~',
- RESTRICT = contsel,
- JOIN = contjoinsel
+ RESTRICT = _int_contains_sel,
+ JOIN = _int_contains_joinsel
);
CREATE OPERATOR ~ (
@@ -185,8 +220,8 @@ CREATE OPERATOR ~ (
RIGHTARG = _int4,
PROCEDURE = _int_contained,
COMMUTATOR = '@',
- RESTRICT = contsel,
- JOIN = contjoinsel
+ RESTRICT = _int_contained_sel,
+ JOIN = _int_contained_joinsel
);
--------------
diff --git a/contrib/intarray/intarray.control b/contrib/intarray/intarray.control
index 7b3d4f78f05..8c23e8d5e2e 100644
--- a/contrib/intarray/intarray.control
+++ b/contrib/intarray/intarray.control
@@ -1,5 +1,5 @@
# intarray extension
comment = 'functions, operators, and index support for 1-D arrays of integers'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/_int'
relocatable = true
diff --git a/contrib/intarray/sql/_int.sql b/contrib/intarray/sql/_int.sql
index 216c5c58d61..f6fe2de55c5 100644
--- a/contrib/intarray/sql/_int.sql
+++ b/contrib/intarray/sql/_int.sql
@@ -68,8 +68,8 @@ SELECT '1&(2&(4&(5|!6)))'::query_int;
CREATE TABLE test__int( a int[] );
-
\copy test__int from 'data/test__int.data'
+ANALYZE test__int;
SELECT count(*) from test__int WHERE a && '{23,50}';
SELECT count(*) from test__int WHERE a @@ '23|50';