aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/Makefile2
-rw-r--r--src/backend/catalog/Makefile1
-rw-r--r--src/backend/catalog/aclchk.c35
-rw-r--r--src/backend/catalog/dependency.c9
-rw-r--r--src/backend/catalog/heap.c74
-rw-r--r--src/backend/catalog/namespace.c56
-rw-r--r--src/backend/catalog/objectaddress.c55
-rw-r--r--src/backend/catalog/pg_shdepend.c2
-rw-r--r--src/backend/catalog/system_views.sql10
-rw-r--r--src/backend/commands/Makefile6
-rw-r--r--src/backend/commands/alter.c8
-rw-r--r--src/backend/commands/analyze.c23
-rw-r--r--src/backend/commands/dropcmds.c7
-rw-r--r--src/backend/commands/event_trigger.c3
-rw-r--r--src/backend/commands/statscmds.c296
-rw-r--r--src/backend/nodes/copyfuncs.c17
-rw-r--r--src/backend/nodes/equalfuncs.c15
-rw-r--r--src/backend/nodes/outfuncs.c31
-rw-r--r--src/backend/optimizer/util/plancat.c67
-rw-r--r--src/backend/parser/gram.y62
-rw-r--r--src/backend/statistics/Makefile17
-rw-r--r--src/backend/statistics/README34
-rw-r--r--src/backend/statistics/extended_stats.c389
-rw-r--r--src/backend/statistics/mvdistinct.c671
-rw-r--r--src/backend/tcop/utility.c12
-rw-r--r--src/backend/utils/adt/ruleutils.c81
-rw-r--r--src/backend/utils/adt/selfuncs.c181
-rw-r--r--src/backend/utils/cache/relcache.c79
-rw-r--r--src/backend/utils/cache/syscache.c23
29 files changed, 2239 insertions, 27 deletions
diff --git a/src/backend/Makefile b/src/backend/Makefile
index fffb0d95bad..bce9d2c3ebb 100644
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@@ -19,7 +19,7 @@ include $(top_builddir)/src/Makefile.global
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
main nodes optimizer port postmaster regex replication rewrite \
- storage tcop tsearch utils $(top_builddir)/src/timezone
+ statistics storage tcop tsearch utils $(top_builddir)/src/timezone
include $(srcdir)/common.mk
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 159cab5c18c..fd33426bad1 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -33,6 +33,7 @@ POSTGRES_BKI_SRCS = $(addprefix $(top_srcdir)/src/include/catalog/,\
pg_attrdef.h pg_constraint.h pg_inherits.h pg_index.h pg_operator.h \
pg_opfamily.h pg_opclass.h pg_am.h pg_amop.h pg_amproc.h \
pg_language.h pg_largeobject_metadata.h pg_largeobject.h pg_aggregate.h \
+ pg_statistic_ext.h \
pg_statistic.h pg_rewrite.h pg_trigger.h pg_event_trigger.h pg_description.h \
pg_cast.h pg_enum.h pg_namespace.h pg_conversion.h pg_depend.h \
pg_database.h pg_db_role_setting.h pg_tablespace.h pg_pltemplate.h \
diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c
index be86d76a59e..d01930f4a80 100644
--- a/src/backend/catalog/aclchk.c
+++ b/src/backend/catalog/aclchk.c
@@ -48,6 +48,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@@ -3302,6 +3303,8 @@ static const char *const no_priv_msg[MAX_ACL_KIND] =
gettext_noop("permission denied for collation %s"),
/* ACL_KIND_CONVERSION */
gettext_noop("permission denied for conversion %s"),
+ /* ACL_KIND_STATISTICS */
+ gettext_noop("permission denied for statistics %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("permission denied for tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
@@ -3352,6 +3355,8 @@ static const char *const not_owner_msg[MAX_ACL_KIND] =
gettext_noop("must be owner of collation %s"),
/* ACL_KIND_CONVERSION */
gettext_noop("must be owner of conversion %s"),
+ /* ACL_KIND_STATISTICS */
+ gettext_noop("must be owner of statistics %s"),
/* ACL_KIND_TABLESPACE */
gettext_noop("must be owner of tablespace %s"),
/* ACL_KIND_TSDICTIONARY */
@@ -3467,6 +3472,10 @@ pg_aclmask(AclObjectKind objkind, Oid table_oid, AttrNumber attnum, Oid roleid,
mask, how, NULL);
case ACL_KIND_NAMESPACE:
return pg_namespace_aclmask(table_oid, roleid, mask, how);
+ case ACL_KIND_STATISTICS:
+ elog(ERROR, "grantable rights not supported for statistics");
+ /* not reached, but keep compiler quiet */
+ return ACL_NO_RIGHTS;
case ACL_KIND_TABLESPACE:
return pg_tablespace_aclmask(table_oid, roleid, mask, how);
case ACL_KIND_FDW:
@@ -5104,6 +5113,32 @@ pg_subscription_ownercheck(Oid sub_oid, Oid roleid)
}
/*
+ * Ownership check for a extended statistics (specified by OID).
+ */
+bool
+pg_statistics_ownercheck(Oid stat_oid, Oid roleid)
+{
+ HeapTuple tuple;
+ Oid ownerId;
+
+ /* Superusers bypass all permission checking. */
+ if (superuser_arg(roleid))
+ return true;
+
+ tuple = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stat_oid));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("statistics with OID %u do not exist", stat_oid)));
+
+ ownerId = ((Form_pg_statistic_ext) GETSTRUCT(tuple))->staowner;
+
+ ReleaseSysCache(tuple);
+
+ return has_privs_of_role(roleid, ownerId);
+}
+
+/*
* Check whether specified role has CREATEROLE privilege (or is a superuser)
*
* Note: roles do not have owners per se; instead we use this test in
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index fc088b21658..ee27cae7df7 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -51,6 +51,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
@@ -154,6 +155,7 @@ static const Oid object_classes[] = {
RewriteRelationId, /* OCLASS_REWRITE */
TriggerRelationId, /* OCLASS_TRIGGER */
NamespaceRelationId, /* OCLASS_SCHEMA */
+ StatisticExtRelationId, /* OCLASS_STATISTIC_EXT */
TSParserRelationId, /* OCLASS_TSPARSER */
TSDictionaryRelationId, /* OCLASS_TSDICT */
TSTemplateRelationId, /* OCLASS_TSTEMPLATE */
@@ -1263,6 +1265,10 @@ doDeletion(const ObjectAddress *object, int flags)
DropTransformById(object->objectId);
break;
+ case OCLASS_STATISTIC_EXT:
+ RemoveStatisticsById(object->objectId);
+ break;
+
default:
elog(ERROR, "unrecognized object class: %u",
object->classId);
@@ -2377,6 +2383,9 @@ getObjectClass(const ObjectAddress *object)
case NamespaceRelationId:
return OCLASS_SCHEMA;
+ case StatisticExtRelationId:
+ return OCLASS_STATISTIC_EXT;
+
case TSParserRelationId:
return OCLASS_TSPARSER;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index d49dcdc015d..eee5e2f6caf 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -52,6 +52,7 @@
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
@@ -1609,7 +1610,10 @@ RemoveAttributeById(Oid relid, AttrNumber attnum)
heap_close(attr_rel, RowExclusiveLock);
if (attnum > 0)
+ {
RemoveStatistics(relid, attnum);
+ RemoveStatisticsExt(relid, attnum);
+ }
relation_close(rel, NoLock);
}
@@ -1860,6 +1864,7 @@ heap_drop_with_catalog(Oid relid)
* delete statistics
*/
RemoveStatistics(relid, 0);
+ RemoveStatisticsExt(relid, 0);
/*
* delete attribute tuples
@@ -2772,6 +2777,75 @@ RemoveStatistics(Oid relid, AttrNumber attnum)
/*
+ * RemoveStatisticsExt --- remove entries in pg_statistic_ext for a relation
+ *
+ * If attnum is zero, remove all entries for rel; else remove only the
+ * one(s) involving that column.
+ */
+void
+RemoveStatisticsExt(Oid relid, AttrNumber attnum)
+{
+ Relation pgstatisticext;
+ SysScanDesc scan;
+ ScanKeyData key;
+ HeapTuple tuple;
+
+ /*
+ * Scan pg_statistic_ext to delete relevant tuples
+ */
+ pgstatisticext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+ ScanKeyInit(&key,
+ Anum_pg_statistic_ext_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ scan = systable_beginscan(pgstatisticext,
+ StatisticExtRelidIndexId,
+ true, NULL, 1, &key);
+
+ while (HeapTupleIsValid(tuple = systable_getnext(scan)))
+ {
+ bool delete = false;
+
+ if (attnum == 0)
+ delete = true;
+ else if (attnum != 0)
+ {
+ Form_pg_statistic_ext staForm;
+ int i;
+
+ /*
+ * Decode the stakeys array and delete any stats that involve the
+ * specified column.
+ */
+ staForm = (Form_pg_statistic_ext) GETSTRUCT(tuple);
+ for (i = 0; i < staForm->stakeys.dim1; i++)
+ {
+ if (staForm->stakeys.values[i] == attnum)
+ {
+ delete = true;
+ break;
+ }
+ }
+ }
+
+ if (delete)
+ {
+ CatalogTupleDelete(pgstatisticext, &tuple->t_self);
+ deleteDependencyRecordsFor(StatisticExtRelationId,
+ HeapTupleGetOid(tuple),
+ false);
+ }
+ }
+
+ systable_endscan(scan);
+
+ heap_close(pgstatisticext, RowExclusiveLock);
+}
+
+
+/*
* RelationTruncateIndexes - truncate all indexes associated
* with the heap relation to zero tuples.
*
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index a38da3047ff..e521bd908a2 100644
--- a/src/backend/catalog/namespace.c
+++ b/src/backend/catalog/namespace.c
@@ -2086,6 +2086,62 @@ ConversionIsVisible(Oid conid)
}
/*
+ * get_statistics_oid - find a statistics by possibly qualified name
+ *
+ * If not found, returns InvalidOid if missing_ok, else throws error
+ */
+Oid
+get_statistics_oid(List *names, bool missing_ok)
+{
+ char *schemaname;
+ char *stats_name;
+ Oid namespaceId;
+ Oid stats_oid = InvalidOid;
+ ListCell *l;
+
+ /* deconstruct the name list */
+ DeconstructQualifiedName(names, &schemaname, &stats_name);
+
+ if (schemaname)
+ {
+ /* use exact schema given */
+ namespaceId = LookupExplicitNamespace(schemaname, missing_ok);
+ if (missing_ok && !OidIsValid(namespaceId))
+ stats_oid = InvalidOid;
+ else
+ stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+ PointerGetDatum(stats_name),
+ ObjectIdGetDatum(namespaceId));
+ }
+ else
+ {
+ /* search for it in search path */
+ recomputeNamespacePath();
+
+ foreach(l, activeSearchPath)
+ {
+ namespaceId = lfirst_oid(l);
+
+ if (namespaceId == myTempNamespace)
+ continue; /* do not look in temp namespace */
+ stats_oid = GetSysCacheOid2(STATEXTNAMENSP,
+ PointerGetDatum(stats_name),
+ ObjectIdGetDatum(namespaceId));
+ if (OidIsValid(stats_oid))
+ break;
+ }
+ }
+
+ if (!OidIsValid(stats_oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("statistics \"%s\" do not exist",
+ NameListToString(names))));
+
+ return stats_oid;
+}
+
+/*
* get_ts_parser_oid - find a TS parser by possibly qualified name
*
* If not found, returns InvalidOid if missing_ok, else throws error
diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c
index 61a831b4036..2948d64fa73 100644
--- a/src/backend/catalog/objectaddress.c
+++ b/src/backend/catalog/objectaddress.c
@@ -48,6 +48,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_publication_rel.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_transform.h"
@@ -478,6 +479,18 @@ static const ObjectPropertyType ObjectProperty[] =
InvalidAttrNumber,
ACL_KIND_SUBSCRIPTION,
true
+ },
+ {
+ StatisticExtRelationId,
+ StatisticExtOidIndexId,
+ STATEXTOID,
+ STATEXTNAMENSP,
+ Anum_pg_statistic_ext_staname,
+ Anum_pg_statistic_ext_stanamespace,
+ Anum_pg_statistic_ext_staowner,
+ InvalidAttrNumber, /* no ACL (same as relation) */
+ ACL_KIND_STATISTICS,
+ true
}
};
@@ -696,6 +709,10 @@ static const struct object_type_map
/* OCLASS_TRANSFORM */
{
"transform", OBJECT_TRANSFORM
+ },
+ /* OBJECT_STATISTIC_EXT */
+ {
+ "statistics", OBJECT_STATISTIC_EXT
}
};
@@ -974,6 +991,12 @@ get_object_address(ObjectType objtype, Node *object,
address = get_object_address_defacl(castNode(List, object),
missing_ok);
break;
+ case OBJECT_STATISTIC_EXT:
+ address.classId = StatisticExtRelationId;
+ address.objectId = get_statistics_oid(castNode(List, object),
+ missing_ok);
+ address.objectSubId = 0;
+ break;
default:
elog(ERROR, "unrecognized objtype: %d", (int) objtype);
/* placate compiler, in case it thinks elog might return */
@@ -2083,6 +2106,7 @@ pg_get_object_address(PG_FUNCTION_ARGS)
case OBJECT_ATTRIBUTE:
case OBJECT_COLLATION:
case OBJECT_CONVERSION:
+ case OBJECT_STATISTIC_EXT:
case OBJECT_TSPARSER:
case OBJECT_TSDICTIONARY:
case OBJECT_TSTEMPLATE:
@@ -2370,6 +2394,10 @@ check_object_ownership(Oid roleid, ObjectType objtype, ObjectAddress address,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("must be superuser")));
break;
+ case OBJECT_STATISTIC_EXT:
+ if (!pg_statistics_ownercheck(address.objectId, roleid))
+ aclcheck_error_type(ACLCHECK_NOT_OWNER, address.objectId);
+ break;
default:
elog(ERROR, "unrecognized object type: %d",
(int) objtype);
@@ -3857,6 +3885,10 @@ getObjectTypeDescription(const ObjectAddress *object)
appendStringInfoString(&buffer, "subscription");
break;
+ case OCLASS_STATISTIC_EXT:
+ appendStringInfoString(&buffer, "statistics");
+ break;
+
default:
appendStringInfo(&buffer, "unrecognized %u", object->classId);
break;
@@ -4880,6 +4912,29 @@ getObjectIdentityParts(const ObjectAddress *object,
break;
}
+ case OCLASS_STATISTIC_EXT:
+ {
+ HeapTuple tup;
+ Form_pg_statistic_ext formStatistic;
+ char *schema;
+
+ tup = SearchSysCache1(STATEXTOID,
+ ObjectIdGetDatum(object->objectId));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for statistics %u",
+ object->objectId);
+ formStatistic = (Form_pg_statistic_ext) GETSTRUCT(tup);
+ schema = get_namespace_name_or_temp(formStatistic->stanamespace);
+ appendStringInfoString(&buffer,
+ quote_qualified_identifier(schema,
+ NameStr(formStatistic->staname)));
+ if (objname)
+ *objname = list_make2(schema,
+ pstrdup(NameStr(formStatistic->staname)));
+ ReleaseSysCache(tup);
+ }
+ break;
+
default:
appendStringInfo(&buffer, "unrecognized object %u %u %d",
object->classId,
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c
index 8d946ff44cc..d28a8afb47d 100644
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -39,6 +39,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_shdepend.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_ts_config.h"
@@ -1415,6 +1416,7 @@ shdepReassignOwned(List *roleids, Oid newrole)
case OperatorFamilyRelationId:
case OperatorClassRelationId:
case ExtensionRelationId:
+ case StatisticExtRelationId:
case TableSpaceRelationId:
case DatabaseRelationId:
case TSConfigRelationId:
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 80d14296de2..b41882aa521 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -186,6 +186,16 @@ CREATE OR REPLACE VIEW pg_sequences AS
WHERE NOT pg_is_other_temp_schema(N.oid)
AND relkind = 'S';
+CREATE VIEW pg_stats_ext AS
+ SELECT
+ N.nspname AS schemaname,
+ C.relname AS tablename,
+ S.staname AS staname,
+ S.stakeys AS attnums,
+ length(s.standistinct) AS ndistbytes
+ FROM (pg_statistic_ext S JOIN pg_class C ON (C.oid = S.starelid))
+ LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace);
+
CREATE VIEW pg_stats WITH (security_barrier) AS
SELECT
nspname AS schemaname,
diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile
index e0fab38cbe1..4a6c99e0908 100644
--- a/src/backend/commands/Makefile
+++ b/src/backend/commands/Makefile
@@ -18,8 +18,8 @@ OBJS = amcmds.o aggregatecmds.o alter.o analyze.o async.o cluster.o comment.o \
event_trigger.o explain.o extension.o foreigncmds.o functioncmds.o \
indexcmds.o lockcmds.o matview.o operatorcmds.o opclasscmds.o \
policy.o portalcmds.o prepare.o proclang.o publicationcmds.o \
- schemacmds.o seclabel.o sequence.o subscriptioncmds.o tablecmds.o \
- tablespace.o trigger.o tsearchcmds.o typecmds.o user.o vacuum.o \
- vacuumlazy.o variable.o view.o
+ schemacmds.o seclabel.o sequence.o statscmds.o subscriptioncmds.o \
+ tablecmds.o tablespace.o trigger.o tsearchcmds.o typecmds.o user.o \
+ vacuum.o vacuumlazy.o variable.o view.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/commands/alter.c b/src/backend/commands/alter.c
index cf1391c2e6b..2c6435b7598 100644
--- a/src/backend/commands/alter.c
+++ b/src/backend/commands/alter.c
@@ -33,6 +33,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_subscription.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_ts_parser.h"
@@ -120,6 +121,10 @@ report_namespace_conflict(Oid classId, const char *name, Oid nspOid)
Assert(OidIsValid(nspOid));
msgfmt = gettext_noop("conversion \"%s\" already exists in schema \"%s\"");
break;
+ case StatisticExtRelationId:
+ Assert(OidIsValid(nspOid));
+ msgfmt = gettext_noop("statistics \"%s\" already exists in schema \"%s\"");
+ break;
case TSParserRelationId:
Assert(OidIsValid(nspOid));
msgfmt = gettext_noop("text search parser \"%s\" already exists in schema \"%s\"");
@@ -373,6 +378,7 @@ ExecRenameStmt(RenameStmt *stmt)
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
case OBJECT_LANGUAGE:
+ case OBJECT_STATISTIC_EXT:
case OBJECT_TSCONFIGURATION:
case OBJECT_TSDICTIONARY:
case OBJECT_TSPARSER:
@@ -489,6 +495,7 @@ ExecAlterObjectSchemaStmt(AlterObjectSchemaStmt *stmt,
case OBJECT_OPERATOR:
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
+ case OBJECT_STATISTIC_EXT:
case OBJECT_TSCONFIGURATION:
case OBJECT_TSDICTIONARY:
case OBJECT_TSPARSER:
@@ -803,6 +810,7 @@ ExecAlterOwnerStmt(AlterOwnerStmt *stmt)
case OBJECT_OPERATOR:
case OBJECT_OPCLASS:
case OBJECT_OPFAMILY:
+ case OBJECT_STATISTIC_EXT:
case OBJECT_TABLESPACE:
case OBJECT_TSDICTIONARY:
case OBJECT_TSCONFIGURATION:
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 055338fdff2..c5b5c54babf 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -17,6 +17,7 @@
#include <math.h>
#include "access/multixact.h"
+#include "access/sysattr.h"
#include "access/transam.h"
#include "access/tupconvert.h"
#include "access/tuptoaster.h"
@@ -28,6 +29,7 @@
#include "catalog/pg_collation.h"
#include "catalog/pg_inherits_fn.h"
#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
#include "commands/dbcommands.h"
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
@@ -39,13 +41,17 @@
#include "parser/parse_relation.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/attoptcache.h"
+#include "utils/builtins.h"
#include "utils/datum.h"
+#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -566,6 +572,10 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
update_attstats(RelationGetRelid(Irel[ind]), false,
thisdata->attr_cnt, thisdata->vacattrstats);
}
+
+ /* Build extended statistics (if there are any). */
+ BuildRelationExtStatistics(onerel, totalrows, numrows, rows, attr_cnt,
+ vacattrstats);
}
/*
@@ -1683,19 +1693,6 @@ ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull)
*/
typedef struct
{
- Oid eqopr; /* '=' operator for datatype, if any */
- Oid eqfunc; /* and associated function */
- Oid ltopr; /* '<' operator for datatype, if any */
-} StdAnalyzeData;
-
-typedef struct
-{
- Datum value; /* a data value */
- int tupno; /* position index for tuple it came from */
-} ScalarItem;
-
-typedef struct
-{
int count; /* # of duplicates */
int first; /* values[] index of first occurrence */
} ScalarMCVItem;
diff --git a/src/backend/commands/dropcmds.c b/src/backend/commands/dropcmds.c
index ab73fbf961b..cb948f0204d 100644
--- a/src/backend/commands/dropcmds.c
+++ b/src/backend/commands/dropcmds.c
@@ -286,6 +286,13 @@ does_not_exist_skipping(ObjectType objtype, Node *object)
msg = gettext_noop("schema \"%s\" does not exist, skipping");
name = strVal((Value *) object);
break;
+ case OBJECT_STATISTIC_EXT:
+ if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
+ {
+ msg = gettext_noop("extended statistics \"%s\" do not exist, skipping");
+ name = NameListToString(castNode(List, object));
+ }
+ break;
case OBJECT_TSPARSER:
if (!schema_does_not_exist_skipping(castNode(List, object), &msg, &name))
{
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index 346b347ae17..7366fc74bec 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -112,6 +112,7 @@ static event_trigger_support_data event_trigger_support[] = {
{"SCHEMA", true},
{"SEQUENCE", true},
{"SERVER", true},
+ {"STATISTICS", true},
{"SUBSCRIPTION", true},
{"TABLE", true},
{"TABLESPACE", false},
@@ -1108,6 +1109,7 @@ EventTriggerSupportsObjectType(ObjectType obtype)
case OBJECT_SCHEMA:
case OBJECT_SEQUENCE:
case OBJECT_SUBSCRIPTION:
+ case OBJECT_STATISTIC_EXT:
case OBJECT_TABCONSTRAINT:
case OBJECT_TABLE:
case OBJECT_TRANSFORM:
@@ -1173,6 +1175,7 @@ EventTriggerSupportsObjectClass(ObjectClass objclass)
case OCLASS_PUBLICATION:
case OCLASS_PUBLICATION_REL:
case OCLASS_SUBSCRIPTION:
+ case OCLASS_STATISTIC_EXT:
return true;
}
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
new file mode 100644
index 00000000000..416309106a7
--- /dev/null
+++ b/src/backend/commands/statscmds.c
@@ -0,0 +1,296 @@
+/*-------------------------------------------------------------------------
+ *
+ * statscmds.c
+ * Commands for creating and altering extended statistics
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/commands/statscmds.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "catalog/dependency.h"
+#include "catalog/indexing.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_namespace.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/defrem.h"
+#include "miscadmin.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+/* used for sorting the attnums in CreateStatistics */
+static int
+compare_int16(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(int16));
+}
+
+/*
+ * CREATE STATISTICS
+ */
+ObjectAddress
+CreateStatistics(CreateStatsStmt *stmt)
+{
+ int i;
+ ListCell *l;
+ int16 attnums[STATS_MAX_DIMENSIONS];
+ int numcols = 0;
+ ObjectAddress address = InvalidObjectAddress;
+ char *namestr;
+ NameData staname;
+ Oid statoid;
+ Oid namespaceId;
+ HeapTuple htup;
+ Datum values[Natts_pg_statistic_ext];
+ bool nulls[Natts_pg_statistic_ext];
+ int2vector *stakeys;
+ Relation statrel;
+ Relation rel;
+ Oid relid;
+ ObjectAddress parentobject,
+ childobject;
+ Datum types[1]; /* only ndistinct defined now */
+ int ntypes;
+ ArrayType *staenabled;
+ bool build_ndistinct;
+ bool requested_type = false;
+
+ Assert(IsA(stmt, CreateStatsStmt));
+
+ /* resolve the pieces of the name (namespace etc.) */
+ namespaceId = QualifiedNameGetCreationNamespace(stmt->defnames, &namestr);
+ namestrcpy(&staname, namestr);
+
+ /*
+ * If if_not_exists was given and the statistics already exists, bail out.
+ */
+ if (SearchSysCacheExists2(STATEXTNAMENSP,
+ PointerGetDatum(&staname),
+ ObjectIdGetDatum(namespaceId)))
+ {
+ if (stmt->if_not_exists)
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("statistics \"%s\" already exist, skipping",
+ namestr)));
+ return InvalidObjectAddress;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("statistics \"%s\" already exist", namestr)));
+ }
+
+ rel = heap_openrv(stmt->relation, AccessExclusiveLock);
+ relid = RelationGetRelid(rel);
+
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("relation \"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
+ /*
+ * Transform column names to array of attnums. While at it, enforce some
+ * constraints.
+ */
+ foreach(l, stmt->keys)
+ {
+ char *attname = strVal(lfirst(l));
+ HeapTuple atttuple;
+ Form_pg_attribute attForm;
+ TypeCacheEntry *type;
+
+ atttuple = SearchSysCacheAttName(relid, attname);
+ if (!HeapTupleIsValid(atttuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" referenced in statistics does not exist",
+ attname)));
+ attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+ /* Disallow use of system attributes in extended stats */
+ if (attForm->attnum < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("statistic creation on system columns is not supported")));
+
+ /* Disallow data types without a less-than operator */
+ type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
+ if (type->lt_opr == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only scalar types can be used in extended statistics")));
+
+ /* Make sure no more than STATS_MAX_DIMENSIONS columns are used */
+ if (numcols >= STATS_MAX_DIMENSIONS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("cannot have more than %d keys in statistics",
+ STATS_MAX_DIMENSIONS)));
+
+ attnums[numcols] = ((Form_pg_attribute) GETSTRUCT(atttuple))->attnum;
+ ReleaseSysCache(atttuple);
+ numcols++;
+ }
+
+ /*
+ * Check that at least two columns were specified in the statement. The
+ * upper bound was already checked in the loop above.
+ */
+ if (numcols < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_COLUMNS),
+ errmsg("statistics require at least 2 columns")));
+
+ /*
+ * Sort the attnums, which makes detecting duplicies somewhat easier, and
+ * it does not hurt (it does not affect the efficiency, unlike for
+ * indexes, for example).
+ */
+ qsort(attnums, numcols, sizeof(int16), compare_int16);
+
+ /*
+ * Look for duplicities in the list of columns. The attnums are sorted so
+ * just check consecutive elements.
+ */
+ for (i = 1; i < numcols; i++)
+ if (attnums[i] == attnums[i - 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("duplicate column name in statistics definition")));
+
+ stakeys = buildint2vector(attnums, numcols);
+
+ /*
+ * Parse the statistics options. Currently only statistics types are
+ * recognized.
+ */
+ build_ndistinct = false;
+ foreach(l, stmt->options)
+ {
+ DefElem *opt = (DefElem *) lfirst(l);
+
+ if (strcmp(opt->defname, "ndistinct") == 0)
+ {
+ build_ndistinct = defGetBoolean(opt);
+ requested_type = true;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("unrecognized STATISTICS option \"%s\"",
+ opt->defname)));
+ }
+ /* If no statistic type was specified, build them all. */
+ if (!requested_type)
+ build_ndistinct = true;
+
+ /* construct the char array of enabled statistic types */
+ ntypes = 0;
+ if (build_ndistinct)
+ types[ntypes++] = CharGetDatum(STATS_EXT_NDISTINCT);
+ Assert(ntypes > 0);
+ staenabled = construct_array(types, ntypes, CHAROID, 1, true, 'c');
+
+ /*
+ * Everything seems fine, so let's build the pg_statistic_ext tuple.
+ */
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ values[Anum_pg_statistic_ext_starelid - 1] = ObjectIdGetDatum(relid);
+ values[Anum_pg_statistic_ext_staname - 1] = NameGetDatum(&staname);
+ values[Anum_pg_statistic_ext_stanamespace - 1] = ObjectIdGetDatum(namespaceId);
+ values[Anum_pg_statistic_ext_staowner - 1] = ObjectIdGetDatum(GetUserId());
+ values[Anum_pg_statistic_ext_stakeys - 1] = PointerGetDatum(stakeys);
+ values[Anum_pg_statistic_ext_staenabled - 1] = PointerGetDatum(staenabled);
+
+ /* no statistics build yet */
+ nulls[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+ /* insert it into pg_statistic_ext */
+ statrel = heap_open(StatisticExtRelationId, RowExclusiveLock);
+ htup = heap_form_tuple(statrel->rd_att, values, nulls);
+ CatalogTupleInsert(statrel, htup);
+ statoid = HeapTupleGetOid(htup);
+ heap_freetuple(htup);
+ heap_close(statrel, RowExclusiveLock);
+ relation_close(rel, NoLock);
+
+ /*
+ * Add a dependency on a table, so that stats get dropped on DROP TABLE.
+ */
+ ObjectAddressSet(parentobject, RelationRelationId, relid);
+ ObjectAddressSet(childobject, StatisticExtRelationId, statoid);
+ recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+ /*
+ * Also add dependency on the schema. This is required to ensure that we
+ * drop the statistics on DROP SCHEMA. This is not handled automatically
+ * by DROP TABLE because the statistics are not an object in the table's
+ * schema.
+ */
+ ObjectAddressSet(parentobject, NamespaceRelationId, namespaceId);
+ recordDependencyOn(&childobject, &parentobject, DEPENDENCY_AUTO);
+
+ ObjectAddressSet(address, StatisticExtRelationId, statoid);
+
+ /*
+ * Invalidate relcache so that others see the new statistics.
+ */
+ CacheInvalidateRelcache(rel);
+
+ return address;
+}
+
+/*
+ * Guts of statistics deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+ Relation relation;
+ Oid relid;
+ Relation rel;
+ HeapTuple tup;
+ Form_pg_statistic_ext statext;
+
+ /*
+ * Delete the pg_statistic_ext tuple.
+ */
+ relation = heap_open(StatisticExtRelationId, RowExclusiveLock);
+
+ tup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statsOid));
+
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for statistics %u", statsOid);
+
+ statext = (Form_pg_statistic_ext) GETSTRUCT(tup);
+ relid = statext->starelid;
+
+ rel = heap_open(relid, AccessExclusiveLock);
+
+ simple_heap_delete(relation, &tup->t_self);
+
+ CacheInvalidateRelcache(rel);
+
+ ReleaseSysCache(tup);
+
+ heap_close(relation, RowExclusiveLock);
+ heap_close(rel, NoLock);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 93bda427153..c23d5c52851 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -3337,6 +3337,20 @@ _copyIndexStmt(const IndexStmt *from)
return newnode;
}
+static CreateStatsStmt *
+_copyCreateStatsStmt(const CreateStatsStmt *from)
+{
+ CreateStatsStmt *newnode = makeNode(CreateStatsStmt);
+
+ COPY_NODE_FIELD(defnames);
+ COPY_NODE_FIELD(relation);
+ COPY_NODE_FIELD(keys);
+ COPY_NODE_FIELD(options);
+ COPY_SCALAR_FIELD(if_not_exists);
+
+ return newnode;
+}
+
static CreateFunctionStmt *
_copyCreateFunctionStmt(const CreateFunctionStmt *from)
{
@@ -5050,6 +5064,9 @@ copyObject(const void *from)
case T_IndexStmt:
retval = _copyIndexStmt(from);
break;
+ case T_CreateStatsStmt:
+ retval = _copyCreateStatsStmt(from);
+ break;
case T_CreateFunctionStmt:
retval = _copyCreateFunctionStmt(from);
break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 0d12636d92c..5941b7a2bfb 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -1335,6 +1335,18 @@ _equalIndexStmt(const IndexStmt *a, const IndexStmt *b)
}
static bool
+_equalCreateStatsStmt(const CreateStatsStmt *a, const CreateStatsStmt *b)
+{
+ COMPARE_NODE_FIELD(defnames);
+ COMPARE_NODE_FIELD(relation);
+ COMPARE_NODE_FIELD(keys);
+ COMPARE_NODE_FIELD(options);
+ COMPARE_SCALAR_FIELD(if_not_exists);
+
+ return true;
+}
+
+static bool
_equalCreateFunctionStmt(const CreateFunctionStmt *a, const CreateFunctionStmt *b)
{
COMPARE_SCALAR_FIELD(replace);
@@ -3236,6 +3248,9 @@ equal(const void *a, const void *b)
case T_IndexStmt:
retval = _equalIndexStmt(a, b);
break;
+ case T_CreateStatsStmt:
+ retval = _equalCreateStatsStmt(a, b);
+ break;
case T_CreateFunctionStmt:
retval = _equalCreateFunctionStmt(a, b);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 1b9005fa537..541af029353 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2202,6 +2202,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(lateral_vars);
WRITE_BITMAPSET_FIELD(lateral_referencers);
WRITE_NODE_FIELD(indexlist);
+ WRITE_NODE_FIELD(statlist);
WRITE_UINT_FIELD(pages);
WRITE_FLOAT_FIELD(tuples, "%.0f");
WRITE_FLOAT_FIELD(allvisfrac, "%.6f");
@@ -2275,6 +2276,18 @@ _outForeignKeyOptInfo(StringInfo str, const ForeignKeyOptInfo *node)
}
static void
+_outStatisticExtInfo(StringInfo str, const StatisticExtInfo *node)
+{
+ WRITE_NODE_TYPE("STATISTICEXTINFO");
+
+ /* NB: this isn't a complete set of fields */
+ WRITE_OID_FIELD(statOid);
+ /* don't write rel, leads to infinite recursion in plan tree dump */
+ WRITE_CHAR_FIELD(kind);
+ WRITE_BITMAPSET_FIELD(keys);
+}
+
+static void
_outEquivalenceClass(StringInfo str, const EquivalenceClass *node)
{
/*
@@ -2578,6 +2591,18 @@ _outIndexStmt(StringInfo str, const IndexStmt *node)
}
static void
+_outCreateStatsStmt(StringInfo str, const CreateStatsStmt *node)
+{
+ WRITE_NODE_TYPE("CREATESTATSSTMT");
+
+ WRITE_NODE_FIELD(defnames);
+ WRITE_NODE_FIELD(relation);
+ WRITE_NODE_FIELD(keys);
+ WRITE_NODE_FIELD(options);
+ WRITE_BOOL_FIELD(if_not_exists);
+}
+
+static void
_outNotifyStmt(StringInfo str, const NotifyStmt *node)
{
WRITE_NODE_TYPE("NOTIFY");
@@ -3936,6 +3961,9 @@ outNode(StringInfo str, const void *obj)
case T_PlannerParamItem:
_outPlannerParamItem(str, obj);
break;
+ case T_StatisticExtInfo:
+ _outStatisticExtInfo(str, obj);
+ break;
case T_ExtensibleNode:
_outExtensibleNode(str, obj);
@@ -3953,6 +3981,9 @@ outNode(StringInfo str, const void *obj)
case T_IndexStmt:
_outIndexStmt(str, obj);
break;
+ case T_CreateStatsStmt:
+ _outCreateStatsStmt(str, obj);
+ break;
case T_NotifyStmt:
_outNotifyStmt(str, obj);
break;
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 463f8064678..cc88dcc28e4 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -29,6 +29,7 @@
#include "catalog/heap.h"
#include "catalog/partition.h"
#include "catalog/pg_am.h"
+#include "catalog/pg_statistic_ext.h"
#include "foreign/fdwapi.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
@@ -40,8 +41,11 @@
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
+#include "statistics/statistics.h"
#include "storage/bufmgr.h"
+#include "utils/builtins.h"
#include "utils/lsyscache.h"
+#include "utils/syscache.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
@@ -63,7 +67,7 @@ static List *get_relation_constraints(PlannerInfo *root,
bool include_notnull);
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
Relation heapRelation);
-
+static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
/*
* get_relation_info -
@@ -398,6 +402,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
rel->indexlist = indexinfos;
+ rel->statlist = get_relation_statistics(rel, relation);
+
/* Grab foreign-table info using the relcache, while we have it */
if (relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
{
@@ -1251,6 +1257,65 @@ get_relation_constraints(PlannerInfo *root,
return result;
}
+/*
+ * get_relation_statistics
+ * Retrieve extended statistics defined on the table.
+ *
+ * Returns a List (possibly empty) of StatisticExtInfo objects describing
+ * the statistics. Note that this doesn't load the actual statistics data,
+ * just the identifying metadata. Only stats actually built are considered.
+ */
+static List *
+get_relation_statistics(RelOptInfo *rel, Relation relation)
+{
+ List *statoidlist;
+ List *stainfos = NIL;
+ ListCell *l;
+
+ statoidlist = RelationGetStatExtList(relation);
+
+ foreach(l, statoidlist)
+ {
+ Oid statOid = lfirst_oid(l);
+ Form_pg_statistic_ext staForm;
+ HeapTuple htup;
+ Bitmapset *keys = NULL;
+ int i;
+
+ htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+ if (!htup)
+ elog(ERROR, "cache lookup failed for statistics %u", statOid);
+ staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+
+ /*
+ * First, build the array of columns covered. This is ultimately
+ * wasted if no stats are actually built, but it doesn't seem worth
+ * troubling over that case.
+ */
+ for (i = 0; i < staForm->stakeys.dim1; i++)
+ keys = bms_add_member(keys, staForm->stakeys.values[i]);
+
+ /* add one StatisticExtInfo for each kind built */
+ if (statext_is_kind_built(htup, STATS_EXT_NDISTINCT))
+ {
+ StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+ info->statOid = statOid;
+ info->rel = rel;
+ info->kind = STATS_EXT_NDISTINCT;
+ info->keys = bms_copy(keys);
+
+ stainfos = lcons(info, stainfos);
+ }
+
+ ReleaseSysCache(htup);
+ bms_free(keys);
+ }
+
+ list_free(statoidlist);
+
+ return stainfos;
+}
/*
* relation_excluded_by_constraints
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 82844a0399d..bbcfc1fb4fd 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -257,7 +257,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
ConstraintsSetStmt CopyStmt CreateAsStmt CreateCastStmt
CreateDomainStmt CreateExtensionStmt CreateGroupStmt CreateOpClassStmt
CreateOpFamilyStmt AlterOpFamilyStmt CreatePLangStmt
- CreateSchemaStmt CreateSeqStmt CreateStmt CreateTableSpaceStmt
+ CreateSchemaStmt CreateSeqStmt CreateStmt CreateStatsStmt CreateTableSpaceStmt
CreateFdwStmt CreateForeignServerStmt CreateForeignTableStmt
CreateAssertStmt CreateTransformStmt CreateTrigStmt CreateEventTrigStmt
CreateUserStmt CreateUserMappingStmt CreateRoleStmt CreatePolicyStmt
@@ -874,6 +874,7 @@ stmt :
| CreateSeqStmt
| CreateStmt
| CreateSubscriptionStmt
+ | CreateStatsStmt
| CreateTableSpaceStmt
| CreateTransformStmt
| CreateTrigStmt
@@ -3747,6 +3748,35 @@ OptConsTableSpace: USING INDEX TABLESPACE name { $$ = $4; }
ExistingIndex: USING INDEX index_name { $$ = $3; }
;
+/*****************************************************************************
+ *
+ * QUERY :
+ * CREATE STATISTICS stats_name WITH (options) ON (columns) FROM relname
+ *
+ *****************************************************************************/
+
+
+CreateStatsStmt: CREATE STATISTICS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+ {
+ CreateStatsStmt *n = makeNode(CreateStatsStmt);
+ n->defnames = $3;
+ n->relation = $10;
+ n->keys = $7;
+ n->options = $4;
+ n->if_not_exists = false;
+ $$ = (Node *)n;
+ }
+ | CREATE STATISTICS IF_P NOT EXISTS any_name opt_reloptions ON '(' columnList ')' FROM qualified_name
+ {
+ CreateStatsStmt *n = makeNode(CreateStatsStmt);
+ n->defnames = $6;
+ n->relation = $13;
+ n->keys = $10;
+ n->options = $7;
+ n->if_not_exists = true;
+ $$ = (Node *)n;
+ }
+ ;
/*****************************************************************************
*
@@ -6042,6 +6072,7 @@ drop_type_any_name:
| FOREIGN TABLE { $$ = OBJECT_FOREIGN_TABLE; }
| COLLATION { $$ = OBJECT_COLLATION; }
| CONVERSION_P { $$ = OBJECT_CONVERSION; }
+ | STATISTICS { $$ = OBJECT_STATISTIC_EXT; }
| TEXT_P SEARCH PARSER { $$ = OBJECT_TSPARSER; }
| TEXT_P SEARCH DICTIONARY { $$ = OBJECT_TSDICTIONARY; }
| TEXT_P SEARCH TEMPLATE { $$ = OBJECT_TSTEMPLATE; }
@@ -6119,7 +6150,7 @@ opt_restart_seqs:
* EXTENSION | EVENT TRIGGER | FOREIGN DATA WRAPPER |
* FOREIGN TABLE | INDEX | [PROCEDURAL] LANGUAGE |
* MATERIALIZED VIEW | POLICY | ROLE | SCHEMA | SEQUENCE |
- * SERVER | TABLE | TABLESPACE |
+ * SERVER | STATISTICS | TABLE | TABLESPACE |
* TEXT SEARCH CONFIGURATION | TEXT SEARCH DICTIONARY |
* TEXT SEARCH PARSER | TEXT SEARCH TEMPLATE | TYPE |
* VIEW] <objname> |
@@ -6288,6 +6319,7 @@ comment_type_any_name:
COLUMN { $$ = OBJECT_COLUMN; }
| INDEX { $$ = OBJECT_INDEX; }
| SEQUENCE { $$ = OBJECT_SEQUENCE; }
+ | STATISTICS { $$ = OBJECT_STATISTIC_EXT; }
| TABLE { $$ = OBJECT_TABLE; }
| VIEW { $$ = OBJECT_VIEW; }
| MATERIALIZED VIEW { $$ = OBJECT_MATVIEW; }
@@ -8428,6 +8460,15 @@ RenameStmt: ALTER AGGREGATE aggregate_with_argtypes RENAME TO name
n->missing_ok = false;
$$ = (Node *)n;
}
+ | ALTER STATISTICS any_name RENAME TO name
+ {
+ RenameStmt *n = makeNode(RenameStmt);
+ n->renameType = OBJECT_STATISTIC_EXT;
+ n->object = (Node *) $3;
+ n->newname = $6;
+ n->missing_ok = false;
+ $$ = (Node *)n;
+ }
| ALTER TEXT_P SEARCH PARSER any_name RENAME TO name
{
RenameStmt *n = makeNode(RenameStmt);
@@ -8643,6 +8684,15 @@ AlterObjectSchemaStmt:
n->missing_ok = true;
$$ = (Node *)n;
}
+ | ALTER STATISTICS any_name SET SCHEMA name
+ {
+ AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
+ n->objectType = OBJECT_STATISTIC_EXT;
+ n->object = (Node *) $3;
+ n->newschema = $6;
+ n->missing_ok = false;
+ $$ = (Node *)n;
+ }
| ALTER TEXT_P SEARCH PARSER any_name SET SCHEMA name
{
AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt);
@@ -8906,6 +8956,14 @@ AlterOwnerStmt: ALTER AGGREGATE aggregate_with_argtypes OWNER TO RoleSpec
n->newowner = $6;
$$ = (Node *)n;
}
+ | ALTER STATISTICS any_name OWNER TO RoleSpec
+ {
+ AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
+ n->objectType = OBJECT_STATISTIC_EXT;
+ n->object = (Node *) $3;
+ n->newowner = $6;
+ $$ = (Node *)n;
+ }
| ALTER TEXT_P SEARCH DICTIONARY any_name OWNER TO RoleSpec
{
AlterOwnerStmt *n = makeNode(AlterOwnerStmt);
diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile
new file mode 100644
index 00000000000..b3615bd69ac
--- /dev/null
+++ b/src/backend/statistics/Makefile
@@ -0,0 +1,17 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+# Makefile for statistics
+#
+# IDENTIFICATION
+# src/backend/statistics/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/statistics
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = extended_stats.o mvdistinct.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/statistics/README b/src/backend/statistics/README
new file mode 100644
index 00000000000..beb7c2485fd
--- /dev/null
+++ b/src/backend/statistics/README
@@ -0,0 +1,34 @@
+Extended statistics
+===================
+
+When estimating various quantities (e.g. condition selectivities) the default
+approach relies on the assumption of independence. In practice that's often
+not true, resulting in estimation errors.
+
+Extended statistics track different types of dependencies between the columns,
+hopefully improving the estimates and producing better plans.
+
+Currently we only have one type of extended statistics - ndistinct
+coefficients, and we use it to improve estimates of grouping queries. See
+README.ndistinct for details.
+
+
+Size of sample in ANALYZE
+-------------------------
+When performing ANALYZE, the number of rows to sample is determined as
+
+ (300 * statistics_target)
+
+That works reasonably well for statistics on individual columns, but perhaps
+it's not enough for extended statistics. Papers analyzing estimation errors
+all use samples proportional to the table (usually finding that 1-3% of the
+table is enough to build accurate stats).
+
+The requested accuracy (number of MCV items or histogram bins) should also
+be considered when determining the sample size, and in extended statistics
+those are not necessarily limited by statistics_target.
+
+This however merits further discussion, because collecting the sample is quite
+expensive and increasing it further would make ANALYZE even more painful.
+Judging by the experiments with the current implementation, the fixed size
+seems to work reasonably well for now, so we leave this as a future work.
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
new file mode 100644
index 00000000000..d2b9f6a7c70
--- /dev/null
+++ b/src/backend/statistics/extended_stats.c
@@ -0,0 +1,389 @@
+/*-------------------------------------------------------------------------
+ *
+ * extended_stats.c
+ * POSTGRES extended statistics
+ *
+ * Generic code supporting statistic objects created via CREATE STATISTICS.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/statistics/extended_stats.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "catalog/indexing.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_statistic_ext.h"
+#include "nodes/relation.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+/*
+ * Used internally to refer to an individual pg_statistic_ext entry.
+ */
+typedef struct StatExtEntry
+{
+ Oid statOid; /* OID of pg_statistic_ext entry */
+ Bitmapset *columns; /* attribute numbers covered by the statistics */
+ List *types; /* 'char' list of enabled statistic kinds */
+} StatExtEntry;
+
+
+static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+ int natts, VacAttrStats **vacattrstats);
+static void statext_store(Relation pg_stext, Oid relid,
+ MVNDistinct *ndistinct,
+ VacAttrStats **stats);
+
+
+/*
+ * Compute requested extended stats, using the rows sampled for the plain
+ * (single-column) stats.
+ *
+ * This fetches a list of stats from pg_statistic_ext, computes the stats
+ * and serializes them back into the catalog (as bytea values).
+ */
+void
+BuildRelationExtStatistics(Relation onerel, double totalrows,
+ int numrows, HeapTuple *rows,
+ int natts, VacAttrStats **vacattrstats)
+{
+ Relation pg_stext;
+ ListCell *lc;
+ List *stats;
+
+ pg_stext = heap_open(StatisticExtRelationId, RowExclusiveLock);
+ stats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+
+ foreach(lc, stats)
+ {
+ StatExtEntry *stat = (StatExtEntry *) lfirst(lc);
+ MVNDistinct *ndistinct = NULL;
+ VacAttrStats **stats;
+ ListCell *lc2;
+
+ /* filter only the interesting vacattrstats records */
+ stats = lookup_var_attr_stats(onerel, stat->columns,
+ natts, vacattrstats);
+
+ /* check allowed number of dimensions */
+ Assert(bms_num_members(stat->columns) >= 2 &&
+ bms_num_members(stat->columns) <= STATS_MAX_DIMENSIONS);
+
+ /* compute statistic of each type */
+ foreach(lc2, stat->types)
+ {
+ char t = (char) lfirst_int(lc2);
+
+ if (t == STATS_EXT_NDISTINCT)
+ ndistinct = statext_ndistinct_build(totalrows, numrows, rows,
+ stat->columns, stats);
+ }
+
+ /* store the statistics in the catalog */
+ statext_store(pg_stext, stat->statOid, ndistinct, stats);
+ }
+
+ heap_close(pg_stext, RowExclusiveLock);
+}
+
+/*
+ * statext_is_kind_built
+ * Is this stat kind built in the given pg_statistic_ext tuple?
+ */
+bool
+statext_is_kind_built(HeapTuple htup, char type)
+{
+ AttrNumber attnum;
+
+ switch (type)
+ {
+ case STATS_EXT_NDISTINCT:
+ attnum = Anum_pg_statistic_ext_standistinct;
+ break;
+
+ default:
+ elog(ERROR, "unexpected statistics type requested: %d", type);
+ }
+
+ return !heap_attisnull(htup, attnum);
+}
+
+/*
+ * Return a list (of StatExtEntry) of statistics for the given relation.
+ */
+static List *
+fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+{
+ SysScanDesc scan;
+ ScanKeyData skey;
+ HeapTuple htup;
+ List *result = NIL;
+
+ /*
+ * Prepare to scan pg_statistic_ext for entries having indrelid = this
+ * rel.
+ */
+ ScanKeyInit(&skey,
+ Anum_pg_statistic_ext_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+
+ scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(htup = systable_getnext(scan)))
+ {
+ StatExtEntry *entry;
+ Datum datum;
+ bool isnull;
+ int i;
+ ArrayType *arr;
+ char *enabled;
+ Form_pg_statistic_ext staForm;
+
+ entry = palloc0(sizeof(StatExtEntry));
+ entry->statOid = HeapTupleGetOid(htup);
+ staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+ for (i = 0; i < staForm->stakeys.dim1; i++)
+ {
+ entry->columns = bms_add_member(entry->columns,
+ staForm->stakeys.values[i]);
+ }
+
+ /* decode the staenabled char array into a list of chars */
+ datum = SysCacheGetAttr(STATEXTOID, htup,
+ Anum_pg_statistic_ext_staenabled, &isnull);
+ Assert(!isnull);
+ arr = DatumGetArrayTypeP(datum);
+ if (ARR_NDIM(arr) != 1 ||
+ ARR_HASNULL(arr) ||
+ ARR_ELEMTYPE(arr) != CHAROID)
+ elog(ERROR, "staenabled is not a 1-D char array");
+ enabled = (char *) ARR_DATA_PTR(arr);
+ for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+ {
+ Assert(enabled[i] == STATS_EXT_NDISTINCT);
+ entry->types = lappend_int(entry->types, (int) enabled[i]);
+ }
+
+ result = lappend(result, entry);
+ }
+
+ systable_endscan(scan);
+
+ return result;
+}
+
+/*
+ * Using 'vacattrstats' of size 'natts' as input data, return a newly built
+ * VacAttrStats array which includes only the items corresponding to attributes
+ * indicated by 'attrs'.
+ */
+static VacAttrStats **
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, int natts,
+ VacAttrStats **vacattrstats)
+{
+ int i = 0;
+ int x = -1;
+ VacAttrStats **stats;
+ Bitmapset *matched = NULL;
+
+ stats = (VacAttrStats **)
+ palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+
+ /* lookup VacAttrStats info for the requested columns (same attnum) */
+ while ((x = bms_next_member(attrs, x)) >= 0)
+ {
+ int j;
+
+ stats[i] = NULL;
+ for (j = 0; j < natts; j++)
+ {
+ if (x == vacattrstats[j]->tupattnum)
+ {
+ stats[i] = vacattrstats[j];
+ break;
+ }
+ }
+
+ if (!stats[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("extended statistics could not be collected for column \"%s\" of relation %s.%s",
+ NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname),
+ get_namespace_name(rel->rd_rel->relnamespace),
+ RelationGetRelationName(rel)),
+ errhint("Consider ALTER TABLE \"%s\".\"%s\" ALTER \"%s\" SET STATISTICS -1",
+ get_namespace_name(rel->rd_rel->relnamespace),
+ RelationGetRelationName(rel),
+ NameStr(RelationGetDescr(rel)->attrs[x - 1]->attname))));
+
+ /*
+ * Check that we found a non-dropped column and that the attnum
+ * matches.
+ */
+ Assert(!stats[i]->attr->attisdropped);
+ matched = bms_add_member(matched, stats[i]->tupattnum);
+
+ i++;
+ }
+ if (bms_subset_compare(matched, attrs) != BMS_EQUAL)
+ elog(ERROR, "could not find all attributes in attribute stats array");
+ bms_free(matched);
+
+ return stats;
+}
+
+/*
+ * statext_store
+ * Serializes the statistics and stores them into the pg_statistic_ext tuple.
+ */
+static void
+statext_store(Relation pg_stext, Oid statOid,
+ MVNDistinct *ndistinct,
+ VacAttrStats **stats)
+{
+ HeapTuple stup,
+ oldtup;
+ Datum values[Natts_pg_statistic_ext];
+ bool nulls[Natts_pg_statistic_ext];
+ bool replaces[Natts_pg_statistic_ext];
+
+ memset(nulls, 1, Natts_pg_statistic_ext * sizeof(bool));
+ memset(replaces, 0, Natts_pg_statistic_ext * sizeof(bool));
+ memset(values, 0, Natts_pg_statistic_ext * sizeof(Datum));
+
+ /*
+ * Construct a new pg_statistic_ext tuple, replacing the calculated stats.
+ */
+ if (ndistinct != NULL)
+ {
+ bytea *data = statext_ndistinct_serialize(ndistinct);
+
+ nulls[Anum_pg_statistic_ext_standistinct - 1] = (data == NULL);
+ values[Anum_pg_statistic_ext_standistinct - 1] = PointerGetDatum(data);
+ }
+
+ /* always replace the value (either by bytea or NULL) */
+ replaces[Anum_pg_statistic_ext_standistinct - 1] = true;
+
+ /* there should already be a pg_statistic_ext tuple */
+ oldtup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+ if (!HeapTupleIsValid(oldtup))
+ elog(ERROR, "cache lookup failed for extended statistics %u", statOid);
+
+ /* replace it */
+ stup = heap_modify_tuple(oldtup,
+ RelationGetDescr(pg_stext),
+ values,
+ nulls,
+ replaces);
+ ReleaseSysCache(oldtup);
+ CatalogTupleUpdate(pg_stext, &stup->t_self, stup);
+
+ heap_freetuple(stup);
+}
+
+/* initialize multi-dimensional sort */
+MultiSortSupport
+multi_sort_init(int ndims)
+{
+ MultiSortSupport mss;
+
+ Assert(ndims >= 2);
+
+ mss = (MultiSortSupport) palloc0(offsetof(MultiSortSupportData, ssup)
+ +sizeof(SortSupportData) * ndims);
+
+ mss->ndims = ndims;
+
+ return mss;
+}
+
+/*
+ * Prepare sort support info using the given sort operator
+ * at the position 'sortdim'
+ */
+void
+multi_sort_add_dimension(MultiSortSupport mss, int sortdim, Oid oper)
+{
+ SortSupport ssup = &mss->ssup[sortdim];
+
+ ssup->ssup_cxt = CurrentMemoryContext;
+ ssup->ssup_collation = DEFAULT_COLLATION_OID;
+ ssup->ssup_nulls_first = false;
+ ssup->ssup_cxt = CurrentMemoryContext;
+
+ PrepareSortSupportFromOrderingOp(oper, ssup);
+}
+
+/* compare all the dimensions in the selected order */
+int
+multi_sort_compare(const void *a, const void *b, void *arg)
+{
+ MultiSortSupport mss = (MultiSortSupport) arg;
+ SortItem *ia = (SortItem *) a;
+ SortItem *ib = (SortItem *) b;
+ int i;
+
+ for (i = 0; i < mss->ndims; i++)
+ {
+ int compare;
+
+ compare = ApplySortComparator(ia->values[i], ia->isnull[i],
+ ib->values[i], ib->isnull[i],
+ &mss->ssup[i]);
+
+ if (compare != 0)
+ return compare;
+ }
+
+ /* equal by default */
+ return 0;
+}
+
+/* compare selected dimension */
+int
+multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b,
+ MultiSortSupport mss)
+{
+ return ApplySortComparator(a->values[dim], a->isnull[dim],
+ b->values[dim], b->isnull[dim],
+ &mss->ssup[dim]);
+}
+
+int
+multi_sort_compare_dims(int start, int end,
+ const SortItem *a, const SortItem *b,
+ MultiSortSupport mss)
+{
+ int dim;
+
+ for (dim = start; dim <= end; dim++)
+ {
+ int r = ApplySortComparator(a->values[dim], a->isnull[dim],
+ b->values[dim], b->isnull[dim],
+ &mss->ssup[dim]);
+
+ if (r != 0)
+ return r;
+ }
+
+ return 0;
+}
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
new file mode 100644
index 00000000000..5df4e29eec3
--- /dev/null
+++ b/src/backend/statistics/mvdistinct.c
@@ -0,0 +1,671 @@
+/*-------------------------------------------------------------------------
+ *
+ * mvdistinct.c
+ * POSTGRES multivariate ndistinct coefficients
+ *
+ * Estimating number of groups in a combination of columns (e.g. for GROUP BY)
+ * is tricky, and the estimation error is often significant.
+
+ * The multivariate ndistinct coefficients address this by storing ndistinct
+ * estimates for combinations of the user-specified columns. So for example
+ * given a statistics object on three columns (a,b,c), this module estimates
+ * and store n-distinct for (a,b), (a,c), (b,c) and (a,b,c). The per-column
+ * estimates are already available in pg_statistic.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/statistics/mvdistinct.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_statistic_ext.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "lib/stringinfo.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+
+static double ndistinct_for_combination(double totalrows, int numrows,
+ HeapTuple *rows, VacAttrStats **stats,
+ int k, int *combination);
+static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
+static int n_choose_k(int n, int k);
+static int num_combinations(int n);
+
+/* Combination generator API */
+
+/* internal state for generator of k-combinations of n elements */
+typedef struct CombinationGenerator
+{
+ int k; /* size of the combination */
+ int n; /* total number of elements */
+ int current; /* index of the next combination to return */
+ int ncombinations; /* number of combinations (size of array) */
+ int *combinations; /* array of pre-built combinations */
+} CombinationGenerator;
+
+static CombinationGenerator *generator_init(int n, int k);
+static void generator_free(CombinationGenerator *state);
+static int *generator_next(CombinationGenerator *state);
+static void generate_combinations(CombinationGenerator *state);
+
+
+/*
+ * statext_ndistinct_build
+ * Compute ndistinct coefficient for the combination of attributes.
+ *
+ * This computes the ndistinct estimate using the same estimator used
+ * in analyze.c and then computes the coefficient.
+ */
+MVNDistinct *
+statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
+ Bitmapset *attrs, VacAttrStats **stats)
+{
+ MVNDistinct *result;
+ int k;
+ int itemcnt;
+ int numattrs = bms_num_members(attrs);
+ int numcombs = num_combinations(numattrs);
+
+ result = palloc(offsetof(MVNDistinct, items) +
+ numcombs * sizeof(MVNDistinctItem));
+ result->magic = STATS_NDISTINCT_MAGIC;
+ result->type = STATS_NDISTINCT_TYPE_BASIC;
+ result->nitems = numcombs;
+
+ itemcnt = 0;
+ for (k = 2; k <= numattrs; k++)
+ {
+ int *combination;
+ CombinationGenerator *generator;
+
+ /* generate combinations of K out of N elements */
+ generator = generator_init(numattrs, k);
+
+ while ((combination = generator_next(generator)))
+ {
+ MVNDistinctItem *item = &result->items[itemcnt];
+ int j;
+
+ item->attrs = NULL;
+ for (j = 0; j < k; j++)
+ item->attrs = bms_add_member(item->attrs,
+ stats[combination[j]]->attr->attnum);
+ item->ndistinct =
+ ndistinct_for_combination(totalrows, numrows, rows,
+ stats, k, combination);
+
+ itemcnt++;
+ Assert(itemcnt <= result->nitems);
+ }
+
+ generator_free(generator);
+ }
+
+ /* must consume exactly the whole output array */
+ Assert(itemcnt == result->nitems);
+
+ return result;
+}
+
+/*
+ * statext_ndistinct_load
+ * Load the ndistinct value for the indicated pg_statistic_ext tuple
+ */
+MVNDistinct *
+statext_ndistinct_load(Oid mvoid)
+{
+ bool isnull = false;
+ Datum ndist;
+ HeapTuple htup;
+
+ htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(mvoid));
+ if (!htup)
+ elog(ERROR, "cache lookup failed for statistics %u", mvoid);
+
+ ndist = SysCacheGetAttr(STATEXTOID, htup,
+ Anum_pg_statistic_ext_standistinct, &isnull);
+ if (isnull)
+ elog(ERROR,
+ "requested statistic kind %c not yet built for statistics %u",
+ STATS_EXT_NDISTINCT, mvoid);
+
+ ReleaseSysCache(htup);
+
+ return statext_ndistinct_deserialize(DatumGetByteaP(ndist));
+}
+
+/*
+ * statext_ndistinct_serialize
+ * serialize ndistinct to the on-disk bytea format
+ */
+bytea *
+statext_ndistinct_serialize(MVNDistinct *ndistinct)
+{
+ int i;
+ bytea *output;
+ char *tmp;
+ Size len;
+
+ Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC);
+ Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC);
+
+ /*
+ * Base size is base struct size, plus one base struct for each items,
+ * including number of items for each.
+ */
+ len = VARHDRSZ + offsetof(MVNDistinct, items) +
+ ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int));
+
+ /* and also include space for the actual attribute numbers */
+ for (i = 0; i < ndistinct->nitems; i++)
+ {
+ int nmembers;
+
+ nmembers = bms_num_members(ndistinct->items[i].attrs);
+ Assert(nmembers >= 2);
+ len += sizeof(AttrNumber) * nmembers;
+ }
+
+ output = (bytea *) palloc(len);
+ SET_VARSIZE(output, len);
+
+ tmp = VARDATA(output);
+
+ /* Store the base struct values */
+ memcpy(tmp, ndistinct, offsetof(MVNDistinct, items));
+ tmp += offsetof(MVNDistinct, items);
+
+ /*
+ * store number of attributes and attribute numbers for each ndistinct
+ * entry
+ */
+ for (i = 0; i < ndistinct->nitems; i++)
+ {
+ MVNDistinctItem item = ndistinct->items[i];
+ int nmembers = bms_num_members(item.attrs);
+ int x;
+
+ memcpy(tmp, &item.ndistinct, sizeof(double));
+ tmp += sizeof(double);
+ memcpy(tmp, &nmembers, sizeof(int));
+ tmp += sizeof(int);
+
+ x = -1;
+ while ((x = bms_next_member(item.attrs, x)) >= 0)
+ {
+ AttrNumber value = (AttrNumber) x;
+
+ memcpy(tmp, &value, sizeof(AttrNumber));
+ tmp += sizeof(AttrNumber);
+ }
+
+ Assert(tmp <= ((char *) output + len));
+ }
+
+ return output;
+}
+
+/*
+ * statext_ndistinct_deserialize
+ * Read an on-disk bytea format MVNDistinct to in-memory format
+ */
+MVNDistinct *
+statext_ndistinct_deserialize(bytea *data)
+{
+ int i;
+ Size expected_size;
+ MVNDistinct *ndistinct;
+ char *tmp;
+
+ if (data == NULL)
+ return NULL;
+
+ if (VARSIZE_ANY_EXHDR(data) < offsetof(MVNDistinct, items))
+ elog(ERROR, "invalid MVNDistinct size %ld (expected at least %ld)",
+ VARSIZE_ANY_EXHDR(data), offsetof(MVNDistinct, items));
+
+ /* read the MVNDistinct header */
+ ndistinct = (MVNDistinct *) palloc(sizeof(MVNDistinct));
+
+ /* initialize pointer to the data part (skip the varlena header) */
+ tmp = VARDATA_ANY(data);
+
+ /* get the header and perform basic sanity checks */
+ memcpy(ndistinct, tmp, offsetof(MVNDistinct, items));
+ tmp += offsetof(MVNDistinct, items);
+
+ if (ndistinct->magic != STATS_NDISTINCT_MAGIC)
+ elog(ERROR, "invalid ndistinct magic %d (expected %d)",
+ ndistinct->magic, STATS_NDISTINCT_MAGIC);
+
+ if (ndistinct->type != STATS_NDISTINCT_TYPE_BASIC)
+ elog(ERROR, "invalid ndistinct type %d (expected %d)",
+ ndistinct->type, STATS_NDISTINCT_TYPE_BASIC);
+
+ Assert(ndistinct->nitems > 0);
+
+ /* what minimum bytea size do we expect for those parameters */
+ expected_size = offsetof(MVNDistinct, items) +
+ ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) +
+ sizeof(AttrNumber) * 2);
+
+ if (VARSIZE_ANY_EXHDR(data) < expected_size)
+ elog(ERROR, "invalid dependencies size %ld (expected at least %ld)",
+ VARSIZE_ANY_EXHDR(data), expected_size);
+
+ /* allocate space for the ndistinct items */
+ ndistinct = repalloc(ndistinct, offsetof(MVNDistinct, items) +
+ (ndistinct->nitems * sizeof(MVNDistinctItem)));
+
+ for (i = 0; i < ndistinct->nitems; i++)
+ {
+ MVNDistinctItem *item = &ndistinct->items[i];
+ int nelems;
+
+ item->attrs = NULL;
+
+ /* ndistinct value */
+ memcpy(&item->ndistinct, tmp, sizeof(double));
+ tmp += sizeof(double);
+
+ /* number of attributes */
+ memcpy(&nelems, tmp, sizeof(int));
+ tmp += sizeof(int);
+ Assert((nelems >= 2) && (nelems <= STATS_MAX_DIMENSIONS));
+
+ while (nelems-- > 0)
+ {
+ AttrNumber attno;
+
+ memcpy(&attno, tmp, sizeof(AttrNumber));
+ tmp += sizeof(AttrNumber);
+ item->attrs = bms_add_member(item->attrs, attno);
+ }
+
+ /* still within the bytea */
+ Assert(tmp <= ((char *) data + VARSIZE_ANY(data)));
+ }
+
+ /* we should have consumed the whole bytea exactly */
+ Assert(tmp == ((char *) data + VARSIZE_ANY(data)));
+
+ return ndistinct;
+}
+
+/*
+ * pg_ndistinct_in
+ * input routine for type pg_ndistinct
+ *
+ * pg_ndistinct is real enough to be a table column, but it has no
+ * operations of its own, and disallows input (jus like pg_node_tree).
+ */
+Datum
+pg_ndistinct_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct
+ * output routine for type pg_ndistinct
+ *
+ * Produces a human-readable representation of the value.
+ */
+Datum
+pg_ndistinct_out(PG_FUNCTION_ARGS)
+{
+ bytea *data = PG_GETARG_BYTEA_PP(0);
+ MVNDistinct *ndist = statext_ndistinct_deserialize(data);
+ int i;
+ StringInfoData str;
+
+ initStringInfo(&str);
+ appendStringInfoChar(&str, '[');
+
+ for (i = 0; i < ndist->nitems; i++)
+ {
+ MVNDistinctItem item = ndist->items[i];
+
+ if (i > 0)
+ appendStringInfoString(&str, ", ");
+
+ appendStringInfoChar(&str, '{');
+ outBitmapset(&str, item.attrs);
+ appendStringInfo(&str, ", %f}", item.ndistinct);
+ }
+
+ appendStringInfoChar(&str, ']');
+
+ PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_ndistinct_recv
+ * binary input routine for type pg_ndistinct
+ */
+Datum
+pg_ndistinct_recv(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of type %s", "pg_ndistinct")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+/*
+ * pg_ndistinct_send
+ * binary output routine for type pg_ndistinct
+ *
+ * n-distinct is serialized into a bytea value, so let's send that.
+ */
+Datum
+pg_ndistinct_send(PG_FUNCTION_ARGS)
+{
+ return byteasend(fcinfo);
+}
+
+/*
+ * ndistinct_for_combination
+ * Estimates number of distinct values in a combination of columns.
+ *
+ * This uses the same ndistinct estimator as compute_scalar_stats() in
+ * ANALYZE, i.e.,
+ * n*d / (n - f1 + f1*n/N)
+ *
+ * except that instead of values in a single column we are dealing with
+ * combination of multiple columns.
+ */
+static double
+ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+ VacAttrStats **stats, int k, int *combination)
+{
+ int i,
+ j;
+ int f1,
+ cnt,
+ d;
+ bool *isnull;
+ Datum *values;
+ SortItem *items;
+ MultiSortSupport mss;
+
+ mss = multi_sort_init(k);
+
+ /*
+ * In order to determine the number of distinct elements, create separate
+ * values[]/isnull[] arrays with all the data we have, then sort them
+ * using the specified column combination as dimensions. We could try to
+ * sort in place, but it'd probably be more complex and bug-prone.
+ */
+ items = (SortItem *) palloc(numrows * sizeof(SortItem));
+ values = (Datum *) palloc0(sizeof(Datum) * numrows * k);
+ isnull = (bool *) palloc0(sizeof(bool) * numrows * k);
+
+ for (i = 0; i < numrows; i++)
+ {
+ items[i].values = &values[i * k];
+ items[i].isnull = &isnull[i * k];
+ }
+
+ /*
+ * For each dimension, set up sort-support and fill in the values from
+ * the sample data.
+ */
+ for (i = 0; i < k; i++)
+ {
+ VacAttrStats *colstat = stats[combination[i]];
+ TypeCacheEntry *type;
+
+ type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR);
+ if (type->lt_opr == InvalidOid) /* shouldn't happen */
+ elog(ERROR, "cache lookup failed for ordering operator for type %u",
+ colstat->attrtypid);
+
+ /* prepare the sort function for this dimension */
+ multi_sort_add_dimension(mss, i, type->lt_opr);
+
+ /* accumulate all the data for this dimension into the arrays */
+ for (j = 0; j < numrows; j++)
+ {
+ items[j].values[i] =
+ heap_getattr(rows[j],
+ colstat->attr->attnum,
+ colstat->tupDesc,
+ &items[j].isnull[i]);
+ }
+ }
+
+ /* We can sort the array now ... */
+ qsort_arg((void *) items, numrows, sizeof(SortItem),
+ multi_sort_compare, mss);
+
+ /* ... and count the number of distinct combinations */
+
+ f1 = 0;
+ cnt = 1;
+ d = 1;
+ for (i = 1; i < numrows; i++)
+ {
+ if (multi_sort_compare(&items[i], &items[i - 1], mss) != 0)
+ {
+ if (cnt == 1)
+ f1 += 1;
+
+ d++;
+ cnt = 0;
+ }
+
+ cnt += 1;
+ }
+
+ if (cnt == 1)
+ f1 += 1;
+
+ return estimate_ndistinct(totalrows, numrows, d, f1);
+}
+
+/* The Duj1 estimator (already used in analyze.c). */
+static double
+estimate_ndistinct(double totalrows, int numrows, int d, int f1)
+{
+ double numer,
+ denom,
+ ndistinct;
+
+ numer = (double) numrows * (double) d;
+
+ denom = (double) (numrows - f1) +
+ (double) f1 *(double) numrows / totalrows;
+
+ ndistinct = numer / denom;
+
+ /* Clamp to sane range in case of roundoff error */
+ if (ndistinct < (double) d)
+ ndistinct = (double) d;
+
+ if (ndistinct > totalrows)
+ ndistinct = totalrows;
+
+ return floor(ndistinct + 0.5);
+}
+
+/*
+ * n_choose_k
+ * computes binomial coefficients using an algorithm that is both
+ * efficient and prevents overflows
+ */
+static int
+n_choose_k(int n, int k)
+{
+ int d,
+ r;
+
+ Assert((k > 0) && (n >= k));
+
+ /* use symmetry of the binomial coefficients */
+ k = Min(k, n - k);
+
+ r = 1;
+ for (d = 1; d <= k; ++d)
+ {
+ r *= n--;
+ r /= d;
+ }
+
+ return r;
+}
+
+/*
+ * num_combinations
+ * number of combinations, excluding single-value combinations
+ */
+static int
+num_combinations(int n)
+{
+ int k;
+ int ncombs = 1;
+
+ for (k = 1; k <= n; k++)
+ ncombs *= 2;
+
+ ncombs -= (n + 1);
+
+ return ncombs;
+}
+
+/*
+ * generator_init
+ * initialize the generator of combinations
+ *
+ * The generator produces combinations of K elements in the interval (0..N).
+ * We prebuild all the combinations in this method, which is simpler than
+ * generating them on the fly.
+ */
+static CombinationGenerator *
+generator_init(int n, int k)
+{
+ CombinationGenerator *state;
+
+ Assert((n >= k) && (k > 0));
+
+ /* allocate the generator state as a single chunk of memory */
+ state = (CombinationGenerator *) palloc(sizeof(CombinationGenerator));
+
+ state->ncombinations = n_choose_k(n, k);
+
+ /* pre-allocate space for all combinations*/
+ state->combinations = (int *) palloc(sizeof(int) * k * state->ncombinations);
+
+ state->current = 0;
+ state->k = k;
+ state->n = n;
+
+ /* now actually pre-generate all the combinations of K elements */
+ generate_combinations(state);
+
+ /* make sure we got the expected number of combinations */
+ Assert(state->current == state->ncombinations);
+
+ /* reset the number, so we start with the first one */
+ state->current = 0;
+
+ return state;
+}
+
+/*
+ * generator_next
+ * returns the next combination from the prebuilt list
+ *
+ * Returns a combination of K array indexes (0 .. N), as specified to
+ * generator_init), or NULL when there are no more combination.
+ */
+static int *
+generator_next(CombinationGenerator *state)
+{
+ if (state->current == state->ncombinations)
+ return NULL;
+
+ return &state->combinations[state->k * state->current++];
+}
+
+/*
+ * generator_free
+ * free the internal state of the generator
+ *
+ * Releases the generator internal state (pre-built combinations).
+ */
+static void
+generator_free(CombinationGenerator *state)
+{
+ pfree(state->combinations);
+ pfree(state);
+}
+
+/*
+ * generate_combinations_recurse
+ * given a prefix, generate all possible combinations
+ *
+ * Given a prefix (first few elements of the combination), generate following
+ * elements recursively. We generate the combinations in lexicographic order,
+ * which eliminates permutations of the same combination.
+ */
+static void
+generate_combinations_recurse(CombinationGenerator *state,
+ int index, int start, int *current)
+{
+ /* If we haven't filled all the elements, simply recurse. */
+ if (index < state->k)
+ {
+ int i;
+
+ /*
+ * The values have to be in ascending order, so make sure we start
+ * with the value passed by parameter.
+ */
+
+ for (i = start; i < state->n; i++)
+ {
+ current[index] = i;
+ generate_combinations_recurse(state, (index + 1), (i + 1), current);
+ }
+
+ return;
+ }
+ else
+ {
+ /* we got a valid combination, add it to the array */
+ memcpy(&state->combinations[(state->k * state->current)],
+ current, state->k * sizeof(int));
+ state->current++;
+ }
+}
+
+/*
+ * generate_combinations
+ * generate all k-combinations of N elements
+ */
+static void
+generate_combinations(CombinationGenerator *state)
+{
+ int *current = (int *) palloc0(sizeof(int) * state->k);
+
+ generate_combinations_recurse(state, 0, 0, current);
+
+ pfree(current);
+}
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index c8d20fffeaf..b59821bf978 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1623,6 +1623,10 @@ ProcessUtilitySlow(ParseState *pstate,
commandCollected = true;
break;
+ case T_CreateStatsStmt:
+ address = CreateStatistics((CreateStatsStmt *) parsetree);
+ break;
+
case T_AlterCollationStmt:
address = AlterCollation((AlterCollationStmt *) parsetree);
break;
@@ -1992,6 +1996,8 @@ AlterObjectTypeCommandTag(ObjectType objtype)
break;
case OBJECT_SUBSCRIPTION:
tag = "ALTER SUBSCRIPTION";
+ case OBJECT_STATISTIC_EXT:
+ tag = "ALTER STATISTICS";
break;
default:
tag = "???";
@@ -2286,6 +2292,8 @@ CreateCommandTag(Node *parsetree)
break;
case OBJECT_PUBLICATION:
tag = "DROP PUBLICATION";
+ case OBJECT_STATISTIC_EXT:
+ tag = "DROP STATISTICS";
break;
default:
tag = "???";
@@ -2689,6 +2697,10 @@ CreateCommandTag(Node *parsetree)
tag = "EXECUTE";
break;
+ case T_CreateStatsStmt:
+ tag = "CREATE STATISTICS";
+ break;
+
case T_DeallocateStmt:
{
DeallocateStmt *stmt = (DeallocateStmt *) parsetree;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 5c823250bc2..81c91039e40 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -35,6 +35,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
@@ -317,6 +318,7 @@ static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
const Oid *excludeOps,
bool attrsOnly, bool showTblSpc,
int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsext_worker(Oid statextid, bool missing_ok);
static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
bool attrsOnly);
static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
@@ -1422,6 +1424,85 @@ pg_get_indexdef_worker(Oid indexrelid, int colno,
}
/*
+ * pg_get_statisticsextdef
+ * Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsextdef(PG_FUNCTION_ARGS)
+{
+ Oid statextid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_statisticsext_worker(statextid, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsext_worker(Oid statextid, bool missing_ok)
+{
+ Form_pg_statistic_ext statextrec;
+ Form_pg_class pgclassrec;
+ HeapTuple statexttup;
+ HeapTuple pgclasstup;
+ StringInfoData buf;
+ int colno;
+
+ statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+ if (!HeapTupleIsValid(statexttup))
+ {
+ if (missing_ok)
+ return NULL;
+ elog(ERROR, "cache lookup failed for extended statistics %u", statextid);
+ }
+
+ statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+ pgclasstup = SearchSysCache1(RELOID, ObjectIdGetDatum(statextrec->starelid));
+
+ if (!HeapTupleIsValid(statexttup))
+ {
+ ReleaseSysCache(statexttup);
+ elog(ERROR, "cache lookup failed for relation %u", statextrec->starelid);
+ }
+
+ pgclassrec = (Form_pg_class) GETSTRUCT(pgclasstup);
+
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf, "CREATE STATISTICS %s ON (",
+ quote_identifier(NameStr(statextrec->staname)));
+
+ for (colno = 0; colno < statextrec->stakeys.dim1; colno++)
+ {
+ AttrNumber attnum = statextrec->stakeys.values[colno];
+ char *attname;
+
+ if (colno > 0)
+ appendStringInfoString(&buf, ", ");
+
+ attname = get_relid_attribute_name(statextrec->starelid, attnum);
+
+ appendStringInfoString(&buf, quote_identifier(attname));
+ }
+
+ appendStringInfo(&buf, ") FROM %s",
+ quote_identifier(NameStr(pgclassrec->relname)));
+
+ ReleaseSysCache(statexttup);
+ ReleaseSysCache(pgclasstup);
+
+ return buf.data;
+}
+
+/*
* pg_get_partkeydef
*
* Returns the partition key specification, ie, the following:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index f8b28fe0e61..cc24c8aeb56 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -110,6 +110,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_opfamily.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
@@ -126,6 +127,7 @@
#include "parser/parse_clause.h"
#include "parser/parse_coerce.h"
#include "parser/parsetree.h"
+#include "statistics/statistics.h"
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/date.h"
@@ -164,6 +166,8 @@ static double eqjoinsel_inner(Oid operator,
static double eqjoinsel_semi(Oid operator,
VariableStatData *vardata1, VariableStatData *vardata2,
RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+ RelOptInfo *rel, List **varinfos, double *ndistinct);
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
@@ -3398,25 +3402,25 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
{
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
- double reldistinct = varinfo1->ndistinct;
+ double reldistinct = 1;
double relmaxndistinct = reldistinct;
int relvarcount = 1;
List *newvarinfos = NIL;
+ List *relvarinfos = NIL;
/*
- * Get the product of numdistinct estimates of the Vars for this rel.
- * Also, construct new varinfos list of remaining Vars.
+ * Split the list of varinfos in two - one for the current rel,
+ * one for remaining Vars on other rels.
*/
+ relvarinfos = lcons(varinfo1, relvarinfos);
for_each_cell(l, lnext(list_head(varinfos)))
{
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
{
- reldistinct *= varinfo2->ndistinct;
- if (relmaxndistinct < varinfo2->ndistinct)
- relmaxndistinct = varinfo2->ndistinct;
- relvarcount++;
+ /* varinfos on current rel */
+ relvarinfos = lcons(varinfo2, relvarinfos);
}
else
{
@@ -3426,6 +3430,43 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
}
/*
+ * Get the numdistinct estimate for the Vars of this rel. We
+ * iteratively search for multivariate n-distinct with maximum number
+ * of vars; assuming that each var group is independent of the others,
+ * we multiply them together. Any remaining relvarinfos after
+ * no more multivariate matches are found are assumed independent too,
+ * so their individual ndistinct estimates are multiplied also.
+ */
+ while (relvarinfos)
+ {
+ double mvndistinct;
+
+ if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+ &mvndistinct))
+ {
+ reldistinct *= mvndistinct;
+ if (relmaxndistinct < mvndistinct)
+ relmaxndistinct = mvndistinct;
+ relvarcount++; /* inaccurate, but doesn't matter */
+ }
+ else
+ {
+ foreach (l, relvarinfos)
+ {
+ GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+ reldistinct *= varinfo2->ndistinct;
+ if (relmaxndistinct < varinfo2->ndistinct)
+ relmaxndistinct = varinfo2->ndistinct;
+ relvarcount++;
+ }
+
+ /* we're done with this relation */
+ relvarinfos = NIL;
+ }
+ }
+
+ /*
* Sanity check --- don't divide by zero if empty relation.
*/
Assert(rel->reloptkind == RELOPT_BASEREL);
@@ -3668,6 +3709,132 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
*/
/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches. If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return TRUE if we're able to find a match, FALSE otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+ List **varinfos, double *ndistinct)
+{
+ ListCell *lc;
+ Bitmapset *attnums = NULL;
+ int nmatches;
+ Oid statOid = InvalidOid;
+ MVNDistinct *stats;
+ Bitmapset *matched = NULL;
+
+ /* bail out immediately if the table has no extended statistics */
+ if (!rel->statlist)
+ return false;
+
+ /* Determine the attnums we're looking for */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+
+ Assert(varinfo->rel == rel);
+
+ if (IsA(varinfo->var, Var))
+ {
+ attnums = bms_add_member(attnums,
+ ((Var *) varinfo->var)->varattno);
+ }
+ }
+
+ /* look for the ndistinct statistics matching the most vars */
+ nmatches = 1; /* we require at least two matches */
+ foreach(lc, rel->statlist)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+ Bitmapset *shared;
+
+ /* skip statistics of other kinds */
+ if (info->kind != STATS_EXT_NDISTINCT)
+ continue;
+
+ /* compute attnums shared by the vars and the statistic */
+ shared = bms_intersect(info->keys, attnums);
+
+ /*
+ * Does this statistics matches more columns than the currently
+ * best statistic? If so, use this one instead.
+ *
+ * XXX This should break ties using name of the statistic, or
+ * something like that, to make the outcome stable.
+ */
+ if (bms_num_members(shared) > nmatches)
+ {
+ statOid = info->statOid;
+ nmatches = bms_num_members(shared);
+ matched = shared;
+ }
+ }
+
+ /* No match? */
+ if (statOid == InvalidOid)
+ return false;
+ Assert(nmatches > 1 && matched != NULL);
+
+ stats = statext_ndistinct_load(statOid);
+
+ /*
+ * If we have a match, search it for the specific item that matches (there
+ * must be one), and construct the output values.
+ */
+ if (stats)
+ {
+ int i;
+ List *newlist = NIL;
+ MVNDistinctItem *item = NULL;
+
+ /* Find the specific item that exactly matches the combination */
+ for (i = 0; i < stats->nitems; i++)
+ {
+ MVNDistinctItem *tmpitem = &stats->items[i];
+
+ if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
+ {
+ item = tmpitem;
+ break;
+ }
+ }
+
+ /* make sure we found an item */
+ if (!item)
+ elog(ERROR, "corrupt MVNDistinct entry");
+
+ /* Form the output varinfo list, keeping only unmatched ones */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+ AttrNumber attnum;
+
+ if (!IsA(varinfo->var, Var))
+ {
+ newlist = lappend(newlist, varinfo);
+ continue;
+ }
+
+ attnum = ((Var *) varinfo->var)->varattno;
+ if (!bms_is_member(attnum, matched))
+ newlist = lappend(newlist, varinfo);
+ }
+
+ *varinfos = newlist;
+ *ndistinct = item->ndistinct;
+ return true;
+ }
+
+ return false;
+}
+
+/*
* convert_to_scalar
* Convert non-NULL values of the indicated types to the comparison
* scale needed by scalarineqsel().
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index ce55fc52777..a6b60c67caa 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -56,6 +56,7 @@
#include "catalog/pg_publication.h"
#include "catalog/pg_rewrite.h"
#include "catalog/pg_shseclabel.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_trigger.h"
@@ -4452,6 +4453,82 @@ RelationGetIndexList(Relation relation)
}
/*
+ * RelationGetStatExtList
+ * get a list of OIDs of extended statistics on this relation
+ *
+ * The statistics list is created only if someone requests it, in a way
+ * similar to RelationGetIndexList(). We scan pg_statistic_ext to find
+ * relevant statistics, and add the list to the relcache entry so that we
+ * won't have to compute it again. Note that shared cache inval of a
+ * relcache entry will delete the old list and set rd_statvalid to 0,
+ * so that we must recompute the statistics list on next request. This
+ * handles creation or deletion of a statistic.
+ *
+ * The returned list is guaranteed to be sorted in order by OID, although
+ * this is not currently needed.
+ *
+ * Since shared cache inval causes the relcache's copy of the list to go away,
+ * we return a copy of the list palloc'd in the caller's context. The caller
+ * may list_free() the returned list after scanning it. This is necessary
+ * since the caller will typically be doing syscache lookups on the relevant
+ * statistics, and syscache lookup could cause SI messages to be processed!
+ */
+List *
+RelationGetStatExtList(Relation relation)
+{
+ Relation indrel;
+ SysScanDesc indscan;
+ ScanKeyData skey;
+ HeapTuple htup;
+ List *result;
+ List *oldlist;
+ MemoryContext oldcxt;
+
+ /* Quick exit if we already computed the list. */
+ if (relation->rd_statvalid != 0)
+ return list_copy(relation->rd_statlist);
+
+ /*
+ * We build the list we intend to return (in the caller's context) while
+ * doing the scan. After successfully completing the scan, we copy that
+ * list into the relcache entry. This avoids cache-context memory leakage
+ * if we get some sort of error partway through.
+ */
+ result = NIL;
+
+ /* Prepare to scan pg_statistic_ext for entries having starelid = this rel. */
+ ScanKeyInit(&skey,
+ Anum_pg_statistic_ext_starelid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ indrel = heap_open(StatisticExtRelationId, AccessShareLock);
+ indscan = systable_beginscan(indrel, StatisticExtRelidIndexId, true,
+ NULL, 1, &skey);
+
+ while (HeapTupleIsValid(htup = systable_getnext(indscan)))
+ /* TODO maybe include only already built statistics? */
+ result = insert_ordered_oid(result, HeapTupleGetOid(htup));
+
+ systable_endscan(indscan);
+
+ heap_close(indrel, AccessShareLock);
+
+ /* Now save a copy of the completed list in the relcache entry. */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ oldlist = relation->rd_statlist;
+ relation->rd_statlist = list_copy(result);
+
+ relation->rd_statvalid = true;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Don't leak the old list, if there is one */
+ list_free(oldlist);
+
+ return result;
+}
+
+/*
* insert_ordered_oid
* Insert a new Oid into a sorted list of Oids, preserving ordering
*
@@ -5560,6 +5637,8 @@ load_relcache_init_file(bool shared)
rel->rd_pkattr = NULL;
rel->rd_idattr = NULL;
rel->rd_pubactions = NULL;
+ rel->rd_statvalid = false;
+ rel->rd_statlist = NIL;
rel->rd_createSubid = InvalidSubTransactionId;
rel->rd_newRelfilenodeSubid = InvalidSubTransactionId;
rel->rd_amcache = NULL;
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index d5a376406fe..d8c823f42b5 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -61,6 +61,7 @@
#include "catalog/pg_shseclabel.h"
#include "catalog/pg_replication_origin.h"
#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_subscription.h"
#include "catalog/pg_subscription_rel.h"
#include "catalog/pg_tablespace.h"
@@ -726,6 +727,28 @@ static const struct cachedesc cacheinfo[] = {
},
32
},
+ {StatisticExtRelationId, /* STATEXTNAMENSP */
+ StatisticExtNameIndexId,
+ 2,
+ {
+ Anum_pg_statistic_ext_staname,
+ Anum_pg_statistic_ext_stanamespace,
+ 0,
+ 0
+ },
+ 4
+ },
+ {StatisticExtRelationId, /* STATEXTOID */
+ StatisticExtOidIndexId,
+ 1,
+ {
+ ObjectIdAttributeNumber,
+ 0,
+ 0,
+ 0
+ },
+ 4
+ },
{StatisticRelationId, /* STATRELATTINH */
StatisticRelidAttnumInhIndexId,
3,