aboutsummaryrefslogtreecommitdiff
path: root/src/bin/pg_dump/pg_dump.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2017-03-24 14:06:10 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2017-03-24 14:06:10 -0300
commit7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b (patch)
tree4b12f53c5bd25a03f1016f1daa0809606b47df3a /src/bin/pg_dump/pg_dump.c
parentf120b614e070aed39586d1443193738a149a90d4 (diff)
downloadpostgresql-7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b.tar.gz
postgresql-7b504eb282ca2f5104b5c00b4f05a3ef6bb1385b.zip
Implement multivariate n-distinct coefficients
Add support for explicitly declared statistic objects (CREATE STATISTICS), allowing collection of statistics on more complex combinations that individual table columns. Companion commands DROP STATISTICS and ALTER STATISTICS ... OWNER TO / SET SCHEMA / RENAME are added too. All this DDL has been designed so that more statistic types can be added later on, such as multivariate most-common-values and multivariate histograms between columns of a single table, leaving room for permitting columns on multiple tables, too, as well as expressions. This commit only adds support for collection of n-distinct coefficient on user-specified sets of columns in a single table. This is useful to estimate number of distinct groups in GROUP BY and DISTINCT clauses; estimation errors there can cause over-allocation of memory in hashed aggregates, for instance, so it's a worthwhile problem to solve. A new special pseudo-type pg_ndistinct is used. (num-distinct estimation was deemed sufficiently useful by itself that this is worthwhile even if no further statistic types are added immediately; so much so that another version of essentially the same functionality was submitted by Kyotaro Horiguchi: https://postgr.es/m/20150828.173334.114731693.horiguchi.kyotaro@lab.ntt.co.jp though this commit does not use that code.) Author: Tomas Vondra. Some code rework by Álvaro. Reviewed-by: Dean Rasheed, David Rowley, Kyotaro Horiguchi, Jeff Janes, Ideriha Takeshi Discussion: https://postgr.es/m/543AFA15.4080608@fuzzy.cz https://postgr.es/m/20170320190220.ixlaueanxegqd5gr@alvherre.pgsql
Diffstat (limited to 'src/bin/pg_dump/pg_dump.c')
-rw-r--r--src/bin/pg_dump/pg_dump.c153
1 files changed, 153 insertions, 0 deletions
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index b3d95d7f6ee..ba34cc163e9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -192,6 +192,7 @@ static void dumpAttrDef(Archive *fout, AttrDefInfo *adinfo);
static void dumpSequence(Archive *fout, TableInfo *tbinfo);
static void dumpSequenceData(Archive *fout, TableDataInfo *tdinfo);
static void dumpIndex(Archive *fout, IndxInfo *indxinfo);
+static void dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo);
static void dumpConstraint(Archive *fout, ConstraintInfo *coninfo);
static void dumpTableConstraintComment(Archive *fout, ConstraintInfo *coninfo);
static void dumpTSParser(Archive *fout, TSParserInfo *prsinfo);
@@ -6583,6 +6584,99 @@ getIndexes(Archive *fout, TableInfo tblinfo[], int numTables)
}
/*
+ * getExtendedStatistics
+ * get information about extended statistics on a dumpable table
+ * or materialized view.
+ *
+ * Note: extended statistics data is not returned directly to the caller, but
+ * it does get entered into the DumpableObject tables.
+ */
+void
+getExtendedStatistics(Archive *fout, TableInfo tblinfo[], int numTables)
+{
+ int i,
+ j;
+ PQExpBuffer query;
+ PGresult *res;
+ StatsExtInfo *statsextinfo;
+ int ntups;
+ int i_tableoid;
+ int i_oid;
+ int i_staname;
+ int i_stadef;
+
+ /* Extended statistics were new in v10 */
+ if (fout->remoteVersion < 100000)
+ return;
+
+ query = createPQExpBuffer();
+
+ for (i = 0; i < numTables; i++)
+ {
+ TableInfo *tbinfo = &tblinfo[i];
+
+ /* Only plain tables and materialized views can have extended statistics. */
+ if (tbinfo->relkind != RELKIND_RELATION &&
+ tbinfo->relkind != RELKIND_MATVIEW)
+ continue;
+
+ /*
+ * Ignore extended statistics of tables whose definitions are not to
+ * be dumped.
+ */
+ if (!(tbinfo->dobj.dump & DUMP_COMPONENT_DEFINITION))
+ continue;
+
+ if (g_verbose)
+ write_msg(NULL, "reading extended statistics for table \"%s.%s\"\n",
+ tbinfo->dobj.namespace->dobj.name,
+ tbinfo->dobj.name);
+
+ /* Make sure we are in proper schema so stadef is right */
+ selectSourceSchema(fout, tbinfo->dobj.namespace->dobj.name);
+
+ resetPQExpBuffer(query);
+
+ appendPQExpBuffer(query,
+ "SELECT "
+ "tableoid, "
+ "oid, "
+ "staname, "
+ "pg_catalog.pg_get_statisticsextdef(oid) AS stadef "
+ "FROM pg_statistic_ext "
+ "WHERE starelid = '%u' "
+ "ORDER BY staname", tbinfo->dobj.catId.oid);
+
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ i_tableoid = PQfnumber(res, "tableoid");
+ i_oid = PQfnumber(res, "oid");
+ i_staname = PQfnumber(res, "staname");
+ i_stadef = PQfnumber(res, "stadef");
+
+ statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+
+ for (j = 0; j < ntups; j++)
+ {
+ statsextinfo[j].dobj.objType = DO_STATSEXT;
+ statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, j, i_tableoid));
+ statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, j, i_oid));
+ AssignDumpId(&statsextinfo[j].dobj);
+ statsextinfo[j].dobj.name = pg_strdup(PQgetvalue(res, j, i_staname));
+ statsextinfo[j].dobj.namespace = tbinfo->dobj.namespace;
+ statsextinfo[j].statsexttable = tbinfo;
+ statsextinfo[j].statsextdef = pg_strdup(PQgetvalue(res, j, i_stadef));
+ }
+
+ PQclear(res);
+ }
+
+ destroyPQExpBuffer(query);
+}
+
+/*
* getConstraints
*
* Get info about constraints on dumpable tables.
@@ -9234,6 +9328,9 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
case DO_INDEX:
dumpIndex(fout, (IndxInfo *) dobj);
break;
+ case DO_STATSEXT:
+ dumpStatisticsExt(fout, (StatsExtInfo *) dobj);
+ break;
case DO_REFRESH_MATVIEW:
refreshMatViewData(fout, (TableDataInfo *) dobj);
break;
@@ -15729,6 +15826,61 @@ dumpIndex(Archive *fout, IndxInfo *indxinfo)
}
/*
+ * dumpStatisticsExt
+ * write out to fout an extended statistics object
+ */
+static void
+dumpStatisticsExt(Archive *fout, StatsExtInfo *statsextinfo)
+{
+ DumpOptions *dopt = fout->dopt;
+ TableInfo *tbinfo = statsextinfo->statsexttable;
+ PQExpBuffer q;
+ PQExpBuffer delq;
+ PQExpBuffer labelq;
+
+ if (dopt->dataOnly)
+ return;
+
+ q = createPQExpBuffer();
+ delq = createPQExpBuffer();
+ labelq = createPQExpBuffer();
+
+ appendPQExpBuffer(labelq, "STATISTICS %s",
+ fmtId(statsextinfo->dobj.name));
+
+ appendPQExpBuffer(q, "%s;\n", statsextinfo->statsextdef);
+
+ appendPQExpBuffer(delq, "DROP STATISTICS %s.",
+ fmtId(tbinfo->dobj.namespace->dobj.name));
+ appendPQExpBuffer(delq, "%s;\n",
+ fmtId(statsextinfo->dobj.name));
+
+ if (statsextinfo->dobj.dump & DUMP_COMPONENT_DEFINITION)
+ ArchiveEntry(fout, statsextinfo->dobj.catId,
+ statsextinfo->dobj.dumpId,
+ statsextinfo->dobj.name,
+ tbinfo->dobj.namespace->dobj.name,
+ NULL,
+ tbinfo->rolname, false,
+ "STATISTICS", SECTION_POST_DATA,
+ q->data, delq->data, NULL,
+ NULL, 0,
+ NULL, NULL);
+
+ /* Dump Statistics Comments */
+ if (statsextinfo->dobj.dump & DUMP_COMPONENT_COMMENT)
+ dumpComment(fout, labelq->data,
+ tbinfo->dobj.namespace->dobj.name,
+ tbinfo->rolname,
+ statsextinfo->dobj.catId, 0,
+ statsextinfo->dobj.dumpId);
+
+ destroyPQExpBuffer(q);
+ destroyPQExpBuffer(delq);
+ destroyPQExpBuffer(labelq);
+}
+
+/*
* dumpConstraint
* write out to fout a user-defined constraint
*/
@@ -17266,6 +17418,7 @@ addBoundaryDependencies(DumpableObject **dobjs, int numObjs,
addObjectDependency(postDataBound, dobj->dumpId);
break;
case DO_INDEX:
+ case DO_STATSEXT:
case DO_REFRESH_MATVIEW:
case DO_TRIGGER:
case DO_EVENT_TRIGGER: