aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-05-27 03:50:39 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-05-27 03:50:39 +0000
commit77947c51c08179b8bc12347a7fbcb2c8d7908302 (patch)
tree0a306ea177817fdadc0e4421b6d8dd212c11e6e3 /src/include
parentcadb78330eedceafeda99bf12ac690cda773be62 (diff)
downloadpostgresql-77947c51c08179b8bc12347a7fbcb2c8d7908302.tar.gz
postgresql-77947c51c08179b8bc12347a7fbcb2c8d7908302.zip
Fix up pgstats counting of live and dead tuples to recognize that committed
and aborted transactions have different effects; also teach it not to assume that prepared transactions are always committed. Along the way, simplify the pgstats API by tying counting directly to Relations; I cannot detect any redeeming social value in having stats pointers in HeapScanDesc and IndexScanDesc structures. And fix a few corner cases in which counts might be missed because the relation's pgstat_info pointer hadn't been set.
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/heapam.h6
-rw-r--r--src/include/access/relscan.h6
-rw-r--r--src/include/access/twophase_rmgr.h5
-rw-r--r--src/include/pgstat.h206
-rw-r--r--src/include/utils/rel.h15
5 files changed, 140 insertions, 98 deletions
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 5ea66e74672..ebb2e984c24 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.123 2007/04/08 01:26:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
extern bool heap_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
- PgStat_Info *pgstat_info);
+ Relation stats_relation);
extern bool heap_release_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
- PgStat_Info *pgstat_info);
+ Relation stats_relation);
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 77bca6be482..7a1ea39352a 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.52 2007/01/20 18:43:35 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,8 +37,6 @@ typedef struct HeapScanDescData
/* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
ItemPointerData rs_mctid; /* marked scan position, if any */
- PgStat_Info rs_pgstat_info; /* statistics collector hook */
-
/* these fields only used in page-at-a-time mode */
int rs_cindex; /* current tuple's index in vistuples */
int rs_mindex; /* marked tuple's saved index */
@@ -78,8 +76,6 @@ typedef struct IndexScanDescData
HeapTupleData xs_ctup; /* current heap tuple, if any */
Buffer xs_cbuf; /* current heap buffer in scan, if any */
/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
-
- PgStat_Info xs_pgstat_info; /* statistics collector hook */
} IndexScanDescData;
typedef IndexScanDescData *IndexScanDesc;
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
index 0dbcd226fbd..e98ad7cb375 100644
--- a/src/include/access/twophase_rmgr.h
+++ b/src/include/access/twophase_rmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.4 2007/01/05 22:19:51 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.5 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId;
#define TWOPHASE_RM_INVAL_ID 2
#define TWOPHASE_RM_FLATFILES_ID 3
#define TWOPHASE_RM_NOTIFY_ID 4
-#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID
+#define TWOPHASE_RM_PGSTAT_ID 5
+#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_PGSTAT_ID
extern const TwoPhaseCallback twophase_recover_callbacks[];
extern const TwoPhaseCallback twophase_postcommit_callbacks[];
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 694ee44db19..476fd47dc7b 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 2001-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.58 2007/04/30 16:37:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $
* ----------
*/
#ifndef PGSTAT_H
@@ -40,6 +40,90 @@ typedef enum StatMsgType
*/
typedef int64 PgStat_Counter;
+/* ----------
+ * PgStat_TableCounts The actual per-table counts kept by a backend
+ *
+ * This struct should contain only actual event counters, because we memcmp
+ * it against zeroes to detect whether there are any counts to transmit.
+ * It is a component of PgStat_TableStatus (within-backend state) and
+ * PgStat_TableEntry (the transmitted message format).
+ *
+ * Note: for a table, tuples_returned is the number of tuples successfully
+ * fetched by heap_getnext, while tuples_fetched is the number of tuples
+ * successfully fetched by heap_fetch under the control of bitmap indexscans.
+ * For an index, tuples_returned is the number of index entries returned by
+ * the index AM, while tuples_fetched is the number of tuples successfully
+ * fetched by heap_fetch under the control of simple indexscans for this index.
+ *
+ * tuples_inserted/tuples_updated/tuples_deleted count attempted actions,
+ * regardless of whether the transaction committed. new_live_tuples and
+ * new_dead_tuples are properly adjusted depending on commit or abort.
+ * ----------
+ */
+typedef struct PgStat_TableCounts
+{
+ PgStat_Counter t_numscans;
+
+ PgStat_Counter t_tuples_returned;
+ PgStat_Counter t_tuples_fetched;
+
+ PgStat_Counter t_tuples_inserted;
+ PgStat_Counter t_tuples_updated;
+ PgStat_Counter t_tuples_deleted;
+
+ PgStat_Counter t_new_live_tuples;
+ PgStat_Counter t_new_dead_tuples;
+
+ PgStat_Counter t_blocks_fetched;
+ PgStat_Counter t_blocks_hit;
+} PgStat_TableCounts;
+
+
+/* ------------------------------------------------------------
+ * Structures kept in backend local memory while accumulating counts
+ * ------------------------------------------------------------
+ */
+
+
+/* ----------
+ * PgStat_TableStatus Per-table status within a backend
+ *
+ * Most of the event counters are nontransactional, ie, we count events
+ * in committed and aborted transactions alike. For these, we just count
+ * directly in the PgStat_TableStatus. However, new_live_tuples and
+ * new_dead_tuples must be derived from tuple insertion and deletion counts
+ * with awareness of whether the transaction or subtransaction committed or
+ * aborted. Hence, we also keep a stack of per-(sub)transaction status
+ * records for every table modified in the current transaction. At commit
+ * or abort, we propagate tuples_inserted and tuples_deleted up to the
+ * parent subtransaction level, or out to the parent PgStat_TableStatus,
+ * as appropriate.
+ * ----------
+ */
+typedef struct PgStat_TableStatus
+{
+ Oid t_id; /* table's OID */
+ bool t_shared; /* is it a shared catalog? */
+ struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
+ PgStat_TableCounts t_counts; /* event counts to be sent */
+} PgStat_TableStatus;
+
+/* ----------
+ * PgStat_TableXactStatus Per-table, per-subtransaction status
+ * ----------
+ */
+typedef struct PgStat_TableXactStatus
+{
+ PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
+ PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
+ int nest_level; /* subtransaction nest level */
+ /* links to other structs for same relation: */
+ struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
+ PgStat_TableStatus *parent; /* per-table status */
+ /* structs of same subxact level are linked here: */
+ struct PgStat_TableXactStatus *next; /* next of same subxact */
+} PgStat_TableXactStatus;
+
/* ------------------------------------------------------------
* Message formats follow
@@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy
/* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat
- *
- * Note: for a table, tuples_returned is the number of tuples successfully
- * fetched by heap_getnext, while tuples_fetched is the number of tuples
- * successfully fetched by heap_fetch under the control of bitmap indexscans.
- * For an index, tuples_returned is the number of index entries returned by
- * the index AM, while tuples_fetched is the number of tuples successfully
- * fetched by heap_fetch under the control of simple indexscans for this index.
* ----------
*/
typedef struct PgStat_TableEntry
{
Oid t_id;
-
- PgStat_Counter t_numscans;
-
- PgStat_Counter t_tuples_returned;
- PgStat_Counter t_tuples_fetched;
-
- PgStat_Counter t_tuples_inserted;
- PgStat_Counter t_tuples_updated;
- PgStat_Counter t_tuples_deleted;
-
- PgStat_Counter t_blocks_fetched;
- PgStat_Counter t_blocks_hit;
+ PgStat_TableCounts t_counts;
} PgStat_TableEntry;
/* ----------
@@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel;
extern bool pgstat_collect_blocklevel;
extern bool pgstat_collect_querystring;
+/*
+ * BgWriter statistics counters are updated directly by bgwriter and bufmgr
+ */
+extern PgStat_MsgBgWriter BgWriterStats;
/* ----------
* Functions called from postmaster
@@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what);
extern void pgstat_report_txn_timestamp(TimestampTz tstamp);
extern void pgstat_report_waiting(bool waiting);
-extern void pgstat_initstats(PgStat_Info *stats, Relation rel);
+extern void pgstat_initstats(Relation rel);
+/* nontransactional event counts are simple enough to inline */
-#define pgstat_count_heap_scan(s) \
+#define pgstat_count_heap_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
/* kluge for bitmap scans: */
-#define pgstat_discount_heap_scan(s) \
+#define pgstat_discount_heap_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans--; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans--; \
} while (0)
-#define pgstat_count_heap_getnext(s) \
+#define pgstat_count_heap_getnext(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_returned++; \
} while (0)
-#define pgstat_count_heap_fetch(s) \
+#define pgstat_count_heap_fetch(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_fetched++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_fetched++; \
} while (0)
-#define pgstat_count_heap_insert(s) \
+#define pgstat_count_index_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_inserted++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
-#define pgstat_count_heap_update(s) \
+#define pgstat_count_index_tuples(rel, n) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_updated++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
} while (0)
-#define pgstat_count_heap_delete(s) \
+#define pgstat_count_buffer_read(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_deleted++; \
+ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_blocks_fetched++; \
} while (0)
-#define pgstat_count_index_scan(s) \
+#define pgstat_count_buffer_hit(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \
- } while (0)
-#define pgstat_count_index_tuples(s, n) \
- do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned += (n); \
- } while (0)
-#define pgstat_count_buffer_read(s,r) \
- do { \
- if (pgstat_collect_blocklevel) { \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
- else { \
- pgstat_initstats((s), (r)); \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
- } \
- } \
- } while (0)
-#define pgstat_count_buffer_hit(s,r) \
- do { \
- if (pgstat_collect_blocklevel) { \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
- else { \
- pgstat_initstats((s), (r)); \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
- } \
- } \
+ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
+extern void pgstat_count_heap_insert(Relation rel);
+extern void pgstat_count_heap_update(Relation rel);
+extern void pgstat_count_heap_delete(Relation rel);
+
+extern void AtEOXact_PgStat(bool isCommit);
+extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
+
+extern void AtPrepare_PgStat(void);
+extern void PostPrepare_PgStat(void);
+
+extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
-extern void pgstat_count_xact_commit(void);
-extern void pgstat_count_xact_rollback(void);
extern void pgstat_send_bgwriter(void);
/* ----------
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 33795de2bf8..bc6bf190b86 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.101 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -90,15 +90,6 @@ typedef struct TriggerDesc
/*
- * Same for the statistics collector data in Relation and scan data.
- */
-typedef struct PgStat_Info
-{
- void *tabentry;
-} PgStat_Info;
-
-
-/*
* Cached lookup information for the index access method functions defined
* by the pg_am row associated with an index relation.
*/
@@ -200,8 +191,8 @@ typedef struct RelationData
List *rd_indpred; /* index predicate tree, if any */
void *rd_amcache; /* available for use by index AM */
- /* statistics collection area */
- PgStat_Info pgstat_info;
+ /* use "struct" here to avoid needing to include pgstat.h: */
+ struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
typedef RelationData *Relation;