diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-05-27 03:50:39 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-05-27 03:50:39 +0000 |
commit | 77947c51c08179b8bc12347a7fbcb2c8d7908302 (patch) | |
tree | 0a306ea177817fdadc0e4421b6d8dd212c11e6e3 /src/include | |
parent | cadb78330eedceafeda99bf12ac690cda773be62 (diff) | |
download | postgresql-77947c51c08179b8bc12347a7fbcb2c8d7908302.tar.gz postgresql-77947c51c08179b8bc12347a7fbcb2c8d7908302.zip |
Fix up pgstats counting of live and dead tuples to recognize that committed
and aborted transactions have different effects; also teach it not to assume
that prepared transactions are always committed.
Along the way, simplify the pgstats API by tying counting directly to
Relations; I cannot detect any redeeming social value in having stats
pointers in HeapScanDesc and IndexScanDesc structures. And fix a few
corner cases in which counts might be missed because the relation's
pgstat_info pointer hadn't been set.
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/access/heapam.h | 6 | ||||
-rw-r--r-- | src/include/access/relscan.h | 6 | ||||
-rw-r--r-- | src/include/access/twophase_rmgr.h | 5 | ||||
-rw-r--r-- | src/include/pgstat.h | 206 | ||||
-rw-r--r-- | src/include/utils/rel.h | 15 |
5 files changed, 140 insertions, 98 deletions
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 5ea66e74672..ebb2e984c24 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.123 2007/04/08 01:26:33 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction); extern bool heap_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info); + Relation stats_relation); extern bool heap_release_fetch(Relation relation, Snapshot snapshot, HeapTuple tuple, Buffer *userbuf, bool keep_buf, - PgStat_Info *pgstat_info); + Relation stats_relation); extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 77bca6be482..7a1ea39352a 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.52 2007/01/20 18:43:35 neilc Exp $ + * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,8 +37,6 @@ typedef struct HeapScanDescData /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ ItemPointerData rs_mctid; /* marked scan position, if any */ - PgStat_Info rs_pgstat_info; /* statistics collector hook */ - /* these fields only used in page-at-a-time mode */ int rs_cindex; /* current tuple's index in vistuples */ int rs_mindex; /* marked tuple's saved index */ @@ -78,8 +76,6 @@ typedef struct IndexScanDescData HeapTupleData xs_ctup; /* current heap tuple, if any */ Buffer xs_cbuf; /* current heap buffer in scan, if any */ /* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */ - - PgStat_Info xs_pgstat_info; /* statistics collector hook */ } IndexScanDescData; typedef IndexScanDescData *IndexScanDesc; diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index 0dbcd226fbd..e98ad7cb375 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.4 2007/01/05 22:19:51 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.5 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId; #define TWOPHASE_RM_INVAL_ID 2 #define TWOPHASE_RM_FLATFILES_ID 3 #define TWOPHASE_RM_NOTIFY_ID 4 -#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID +#define TWOPHASE_RM_PGSTAT_ID 5 +#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_PGSTAT_ID extern const TwoPhaseCallback twophase_recover_callbacks[]; extern const TwoPhaseCallback twophase_postcommit_callbacks[]; diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 694ee44db19..476fd47dc7b 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -5,7 +5,7 @@ * * Copyright (c) 2001-2007, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.58 2007/04/30 16:37:08 tgl Exp $ + * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $ * ---------- */ #ifndef PGSTAT_H @@ -40,6 +40,90 @@ typedef enum StatMsgType */ typedef int64 PgStat_Counter; +/* ---------- + * PgStat_TableCounts The actual per-table counts kept by a backend + * + * This struct should contain only actual event counters, because we memcmp + * it against zeroes to detect whether there are any counts to transmit. + * It is a component of PgStat_TableStatus (within-backend state) and + * PgStat_TableEntry (the transmitted message format). + * + * Note: for a table, tuples_returned is the number of tuples successfully + * fetched by heap_getnext, while tuples_fetched is the number of tuples + * successfully fetched by heap_fetch under the control of bitmap indexscans. + * For an index, tuples_returned is the number of index entries returned by + * the index AM, while tuples_fetched is the number of tuples successfully + * fetched by heap_fetch under the control of simple indexscans for this index. + * + * tuples_inserted/tuples_updated/tuples_deleted count attempted actions, + * regardless of whether the transaction committed. new_live_tuples and + * new_dead_tuples are properly adjusted depending on commit or abort. + * ---------- + */ +typedef struct PgStat_TableCounts +{ + PgStat_Counter t_numscans; + + PgStat_Counter t_tuples_returned; + PgStat_Counter t_tuples_fetched; + + PgStat_Counter t_tuples_inserted; + PgStat_Counter t_tuples_updated; + PgStat_Counter t_tuples_deleted; + + PgStat_Counter t_new_live_tuples; + PgStat_Counter t_new_dead_tuples; + + PgStat_Counter t_blocks_fetched; + PgStat_Counter t_blocks_hit; +} PgStat_TableCounts; + + +/* ------------------------------------------------------------ + * Structures kept in backend local memory while accumulating counts + * ------------------------------------------------------------ + */ + + +/* ---------- + * PgStat_TableStatus Per-table status within a backend + * + * Most of the event counters are nontransactional, ie, we count events + * in committed and aborted transactions alike. For these, we just count + * directly in the PgStat_TableStatus. However, new_live_tuples and + * new_dead_tuples must be derived from tuple insertion and deletion counts + * with awareness of whether the transaction or subtransaction committed or + * aborted. Hence, we also keep a stack of per-(sub)transaction status + * records for every table modified in the current transaction. At commit + * or abort, we propagate tuples_inserted and tuples_deleted up to the + * parent subtransaction level, or out to the parent PgStat_TableStatus, + * as appropriate. + * ---------- + */ +typedef struct PgStat_TableStatus +{ + Oid t_id; /* table's OID */ + bool t_shared; /* is it a shared catalog? */ + struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */ + PgStat_TableCounts t_counts; /* event counts to be sent */ +} PgStat_TableStatus; + +/* ---------- + * PgStat_TableXactStatus Per-table, per-subtransaction status + * ---------- + */ +typedef struct PgStat_TableXactStatus +{ + PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */ + PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */ + int nest_level; /* subtransaction nest level */ + /* links to other structs for same relation: */ + struct PgStat_TableXactStatus *upper; /* next higher subxact if any */ + PgStat_TableStatus *parent; /* per-table status */ + /* structs of same subxact level are linked here: */ + struct PgStat_TableXactStatus *next; /* next of same subxact */ +} PgStat_TableXactStatus; + /* ------------------------------------------------------------ * Message formats follow @@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy /* ---------- * PgStat_TableEntry Per-table info in a MsgTabstat - * - * Note: for a table, tuples_returned is the number of tuples successfully - * fetched by heap_getnext, while tuples_fetched is the number of tuples - * successfully fetched by heap_fetch under the control of bitmap indexscans. - * For an index, tuples_returned is the number of index entries returned by - * the index AM, while tuples_fetched is the number of tuples successfully - * fetched by heap_fetch under the control of simple indexscans for this index. * ---------- */ typedef struct PgStat_TableEntry { Oid t_id; - - PgStat_Counter t_numscans; - - PgStat_Counter t_tuples_returned; - PgStat_Counter t_tuples_fetched; - - PgStat_Counter t_tuples_inserted; - PgStat_Counter t_tuples_updated; - PgStat_Counter t_tuples_deleted; - - PgStat_Counter t_blocks_fetched; - PgStat_Counter t_blocks_hit; + PgStat_TableCounts t_counts; } PgStat_TableEntry; /* ---------- @@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel; extern bool pgstat_collect_blocklevel; extern bool pgstat_collect_querystring; +/* + * BgWriter statistics counters are updated directly by bgwriter and bufmgr + */ +extern PgStat_MsgBgWriter BgWriterStats; /* ---------- * Functions called from postmaster @@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what); extern void pgstat_report_txn_timestamp(TimestampTz tstamp); extern void pgstat_report_waiting(bool waiting); -extern void pgstat_initstats(PgStat_Info *stats, Relation rel); +extern void pgstat_initstats(Relation rel); +/* nontransactional event counts are simple enough to inline */ -#define pgstat_count_heap_scan(s) \ +#define pgstat_count_heap_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans++; \ } while (0) /* kluge for bitmap scans: */ -#define pgstat_discount_heap_scan(s) \ +#define pgstat_discount_heap_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans--; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans--; \ } while (0) -#define pgstat_count_heap_getnext(s) \ +#define pgstat_count_heap_getnext(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_returned++; \ } while (0) -#define pgstat_count_heap_fetch(s) \ +#define pgstat_count_heap_fetch(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_fetched++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_fetched++; \ } while (0) -#define pgstat_count_heap_insert(s) \ +#define pgstat_count_index_scan(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_inserted++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_numscans++; \ } while (0) -#define pgstat_count_heap_update(s) \ +#define pgstat_count_index_tuples(rel, n) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_updated++; \ + if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \ } while (0) -#define pgstat_count_heap_delete(s) \ +#define pgstat_count_buffer_read(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_deleted++; \ + if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_blocks_fetched++; \ } while (0) -#define pgstat_count_index_scan(s) \ +#define pgstat_count_buffer_hit(rel) \ do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \ - } while (0) -#define pgstat_count_index_tuples(s, n) \ - do { \ - if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned += (n); \ - } while (0) -#define pgstat_count_buffer_read(s,r) \ - do { \ - if (pgstat_collect_blocklevel) { \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \ - else { \ - pgstat_initstats((s), (r)); \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \ - } \ - } \ - } while (0) -#define pgstat_count_buffer_hit(s,r) \ - do { \ - if (pgstat_collect_blocklevel) { \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \ - else { \ - pgstat_initstats((s), (r)); \ - if ((s)->tabentry != NULL) \ - ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \ - } \ - } \ + if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \ + (rel)->pgstat_info->t_counts.t_blocks_hit++; \ } while (0) +extern void pgstat_count_heap_insert(Relation rel); +extern void pgstat_count_heap_update(Relation rel); +extern void pgstat_count_heap_delete(Relation rel); + +extern void AtEOXact_PgStat(bool isCommit); +extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth); + +extern void AtPrepare_PgStat(void); +extern void PostPrepare_PgStat(void); + +extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, + void *recdata, uint32 len); +extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, + void *recdata, uint32 len); -extern void pgstat_count_xact_commit(void); -extern void pgstat_count_xact_rollback(void); extern void pgstat_send_bgwriter(void); /* ---------- diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 33795de2bf8..bc6bf190b86 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.101 2007/05/27 03:50:39 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -90,15 +90,6 @@ typedef struct TriggerDesc /* - * Same for the statistics collector data in Relation and scan data. - */ -typedef struct PgStat_Info -{ - void *tabentry; -} PgStat_Info; - - -/* * Cached lookup information for the index access method functions defined * by the pg_am row associated with an index relation. */ @@ -200,8 +191,8 @@ typedef struct RelationData List *rd_indpred; /* index predicate tree, if any */ void *rd_amcache; /* available for use by index AM */ - /* statistics collection area */ - PgStat_Info pgstat_info; + /* use "struct" here to avoid needing to include pgstat.h: */ + struct PgStat_TableStatus *pgstat_info; /* statistics collection area */ } RelationData; typedef RelationData *Relation; |