aboutsummaryrefslogtreecommitdiff
path: root/src/include/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/utils')
-rw-r--r--src/include/utils/pgstat_internal.h663
-rw-r--r--src/include/utils/rel.h1
-rw-r--r--src/include/utils/timeout.h1
-rw-r--r--src/include/utils/wait_event.h1
4 files changed, 635 insertions, 31 deletions
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index c3f83c74c62..ab27bc47c5e 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,21 +14,134 @@
#define PGSTAT_INTERNAL_H
+#include "common/hashfn.h"
+#include "lib/dshash.h"
+#include "lib/ilist.h"
#include "pgstat.h"
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
-#define PGSTAT_STAT_INTERVAL 500 /* Minimum time between stats file
- * updates; in milliseconds. */
+/*
+ * Types related to shared memory storage of statistics.
+ *
+ * Per-object statistics are stored in the "shared stats" hashtable. That
+ * table's entries (PgStatShared_HashEntry) contain a pointer to the actual stats
+ * data for the object (the size of the stats data varies depending on the
+ * kind of stats). The table is keyed by PgStat_HashKey.
+ *
+ * Once a backend has a reference to a shared stats entry, it increments the
+ * entry's refcount. Even after stats data is dropped (e.g., due to a DROP
+ * TABLE), the entry itself can only be deleted once all references have been
+ * released.
+ *
+ * These refcounts, in combination with a backend local hashtable
+ * (pgStatEntryRefHash, with entries pointing to PgStat_EntryRef) in front of
+ * the shared hash table, mean that most stats work can happen without
+ * touching the shared hash table, reducing contention.
+ *
+ * Once there are pending stats updates for a table PgStat_EntryRef->pending
+ * is allocated to contain a working space for as-of-yet-unapplied stats
+ * updates. Once the stats are flushed, PgStat_EntryRef->pending is freed.
+ *
+ * Each stat kind in the shared hash table has a fixed member
+ * PgStatShared_Common as the first element.
+ */
-/* ----------
- * The initial size hints for the hash tables used in the collector.
- * ----------
+/* struct for shared statistics hash entry key. */
+typedef struct PgStat_HashKey
+{
+ PgStat_Kind kind; /* statistics entry kind */
+ Oid dboid; /* database ID. InvalidOid for shared objects. */
+ Oid objoid; /* object ID, either table or function. */
+} PgStat_HashKey;
+
+/*
+ * Shared statistics hash entry. Doesn't itself contain any stats, but points
+ * to them (with ->body). That allows the stats entries themselves to be of
+ * variable size.
*/
-#define PGSTAT_DB_HASH_SIZE 16
-#define PGSTAT_TAB_HASH_SIZE 512
-#define PGSTAT_FUNCTION_HASH_SIZE 512
-#define PGSTAT_SUBSCRIPTION_HASH_SIZE 32
-#define PGSTAT_REPLSLOT_HASH_SIZE 32
+typedef struct PgStatShared_HashEntry
+{
+ PgStat_HashKey key; /* hash key */
+
+ /*
+ * If dropped is set, backends need to release their references so that
+ * the memory for the entry can be freed. No new references may be made
+ * once marked as dropped.
+ */
+ bool dropped;
+
+ /*
+ * Refcount managing lifetime of the entry itself (as opposed to the
+ * dshash entry pointing to it). The stats lifetime has to be separate
+ * from the hash table entry lifetime because we allow backends to point
+ * to a stats entry without holding a hash table lock (and some other
+ * reasons).
+ *
+ * As long as the entry is not dropped, 1 is added to the refcount
+ * representing that the entry should not be dropped. In addition each
+ * backend that has a reference to the entry needs to increment the
+ * refcount as long as it does.
+ *
+ * May only be incremented / decremented while holding at least a shared
+ * lock on the dshash partition containing the entry. It needs to be an
+ * atomic variable because multiple backends can increment the refcount
+ * with just a shared lock.
+ *
+ * When the refcount reaches 0 the entry needs to be freed.
+ */
+ pg_atomic_uint32 refcount;
+
+ /*
+ * Pointer to shared stats. The stats entry always starts with
+ * PgStatShared_Common, embedded in a larger struct containing the
+ * PgStat_Kind specific stats fields.
+ */
+ dsa_pointer body;
+} PgStatShared_HashEntry;
+
+/*
+ * Common header struct for PgStatShm_Stat*Entry.
+ */
+typedef struct PgStatShared_Common
+{
+ uint32 magic; /* just a validity cross-check */
+ /* lock protecting stats contents (i.e. data following the header) */
+ LWLock lock;
+} PgStatShared_Common;
+
+/*
+ * A backend local reference to a shared stats entry. As long as at least one
+ * such reference exists, the shared stats entry will not be released.
+ *
+ * If there are pending stats update to the shared stats, these are stored in
+ * ->pending.
+ */
+typedef struct PgStat_EntryRef
+{
+ /*
+ * Pointer to the PgStatShared_HashEntry entry in the shared stats
+ * hashtable.
+ */
+ PgStatShared_HashEntry *shared_entry;
+
+ /*
+ * Pointer to the stats data (i.e. PgStatShared_HashEntry->body), resolved
+ * as a local pointer, to avoid repeated dsa_get_address() calls.
+ */
+ PgStatShared_Common *shared_stats;
+
+ /*
+ * Pending statistics data that will need to be flushed to shared memory
+ * stats eventually. Each stats kind utilizing pending data defines what
+ * format its pending data has and needs to provide a
+ * PgStat_KindInfo->flush_pending_cb callback to merge pending into shared
+ * stats.
+ */
+ void *pending;
+ dlist_node pending_node; /* membership in pgStatPending list */
+} PgStat_EntryRef;
/*
@@ -43,11 +156,11 @@ typedef struct PgStat_SubXactStatus
struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
/*
- * Dropping the statistics for objects that dropped transactionally itself
- * needs to be transactional. Therefore we collect the stats dropped in
- * the current (sub-)transaction and only execute the stats drop when we
- * know if the transaction commits/aborts. To handle replicas and crashes,
- * stats drops are included in commit records.
+ * Statistics for transactionally dropped objects need to be
+ * transactionally dropped as well. Collect the stats dropped in the
+ * current (sub-)transaction and only execute the stats drop when we know
+ * if the transaction commits/aborts. To handle replicas and crashes,
+ * stats drops are included in commit / abort records.
*/
dlist_head pending_drops;
int pending_drops_count;
@@ -65,9 +178,95 @@ typedef struct PgStat_SubXactStatus
/*
+ * Metadata for a specific kind of statistics.
+ */
+typedef struct PgStat_KindInfo
+{
+ /*
+ * Do a fixed number of stats objects exist for this kind of stats (e.g.
+ * bgwriter stats) or not (e.g. tables).
+ */
+ bool fixed_amount:1;
+
+ /*
+ * Can stats of this kind be accessed from another database? Determines
+ * whether a stats object gets included in stats snapshots.
+ */
+ bool accessed_across_databases:1;
+
+ /*
+ * For variable-numbered stats: Identified on-disk using a name, rather
+ * than PgStat_HashKey. Probably only needed for replication slot stats.
+ */
+ bool named_on_disk:1;
+
+ /*
+ * The size of an entry in the shared stats hash table (pointed to by
+ * PgStatShared_HashEntry->body).
+ */
+ uint32 shared_size;
+
+ /*
+ * The offset/size of statistics inside the shared stats entry. Used when
+ * [de-]serializing statistics to / from disk respectively. Separate from
+ * shared_size because [de-]serialization may not include in-memory state
+ * like lwlocks.
+ */
+ uint32 shared_data_off;
+ uint32 shared_data_len;
+
+ /*
+ * The size of the pending data for this kind. E.g. how large
+ * PgStat_EntryRef->pending is. Used for allocations.
+ *
+ * 0 signals that an entry of this kind should never have a pending entry.
+ */
+ uint32 pending_size;
+
+ /*
+ * For variable-numbered stats: flush pending stats. Required if pending
+ * data is used.
+ */
+ bool (*flush_pending_cb) (PgStat_EntryRef *sr, bool nowait);
+
+ /*
+ * For variable-numbered stats: delete pending stats. Optional.
+ */
+ void (*delete_pending_cb) (PgStat_EntryRef *sr);
+
+ /*
+ * For variable-numbered stats: reset the reset timestamp. Optional.
+ */
+ void (*reset_timestamp_cb) (PgStatShared_Common *header, TimestampTz ts);
+
+ /*
+ * For variable-numbered stats with named_on_disk. Optional.
+ */
+ void (*to_serialized_name) (const PgStatShared_Common *header, NameData *name);
+ bool (*from_serialized_name) (const NameData *name, PgStat_HashKey *key);
+
+ /*
+ * For fixed-numbered statistics: Reset All.
+ */
+ void (*reset_all_cb) (TimestampTz ts);
+
+ /*
+ * For fixed-numbered statistics: Build snapshot for entry
+ */
+ void (*snapshot_cb) (void);
+
+ /* name of the kind of stats */
+ const char *const name;
+} PgStat_KindInfo;
+
+
+/*
* List of SLRU names that we keep stats for. There is no central registry of
* SLRUs, so we use this fixed list instead. The "other" entry is used for
* all SLRUs without an explicit entry (e.g. SLRUs in extensions).
+ *
+ * This is only defined here so that SLRU_NUM_ELEMENTS is known for later type
+ * definitions.
*/
static const char *const slru_names[] = {
"CommitTs",
@@ -83,33 +282,271 @@ static const char *const slru_names[] = {
#define SLRU_NUM_ELEMENTS lengthof(slru_names)
+/* ----------
+ * Types and definitions for different kinds of fixed-amount stats.
+ *
+ * Single-writer stats use the changecount mechanism to achieve low-overhead
+ * writes - they're obviously more performance critical than reads. Check the
+ * definition of struct PgBackendStatus for some explanation of the
+ * changecount mechanism.
+ *
+ * Because the obvious implementation of resetting single-writer stats isn't
+ * compatible with that (another backend needs to write), we don't scribble on
+ * shared stats while resetting. Instead, just record the current counter
+ * values in a copy of the stats data, which is protected by ->lock. See
+ * pgstat_fetch_stat_(archiver|bgwriter|checkpointer) for the reader side.
+ *
+ * The only exception to that is the the stat_reset_timestamp in these
+ * structs, which is protected by ->lock, because it has to be written by
+ * another backend while resetting
+ * ----------
+ */
+
+typedef struct PgStatShared_Archiver
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_ArchiverStats stats;
+ PgStat_ArchiverStats reset_offset;
+} PgStatShared_Archiver;
+
+typedef struct PgStatShared_BgWriter
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_BgWriterStats stats;
+ PgStat_BgWriterStats reset_offset;
+} PgStatShared_BgWriter;
+
+typedef struct PgStatShared_Checkpointer
+{
+ /* lock protects ->reset_offset as well as stats->stat_reset_timestamp */
+ LWLock lock;
+ uint32 changecount;
+ PgStat_CheckpointerStats stats;
+ PgStat_CheckpointerStats reset_offset;
+} PgStatShared_Checkpointer;
+
+typedef struct PgStatShared_SLRU
+{
+ /* lock protects ->stats */
+ LWLock lock;
+ PgStat_SLRUStats stats[SLRU_NUM_ELEMENTS];
+} PgStatShared_SLRU;
+
+typedef struct PgStatShared_Wal
+{
+ /* lock protects ->stats */
+ LWLock lock;
+ PgStat_WalStats stats;
+} PgStatShared_Wal;
+
+
+
+/* ----------
+ * Types and definitions for different kinds of variable-amount stats.
+ *
+ * Each struct has to start with PgStatShared_Common, containing information
+ * common across the different types of stats. Kind-specific data follows.
+ * ----------
+ */
+
+typedef struct PgStatShared_Database
+{
+ PgStatShared_Common header;
+ PgStat_StatDBEntry stats;
+} PgStatShared_Database;
+
+typedef struct PgStatShared_Relation
+{
+ PgStatShared_Common header;
+ PgStat_StatTabEntry stats;
+} PgStatShared_Relation;
+
+typedef struct PgStatShared_Function
+{
+ PgStatShared_Common header;
+ PgStat_StatFuncEntry stats;
+} PgStatShared_Function;
+
+typedef struct PgStatShared_Subscription
+{
+ PgStatShared_Common header;
+ PgStat_StatSubEntry stats;
+} PgStatShared_Subscription;
+
+typedef struct PgStatShared_ReplSlot
+{
+ PgStatShared_Common header;
+ PgStat_StatReplSlotEntry stats;
+} PgStatShared_ReplSlot;
+
+
+/*
+ * Central shared memory entry for the cumulative stats system.
+ *
+ * Fixed amount stats, the dynamic shared memory hash table for
+ * non-fixed-amount stats, as well as remaining bits and pieces are all
+ * reached from here.
+ */
+typedef struct PgStat_ShmemControl
+{
+ void *raw_dsa_area;
+
+ /*
+ * Stats for variable-numbered objects are kept in this shared hash table.
+ * See comment above PgStat_Kind for details.
+ */
+ dshash_table_handle hash_handle; /* shared dbstat hash */
+
+ /* Has the stats system already been shut down? Just a debugging check. */
+ bool is_shutdown;
+
+ /*
+ * Whenever statistics for dropped objects could not be freed - because
+ * backends still have references - the dropping backend calls
+ * pgstat_request_entry_refs_gc() incrementing this counter. Eventually
+ * that causes backends to run pgstat_gc_entry_refs(), allowing memory to
+ * be reclaimed.
+ */
+ pg_atomic_uint64 gc_request_count;
+
+ /*
+ * Stats data for fixed-numbered objects.
+ */
+ PgStatShared_Archiver archiver;
+ PgStatShared_BgWriter bgwriter;
+ PgStatShared_Checkpointer checkpointer;
+ PgStatShared_SLRU slru;
+ PgStatShared_Wal wal;
+} PgStat_ShmemControl;
+
+
+/*
+ * Cached statistics snapshot
+ */
+typedef struct PgStat_Snapshot
+{
+ PgStat_FetchConsistency mode;
+
+ /* time at which snapshot was taken */
+ TimestampTz snapshot_timestamp;
+
+ bool fixed_valid[PGSTAT_NUM_KINDS];
+
+ PgStat_ArchiverStats archiver;
+
+ PgStat_BgWriterStats bgwriter;
+
+ PgStat_CheckpointerStats checkpointer;
+
+ PgStat_SLRUStats slru[SLRU_NUM_ELEMENTS];
+
+ PgStat_WalStats wal;
+
+ /* to free snapshot in bulk */
+ MemoryContext context;
+ struct pgstat_snapshot_hash *stats;
+} PgStat_Snapshot;
+
+
+/*
+ * Collection of backend-local stats state.
+ */
+typedef struct PgStat_LocalState
+{
+ PgStat_ShmemControl *shmem;
+ dsa_area *dsa;
+ dshash_table *shared_hash;
+
+ /* the current statistics snapshot */
+ PgStat_Snapshot snapshot;
+} PgStat_LocalState;
+
+
+/*
+ * Inline functions defined further below.
+ */
+
+static inline void pgstat_begin_changecount_write(uint32 *cc);
+static inline void pgstat_end_changecount_write(uint32 *cc);
+static inline uint32 pgstat_begin_changecount_read(uint32 *cc);
+static inline bool pgstat_end_changecount_read(uint32 *cc, uint32 cc_before);
+
+static inline void pgstat_copy_changecounted_stats(void *dst, void *src, size_t len,
+ uint32 *cc);
+
+static inline int pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg);
+static inline uint32 pgstat_hash_hash_key(const void *d, size_t size, void *arg);
+static inline size_t pgstat_get_entry_len(PgStat_Kind kind);
+static inline void *pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry);
+
+
/*
* Functions in pgstat.c
*/
-extern void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
-extern void pgstat_send(void *msg, int len);
+const PgStat_KindInfo *pgstat_get_kind_info(PgStat_Kind kind);
+
#ifdef USE_ASSERT_CHECKING
extern void pgstat_assert_is_up(void);
#else
#define pgstat_assert_is_up() ((void)true)
#endif
+extern void pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref);
+extern PgStat_EntryRef *pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry);
+extern PgStat_EntryRef *pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+
+extern void *pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+extern void pgstat_snapshot_fixed(PgStat_Kind kind);
+
+
+/*
+ * Functions in pgstat_archiver.c
+ */
+
+extern void pgstat_archiver_reset_all_cb(TimestampTz ts);
+extern void pgstat_archiver_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_bgwriter.c
+ */
+
+extern void pgstat_bgwriter_reset_all_cb(TimestampTz ts);
+extern void pgstat_bgwriter_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_checkpointer.c
+ */
+
+extern void pgstat_checkpointer_reset_all_cb(TimestampTz ts);
+extern void pgstat_checkpointer_snapshot_cb(void);
+
/*
* Functions in pgstat_database.c
*/
-extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel);
extern void pgstat_report_disconnect(Oid dboid);
-extern void pgstat_update_dbstats(PgStat_MsgTabstat *tsmsg, TimestampTz now);
+extern void pgstat_update_dbstats(TimestampTz ts);
+extern void AtEOXact_PgStat_Database(bool isCommit, bool parallel);
+
+extern PgStat_StatDBEntry *pgstat_prep_database_pending(Oid dboid);
+extern void pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts);
+extern bool pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
/*
* Functions in pgstat_function.c
*/
-extern void pgstat_send_funcstats(void);
+extern bool pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
/*
@@ -120,23 +557,73 @@ extern void AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isC
extern void AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth);
extern void AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
extern void PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state);
-extern void pgstat_send_tabstats(TimestampTz now, bool disconnect);
+
+extern bool pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref);
+
+
+/*
+ * Functions in pgstat_replslot.c
+ */
+
+extern void pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
+extern void pgstat_replslot_to_serialized_name_cb(const PgStatShared_Common *tmp, NameData *name);
+extern bool pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key);
+
+
+/*
+ * Functions in pgstat_shmem.c
+ */
+
+extern void pgstat_attach_shmem(void);
+extern void pgstat_detach_shmem(void);
+
+extern PgStat_EntryRef *pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid,
+ bool create, bool *found);
+extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_unlock_entry(PgStat_EntryRef *entry_ref);
+extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid);
+extern void pgstat_drop_all_entries(void);
+extern PgStat_EntryRef *pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
+ bool nowait);
+extern void pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts);
+extern void pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts);
+extern void pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
+ Datum match_data,
+ TimestampTz ts);
+
+extern void pgstat_request_entry_refs_gc(void);
+extern PgStatShared_Common *pgstat_init_entry(PgStat_Kind kind,
+ PgStatShared_HashEntry *shhashent);
/*
* Functions in pgstat_slru.c
*/
-extern void pgstat_send_slru(void);
+extern bool pgstat_slru_flush(bool nowait);
+extern void pgstat_slru_reset_all_cb(TimestampTz ts);
+extern void pgstat_slru_snapshot_cb(void);
/*
* Functions in pgstat_wal.c
*/
+extern bool pgstat_flush_wal(bool nowait);
extern void pgstat_init_wal(void);
extern bool pgstat_have_pending_wal(void);
+extern void pgstat_wal_reset_all_cb(TimestampTz ts);
+extern void pgstat_wal_snapshot_cb(void);
+
+
+/*
+ * Functions in pgstat_subscription.c
+ */
+
+extern bool pgstat_subscription_flush_cb(PgStat_EntryRef *entry_ref, bool nowait);
+extern void pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts);
/*
* Functions in pgstat_xact.c
@@ -151,29 +638,145 @@ extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid)
* Variables in pgstat.c
*/
-extern pgsocket pgStatSock;
+extern PgStat_LocalState pgStatLocal;
/*
- * Variables in pgstat_database.c
+ * Variables in pgstat_slru.c
*/
-extern int pgStatXactCommit;
-extern int pgStatXactRollback;
+extern bool have_slrustats;
/*
- * Variables in pgstat_functions.c
+ * Implementation of inline functions declared above.
+ */
+
+/*
+ * Helpers for changecount manipulation. See comments around struct
+ * PgBackendStatus for details.
*/
-extern bool have_function_stats;
+static inline void
+pgstat_begin_changecount_write(uint32 *cc)
+{
+ Assert((*cc & 1) == 0);
+
+ START_CRIT_SECTION();
+ (*cc)++;
+ pg_write_barrier();
+}
+
+static inline void
+pgstat_end_changecount_write(uint32 *cc)
+{
+ Assert((*cc & 1) == 1);
+
+ pg_write_barrier();
+
+ (*cc)++;
+
+ END_CRIT_SECTION();
+}
+
+static inline uint32
+pgstat_begin_changecount_read(uint32 *cc)
+{
+ uint32 before_cc = *cc;
+
+ CHECK_FOR_INTERRUPTS();
+ pg_read_barrier();
+
+ return before_cc;
+}
/*
- * Variables in pgstat_relation.c
+ * Returns true if the read succeeded, false if it needs to be repeated.
*/
+static inline bool
+pgstat_end_changecount_read(uint32 *cc, uint32 before_cc)
+{
+ uint32 after_cc;
+
+ pg_read_barrier();
+
+ after_cc = *cc;
+
+ /* was a write in progress when we started? */
+ if (before_cc & 1)
+ return false;
+
+ /* did writes start and complete while we read? */
+ return before_cc == after_cc;
+}
+
+
+/*
+ * helper function for PgStat_KindInfo->snapshot_cb
+ * PgStat_KindInfo->reset_all_cb callbacks.
+ *
+ * Copies out the specified memory area following change-count protocol.
+ */
+static inline void
+pgstat_copy_changecounted_stats(void *dst, void *src, size_t len,
+ uint32 *cc)
+{
+ uint32 cc_before;
+
+ do
+ {
+ cc_before = pgstat_begin_changecount_read(cc);
+
+ memcpy(dst, src, len);
+ }
+ while (!pgstat_end_changecount_read(cc, cc_before));
+}
+
+/* helpers for dshash / simplehash hashtables */
+static inline int
+pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg)
+{
+ AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL);
+ return memcmp(a, b, sizeof(PgStat_HashKey));
+}
+
+static inline uint32
+pgstat_hash_hash_key(const void *d, size_t size, void *arg)
+{
+ const PgStat_HashKey *key = (PgStat_HashKey *) d;
+ uint32 hash;
+
+ AssertArg(size == sizeof(PgStat_HashKey) && arg == NULL);
+
+ hash = murmurhash32(key->kind);
+ hash = hash_combine(hash, murmurhash32(key->dboid));
+ hash = hash_combine(hash, murmurhash32(key->objoid));
+
+ return hash;
+}
+
+/*
+ * The length of the data portion of a shared memory stats entry (i.e. without
+ * transient data such as refcounts, lwlocks, ...).
+ */
+static inline size_t
+pgstat_get_entry_len(PgStat_Kind kind)
+{
+ return pgstat_get_kind_info(kind)->shared_data_len;
+}
+
+/*
+ * Returns a pointer to the data portion of a shared memory stats entry.
+ */
+static inline void *
+pgstat_get_entry_data(PgStat_Kind kind, PgStatShared_Common *entry)
+{
+ size_t off = pgstat_get_kind_info(kind)->shared_data_off;
-extern bool have_relation_stats;
+ Assert(off != 0 && off < PG_UINT32_MAX);
+ return ((char *) (entry)) + off;
+}
#endif /* PGSTAT_INTERNAL_H */
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 121dbbc9a96..eadbd009045 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -246,6 +246,7 @@ typedef struct RelationData
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
+ bool pgstat_enabled; /* should relation stats be counted */
/* use "struct" here to avoid needing to include pgstat.h: */
struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
diff --git a/src/include/utils/timeout.h b/src/include/utils/timeout.h
index 099f91c61da..c068986d09a 100644
--- a/src/include/utils/timeout.h
+++ b/src/include/utils/timeout.h
@@ -32,6 +32,7 @@ typedef enum TimeoutId
STANDBY_LOCK_TIMEOUT,
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IDLE_SESSION_TIMEOUT,
+ IDLE_STATS_UPDATE_TIMEOUT,
CLIENT_CONNECTION_CHECK_TIMEOUT,
STARTUP_PROGRESS_TIMEOUT,
/* First user-definable timeout reason */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index d870c592632..b578e2ec757 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -42,7 +42,6 @@ typedef enum
WAIT_EVENT_CHECKPOINTER_MAIN,
WAIT_EVENT_LOGICAL_APPLY_MAIN,
WAIT_EVENT_LOGICAL_LAUNCHER_MAIN,
- WAIT_EVENT_PGSTAT_MAIN,
WAIT_EVENT_RECOVERY_WAL_STREAM,
WAIT_EVENT_SYSLOGGER_MAIN,
WAIT_EVENT_WAL_RECEIVER_MAIN,