diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index 1b465bc8ba71..a5c66837241c 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -251,6 +251,130 @@ false is returned. + + + + + pg_get_process_memory_contexts + + pg_get_process_memory_contexts ( pid integer, summary boolean ) + setof record + ( name text, + ident text, + type text, + level integer, + path integer[], + total_bytes bigint, + total_nblocks bigint, + free_bytes bigint, + free_chunks bigint, + used_bytes bigint, + num_agg_contexts integer ) + + + This function handles requests to display the memory contexts of a + PostgreSQL process with the specified + process ID. The function can be used to send requests to backends as + well as auxiliary processes. + + + The returned record contains extended statistics per each memory + context: + + + + name - The name of the memory context. + + + + + ident - Memory context ID (if any). + + + + + type - The type of memory context, possible + values are: AllocSet, Generation, Slab and Bump. + + + + + level - The level in the tree of the current + memory context. + + + + + path - Memory contexts are organized in a + tree model with TopMemoryContext as the root, and all other memory + contexts as nodes in the tree. The path + displays the path from the root to the current memory context. The + path is limited to 100 children per node, which each node limited + to a max depth of 100, to preserve memory during reporting. The + printed path will also be limited to 100 nodes counting from the + TopMemoryContext. + + + + + total_bytes - The total number of bytes + allocated to this memory context. + + + + + total_nblocks - The total number of blocks + used for the allocated memory. + + + + + free_bytes - The amount of free memory in + this memory context. + + + + + free_chunks - The number of chunks that + free_bytes corresponds to. + + + + + used_bytes - The total number of bytes + currently occupied. + + + + + num_agg_contexts - The number of memory + contexts aggregated in the displayed statistics. + + + + + + When summary is true, statistics + for memory contexts at levels 1 and 2 are displayed, with level 1 + representing the root node (i.e., TopMemoryContext). + Statistics for contexts on level 2 and below are aggregates of all + child contexts' statistics, where num_agg_contexts + indicate the number aggregated child contexts. When + summary is false, + the num_agg_contexts value is 1, + indicating that individual statistics are being displayed. + + + After receiving memory context statistics from the target process, it + returns the results as one row per context. If all the contexts don't + fit within the pre-determined size limit, the remaining context + statistics are aggregated and a cumulative total is displayed. The + num_agg_contexts column indicates the number of + contexts aggregated in the displayed statistics. When + num_agg_contexts is 1 it means + that the context statistics are displayed separately. + + @@ -302,6 +426,39 @@ LOG: Grand total: 1651920 bytes in 201 blocks; 622360 free (88 chunks); 1029560 because it may generate a large number of log messages. + + pg_get_process_memory_contexts can be used to request + memory contexts statistics of any PostgreSQL + process. For example: + +postgres=# SELECT * FROM pg_get_process_memory_contexts( + (SELECT pid FROM pg_stat_activity + WHERE backend_type = 'checkpointer'), + false) LIMIT 1; +-[ RECORD 1 ]----+------------------------------ +name | TopMemoryContext +ident | +type | AllocSet +level | 1 +path | {1} +total_bytes | 90304 +total_nblocks | 3 +free_bytes | 2880 +free_chunks | 1 +used_bytes | 87424 +num_agg_contexts | 1 + + + + While pg_get_process_memory_contexts can be used to + query memory contexts of the local backend, + pg_backend_memory_contexts + (see for more details) + will be less resource intensive when only the local backend is of interest. + + + + diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 059e8778ca7c..c63fd6783bd1 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -692,6 +692,11 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats; REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC; GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats; +REVOKE EXECUTE ON FUNCTION + pg_get_process_memory_contexts(integer, boolean) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION + pg_get_process_memory_contexts(integer, boolean) TO pg_read_all_stats; + -- Statistics views CREATE VIEW pg_stat_all_tables AS diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index ed19c74bb19f..34bdb88fa7ff 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -791,6 +791,10 @@ ProcessAutoVacLauncherInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index e84e8663e966..5b3e08805bfe 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -679,6 +679,10 @@ ProcessCheckpointerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index ba63b84dfc53..29454b8bf8a9 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -48,6 +48,10 @@ ProcessMainLoopInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index ce6b52993247..fdd385e492d1 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -871,6 +871,10 @@ ProcessPgArchInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + if (ConfigReloadPending) { char *archiveLib = pstrdup(XLogArchiveLibrary); diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 27e86cf393f6..7149a67fcbcd 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -192,6 +192,10 @@ ProcessStartupProcInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index c4a888a081c4..00f03b36ed88 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -879,6 +879,10 @@ ProcessWalSummarizerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + + /* Publish memory contexts of this process */ + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index b23d0c19360a..a5ed58a18c50 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -52,6 +52,7 @@ #include "storage/sinvaladt.h" #include "utils/guc.h" #include "utils/injection_point.h" +#include "utils/memutils.h" /* GUCs */ int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE; @@ -140,6 +141,7 @@ CalculateShmemSize(void) size = add_size(size, SlotSyncShmemSize()); size = add_size(size, AioShmemSize()); size = add_size(size, WaitLSNShmemSize()); + size = add_size(size, MemoryContextKeysShmemSize() + sizeof(LWLockPadded)); /* include additional requested shmem from preload libraries */ size = add_size(size, total_addin_request); @@ -328,6 +330,7 @@ CreateOrAttachShmemStructs(void) InjectionPointShmemInit(); AioShmemInit(); WaitLSNShmemInit(); + MemoryContextKeysShmemInit(); } /* diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index 087821311cce..8963285cc123 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -691,6 +691,9 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT)) HandleLogMemoryContextInterrupt(); + if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT)) + HandleGetMemoryContextInterrupt(); + if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE)) HandleParallelApplyMessageInterrupt(); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 1504fafe6d88..c5e691517562 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -51,6 +51,7 @@ #include "storage/procsignal.h" #include "storage/spin.h" #include "storage/standby.h" +#include "utils/memutils.h" #include "utils/timeout.h" #include "utils/timestamp.h" diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 2bd89102686e..da8f2b979866 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3549,6 +3549,9 @@ ProcessInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); + if (PublishMemoryContextPending) + ProcessGetMemoryContextInterrupt(); + if (ParallelApplyMessagePending) ProcessParallelApplyMessages(); } diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index c1ac71ff7f24..644d8d988e13 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -162,6 +162,7 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit." WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication." WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated." XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end." +MEM_CXT_PUBLISH "Waiting for a process to publish memory information." ABI_compatibility: @@ -404,6 +405,7 @@ SubtransSLRU "Waiting to access the sub-transaction SLRU cache." XactSLRU "Waiting to access the transaction status SLRU cache." ParallelVacuumDSA "Waiting for parallel vacuum dynamic shared memory allocation." AioUringCompletion "Waiting for another process to complete IO via io_uring." +MemoryContextReportingKeys "Waiting for another process to complete reading or writing the memory reporting keys." # No "ABI_compatibility" region here as WaitEventLWLock has its own C code. diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index fe6dce9cba3e..a62f3d6dc93a 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -15,20 +15,51 @@ #include "postgres.h" +#include "access/twophase.h" +#include "catalog/pg_authid_d.h" #include "funcapi.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "storage/dsm_registry.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/injection_point.h" +#include "utils/memutils.h" +#include "utils/wait_event_types.h" + +#define CLIENT_KEY_SIZE 64 + +static LWLock *client_keys_lock = NULL; +static int *client_keys = NULL; +static dshash_table *MemoryStatsDsHash = NULL; +static dsa_area *MemoryStatsDsaArea = NULL; + +static void memstats_dsa_cleanup(char *key); +static const char *ContextTypeToString(NodeTag type); +static void PublishMemoryContext(MemoryStatsEntry *memcxt_info, + int curr_id, MemoryContext context, + List *path, + MemoryContextCounters stat, + int num_contexts); +static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup); +static void end_memorycontext_reporting(MemoryStatsDSHashEntry *entry, MemoryContext oldcontext, + HTAB *context_id_lookup); /* ---------- * The max bytes for showing identifiers of MemoryContext. + * This is used by pg_get_backend_memory_context - view used for local backend. * ---------- */ #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 +#define MAX_PATH_DISPLAY_LENGTH 100 +/* Timeout in seconds */ +#define MEMORY_STATS_MAX_TIMEOUT 5 + /* * MemoryContextId * Used for storage of transient identifiers for @@ -89,7 +120,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, */ for (MemoryContext cur = context; cur != NULL; cur = cur->parent) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found); @@ -143,24 +174,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, else nulls[1] = true; - switch (context->type) - { - case T_AllocSetContext: - type = "AllocSet"; - break; - case T_GenerationContext: - type = "Generation"; - break; - case T_SlabContext: - type = "Slab"; - break; - case T_BumpContext: - type = "Bump"; - break; - default: - type = "???"; - break; - } + type = ContextTypeToString(context->type); values[2] = CStringGetTextDatum(type); values[3] = Int32GetDatum(list_length(path)); /* level */ @@ -175,6 +189,38 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, list_free(path); } +/* + * ContextTypeToString + * Returns a textual representation of a context type + * + * This should cover the same types as MemoryContextIsValid. + */ +const char * +ContextTypeToString(NodeTag type) +{ + const char *context_type; + + switch (type) + { + case T_AllocSetContext: + context_type = "AllocSet"; + break; + case T_GenerationContext: + context_type = "Generation"; + break; + case T_SlabContext: + context_type = "Slab"; + break; + case T_BumpContext: + context_type = "Bump"; + break; + default: + context_type = "???"; + break; + } + return context_type; +} + /* * pg_get_backend_memory_contexts * SQL SRF showing backend memory context. @@ -189,7 +235,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) HTAB *context_id_lookup; ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryContextId); + ctl.entrysize = sizeof(MemoryStatsContextId); ctl.hcxt = CurrentMemoryContext; context_id_lookup = hash_create("pg_get_backend_memory_contexts", @@ -216,7 +262,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) foreach_ptr(MemoryContextData, cur, contexts) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; /* @@ -224,8 +270,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) * PutMemoryContextsStatsTupleStore needs this to populate the "path" * column with the parent context_ids. */ - entry = (MemoryContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); entry->context_id = context_id++; Assert(!found); @@ -305,3 +351,821 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } + +/* + * pg_get_process_memory_contexts + * Signal a backend or an auxiliary process to send its memory contexts, + * wait for the results and display them. + * + * By default, only superusers or users with ROLE_PG_READ_ALL_STATS are allowed + * to signal a process to return the memory contexts. This is because allowing + * any users to issue this request at an unbounded rate would cause lots of + * requests to be sent, which can lead to denial of service. Additional roles + * can be permitted with GRANT. + * + * On receipt of this signal, a backend or an auxiliary process sets the flag + * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() + * or process-specific interrupt handler to copy the memory context details + * to a dynamic shared memory space. + * + * We have defined a limit on DSA memory that could be allocated per process - + * if the process has more memory contexts than what can fit in the allocated + * size, the excess contexts are summarized and represented as cumulative total + * at the end of the buffer. + * + * After sending the signal, wait on a condition variable. The publishing + * backend, after copying the data to shared memory, sends signal on that + * condition variable. There is one condition variable per client process. + * Once the condition variable is signalled, check if the latest memory context + * information is available and display. + * + * If the publishing backend does not respond before the condition variable + * times out, which is set to a predefined value MEMORY_STATS_MAX_TIMEOUT, give up + * and return NULL. + */ +Datum +pg_get_process_memory_contexts(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + bool summary = PG_GETARG_BOOL(1); + PGPROC *proc; + ProcNumber procNumber = INVALID_PROC_NUMBER; + bool proc_is_aux = false; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryStatsEntry *memcxt_info; + MemoryStatsDSHashEntry *entry; + bool found; + char key[CLIENT_KEY_SIZE]; + TimestampTz start_timestamp; + + /* + * See if the process with given pid is a backend or an auxiliary process + * and remember the type for when we requery the process later. + */ + proc = BackendPidGetProc(pid); + if (proc == NULL) + { + proc = AuxiliaryPidGetProc(pid); + proc_is_aux = true; + } + + /* + * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid + * isn't valid; this is however not a problem and leave with a WARNING. + * See comment in pg_log_backend_memory_contexts for a discussion on this. + */ + if (proc == NULL) + { + /* + * This is a warning because we don't want to break loops. + */ + ereport(WARNING, + errmsg("PID %d is not a PostgreSQL server process", pid)); + PG_RETURN_NULL(); + } + + InitMaterializedSRF(fcinfo, 0); + + procNumber = GetNumberFromPGProc(proc); + + /* + * Create a DSA to allocate memory for copying memory contexts statistics. + * Allocate the memory in the DSA and send dsa pointer to the server + * process for storing the context statistics. If number of contexts + * exceed a predefined limit (1MB), a cumulative total is stored for such + * contexts. + * + * The DSA is created once for the lifetime of the server, and only + * attached in subsequent calls. + */ + if (MemoryStatsDsaArea == NULL) + MemoryStatsDsaArea = GetNamedDSA("memory_context_statistics_dsa", &found); + + /* + * The dsa pointers containing statistics for each client are stored in a + * dshash table. In addition to dsa pointer, each entry in this table also + * contains information about the statistics, condition variable for + * signalling between client and the server and miscellaneous data + * specific to a request. There is one entry per client request in the + * hash table. + */ + if (MemoryStatsDsHash == NULL) + MemoryStatsDsHash = GetNamedDSHash("memory_context_statistics_dshash", &memctx_dsh_params, &found); + + snprintf(key, sizeof(key), "%d", MyProcNumber); + + /* + * Check if the publishing process slot is empty and store this clients + * key i.e its procNumber. This informs the publishing process that it is + * supposed to write statistics in the hash entry corresponding to this + * client. + */ + LWLockAcquire(client_keys_lock, LW_EXCLUSIVE); + + /* + * XXX. If the process exits without cleaning up its slot, i.e in case of + * an abrupt crash the client_keys slot won't be reset thus resulting in + * false negative and WARNING would be thrown in case another process with + * same slot index is queried for statistics. + */ + if (client_keys[procNumber] == -1) + client_keys[procNumber] = MyProcNumber; + else + { + LWLockRelease(client_keys_lock); + ereport(WARNING, + errmsg("server process %d is processing previous request", pid)); + PG_RETURN_NULL(); + } + LWLockRelease(client_keys_lock); + + /* + * Insert an entry for this client in DSHASH table the first time this + * function is called. This entry is deleted when the process exits in + * before_shmem_exit call. + * + * dshash_find_or_insert locks the entry to prevent the publisher from + * reading before client has updated the entry. + */ + entry = dshash_find_or_insert(MemoryStatsDsHash, key, &found); + if (!found) + { + entry->stats_written = false; + ConditionVariableInit(&entry->memcxt_cv); + } + + /* + * Allocate 1MB of memory for the backend to publish its statistics on + * every call to this function. The memory is freed at the end of the + * function. + */ + entry->memstats_dsa_pointer = + dsa_allocate0(MemoryStatsDsaArea, MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND); + + /* + * Specify whether a summary of statistics is requested, before signalling + * the server. + */ + entry->summary = summary; + + /* + * Indicate which server process statistics are being requested from. If + * this client times out before the last requested process can publish its + * statistics, it may send a new request to another server process. Since + * the previous server was notified, it might attempt to read the same + * client entry and respond incorrectly with its statistics. By storing + * the server ID in the client entry, we prevent any previously signalled + * server process from writing its statistics in the space meant for the + * newly requested process. + */ + entry->target_server_id = pid; + dshash_release_lock(MemoryStatsDsHash, entry); + + /* + * Send a signal to a PostgreSQL process, informing it we want it to + * produce information about its memory contexts. + */ + if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0) + { + memstats_dsa_cleanup(key); + ereport(WARNING, + errmsg("could not send signal to process %d: %m", pid)); + PG_RETURN_NULL(); + } + start_timestamp = GetCurrentTimestamp(); + + while (1) + { + long elapsed_time; + + entry = dshash_find_or_insert(MemoryStatsDsHash, key, &found); + Assert(found); + + INJECTION_POINT("memcontext-client-crash", NULL); + + memcxt_info = (MemoryStatsEntry *) + dsa_get_address(MemoryStatsDsaArea, entry->memstats_dsa_pointer); + + /* + * We expect to come out of sleep when the requested process has + * finished publishing the statistics, verified using a boolean + * stats_written. + * + * Make sure that the statistics are actually written by checking that + * the name of the context is not NULL. This is done to ensure that + * the subsequent waits for statistics do not return spuriously if the + * previous call to the function ended in error and thus could not + * clear the stats_written flag. + */ + if (entry->stats_written && memcxt_info[0].name[0] != '\0') + break; + + dshash_release_lock(MemoryStatsDsHash, entry); + + elapsed_time = TimestampDifferenceMilliseconds(start_timestamp, + GetCurrentTimestamp()); + /* Return if we have already exceeded the timeout */ + if (elapsed_time >= MEMORY_STATS_MAX_TIMEOUT * 1000) + { + memstats_dsa_cleanup(key); + PG_RETURN_NULL(); + } + + /* + * Recheck the state of the backend before sleeping on the condition + * variable to ensure the process is still alive. Only check the + * relevant process type based on the earlier PID check. + */ + if (proc_is_aux) + proc = AuxiliaryPidGetProc(pid); + else + proc = BackendPidGetProc(pid); + + /* + * The target server process ending during memory context processing + * is not an error. + */ + if (proc == NULL) + { + memstats_dsa_cleanup(key); + ConditionVariableCancelSleep(); + ereport(WARNING, + errmsg("PID %d is no longer a PostgreSQL server process", + pid)); + PG_RETURN_NULL(); + } + + INJECTION_POINT("memcontext-client-crash", NULL); + + /* + * Wait for MEMORY_STATS_MAX_TIMEOUT. If no statistics are available + * within the allowed time then return NULL. The timer is defined in + * milliseconds since that's what the condition variable sleep uses. + */ + if (ConditionVariableTimedSleep(&entry->memcxt_cv, + (MEMORY_STATS_MAX_TIMEOUT * 1000), WAIT_EVENT_MEM_CXT_PUBLISH)) + { + /* Timeout has expired, return NULL */ + memstats_dsa_cleanup(key); + ConditionVariableCancelSleep(); + PG_RETURN_NULL(); + } + } + + /* + * Backend has finished publishing the stats, project them. + */ +#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 11 + for (int i = 0; i < entry->total_stats; i++) + { + ArrayType *path_array; + int path_length; + Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + Datum *path_datum = NULL; + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + Assert(memcxt_info[i].name[0] != '\0'); + values[0] = CStringGetTextDatum(memcxt_info[i].name); + + if (memcxt_info[i].ident[0] != '\0') + values[1] = CStringGetTextDatum(memcxt_info[i].ident); + else + nulls[1] = true; + + values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type)); + values[3] = Int32GetDatum(memcxt_info[i].levels); + + path_length = memcxt_info[i].path_length; + path_datum = (Datum *) palloc(path_length * sizeof(Datum)); + if (memcxt_info[i].path[0] != 0) + { + for (int j = 0; j < path_length; j++) + path_datum[j] = Int32GetDatum(memcxt_info[i].path[j]); + path_array = construct_array_builtin(path_datum, path_length, INT4OID); + values[4] = PointerGetDatum(path_array); + } + else + nulls[4] = true; + + values[5] = Int64GetDatum(memcxt_info[i].totalspace); + values[6] = Int64GetDatum(memcxt_info[i].nblocks); + values[7] = Int64GetDatum(memcxt_info[i].freespace); + values[8] = Int64GetDatum(memcxt_info[i].freechunks); + values[9] = Int64GetDatum(memcxt_info[i].totalspace - + memcxt_info[i].freespace); + values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + dshash_release_lock(MemoryStatsDsHash, entry); + memstats_dsa_cleanup(key); + + ConditionVariableCancelSleep(); + + PG_RETURN_NULL(); +} + +static void +memstats_dsa_cleanup(char *key) +{ + MemoryStatsDSHashEntry *entry; + + entry = dshash_find(MemoryStatsDsHash, key, true); + + Assert(MemoryStatsDsaArea != NULL); + dsa_free(MemoryStatsDsaArea, entry->memstats_dsa_pointer); + entry->memstats_dsa_pointer = InvalidDsaPointer; + entry->stats_written = false; + entry->target_server_id = 0; + + dshash_release_lock(MemoryStatsDsHash, entry); +} +void +MemoryContextKeysShmemInit(void) +{ + bool found; + + client_keys = (int *) + ShmemInitStruct("MemoryContextKeys", + MemoryContextKeysShmemSize() + sizeof(LWLockPadded), &found); + client_keys_lock = (LWLock *) ((char *) client_keys + MemoryContextKeysShmemSize()); + + if (!found) + { + MemSet(client_keys, -1, MemoryContextKeysShmemSize()); + LWLockInitialize(client_keys_lock, LWTRANCHE_MEMORY_CONTEXT_KEYS); + } +} + +Size +MemoryContextKeysShmemSize(void) +{ + Size sz = 0; + Size TotalProcs = 0; + + TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS); + TotalProcs = add_size(TotalProcs, MaxBackends); + sz = add_size(sz, mul_size(TotalProcs, sizeof(int))); + + return sz; +} + +/* + * HandleGetMemoryContextInterrupt + * Handle receipt of an interrupt indicating a request to publish memory + * contexts statistics. + * + * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as + * this cannot be performed in a signal handler. + */ +void +HandleGetMemoryContextInterrupt(void) +{ + InterruptPending = true; + PublishMemoryContextPending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* + * ProcessGetMemoryContextInterrupt + * Generate information about memory contexts used by the process. + * + * Performs a breadth first search on the memory context tree, thus parents + * statistics are reported before their children in the monitoring function + * output. + * + * Statistics for all the processes are shared via the same dynamic shared + * area. Individual statistics are tracked independently in + * per-process DSA pointers. These pointers are stored in a dshash table with + * key as requesting clients ProcNumber. + * + * We calculate maximum number of context's statistics that can be displayed + * using a pre-determined limit for memory available per process for this + * utility and maximum size of statistics for each context. The remaining + * context statistics if any are captured as a cumulative total at the end of + * individual context's statistics. + * + * If summary is true, we capture the level 1 and level 2 contexts + * statistics. For that we traverse the memory context tree recursively in + * depth first search manner to cover all the children of a parent context, to + * be able to display a cumulative total of memory consumption by a parent at + * level 2 and all its children. + */ +void +ProcessGetMemoryContextInterrupt(void) +{ + List *contexts; + HASHCTL ctl; + HTAB *context_id_lookup; + int context_id = 0; + MemoryStatsEntry *meminfo; + bool summary = false; + MemoryContextCounters stat; + int num_individual_stats = 0; + bool found; + MemoryStatsDSHashEntry *entry; + char key[CLIENT_KEY_SIZE]; + int clientProcNumber; + MemoryContext memstats_ctx = NULL; + MemoryContext oldcontext = NULL; + + PublishMemoryContextPending = false; + + /* + * Retreive the client key for publishing statistics and reset it to -1, + * so other clients can request memory statistics from this process + */ + LWLockAcquire(client_keys_lock, LW_SHARED); + Assert(client_keys[MyProcNumber] != -1); + clientProcNumber = client_keys[MyProcNumber]; + client_keys[MyProcNumber] = -1; + LWLockRelease(client_keys_lock); + + /* + * Create a new memory context which is not a part of TopMemoryContext + * tree. This context is used to allocate all memory in this function. + * This helps in keeping the memory allocation in this function to report + * memory consumption statistics separate. So that it does not affect the + * output of this function. + */ + memstats_ctx = AllocSetContextCreate((MemoryContext) NULL, "publish_memory_context_statistics", + ALLOCSET_SMALL_SIZES); + oldcontext = MemoryContextSwitchTo(memstats_ctx); + + /* + * The hash table is used for constructing "path" column of the view, + * similar to its local backend counterpart. + */ + ctl.keysize = sizeof(MemoryContext); + ctl.entrysize = sizeof(MemoryStatsContextId); + ctl.hcxt = CurrentMemoryContext; + + context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts", + 256, + &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* List of contexts to process in the next round - start at the top. */ + contexts = list_make1(TopMemoryContext); + + /* + * If DSA exists, created by another process requesting statistics, attach + * to it. We expect the client process to create required DSA and Dshash + * table. + */ + if (MemoryStatsDsaArea == NULL) + MemoryStatsDsaArea = GetNamedDSA("memory_context_statistics_dsa", &found); + + if (MemoryStatsDsHash == NULL) + MemoryStatsDsHash = GetNamedDSHash("memory_context_statistics_dshash", &memctx_dsh_params, &found); + + + snprintf(key, CLIENT_KEY_SIZE, "%d", clientProcNumber); + + /* + * The entry lock is held by dshash_find_or_insert to protect writes to + * process specific memory. Two different processes publishing statistics + * do not block each other. + */ + INJECTION_POINT("memcontext-server-crash", NULL); + entry = dshash_find_or_insert(MemoryStatsDsHash, key, &found); + + /* + * Entry has been deleted due to client process exit. Make sure that the + * client always deletes the entry after taking required lock or this + * function may end up writing to unallocated memory. + */ + if (!found) + { + entry->stats_written = false; + end_memorycontext_reporting(entry, oldcontext, context_id_lookup); + return; + } + + /* + * The client has timed out waiting for us to write statistics and is + * requesting statistics from some other process + */ + if (entry->target_server_id != MyProcPid) + { + entry->stats_written = false; + end_memorycontext_reporting(entry, oldcontext, context_id_lookup); + return; + } + summary = entry->summary; + + /* Should be allocated by a client backend that is requesting statistics */ + Assert(entry->memstats_dsa_pointer != InvalidDsaPointer); + meminfo = (MemoryStatsEntry *) + dsa_get_address(MemoryStatsDsaArea, entry->memstats_dsa_pointer); + + if (summary) + { + int cxt_id = 0; + List *path = NIL; + MemoryStatsContextId *contextid_entry; + + /* Copy TopMemoryContext statistics to DSA */ + memset(&stat, 0, sizeof(stat)); + (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL, + &stat, true); + path = lcons_int(1, path); + PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat, + 1); + + contextid_entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &TopMemoryContext, + HASH_ENTER, &found); + Assert(!found); + + /* + * context id starts with 1 + */ + contextid_entry->context_id = cxt_id + 1; + + /* + * Copy statistics for each of TopMemoryContexts children. This + * includes statistics of at most 100 children per node, with each + * child node limited to a depth of 100 in its subtree. + */ + for (MemoryContext c = TopMemoryContext->firstchild; c != NULL; + c = c->nextchild) + { + MemoryContextCounters grand_totals; + int num_contexts = 0; + + path = NIL; + memset(&grand_totals, 0, sizeof(grand_totals)); + + cxt_id++; + contextid_entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c, + HASH_ENTER, &found); + Assert(!found); + contextid_entry->context_id = cxt_id + 1; + + MemoryContextStatsCounter(c, &grand_totals, &num_contexts); + + path = compute_context_path(c, context_id_lookup); + + PublishMemoryContext(meminfo, cxt_id, c, path, + grand_totals, num_contexts); + } + entry->total_stats = cxt_id + 1; + + entry->stats_written = true; + end_memorycontext_reporting(entry, oldcontext, context_id_lookup); + /* Notify waiting backends and return */ + ConditionVariableBroadcast(&entry->memcxt_cv); + return; + } + foreach_ptr(MemoryContextData, cur, contexts) + { + List *path = NIL; + MemoryStatsContextId *contextid_entry; + + contextid_entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); + Assert(!found); + + /* + * context id starts with 1 + */ + contextid_entry->context_id = context_id + 1; + + /* + * Figure out the transient context_id of this context and each of its + * ancestors, to compute a path for this context. + */ + path = compute_context_path(cur, context_id_lookup); + + /* Examine the context stats */ + memset(&stat, 0, sizeof(stat)); + (*cur->methods->stats) (cur, NULL, NULL, &stat, true); + + /* Account for saving one statistics slot for cumulative reporting */ + if (context_id < (MAX_MEMORY_CONTEXT_STATS_NUM - 1)) + { + /* Copy statistics to DSA memory */ + PublishMemoryContext(meminfo, context_id, cur, path, stat, 1); + } + else + { + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].totalspace += stat.totalspace; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].nblocks += stat.nblocks; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].freespace += stat.freespace; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].freechunks += stat.freechunks; + } + + /* + * DSA max limit per process is reached, write aggregate of the + * remaining statistics. + * + * We can store contexts from 0 to max_stats - 1. When context_id is + * greater than max_stats, we stop reporting individual statistics + * when context_id equals max_stats - 2. As we use max_stats - 1 array + * slot for reporting cumulative statistics or "Remaining Totals". + */ + if (context_id == (MAX_MEMORY_CONTEXT_STATS_NUM - 2)) + { + int namelen = strlen("Remaining Totals"); + + num_individual_stats = context_id + 1; + strlcpy(meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].name, "Remaining Totals", namelen + 1); + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].ident[0] = '\0'; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].path[0] = 0; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].type = 0; + } + context_id++; + + for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild) + contexts = lappend(contexts, c); + } + + /* + * Statistics are not aggregated, i.e individual statistics reported when + * context_id <= max_stats. + */ + if (context_id <= MAX_MEMORY_CONTEXT_STATS_NUM) + { + entry->total_stats = context_id; + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].num_agg_stats = 1; + } + /* Report number of aggregated memory contexts */ + else + { + meminfo[MAX_MEMORY_CONTEXT_STATS_NUM - 1].num_agg_stats = context_id - + num_individual_stats; + + /* + * Total stats equals num_individual_stats + 1 record for cumulative + * statistics. + */ + entry->total_stats = num_individual_stats + 1; + } + entry->stats_written = true; + end_memorycontext_reporting(entry, oldcontext, context_id_lookup); + /* Notify waiting backends and return */ + ConditionVariableBroadcast(&entry->memcxt_cv); +} + +/* + * Clean up before exit from ProcessGetMemoryContextInterrupt + */ +static void +end_memorycontext_reporting(MemoryStatsDSHashEntry *entry, MemoryContext oldcontext, HTAB *context_id_lookup) +{ + MemoryContext curr_ctx = CurrentMemoryContext; + + dshash_release_lock(MemoryStatsDsHash, entry); + + hash_destroy(context_id_lookup); + MemoryContextSwitchTo(oldcontext); + MemoryContextReset(curr_ctx); +} + +/* + * compute_context_path + * + * Append the transient context_id of this context and each of its ancestors + * to a list, in order to compute a path. + */ +static List * +compute_context_path(MemoryContext c, HTAB *context_id_lookup) +{ + bool found; + List *path = NIL; + MemoryContext cur_context; + + for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent) + { + MemoryStatsContextId *cur_entry; + + cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found); + + if (!found) + elog(ERROR, "hash table corrupted, can't construct path value"); + + path = lcons_int(cur_entry->context_id, path); + } + + return path; +} + +/* + * PublishMemoryContext + * + * Copy the memory context statistics of a single context to a DSA memory + */ +static void +PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id, + MemoryContext context, List *path, + MemoryContextCounters stat, int num_contexts) +{ + const char *ident = context->ident; + const char *name = context->name; + + /* + * To be consistent with logging output, we label dynahash contexts with + * just the hash table name as with MemoryContextStatsPrint(). + */ + if (context->ident && strncmp(context->name, "dynahash", 8) == 0) + { + name = context->ident; + ident = NULL; + } + + if (name != NULL) + { + int namelen = strlen(name); + + if (strlen(name) >= MEMORY_CONTEXT_NAME_SHMEM_SIZE) + namelen = pg_mbcliplen(name, namelen, + MEMORY_CONTEXT_NAME_SHMEM_SIZE - 1); + + strlcpy(memcxt_info[curr_id].name, name, namelen + 1); + } + else + /* Clearing the array */ + memcxt_info[curr_id].name[0] = '\0'; + + /* Trim and copy the identifier if it is not set to NULL */ + if (ident != NULL) + { + int idlen = strlen(context->ident); + + /* + * Some identifiers such as SQL query string can be very long, + * truncate oversize identifiers. + */ + if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) + idlen = pg_mbcliplen(ident, idlen, + MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); + + strlcpy(memcxt_info[curr_id].ident, ident, idlen + 1); + } + else + memcxt_info[curr_id].ident[0] = '\0'; + + /* Allocate DSA memory for storing path information */ + if (path == NIL) + memcxt_info[curr_id].path[0] = 0; + else + { + int levels = Min(list_length(path), MAX_PATH_DISPLAY_LENGTH); + + memcxt_info[curr_id].path_length = levels; + memcxt_info[curr_id].levels = list_length(path); + + foreach_int(i, path) + { + memcxt_info[curr_id].path[foreach_current_index(i)] = i; + if (--levels == 0) + break; + } + } + memcxt_info[curr_id].type = context->type; + memcxt_info[curr_id].totalspace = stat.totalspace; + memcxt_info[curr_id].nblocks = stat.nblocks; + memcxt_info[curr_id].freespace = stat.freespace; + memcxt_info[curr_id].freechunks = stat.freechunks; + memcxt_info[curr_id].num_agg_stats = num_contexts; +} + +void +AtProcExit_memstats_cleanup(int code, Datum arg) +{ + int idx = MyProcNumber; + MemoryStatsDSHashEntry *entry; + char key[CLIENT_KEY_SIZE]; + bool found; + + if (MemoryStatsDsHash != NULL) + { + snprintf(key, CLIENT_KEY_SIZE, "%d", idx); + entry = dshash_find_or_insert(MemoryStatsDsHash, key, &found); + + if (found) + { + if (MemoryStatsDsaArea != NULL && + DsaPointerIsValid(entry->memstats_dsa_pointer)) + dsa_free(MemoryStatsDsaArea, entry->memstats_dsa_pointer); + } + dshash_delete_entry(MemoryStatsDsHash, entry); + } + LWLockAcquire(client_keys_lock, LW_EXCLUSIVE); + client_keys[idx] = -1; + LWLockRelease(client_keys_lock); +} + +/* Used for testing purposes */ +dsa_area * +pg_get_memstats_dsa_area(void) +{ + if (MemoryStatsDsaArea != NULL) + return MemoryStatsDsaArea; + else + return NULL; +} diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index d31cb45a0588..92b0446b80c5 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -39,6 +39,7 @@ volatile sig_atomic_t TransactionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile sig_atomic_t LogMemoryContextPending = false; +volatile sig_atomic_t PublishMemoryContextPending = false; volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 98f9598cd789..202403ebc63f 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -658,6 +658,13 @@ BaseInit(void) * drop ephemeral slots, which in turn triggers stats reporting. */ ReplicationSlotInitialize(); + + /* + * The before shmem exit callback frees the DSA memory occupied by the + * latest memory context statistics that could be published by this proc + * if requested. + */ + before_shmem_exit(AtProcExit_memstats_cleanup, 0); } diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 47fd774c7d28..56c2048c67a5 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -1008,6 +1008,35 @@ MemoryContextStatsInternal(MemoryContext context, int level, } } + +/* + * MemoryContextStatsCounter + * + * Accumulate statistics counts into *totals. totals should not be NULL. + * This involves a non-recursive tree traversal. + */ +void +MemoryContextStatsCounter(MemoryContext context, MemoryContextCounters *totals, + int *num_contexts) +{ + int ichild = 1; + + context->methods->stats(context, NULL, NULL, totals, false); + + for (MemoryContext curr = context->firstchild; + curr != NULL; + curr = MemoryContextTraverseNext(curr, context)) + { + curr->methods->stats(curr, NULL, NULL, totals, false); + ichild++; + } + + /* + * Add the count of children contexts which are traversed + */ + *num_contexts = *num_contexts + ichild; +} + /* * MemoryContextStatsPrint * Print callback used by MemoryContextStatsInternal diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5cf9e12fcb9a..bb72b85457d9 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -8617,6 +8617,16 @@ prorettype => 'bool', proargtypes => 'int4', prosrc => 'pg_log_backend_memory_contexts' }, +# publishing memory contexts of the specified postgres process +{ oid => '2173', descr => 'publish memory contexts of the specified backend', + proname => 'pg_get_process_memory_contexts', provolatile => 'v', + prorows => '100', proretset => 't', proparallel => 'r', + prorettype => 'record', proargtypes => 'int4 bool', + proallargtypes => '{int4,bool,text,text,text,int4,_int4,int8,int8,int8,int8,int8,int4}', + proargmodes => '{i,i,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{pid, summary, name, ident, type, level, path, total_bytes, total_nblocks, free_bytes, free_chunks, used_bytes, num_agg_contexts}', + prosrc => 'pg_get_process_memory_contexts' }, + # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 9a7d733ddeff..b76f24baed60 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -96,6 +96,7 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending; +extern PGDLLIMPORT volatile sig_atomic_t PublishMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; diff --git a/src/include/storage/lwlocklist.h b/src/include/storage/lwlocklist.h index 5b0ce383408c..613e769c84e2 100644 --- a/src/include/storage/lwlocklist.h +++ b/src/include/storage/lwlocklist.h @@ -136,3 +136,4 @@ PG_LWLOCKTRANCHE(SUBTRANS_SLRU, SubtransSLRU) PG_LWLOCKTRANCHE(XACT_SLRU, XactSLRU) PG_LWLOCKTRANCHE(PARALLEL_VACUUM_DSA, ParallelVacuumDSA) PG_LWLOCKTRANCHE(AIO_URING_COMPLETION, AioUringCompletion) +PG_LWLOCKTRANCHE(MEMORY_CONTEXT_KEYS, MemoryContextReportingKeys) diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index afeeb1ca019f..345d5a0ecb1e 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -35,6 +35,7 @@ typedef enum PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */ PROCSIG_BARRIER, /* global barrier interrupt */ PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ + PROCSIG_GET_MEMORY_CONTEXT, /* ask backend to send the memory contexts */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ /* Recovery conflict reasons */ diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index 7bbe5a36959d..2d7220cde452 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -18,7 +18,10 @@ #define MEMUTILS_H #include "nodes/memnodes.h" - +#include "storage/condition_variable.h" +#include "storage/lmgr.h" +#include "utils/dsa.h" +#include "lib/dshash.h" /* * MaxAllocSize, MaxAllocHugeSize @@ -48,6 +51,26 @@ #define AllocHugeSizeIsValid(size) ((Size) (size) <= MaxAllocHugeSize) +/* + * Memory Context reporting size limits. + */ + +/* Max length of context name and ident, to keep it consistent + * with ProcessLogMemoryContext() + */ +#define MEMORY_CONTEXT_IDENT_SHMEM_SIZE 100 +#define MEMORY_CONTEXT_NAME_SHMEM_SIZE 100 + +/* Maximum size (in bytes) of DSA area per process */ +#define MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND ((size_t) (1 * 1024 * 1024)) + +/* + * Maximum size per context statistics. The identifier and name are statically + * allocated arrays of size 100 bytes. + * The path depth is limited to 100 like for memory context logging. + */ +#define MAX_MEMORY_CONTEXT_STATS_SIZE (sizeof(MemoryStatsEntry)) +#define MAX_MEMORY_CONTEXT_STATS_NUM MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE /* * Standard top-level memory contexts. @@ -149,6 +172,7 @@ extern MemoryContext BumpContextCreate(MemoryContext parent, Size minContextSize, Size initBlockSize, Size maxBlockSize); +extern dsa_area *pg_get_memstats_dsa_area(void); /* * Recommended default alloc parameters, suitable for "ordinary" contexts @@ -319,4 +343,59 @@ pg_memory_is_all_zeros(const void *ptr, size_t len) return true; } +/* Dynamic shared memory state for statistics per context */ +typedef struct MemoryStatsEntry +{ + char name[MEMORY_CONTEXT_NAME_SHMEM_SIZE]; + char ident[MEMORY_CONTEXT_IDENT_SHMEM_SIZE]; + int path[100]; + NodeTag type; + int path_length; + int levels; + int64 totalspace; + int64 nblocks; + int64 freespace; + int64 freechunks; + int num_agg_stats; +} MemoryStatsEntry; + +/* + * Per backend dynamic shared hash entry for memory context statistics + * reporting. + */ +typedef struct MemoryStatsDSHashEntry +{ + char key[64]; + ConditionVariable memcxt_cv; + bool stats_written; + int target_server_id; + int total_stats; + bool summary; + dsa_pointer memstats_dsa_pointer; +} MemoryStatsDSHashEntry; + +static const dshash_parameters memctx_dsh_params = { + offsetof(MemoryStatsDSHashEntry, memcxt_cv), + sizeof(MemoryStatsDSHashEntry), + dshash_strcmp, + dshash_strhash, + dshash_strcpy +}; + +/* + * Used for storage of transient identifiers for pg_get_backend_memory_contexts + */ +typedef struct MemoryStatsContextId +{ + MemoryContext context; + int context_id; +} MemoryStatsContextId; + +extern void ProcessGetMemoryContextInterrupt(void); +extern void HandleGetMemoryContextInterrupt(void); +extern void MemoryContextKeysShmemInit(void); +extern Size MemoryContextKeysShmemSize(void); +extern void MemoryContextStatsCounter(MemoryContext context, MemoryContextCounters *totals, + int *num_contexts); +extern void AtProcExit_memstats_cleanup(int code, Datum arg); #endif /* MEMUTILS_H */ diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 902a79541010..a31a2578c18d 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -31,6 +31,7 @@ SUBDIRS = \ test_json_parser \ test_lfind \ test_lwlock_tranches \ + test_memcontext_reporting \ test_misc \ test_oat_hooks \ test_parser \ diff --git a/src/test/modules/test_memcontext_reporting/Makefile b/src/test/modules/test_memcontext_reporting/Makefile new file mode 100644 index 000000000000..01a7baa0263e --- /dev/null +++ b/src/test/modules/test_memcontext_reporting/Makefile @@ -0,0 +1,32 @@ +# src/test/modules/test_memcontext_reporting/Makefile + +EXTRA_INSTALL = src/test/modules/injection_points + +export enable_injection_points +MODULE_big = test_memcontext_reporting +OBJS = \ + $(WIN32RES) \ + test_memcontext_reporting.o +PGFILEDESC = "test_memcontext_reporting - test code for memory context reporting" + +EXTENSION = test_memcontext_reporting +DATA = test_memcontext_reporting--1.0.sql + +REGRESS = test_memcontext_reporting + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_memcontext_reporting +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) diff --git a/src/test/modules/test_memcontext_reporting/t/001_memcontext_inj.pl b/src/test/modules/test_memcontext_reporting/t/001_memcontext_inj.pl new file mode 100644 index 000000000000..69d8489eb378 --- /dev/null +++ b/src/test/modules/test_memcontext_reporting/t/001_memcontext_inj.pl @@ -0,0 +1,58 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group + +# Test suite for testing memory context statistics reporting + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} +my $psql_err; +# Create and start a cluster with one node +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init(allows_streaming => 1); +# max_connections need to be bumped in order to accommodate for pgbench clients +# and log_statement is dialled down since it otherwise will generate enormous +# amounts of logging. Page verification failures are still logged. +$node->append_conf( + 'postgresql.conf', + qq[ +max_connections = 100 +log_statement = none +]); +$node->start; +$node->safe_psql('postgres', 'CREATE EXTENSION test_memcontext_reporting;'); +$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;'); +# Attaching to a client process injection point that throws an error +$node->safe_psql('postgres', "select injection_points_attach('memcontext-client-crash', 'error');"); + +my $pid = $node->safe_psql('postgres', "SELECT pid from pg_stat_activity where backend_type='checkpointer'"); +print "PID"; +print $pid; + +#Client should have thrown error +$node->psql('postgres', qq(select pg_get_process_memory_contexts($pid, true);), stderr => \$psql_err); +like ( $psql_err, qr/error triggered for injection point memcontext-client-crash/); + +#Query the same process after detaching the injection point, using some other client and it should succeed. +$node->safe_psql('postgres', "select injection_points_detach('memcontext-client-crash');"); +my $topcontext_name = $node->safe_psql('postgres', "select name from pg_get_process_memory_contexts($pid, true) where path = '{1}';"); +ok($topcontext_name = 'TopMemoryContext'); + +# Attaching to a target process injection point that throws an error +$node->safe_psql('postgres', "select injection_points_attach('memcontext-server-crash', 'error');"); + +#Server should have thrown error +$node->psql('postgres', qq(select pg_get_process_memory_contexts($pid, true);), stderr => \$psql_err); + +#Query the same process after detaching the injection point, using some other client and it should succeed. +$node->safe_psql('postgres', "select injection_points_detach('memcontext-server-crash');"); +$topcontext_name = $node->safe_psql('postgres', "select name from pg_get_process_memory_contexts($pid, true) where path = '{1}';"); +ok($topcontext_name = 'TopMemoryContext'); +done_testing(); diff --git a/src/test/modules/test_memcontext_reporting/test_memcontext_reporting--1.0.sql b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting--1.0.sql new file mode 100644 index 000000000000..181daf429d05 --- /dev/null +++ b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting--1.0.sql @@ -0,0 +1,11 @@ +CREATE FUNCTION memcontext_crash_server() +RETURNS pg_catalog.void +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION memcontext_crash_client() +RETURNS pg_catalog.void +AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION dsa_dump_sql() +RETURNS bigint +AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.c b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.c new file mode 100644 index 000000000000..955155524c2b --- /dev/null +++ b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.c @@ -0,0 +1,123 @@ +/* + * ------------------------------------------------------------------------- + * + * Copyright (c) 2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_memcontext_reporting/test_memcontext_reporting.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "utils/injection_point.h" +#include "funcapi.h" +#include "utils/injection_point.h" +#include "storage/dsm_registry.h" + +PG_MODULE_MAGIC; + +extern PGDLLEXPORT void crash(const char *name, const void *private_data, void *arg); + +void +crash(const char *name, const void *private_data, void *arg) +{ + abort(); +} + +/* + * memcontext_crash_client + * + * Ensure that the client process aborts in between memory context + * reporting. + */ +PG_FUNCTION_INFO_V1(memcontext_crash_client); +Datum +memcontext_crash_client(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + InjectionPointAttach("memcontext-client-crash", + "test_memcontext_reporting", "crash", NULL, 0); + +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(memcontext_detach_client); +Datum +memcontext_detach_client(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + InjectionPointDetach("memcontext-client-crash"); + +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + +/* + * memcontext_crash_server + * + * Ensure that the server process crashes in between memory context + * reporting. + */ +PG_FUNCTION_INFO_V1(memcontext_crash_server); +Datum +memcontext_crash_server(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + InjectionPointAttach("memcontext-server-crash", + "test_memcontext_reporting", "crash", NULL, 0); + +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + +/* + * memcontext_detach_server + * + * Detach the injection point which crashes the server + * reporting. + */ +PG_FUNCTION_INFO_V1(memcontext_detach_server); +Datum +memcontext_detach_server(PG_FUNCTION_ARGS) +{ +#ifdef USE_INJECTION_POINTS + InjectionPointDetach("memcontext-server-crash"); + +#else + elog(ERROR, + "test is not working as intended when injection points are disabled"); +#endif + PG_RETURN_VOID(); +} + +/* + * dsa_dump_sql + */ +PG_FUNCTION_INFO_V1(dsa_dump_sql); +Datum +dsa_dump_sql(PG_FUNCTION_ARGS) +{ + bool found; + size_t tot_size; + dsa_area *memstats_dsa_area; + + memstats_dsa_area = pg_get_memstats_dsa_area(); + + if (memstats_dsa_area == NULL) + memstats_dsa_area = GetNamedDSA("memory_context_statistics_dsa", &found); + + tot_size = dsa_get_total_size(memstats_dsa_area); + dsa_detach(memstats_dsa_area); + PG_RETURN_INT64(tot_size); +} diff --git a/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.control b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.control new file mode 100644 index 000000000000..48b501682d57 --- /dev/null +++ b/src/test/modules/test_memcontext_reporting/test_memcontext_reporting.control @@ -0,0 +1,4 @@ +comment = 'Test code for memcontext reporting' +default_version = '1.0' +module_pathname = '$libdir/test_memcontext_reporting' +relocatable = true diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 3b37fafa65b9..21c65ad2d104 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -233,3 +233,22 @@ select * from pg_timezone_abbrevs where abbrev = 'LMT'; LMT | @ 7 hours 52 mins 58 secs ago | f (1 row) +DO $$ +DECLARE + bg_writer_pid int; + r RECORD; +BEGIN + SELECT pid from pg_stat_activity where backend_type='background writer' + INTO bg_writer_pid; + + select type, name, ident + from pg_get_process_memory_contexts(bg_writer_pid, false) + where path = '{1}' into r; + RAISE NOTICE '%', r; + select type, name, ident + from pg_get_process_memory_contexts(pg_backend_pid(), false) + where path = '{1}' into r; + RAISE NOTICE '%', r; +END $$; +NOTICE: (AllocSet,TopMemoryContext,) +NOTICE: (AllocSet,TopMemoryContext,) diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql index 66179f026b37..c9da4fc8c902 100644 --- a/src/test/regress/sql/sysviews.sql +++ b/src/test/regress/sql/sysviews.sql @@ -101,3 +101,21 @@ select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs; -- One specific case we can check without much fear of breakage -- is the historical local-mean-time value used for America/Los_Angeles. select * from pg_timezone_abbrevs where abbrev = 'LMT'; + +DO $$ +DECLARE + bg_writer_pid int; + r RECORD; +BEGIN + SELECT pid from pg_stat_activity where backend_type='background writer' + INTO bg_writer_pid; + + select type, name, ident + from pg_get_process_memory_contexts(bg_writer_pid, false) + where path = '{1}' into r; + RAISE NOTICE '%', r; + select type, name, ident + from pg_get_process_memory_contexts(pg_backend_pid(), false) + where path = '{1}' into r; + RAISE NOTICE '%', r; +END $$; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 432509277c98..2990c807f45f 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1684,6 +1684,9 @@ MemoryContextData MemoryContextId MemoryContextMethodID MemoryContextMethods +MemoryStatsContextId +MemoryStatsEntry +MemoryStatsDSHashEntry MemoryStatsPrintFunc MergeAction MergeActionState