Skip to content

Commit 4e34513

Browse files
bdrouvotAWSCommitfest Bot
authored andcommitted
Add pg_buffercache_os_pages function and view
ba2a3c2 added a way to check if a buffer is spread across multiple pages. Adding those new function and view so that one does not need NUMA support enabled to get this information. In passing, let's capitalyze "number" in the pg_buffercache_numa documentation definition.
1 parent d549511 commit 4e34513

File tree

9 files changed

+346
-3
lines changed

9 files changed

+346
-3
lines changed

contrib/pg_buffercache/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ EXTENSION = pg_buffercache
99
DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \
1010
pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql \
1111
pg_buffercache--1.3--1.4.sql pg_buffercache--1.4--1.5.sql \
12-
pg_buffercache--1.5--1.6.sql
12+
pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql
1313
PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time"
1414

1515
REGRESS = pg_buffercache pg_buffercache_numa

contrib/pg_buffercache/expected/pg_buffercache.out

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ from pg_buffercache;
88
t
99
(1 row)
1010

11+
-- For pg_buffercache_os_pages, we expect at least one entry for each buffer
12+
select count(*) >= (select setting::bigint
13+
from pg_settings
14+
where name = 'shared_buffers')
15+
from pg_buffercache_os_pages;
16+
?column?
17+
----------
18+
t
19+
(1 row)
20+
1121
select buffers_used + buffers_unused > 0,
1222
buffers_dirty <= buffers_used,
1323
buffers_pinned <= buffers_used
@@ -28,6 +38,8 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
2838
SET ROLE pg_database_owner;
2939
SELECT * FROM pg_buffercache;
3040
ERROR: permission denied for view pg_buffercache
41+
SELECT * FROM pg_buffercache_os_pages;
42+
ERROR: permission denied for view pg_buffercache_os_pages
3143
SELECT * FROM pg_buffercache_pages() AS p (wrong int);
3244
ERROR: permission denied for function pg_buffercache_pages
3345
SELECT * FROM pg_buffercache_summary();
@@ -43,6 +55,12 @@ SELECT count(*) > 0 FROM pg_buffercache;
4355
t
4456
(1 row)
4557

58+
SELECT count(*) > 0 FROM pg_buffercache_os_pages;
59+
?column?
60+
----------
61+
t
62+
(1 row)
63+
4664
SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
4765
?column?
4866
----------

contrib/pg_buffercache/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ install_data(
2424
'pg_buffercache--1.3--1.4.sql',
2525
'pg_buffercache--1.4--1.5.sql',
2626
'pg_buffercache--1.5--1.6.sql',
27+
'pg_buffercache--1.6--1.7.sql',
2728
'pg_buffercache.control',
2829
kwargs: contrib_data_args,
2930
)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/* contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql */
2+
3+
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
4+
\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.7'" to load this file. \quit
5+
6+
-- Register the new function.
7+
CREATE FUNCTION pg_buffercache_os_pages()
8+
RETURNS SETOF RECORD
9+
AS 'MODULE_PATHNAME', 'pg_buffercache_os_pages'
10+
LANGUAGE C PARALLEL SAFE;
11+
12+
-- Create a view for convenient access.
13+
CREATE VIEW pg_buffercache_os_pages AS
14+
SELECT P.* FROM pg_buffercache_os_pages() AS P
15+
(bufferid integer, os_page_num bigint);
16+
17+
REVOKE ALL ON FUNCTION pg_buffercache_os_pages() FROM PUBLIC;
18+
REVOKE ALL ON pg_buffercache_os_pages FROM PUBLIC;
19+
20+
GRANT EXECUTE ON FUNCTION pg_buffercache_os_pages() TO pg_monitor;
21+
GRANT SELECT ON pg_buffercache_os_pages TO pg_monitor;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# pg_buffercache extension
22
comment = 'examine the shared buffer cache'
3-
default_version = '1.6'
3+
default_version = '1.7'
44
module_pathname = '$libdir/pg_buffercache'
55
relocatable = true

contrib/pg_buffercache/pg_buffercache_pages.c

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
2828

2929
#define NUM_BUFFERCACHE_NUMA_ELEM 3
30+
#define NUM_BUFFERCACHE_OS_PAGES_ELEM 2
3031

3132
/*
3233
* Get the maximum buffer cache entries needed.
@@ -94,12 +95,30 @@ typedef struct
9495
BufferCacheNumaRec *record;
9596
} BufferCacheNumaContext;
9697

98+
/*
99+
* Record structure holding the to be exposed cache data.
100+
*/
101+
typedef struct
102+
{
103+
uint32 bufferid;
104+
int64 page_num;
105+
} BufferCacheOsPagesRec;
106+
107+
/*
108+
* Function context for data persisting over repeated calls.
109+
*/
110+
typedef struct
111+
{
112+
TupleDesc tupdesc;
113+
BufferCacheOsPagesRec *record;
114+
} BufferCacheOsPagesContext;
97115

98116
/*
99117
* Function returning data from the shared buffer cache - buffer number,
100118
* relation node/tablespace/database/blocknum and dirty indicator.
101119
*/
102120
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
121+
PG_FUNCTION_INFO_V1(pg_buffercache_os_pages);
103122
PG_FUNCTION_INFO_V1(pg_buffercache_numa_pages);
104123
PG_FUNCTION_INFO_V1(pg_buffercache_summary);
105124
PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
@@ -139,6 +158,170 @@ get_buffer_page_boundaries(char *buffptr, Size os_page_size, char *startptr,
139158
*endptr_buff = end_ptr;
140159
}
141160

161+
/*
162+
* Inquire about OS pages mappings for shared buffers.
163+
*
164+
* Returns each OS memory page used by the buffer. Buffers may
165+
* be smaller or larger than OS memory pages. For each buffer we return one
166+
* entry for each memory page used by the buffer (if the buffer is smaller,
167+
* it only uses a part of one memory page).
168+
*
169+
* We expect both sizes (for buffers and memory pages) to be a power-of-2, so
170+
* one is always a multiple of the other.
171+
*/
172+
Datum
173+
pg_buffercache_os_pages(PG_FUNCTION_ARGS)
174+
{
175+
FuncCallContext *funcctx;
176+
Datum result;
177+
MemoryContext oldcontext;
178+
BufferCacheOsPagesContext *fctx; /* User function context. */
179+
TupleDesc tupledesc;
180+
TupleDesc expected_tupledesc;
181+
HeapTuple tuple;
182+
183+
if (SRF_IS_FIRSTCALL())
184+
{
185+
int i,
186+
idx;
187+
Size os_page_size;
188+
char *startptr;
189+
int max_entries;
190+
191+
/*
192+
* The database block size and OS memory page size are unlikely to be
193+
* the same. The block size is 1-32KB, the memory page size depends on
194+
* platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
195+
* there are also features like THP etc. Moreover, we don't quite know
196+
* how the pages and buffers "align" in memory - the buffers may be
197+
* shifted in some way, using more memory pages than necessary.
198+
*
199+
* So we need to be careful about mapping buffers to memory pages. We
200+
* calculate the maximum number of pages a buffer might use, so that
201+
* we allocate enough space for the entries. And then we count the
202+
* actual number of entries as we scan the buffers.
203+
*/
204+
os_page_size = pg_get_shmem_pagesize();
205+
206+
/* Initialize the multi-call context, load entries about buffers */
207+
funcctx = SRF_FIRSTCALL_INIT();
208+
209+
/* Switch context when allocating stuff to be used in later calls */
210+
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
211+
212+
/* Create a user function context for cross-call persistence */
213+
fctx = (BufferCacheOsPagesContext *) palloc(sizeof(BufferCacheOsPagesContext));
214+
215+
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
216+
elog(ERROR, "return type must be a row type");
217+
218+
if (expected_tupledesc->natts != NUM_BUFFERCACHE_OS_PAGES_ELEM)
219+
elog(ERROR, "incorrect number of output arguments");
220+
221+
/* Construct a tuple descriptor for the result rows. */
222+
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
223+
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
224+
INT4OID, -1, 0);
225+
226+
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
227+
INT8OID, -1, 0);
228+
229+
fctx->tupdesc = BlessTupleDesc(tupledesc);
230+
231+
/*
232+
* Each buffer needs at least one entry, but it might be offset in
233+
* some way, and use one extra entry. So we allocate space for the
234+
* maximum number of entries we might need, and then count the exact
235+
* number as we're walking buffers. That way we can do it in one pass,
236+
* without reallocating memory.
237+
*/
238+
max_entries = GET_MAX_BUFFER_ENTRIES(NBuffers, os_page_size);
239+
240+
/* Allocate NBuffers worth of BufferCacheOsPagesRec records. */
241+
fctx->record = (BufferCacheOsPagesRec *)
242+
MemoryContextAllocHuge(CurrentMemoryContext,
243+
sizeof(BufferCacheOsPagesRec) * max_entries);
244+
245+
/* Return to original context when allocating transient memory */
246+
MemoryContextSwitchTo(oldcontext);
247+
248+
startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
249+
idx = 0;
250+
251+
/*
252+
* Scan through all the buffers, saving the relevant fields in the
253+
* fctx->record structure.
254+
*
255+
* We don't hold the partition locks, so we don't get a consistent
256+
* snapshot across all buffers, but we do grab the buffer header
257+
* locks, so the information of each buffer is self-consistent.
258+
*/
259+
for (i = 0; i < NBuffers; i++)
260+
{
261+
char *buffptr = (char *) BufferGetBlock(i + 1);
262+
BufferDesc *bufHdr;
263+
uint32 buf_state;
264+
uint32 bufferid;
265+
int32 page_num;
266+
char *startptr_buff,
267+
*endptr_buff;
268+
269+
CHECK_FOR_INTERRUPTS();
270+
271+
bufHdr = GetBufferDescriptor(i);
272+
/* Lock each buffer header before inspecting. */
273+
buf_state = LockBufHdr(bufHdr);
274+
bufferid = BufferDescriptorGetBuffer(bufHdr);
275+
UnlockBufHdr(bufHdr, buf_state);
276+
277+
/* Get page boundaries for this buffer. */
278+
get_buffer_page_boundaries(buffptr, os_page_size, startptr,
279+
&startptr_buff, &endptr_buff, &page_num);
280+
281+
/* Add an entry for each OS page overlapping with this buffer. */
282+
for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
283+
{
284+
fctx->record[idx].bufferid = bufferid;
285+
fctx->record[idx].page_num = page_num;
286+
/* advance to the next entry/page */
287+
++idx;
288+
++page_num;
289+
}
290+
}
291+
292+
Assert(idx <= max_entries);
293+
294+
/* Set max calls and remember the user function context. */
295+
funcctx->max_calls = idx;
296+
funcctx->user_fctx = fctx;
297+
}
298+
299+
funcctx = SRF_PERCALL_SETUP();
300+
301+
/* Get the saved state */
302+
fctx = funcctx->user_fctx;
303+
304+
if (funcctx->call_cntr < funcctx->max_calls)
305+
{
306+
uint32 i = funcctx->call_cntr;
307+
Datum values[NUM_BUFFERCACHE_OS_PAGES_ELEM];
308+
bool nulls[NUM_BUFFERCACHE_OS_PAGES_ELEM];
309+
310+
values[0] = Int32GetDatum(fctx->record[i].bufferid);
311+
nulls[0] = false;
312+
values[1] = Int64GetDatum(fctx->record[i].page_num);
313+
nulls[1] = false;
314+
315+
/* Build and return the tuple. */
316+
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
317+
result = HeapTupleGetDatum(tuple);
318+
319+
SRF_RETURN_NEXT(funcctx, result);
320+
}
321+
else
322+
SRF_RETURN_DONE(funcctx);
323+
}
324+
142325
Datum
143326
pg_buffercache_pages(PG_FUNCTION_ARGS)
144327
{

contrib/pg_buffercache/sql/pg_buffercache.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ select count(*) = (select setting::bigint
55
where name = 'shared_buffers')
66
from pg_buffercache;
77

8+
-- For pg_buffercache_os_pages, we expect at least one entry for each buffer
9+
select count(*) >= (select setting::bigint
10+
from pg_settings
11+
where name = 'shared_buffers')
12+
from pg_buffercache_os_pages;
13+
814
select buffers_used + buffers_unused > 0,
915
buffers_dirty <= buffers_used,
1016
buffers_pinned <= buffers_used
@@ -16,6 +22,7 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
1622
-- having to create a dedicated user, use the pg_database_owner pseudo-role.
1723
SET ROLE pg_database_owner;
1824
SELECT * FROM pg_buffercache;
25+
SELECT * FROM pg_buffercache_os_pages;
1926
SELECT * FROM pg_buffercache_pages() AS p (wrong int);
2027
SELECT * FROM pg_buffercache_summary();
2128
SELECT * FROM pg_buffercache_usage_counts();
@@ -24,6 +31,7 @@ RESET role;
2431
-- Check that pg_monitor is allowed to query view / function
2532
SET ROLE pg_monitor;
2633
SELECT count(*) > 0 FROM pg_buffercache;
34+
SELECT count(*) > 0 FROM pg_buffercache_os_pages;
2735
SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
2836
SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
2937
RESET role;

0 commit comments

Comments
 (0)