Skip to content

Commit de98112

Browse files
author
Commitfest Bot
committed
[CF 5690] v7 - Add os_page_num to pg_buffercache
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5690 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/[email protected] Author(s): Bertrand Drouvot
2 parents d432094 + 4e34513 commit de98112

File tree

9 files changed

+383
-16
lines changed

9 files changed

+383
-16
lines changed

contrib/pg_buffercache/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ EXTENSION = pg_buffercache
99
DATA = pg_buffercache--1.2.sql pg_buffercache--1.2--1.3.sql \
1010
pg_buffercache--1.1--1.2.sql pg_buffercache--1.0--1.1.sql \
1111
pg_buffercache--1.3--1.4.sql pg_buffercache--1.4--1.5.sql \
12-
pg_buffercache--1.5--1.6.sql
12+
pg_buffercache--1.5--1.6.sql pg_buffercache--1.6--1.7.sql
1313
PGFILEDESC = "pg_buffercache - monitoring of shared buffer cache in real-time"
1414

1515
REGRESS = pg_buffercache pg_buffercache_numa

contrib/pg_buffercache/expected/pg_buffercache.out

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,16 @@ from pg_buffercache;
88
t
99
(1 row)
1010

11+
-- For pg_buffercache_os_pages, we expect at least one entry for each buffer
12+
select count(*) >= (select setting::bigint
13+
from pg_settings
14+
where name = 'shared_buffers')
15+
from pg_buffercache_os_pages;
16+
?column?
17+
----------
18+
t
19+
(1 row)
20+
1121
select buffers_used + buffers_unused > 0,
1222
buffers_dirty <= buffers_used,
1323
buffers_pinned <= buffers_used
@@ -28,6 +38,8 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
2838
SET ROLE pg_database_owner;
2939
SELECT * FROM pg_buffercache;
3040
ERROR: permission denied for view pg_buffercache
41+
SELECT * FROM pg_buffercache_os_pages;
42+
ERROR: permission denied for view pg_buffercache_os_pages
3143
SELECT * FROM pg_buffercache_pages() AS p (wrong int);
3244
ERROR: permission denied for function pg_buffercache_pages
3345
SELECT * FROM pg_buffercache_summary();
@@ -43,6 +55,12 @@ SELECT count(*) > 0 FROM pg_buffercache;
4355
t
4456
(1 row)
4557

58+
SELECT count(*) > 0 FROM pg_buffercache_os_pages;
59+
?column?
60+
----------
61+
t
62+
(1 row)
63+
4664
SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
4765
?column?
4866
----------

contrib/pg_buffercache/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ install_data(
2424
'pg_buffercache--1.3--1.4.sql',
2525
'pg_buffercache--1.4--1.5.sql',
2626
'pg_buffercache--1.5--1.6.sql',
27+
'pg_buffercache--1.6--1.7.sql',
2728
'pg_buffercache.control',
2829
kwargs: contrib_data_args,
2930
)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
/* contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql */
2+
3+
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
4+
\echo Use "ALTER EXTENSION pg_buffercache UPDATE TO '1.7'" to load this file. \quit
5+
6+
-- Register the new function.
7+
CREATE FUNCTION pg_buffercache_os_pages()
8+
RETURNS SETOF RECORD
9+
AS 'MODULE_PATHNAME', 'pg_buffercache_os_pages'
10+
LANGUAGE C PARALLEL SAFE;
11+
12+
-- Create a view for convenient access.
13+
CREATE VIEW pg_buffercache_os_pages AS
14+
SELECT P.* FROM pg_buffercache_os_pages() AS P
15+
(bufferid integer, os_page_num bigint);
16+
17+
REVOKE ALL ON FUNCTION pg_buffercache_os_pages() FROM PUBLIC;
18+
REVOKE ALL ON pg_buffercache_os_pages FROM PUBLIC;
19+
20+
GRANT EXECUTE ON FUNCTION pg_buffercache_os_pages() TO pg_monitor;
21+
GRANT SELECT ON pg_buffercache_os_pages TO pg_monitor;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# pg_buffercache extension
22
comment = 'examine the shared buffer cache'
3-
default_version = '1.6'
3+
default_version = '1.7'
44
module_pathname = '$libdir/pg_buffercache'
55
relocatable = true

contrib/pg_buffercache/pg_buffercache_pages.c

Lines changed: 220 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@
2727
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
2828

2929
#define NUM_BUFFERCACHE_NUMA_ELEM 3
30+
#define NUM_BUFFERCACHE_OS_PAGES_ELEM 2
31+
32+
/*
33+
* Get the maximum buffer cache entries needed.
34+
*/
35+
#define GET_MAX_BUFFER_ENTRIES(nbuffers, os_page_size) \
36+
((nbuffers) * (Max(1, BLCKSZ / (os_page_size)) + 1))
3037

3138
PG_MODULE_MAGIC_EXT(
3239
.name = "pg_buffercache",
@@ -88,12 +95,30 @@ typedef struct
8895
BufferCacheNumaRec *record;
8996
} BufferCacheNumaContext;
9097

98+
/*
99+
* Record structure holding the to be exposed cache data.
100+
*/
101+
typedef struct
102+
{
103+
uint32 bufferid;
104+
int64 page_num;
105+
} BufferCacheOsPagesRec;
106+
107+
/*
108+
* Function context for data persisting over repeated calls.
109+
*/
110+
typedef struct
111+
{
112+
TupleDesc tupdesc;
113+
BufferCacheOsPagesRec *record;
114+
} BufferCacheOsPagesContext;
91115

92116
/*
93117
* Function returning data from the shared buffer cache - buffer number,
94118
* relation node/tablespace/database/blocknum and dirty indicator.
95119
*/
96120
PG_FUNCTION_INFO_V1(pg_buffercache_pages);
121+
PG_FUNCTION_INFO_V1(pg_buffercache_os_pages);
97122
PG_FUNCTION_INFO_V1(pg_buffercache_numa_pages);
98123
PG_FUNCTION_INFO_V1(pg_buffercache_summary);
99124
PG_FUNCTION_INFO_V1(pg_buffercache_usage_counts);
@@ -105,6 +130,197 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
105130
/* Only need to touch memory once per backend process lifetime */
106131
static bool firstNumaTouch = true;
107132

133+
/*
134+
* Helper function to get buffer page boundaries.
135+
*
136+
* Given a buffer pointer and OS page size, calculates the start/end
137+
* pointers and first page number.
138+
*/
139+
static void
140+
get_buffer_page_boundaries(char *buffptr, Size os_page_size, char *startptr,
141+
char **startptr_buff, char **endptr_buff,
142+
int32 *page_num)
143+
{
144+
char *start_ptr;
145+
char *end_ptr;
146+
147+
/* start of the first page of this buffer */
148+
start_ptr = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
149+
150+
/* end of the buffer (no need to align to memory page) */
151+
end_ptr = buffptr + BLCKSZ;
152+
153+
Assert(start_ptr < end_ptr);
154+
155+
/* calculate ID of the first page for this buffer */
156+
*page_num = (start_ptr - startptr) / os_page_size;
157+
*startptr_buff = start_ptr;
158+
*endptr_buff = end_ptr;
159+
}
160+
161+
/*
162+
* Inquire about OS pages mappings for shared buffers.
163+
*
164+
* Returns each OS memory page used by the buffer. Buffers may
165+
* be smaller or larger than OS memory pages. For each buffer we return one
166+
* entry for each memory page used by the buffer (if the buffer is smaller,
167+
* it only uses a part of one memory page).
168+
*
169+
* We expect both sizes (for buffers and memory pages) to be a power-of-2, so
170+
* one is always a multiple of the other.
171+
*/
172+
Datum
173+
pg_buffercache_os_pages(PG_FUNCTION_ARGS)
174+
{
175+
FuncCallContext *funcctx;
176+
Datum result;
177+
MemoryContext oldcontext;
178+
BufferCacheOsPagesContext *fctx; /* User function context. */
179+
TupleDesc tupledesc;
180+
TupleDesc expected_tupledesc;
181+
HeapTuple tuple;
182+
183+
if (SRF_IS_FIRSTCALL())
184+
{
185+
int i,
186+
idx;
187+
Size os_page_size;
188+
char *startptr;
189+
int max_entries;
190+
191+
/*
192+
* The database block size and OS memory page size are unlikely to be
193+
* the same. The block size is 1-32KB, the memory page size depends on
194+
* platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
195+
* there are also features like THP etc. Moreover, we don't quite know
196+
* how the pages and buffers "align" in memory - the buffers may be
197+
* shifted in some way, using more memory pages than necessary.
198+
*
199+
* So we need to be careful about mapping buffers to memory pages. We
200+
* calculate the maximum number of pages a buffer might use, so that
201+
* we allocate enough space for the entries. And then we count the
202+
* actual number of entries as we scan the buffers.
203+
*/
204+
os_page_size = pg_get_shmem_pagesize();
205+
206+
/* Initialize the multi-call context, load entries about buffers */
207+
funcctx = SRF_FIRSTCALL_INIT();
208+
209+
/* Switch context when allocating stuff to be used in later calls */
210+
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
211+
212+
/* Create a user function context for cross-call persistence */
213+
fctx = (BufferCacheOsPagesContext *) palloc(sizeof(BufferCacheOsPagesContext));
214+
215+
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
216+
elog(ERROR, "return type must be a row type");
217+
218+
if (expected_tupledesc->natts != NUM_BUFFERCACHE_OS_PAGES_ELEM)
219+
elog(ERROR, "incorrect number of output arguments");
220+
221+
/* Construct a tuple descriptor for the result rows. */
222+
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
223+
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
224+
INT4OID, -1, 0);
225+
226+
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
227+
INT8OID, -1, 0);
228+
229+
fctx->tupdesc = BlessTupleDesc(tupledesc);
230+
231+
/*
232+
* Each buffer needs at least one entry, but it might be offset in
233+
* some way, and use one extra entry. So we allocate space for the
234+
* maximum number of entries we might need, and then count the exact
235+
* number as we're walking buffers. That way we can do it in one pass,
236+
* without reallocating memory.
237+
*/
238+
max_entries = GET_MAX_BUFFER_ENTRIES(NBuffers, os_page_size);
239+
240+
/* Allocate NBuffers worth of BufferCacheOsPagesRec records. */
241+
fctx->record = (BufferCacheOsPagesRec *)
242+
MemoryContextAllocHuge(CurrentMemoryContext,
243+
sizeof(BufferCacheOsPagesRec) * max_entries);
244+
245+
/* Return to original context when allocating transient memory */
246+
MemoryContextSwitchTo(oldcontext);
247+
248+
startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
249+
idx = 0;
250+
251+
/*
252+
* Scan through all the buffers, saving the relevant fields in the
253+
* fctx->record structure.
254+
*
255+
* We don't hold the partition locks, so we don't get a consistent
256+
* snapshot across all buffers, but we do grab the buffer header
257+
* locks, so the information of each buffer is self-consistent.
258+
*/
259+
for (i = 0; i < NBuffers; i++)
260+
{
261+
char *buffptr = (char *) BufferGetBlock(i + 1);
262+
BufferDesc *bufHdr;
263+
uint32 buf_state;
264+
uint32 bufferid;
265+
int32 page_num;
266+
char *startptr_buff,
267+
*endptr_buff;
268+
269+
CHECK_FOR_INTERRUPTS();
270+
271+
bufHdr = GetBufferDescriptor(i);
272+
/* Lock each buffer header before inspecting. */
273+
buf_state = LockBufHdr(bufHdr);
274+
bufferid = BufferDescriptorGetBuffer(bufHdr);
275+
UnlockBufHdr(bufHdr, buf_state);
276+
277+
/* Get page boundaries for this buffer. */
278+
get_buffer_page_boundaries(buffptr, os_page_size, startptr,
279+
&startptr_buff, &endptr_buff, &page_num);
280+
281+
/* Add an entry for each OS page overlapping with this buffer. */
282+
for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
283+
{
284+
fctx->record[idx].bufferid = bufferid;
285+
fctx->record[idx].page_num = page_num;
286+
/* advance to the next entry/page */
287+
++idx;
288+
++page_num;
289+
}
290+
}
291+
292+
Assert(idx <= max_entries);
293+
294+
/* Set max calls and remember the user function context. */
295+
funcctx->max_calls = idx;
296+
funcctx->user_fctx = fctx;
297+
}
298+
299+
funcctx = SRF_PERCALL_SETUP();
300+
301+
/* Get the saved state */
302+
fctx = funcctx->user_fctx;
303+
304+
if (funcctx->call_cntr < funcctx->max_calls)
305+
{
306+
uint32 i = funcctx->call_cntr;
307+
Datum values[NUM_BUFFERCACHE_OS_PAGES_ELEM];
308+
bool nulls[NUM_BUFFERCACHE_OS_PAGES_ELEM];
309+
310+
values[0] = Int32GetDatum(fctx->record[i].bufferid);
311+
nulls[0] = false;
312+
values[1] = Int64GetDatum(fctx->record[i].page_num);
313+
nulls[1] = false;
314+
315+
/* Build and return the tuple. */
316+
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
317+
result = HeapTupleGetDatum(tuple);
318+
319+
SRF_RETURN_NEXT(funcctx, result);
320+
}
321+
else
322+
SRF_RETURN_DONE(funcctx);
323+
}
108324

109325
Datum
110326
pg_buffercache_pages(PG_FUNCTION_ARGS)
@@ -320,7 +536,6 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
320536
void **os_page_ptrs;
321537
int *os_page_status;
322538
uint64 os_page_count;
323-
int pages_per_buffer;
324539
int max_entries;
325540
char *startptr,
326541
*endptr;
@@ -428,8 +643,7 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
428643
* number as we're walking buffers. That way we can do it in one pass,
429644
* without reallocating memory.
430645
*/
431-
pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
432-
max_entries = NBuffers * pages_per_buffer;
646+
max_entries = GET_MAX_BUFFER_ENTRIES(NBuffers, os_page_size);
433647

434648
/* Allocate entries for BufferCachePagesRec records. */
435649
fctx->record = (BufferCacheNumaRec *)
@@ -475,16 +689,9 @@ pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
475689
bufferid = BufferDescriptorGetBuffer(bufHdr);
476690
UnlockBufHdr(bufHdr, buf_state);
477691

478-
/* start of the first page of this buffer */
479-
startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
480-
481-
/* end of the buffer (no need to align to memory page) */
482-
endptr_buff = buffptr + BLCKSZ;
483-
484-
Assert(startptr_buff < endptr_buff);
485-
486-
/* calculate ID of the first page for this buffer */
487-
page_num = (startptr_buff - startptr) / os_page_size;
692+
/* Get page boundaries for this buffer. */
693+
get_buffer_page_boundaries(buffptr, os_page_size, startptr,
694+
&startptr_buff, &endptr_buff, &page_num);
488695

489696
/* Add an entry for each OS page overlapping with this buffer. */
490697
for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)

contrib/pg_buffercache/sql/pg_buffercache.sql

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ select count(*) = (select setting::bigint
55
where name = 'shared_buffers')
66
from pg_buffercache;
77

8+
-- For pg_buffercache_os_pages, we expect at least one entry for each buffer
9+
select count(*) >= (select setting::bigint
10+
from pg_settings
11+
where name = 'shared_buffers')
12+
from pg_buffercache_os_pages;
13+
814
select buffers_used + buffers_unused > 0,
915
buffers_dirty <= buffers_used,
1016
buffers_pinned <= buffers_used
@@ -16,6 +22,7 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
1622
-- having to create a dedicated user, use the pg_database_owner pseudo-role.
1723
SET ROLE pg_database_owner;
1824
SELECT * FROM pg_buffercache;
25+
SELECT * FROM pg_buffercache_os_pages;
1926
SELECT * FROM pg_buffercache_pages() AS p (wrong int);
2027
SELECT * FROM pg_buffercache_summary();
2128
SELECT * FROM pg_buffercache_usage_counts();
@@ -24,6 +31,7 @@ RESET role;
2431
-- Check that pg_monitor is allowed to query view / function
2532
SET ROLE pg_monitor;
2633
SELECT count(*) > 0 FROM pg_buffercache;
34+
SELECT count(*) > 0 FROM pg_buffercache_os_pages;
2735
SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
2836
SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
2937
RESET role;

0 commit comments

Comments
 (0)