Skip to content

Commit e80cafd

Browse files
anarazelCommitfest Bot
authored andcommitted
WIP: bufmgr: Detect some bad buffer accesses
I wrote this mainly to ensure that I did not miss converting any hint bit sets to BufferPrepareToSetHintBits(), but it seems like it might be more generally useful. If we do want to include it, it needs a bit more polishing. On my workstation, the performance effect of this test infrastructure is as follows: base: real 1m4.613s user 4m31.409s sys 3m20.445s ENFORCE_BUFFER_PROT real 1m11.912s user 4m27.332s sys 3m28.063s ENFORCE_BUFFER_PROT + ENFORCE_BUFFER_PROT_READ real 1m33.455s user 4m32.188s sys 3m41.275s Author: Reviewed-by: Discussion: https://postgr.es/m/ Backpatch:
1 parent 674755f commit e80cafd

File tree

7 files changed

+229
-12
lines changed

7 files changed

+229
-12
lines changed

src/backend/storage/aio/method_worker.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,17 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
388388
sprintf(cmd, "%d", MyIoWorkerId);
389389
set_ps_display(cmd);
390390

391+
/*
392+
* Allow access to all buffers. We assume that backends will not request
393+
* I/Os on shared buffers without holding proper locks.
394+
*/
395+
#if defined(ENFORCE_BUFFER_PROT) && defined(ENFORCE_BUFFER_PROT_READ)
396+
for (int i = 0; i < NBuffers; i++)
397+
{
398+
SetBufferProtection(i + 1, true, true);
399+
}
400+
#endif
401+
391402
/* see PostgresMain() */
392403
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
393404
{

src/backend/storage/buffer/buf_init.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,16 @@ BufferManagerShmemInit(void)
138138
LWTRANCHE_BUFFER_CONTENT);
139139

140140
ConditionVariableInit(BufferDescriptorGetIOCV(buf));
141+
142+
/*
143+
* Unused buffers are inaccessible. But if we're not enforcing
144+
* making buffers inaccessible without a pin, we won't mark
145+
* buffers as accessible during pinning, therefore we better don't
146+
* make them initially inaccessible.
147+
*/
148+
#if defined(ENFORCE_BUFFER_PROT) && defined(ENFORCE_BUFFER_PROT_READ)
149+
SetBufferProtection(i + 1, false, false);
150+
#endif
141151
}
142152

143153
/* Correct last entry of linked list */

src/backend/storage/buffer/bufmgr.c

Lines changed: 172 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@
3737
#include <sys/file.h>
3838
#include <unistd.h>
3939

40+
#ifdef ENFORCE_BUFFER_PROT
41+
#include <sys/mman.h> /* for mprotect() */
42+
#endif /* ENFORCE_BUFFER_PROT */
43+
4044
#include "access/tableam.h"
4145
#include "access/xloginsert.h"
4246
#include "access/xlogutils.h"
@@ -54,6 +58,7 @@
5458
#include "storage/fd.h"
5559
#include "storage/ipc.h"
5660
#include "storage/lmgr.h"
61+
#include "storage/pg_shmem.h"
5762
#include "storage/proc.h"
5863
#include "storage/read_stream.h"
5964
#include "storage/smgr.h"
@@ -1074,8 +1079,6 @@ ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
10741079

10751080
if (need_to_zero)
10761081
{
1077-
memset(BufferGetPage(buffer), 0, BLCKSZ);
1078-
10791082
/*
10801083
* Grab the buffer content lock before marking the page as valid, to
10811084
* make sure that no other backend sees the zeroed page before the
@@ -1088,7 +1091,12 @@ ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
10881091
* already valid.)
10891092
*/
10901093
if (!isLocalBuf)
1094+
{
10911095
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
1096+
SetBufferProtection(buffer, true, true);
1097+
}
1098+
1099+
memset(BufferGetPage(buffer), 0, BLCKSZ);
10921100

10931101
/* Set BM_VALID, terminate IO, and wake up any waiters */
10941102
if (isLocalBuf)
@@ -1922,6 +1930,9 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19221930
io_pages[0] = BufferGetBlock(buffers[nblocks_done]);
19231931
io_buffers_len = 1;
19241932

1933+
if (persistence != RELPERSISTENCE_TEMP)
1934+
SetBufferProtection(io_buffers[0], true, true);
1935+
19251936
/*
19261937
* How many neighboring-on-disk blocks can we scatter-read into other
19271938
* buffers at the same time? In this case we don't wait if we see an
@@ -1937,7 +1948,16 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19371948
BufferGetBlockNumber(buffers[i]) - 1);
19381949
Assert(io_buffers[io_buffers_len] == buffers[i]);
19391950

1940-
io_pages[io_buffers_len++] = BufferGetBlock(buffers[i]);
1951+
io_pages[io_buffers_len] = BufferGetBlock(buffers[i]);
1952+
1953+
/*
1954+
* If the I/O is performed synchronously, allow the synchronous
1955+
* read to modify the buffer.
1956+
*/
1957+
if (persistence != RELPERSISTENCE_TEMP)
1958+
SetBufferProtection(io_buffers[io_buffers_len], true, true);
1959+
1960+
io_buffers_len++;
19411961
}
19421962

19431963
/* get a reference to wait for in WaitReadBuffers() */
@@ -1975,6 +1995,16 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19751995
else
19761996
pgBufferUsage.shared_blks_read += io_buffers_len;
19771997

1998+
/*
1999+
* Disallow writes for this proces again. If the I/O is performed
2000+
* asynchronously, the I/O worker will allow the write for itself.
2001+
*/
2002+
if (persistence != RELPERSISTENCE_TEMP)
2003+
{
2004+
for (int i = 0; i < io_buffers_len; i++)
2005+
SetBufferProtection(io_buffers[i], true, false);
2006+
}
2007+
19782008
/*
19792009
* Track vacuum cost when issuing IO, not after waiting for it.
19802010
* Otherwise we could end up issuing a lot of IO in a short timespan,
@@ -2649,7 +2679,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
26492679
buf_block = BufHdrGetBlock(GetBufferDescriptor(buffers[i] - 1));
26502680

26512681
/* new buffers are zero-filled */
2682+
SetBufferProtection(buffers[i], true, true);
26522683
MemSet(buf_block, 0, BLCKSZ);
2684+
SetBufferProtection(buffers[i], true, false);
26532685
}
26542686

26552687
/*
@@ -2877,7 +2909,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
28772909
}
28782910

28792911
if (lock)
2880-
LWLockAcquire(BufferDescriptorGetContentLock(buf_hdr), LW_EXCLUSIVE);
2912+
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
28812913

28822914
TerminateBufferIO(buf_hdr, false, BM_VALID, true, false);
28832915
}
@@ -3136,6 +3168,10 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
31363168
* non-accessible in any case.
31373169
*/
31383170
VALGRIND_MAKE_MEM_DEFINED(BufHdrGetBlock(buf), BLCKSZ);
3171+
3172+
#if defined(ENFORCE_BUFFER_PROT) && defined(ENFORCE_BUFFER_PROT_READ)
3173+
SetBufferProtection(BufferDescriptorGetBuffer(buf), true, false);
3174+
#endif
31393175
break;
31403176
}
31413177
}
@@ -3208,6 +3244,10 @@ PinBuffer_Locked(BufferDesc *buf)
32083244
*/
32093245
VALGRIND_MAKE_MEM_DEFINED(BufHdrGetBlock(buf), BLCKSZ);
32103246

3247+
#if defined(ENFORCE_BUFFER_PROT) && defined(ENFORCE_BUFFER_PROT_READ)
3248+
SetBufferProtection(BufferDescriptorGetBuffer(buf), true, false);
3249+
#endif
3250+
32113251
/*
32123252
* Since we hold the buffer spinlock, we can update the buffer state and
32133253
* release the lock in one operation.
@@ -3305,6 +3345,10 @@ UnpinBufferNoOwner(BufferDesc *buf)
33053345
*/
33063346
VALGRIND_MAKE_MEM_NOACCESS(BufHdrGetBlock(buf), BLCKSZ);
33073347

3348+
#if defined(ENFORCE_BUFFER_PROT) && defined(ENFORCE_BUFFER_PROT_READ)
3349+
SetBufferProtection(BufferDescriptorGetBuffer(buf), false, false);
3350+
#endif
3351+
33083352
/* I'd better not still hold the buffer content lock */
33093353
Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)));
33103354

@@ -4113,6 +4157,69 @@ CheckForBufferLeaks(void)
41134157
#endif
41144158
}
41154159

4160+
/*
4161+
* To verify that we are following buffer locking rules, we can make pages
4162+
* inaccessible or read-only when we don't have sufficient locks etc.
4163+
*
4164+
* XXX: It might be possible to fold the VALGRIND_MAKE_MEM_NOACCESS() /
4165+
* VALGRIND_MAKE_MEM_DEFINED() calls into this.
4166+
*/
4167+
#ifdef ENFORCE_BUFFER_PROT
4168+
void
4169+
SetBufferProtection(Buffer buf, bool allow_reads, bool allow_writes)
4170+
{
4171+
static long pagesz = 0;
4172+
int prot = PROT_NONE;
4173+
int rc;
4174+
4175+
Assert(huge_pages_status != HUGE_PAGES_UNKNOWN &&
4176+
huge_pages_status != HUGE_PAGES_TRY);
4177+
StaticAssertStmt(PROT_NONE == 0, "that can't be right");
4178+
4179+
if (unlikely(pagesz == 0))
4180+
{
4181+
pagesz = sysconf(_SC_PAGESIZE);
4182+
4183+
elog(DEBUG1, "sysconf(_SC_PAGESIZE) = %ld", pagesz);
4184+
if (pagesz == -1)
4185+
{
4186+
elog(ERROR, "sysconf(_SC_PAGESIZE) failed: %m");
4187+
}
4188+
else if (pagesz > BLCKSZ)
4189+
{
4190+
elog(DEBUG1, "pagesz > BLCKSZ, disabling buffer protection mode");
4191+
pagesz = -1;
4192+
}
4193+
else if(BLCKSZ % pagesz != 0)
4194+
{
4195+
elog(DEBUG1, "BLCKSZ %% pagesz != 0, disabling buffer protection mode");
4196+
pagesz = -1;
4197+
}
4198+
else if (huge_pages_status == HUGE_PAGES_ON)
4199+
{
4200+
/* can't set status in a granular enough way */
4201+
elog(DEBUG1, "huge pages enabled, disabling buffer protection mode");
4202+
pagesz = -1;
4203+
}
4204+
}
4205+
4206+
/* disabled */
4207+
if (pagesz == -1)
4208+
return;
4209+
4210+
if (allow_reads)
4211+
prot |= PROT_READ;
4212+
4213+
if (allow_writes)
4214+
prot |= PROT_WRITE;
4215+
4216+
rc = mprotect(BufferGetBlock(buf), BLCKSZ, prot);
4217+
4218+
if (rc != 0)
4219+
elog(ERROR, "mprotect(%d, %d) failed: %m", buf, prot);
4220+
}
4221+
#endif /* ENFORCE_BUFFER_PROT */
4222+
41164223
/*
41174224
* Helper routine to issue warnings when a buffer is unexpectedly pinned
41184225
*/
@@ -4307,7 +4414,10 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
43074414
bufBlock = BufHdrGetBlock(buf);
43084415

43094416
/* Update page checksum if desired. */
4417+
SetBufferProtection(BufferDescriptorGetBuffer(buf), true, true);
43104418
PageSetChecksum((Page) bufBlock, buf->tag.blockNum);
4419+
/* FIXME: could theoretically be exclusively locked */
4420+
SetBufferProtection(BufferDescriptorGetBuffer(buf), true, false);
43114421

43124422
io_start = pgstat_prepare_io_time(track_io_timing);
43134423

@@ -5563,19 +5673,38 @@ void
55635673
LockBuffer(Buffer buffer, int mode)
55645674
{
55655675
BufferDesc *buf;
5676+
LWLock *content_lock;
55665677

55675678
Assert(BufferIsPinned(buffer));
55685679
if (BufferIsLocal(buffer))
55695680
return; /* local buffers need no lock */
55705681

55715682
buf = GetBufferDescriptor(buffer - 1);
5683+
content_lock = BufferDescriptorGetContentLock(buf);
55725684

55735685
if (mode == BUFFER_LOCK_UNLOCK)
5574-
LWLockRelease(BufferDescriptorGetContentLock(buf));
5686+
{
5687+
#ifdef ENFORCE_BUFFER_PROT
5688+
bool was_exclusive;
5689+
5690+
was_exclusive = LWLockHeldByMeInMode(content_lock, LW_EXCLUSIVE);
5691+
#endif /* ENFORCE_BUFFER_PROT */
5692+
5693+
LWLockRelease(content_lock);
5694+
5695+
#ifdef ENFORCE_BUFFER_PROT
5696+
if (was_exclusive)
5697+
SetBufferProtection(buffer, true, false);
5698+
#endif /* ENFORCE_BUFFER_PROT */
5699+
}
55755700
else if (mode == BUFFER_LOCK_SHARE)
5576-
LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED);
5701+
LWLockAcquire(content_lock, LW_SHARED);
55775702
else if (mode == BUFFER_LOCK_EXCLUSIVE)
5578-
LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE);
5703+
{
5704+
LWLockAcquire(content_lock, LW_EXCLUSIVE);
5705+
5706+
SetBufferProtection(buffer, true, true);
5707+
}
55795708
else
55805709
elog(ERROR, "unrecognized buffer lock mode: %d", mode);
55815710
}
@@ -5589,15 +5718,21 @@ bool
55895718
ConditionalLockBuffer(Buffer buffer)
55905719
{
55915720
BufferDesc *buf;
5721+
bool ret;
55925722

55935723
Assert(BufferIsPinned(buffer));
55945724
if (BufferIsLocal(buffer))
55955725
return true; /* act as though we got it */
55965726

55975727
buf = GetBufferDescriptor(buffer - 1);
55985728

5599-
return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
5600-
LW_EXCLUSIVE);
5729+
ret = LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
5730+
LW_EXCLUSIVE);
5731+
5732+
if (ret)
5733+
SetBufferProtection(buffer, true, true);
5734+
5735+
return ret;
56015736
}
56025737

56035738
/*
@@ -6732,6 +6867,11 @@ BufferPrepareToSetHintBits(Buffer buffer)
67326867
{
67336868
ResourceOwnerRememberBufferSettingHints(CurrentResourceOwner, buffer);
67346869

6870+
#ifdef ENFORCE_BUFFER_PROT
6871+
if (!LWLockHeldByMeInMode(BufferDescriptorGetContentLock(desc), LW_EXCLUSIVE))
6872+
SetBufferProtection(buffer, true, true);
6873+
#endif /* ENFORCE_BUFFER_PROT */
6874+
67356875
return true;
67366876
}
67376877
}
@@ -6807,6 +6947,11 @@ BufferFinishSetHintBits(Buffer buffer, bool mark_dirty, bool buffer_std)
68076947
* know if there is somebody waiting.
68086948
*/
68096949
ConditionVariableBroadcast(BufferDescriptorGetIOCV(desc));
6950+
6951+
#ifdef ENFORCE_BUFFER_PROT
6952+
if (!LWLockHeldByMeInMode(BufferDescriptorGetContentLock(desc), LW_EXCLUSIVE))
6953+
SetBufferProtection(buffer, true, false);
6954+
#endif /* ENFORCE_BUFFER_PROT */
68106955
}
68116956

68126957
/*
@@ -6853,14 +6998,25 @@ BufferSetHintBits16(Buffer buffer, uint16 *ptr, uint16 val)
68536998
}
68546999
else
68557000
{
7001+
#ifdef ENFORCE_BUFFER_PROT
7002+
bool exclusive;
7003+
#endif
7004+
68567005
desc = GetBufferDescriptor(buffer - 1);
7006+
7007+
#ifdef ENFORCE_BUFFER_PROT
7008+
exclusive = LWLockHeldByMeInMode(BufferDescriptorGetContentLock(desc),
7009+
LW_EXCLUSIVE);
7010+
7011+
if (!exclusive)
7012+
SetBufferProtection(buffer, true, true);
7013+
#endif
7014+
68577015
buf_state = LockBufHdr(desc);
68587016

68597017
if (likely((buf_state & BM_IO_IN_PROGRESS) == 0))
68607018
{
6861-
68627019
*ptr = val;
6863-
68647020
did_set = true;
68657021
}
68667022

@@ -6869,6 +7025,11 @@ BufferSetHintBits16(Buffer buffer, uint16 *ptr, uint16 val)
68697025
is_dirty = (buf_state & (BM_DIRTY | BM_JUST_DIRTIED)) == (BM_DIRTY | BM_JUST_DIRTIED);
68707026
if (did_set && !is_dirty)
68717027
MarkBufferDirtyHintImpl(buffer, true, true);
7028+
7029+
#ifdef ENFORCE_BUFFER_PROT
7030+
if (!exclusive)
7031+
SetBufferProtection(buffer, true, false);
7032+
#endif
68727033
}
68737034
}
68747035

src/backend/utils/misc/guc_tables.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ static int ssl_renegotiation_limit;
566566
*/
567567
int huge_pages = HUGE_PAGES_TRY;
568568
int huge_page_size;
569-
static int huge_pages_status = HUGE_PAGES_UNKNOWN;
569+
int huge_pages_status = HUGE_PAGES_UNKNOWN;
570570

571571
/*
572572
* These variables are all dummies that don't do anything, except in some

0 commit comments

Comments
 (0)