3737#include <sys/file.h>
3838#include <unistd.h>
3939
40+ #ifdef ENFORCE_BUFFER_PROT
41+ #include <sys/mman.h> /* for mprotect() */
42+ #endif /* ENFORCE_BUFFER_PROT */
43+
4044#include "access/tableam.h"
4145#include "access/xloginsert.h"
4246#include "access/xlogutils.h"
5458#include "storage/fd.h"
5559#include "storage/ipc.h"
5660#include "storage/lmgr.h"
61+ #include "storage/pg_shmem.h"
5762#include "storage/proc.h"
5863#include "storage/read_stream.h"
5964#include "storage/smgr.h"
@@ -1074,8 +1079,6 @@ ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
10741079
10751080 if (need_to_zero )
10761081 {
1077- memset (BufferGetPage (buffer ), 0 , BLCKSZ );
1078-
10791082 /*
10801083 * Grab the buffer content lock before marking the page as valid, to
10811084 * make sure that no other backend sees the zeroed page before the
@@ -1088,7 +1091,12 @@ ZeroAndLockBuffer(Buffer buffer, ReadBufferMode mode, bool already_valid)
10881091 * already valid.)
10891092 */
10901093 if (!isLocalBuf )
1094+ {
10911095 LWLockAcquire (BufferDescriptorGetContentLock (bufHdr ), LW_EXCLUSIVE );
1096+ SetBufferProtection (buffer , true, true);
1097+ }
1098+
1099+ memset (BufferGetPage (buffer ), 0 , BLCKSZ );
10921100
10931101 /* Set BM_VALID, terminate IO, and wake up any waiters */
10941102 if (isLocalBuf )
@@ -1922,6 +1930,9 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19221930 io_pages [0 ] = BufferGetBlock (buffers [nblocks_done ]);
19231931 io_buffers_len = 1 ;
19241932
1933+ if (persistence != RELPERSISTENCE_TEMP )
1934+ SetBufferProtection (io_buffers [0 ], true, true);
1935+
19251936 /*
19261937 * How many neighboring-on-disk blocks can we scatter-read into other
19271938 * buffers at the same time? In this case we don't wait if we see an
@@ -1937,7 +1948,16 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19371948 BufferGetBlockNumber (buffers [i ]) - 1 );
19381949 Assert (io_buffers [io_buffers_len ] == buffers [i ]);
19391950
1940- io_pages [io_buffers_len ++ ] = BufferGetBlock (buffers [i ]);
1951+ io_pages [io_buffers_len ] = BufferGetBlock (buffers [i ]);
1952+
1953+ /*
1954+ * If the I/O is performed synchronously, allow the synchronous
1955+ * read to modify the buffer.
1956+ */
1957+ if (persistence != RELPERSISTENCE_TEMP )
1958+ SetBufferProtection (io_buffers [io_buffers_len ], true, true);
1959+
1960+ io_buffers_len ++ ;
19411961 }
19421962
19431963 /* get a reference to wait for in WaitReadBuffers() */
@@ -1975,6 +1995,16 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
19751995 else
19761996 pgBufferUsage .shared_blks_read += io_buffers_len ;
19771997
1998+ /*
1999+ * Disallow writes for this proces again. If the I/O is performed
2000+ * asynchronously, the I/O worker will allow the write for itself.
2001+ */
2002+ if (persistence != RELPERSISTENCE_TEMP )
2003+ {
2004+ for (int i = 0 ; i < io_buffers_len ; i ++ )
2005+ SetBufferProtection (io_buffers [i ], true, false);
2006+ }
2007+
19782008 /*
19792009 * Track vacuum cost when issuing IO, not after waiting for it.
19802010 * Otherwise we could end up issuing a lot of IO in a short timespan,
@@ -2649,7 +2679,9 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
26492679 buf_block = BufHdrGetBlock (GetBufferDescriptor (buffers [i ] - 1 ));
26502680
26512681 /* new buffers are zero-filled */
2682+ SetBufferProtection (buffers [i ], true, true);
26522683 MemSet (buf_block , 0 , BLCKSZ );
2684+ SetBufferProtection (buffers [i ], true, false);
26532685 }
26542686
26552687 /*
@@ -2877,7 +2909,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
28772909 }
28782910
28792911 if (lock )
2880- LWLockAcquire ( BufferDescriptorGetContentLock ( buf_hdr ), LW_EXCLUSIVE );
2912+ LockBuffer ( buf , BUFFER_LOCK_EXCLUSIVE );
28812913
28822914 TerminateBufferIO (buf_hdr , false, BM_VALID , true, false);
28832915 }
@@ -3136,6 +3168,10 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
31363168 * non-accessible in any case.
31373169 */
31383170 VALGRIND_MAKE_MEM_DEFINED (BufHdrGetBlock (buf ), BLCKSZ );
3171+
3172+ #if defined(ENFORCE_BUFFER_PROT ) && defined(ENFORCE_BUFFER_PROT_READ )
3173+ SetBufferProtection (BufferDescriptorGetBuffer (buf ), true, false);
3174+ #endif
31393175 break ;
31403176 }
31413177 }
@@ -3208,6 +3244,10 @@ PinBuffer_Locked(BufferDesc *buf)
32083244 */
32093245 VALGRIND_MAKE_MEM_DEFINED (BufHdrGetBlock (buf ), BLCKSZ );
32103246
3247+ #if defined(ENFORCE_BUFFER_PROT ) && defined(ENFORCE_BUFFER_PROT_READ )
3248+ SetBufferProtection (BufferDescriptorGetBuffer (buf ), true, false);
3249+ #endif
3250+
32113251 /*
32123252 * Since we hold the buffer spinlock, we can update the buffer state and
32133253 * release the lock in one operation.
@@ -3305,6 +3345,10 @@ UnpinBufferNoOwner(BufferDesc *buf)
33053345 */
33063346 VALGRIND_MAKE_MEM_NOACCESS (BufHdrGetBlock (buf ), BLCKSZ );
33073347
3348+ #if defined(ENFORCE_BUFFER_PROT ) && defined(ENFORCE_BUFFER_PROT_READ )
3349+ SetBufferProtection (BufferDescriptorGetBuffer (buf ), false, false);
3350+ #endif
3351+
33083352 /* I'd better not still hold the buffer content lock */
33093353 Assert (!LWLockHeldByMe (BufferDescriptorGetContentLock (buf )));
33103354
@@ -4113,6 +4157,69 @@ CheckForBufferLeaks(void)
41134157#endif
41144158}
41154159
4160+ /*
4161+ * To verify that we are following buffer locking rules, we can make pages
4162+ * inaccessible or read-only when we don't have sufficient locks etc.
4163+ *
4164+ * XXX: It might be possible to fold the VALGRIND_MAKE_MEM_NOACCESS() /
4165+ * VALGRIND_MAKE_MEM_DEFINED() calls into this.
4166+ */
4167+ #ifdef ENFORCE_BUFFER_PROT
4168+ void
4169+ SetBufferProtection (Buffer buf , bool allow_reads , bool allow_writes )
4170+ {
4171+ static long pagesz = 0 ;
4172+ int prot = PROT_NONE ;
4173+ int rc ;
4174+
4175+ Assert (huge_pages_status != HUGE_PAGES_UNKNOWN &&
4176+ huge_pages_status != HUGE_PAGES_TRY );
4177+ StaticAssertStmt (PROT_NONE == 0 , "that can't be right" );
4178+
4179+ if (unlikely (pagesz == 0 ))
4180+ {
4181+ pagesz = sysconf (_SC_PAGESIZE );
4182+
4183+ elog (DEBUG1 , "sysconf(_SC_PAGESIZE) = %ld" , pagesz );
4184+ if (pagesz == -1 )
4185+ {
4186+ elog (ERROR , "sysconf(_SC_PAGESIZE) failed: %m" );
4187+ }
4188+ else if (pagesz > BLCKSZ )
4189+ {
4190+ elog (DEBUG1 , "pagesz > BLCKSZ, disabling buffer protection mode" );
4191+ pagesz = -1 ;
4192+ }
4193+ else if (BLCKSZ % pagesz != 0 )
4194+ {
4195+ elog (DEBUG1 , "BLCKSZ %% pagesz != 0, disabling buffer protection mode" );
4196+ pagesz = -1 ;
4197+ }
4198+ else if (huge_pages_status == HUGE_PAGES_ON )
4199+ {
4200+ /* can't set status in a granular enough way */
4201+ elog (DEBUG1 , "huge pages enabled, disabling buffer protection mode" );
4202+ pagesz = -1 ;
4203+ }
4204+ }
4205+
4206+ /* disabled */
4207+ if (pagesz == -1 )
4208+ return ;
4209+
4210+ if (allow_reads )
4211+ prot |= PROT_READ ;
4212+
4213+ if (allow_writes )
4214+ prot |= PROT_WRITE ;
4215+
4216+ rc = mprotect (BufferGetBlock (buf ), BLCKSZ , prot );
4217+
4218+ if (rc != 0 )
4219+ elog (ERROR , "mprotect(%d, %d) failed: %m" , buf , prot );
4220+ }
4221+ #endif /* ENFORCE_BUFFER_PROT */
4222+
41164223/*
41174224 * Helper routine to issue warnings when a buffer is unexpectedly pinned
41184225 */
@@ -4307,7 +4414,10 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
43074414 bufBlock = BufHdrGetBlock (buf );
43084415
43094416 /* Update page checksum if desired. */
4417+ SetBufferProtection (BufferDescriptorGetBuffer (buf ), true, true);
43104418 PageSetChecksum ((Page ) bufBlock , buf -> tag .blockNum );
4419+ /* FIXME: could theoretically be exclusively locked */
4420+ SetBufferProtection (BufferDescriptorGetBuffer (buf ), true, false);
43114421
43124422 io_start = pgstat_prepare_io_time (track_io_timing );
43134423
@@ -5563,19 +5673,38 @@ void
55635673LockBuffer (Buffer buffer , int mode )
55645674{
55655675 BufferDesc * buf ;
5676+ LWLock * content_lock ;
55665677
55675678 Assert (BufferIsPinned (buffer ));
55685679 if (BufferIsLocal (buffer ))
55695680 return ; /* local buffers need no lock */
55705681
55715682 buf = GetBufferDescriptor (buffer - 1 );
5683+ content_lock = BufferDescriptorGetContentLock (buf );
55725684
55735685 if (mode == BUFFER_LOCK_UNLOCK )
5574- LWLockRelease (BufferDescriptorGetContentLock (buf ));
5686+ {
5687+ #ifdef ENFORCE_BUFFER_PROT
5688+ bool was_exclusive ;
5689+
5690+ was_exclusive = LWLockHeldByMeInMode (content_lock , LW_EXCLUSIVE );
5691+ #endif /* ENFORCE_BUFFER_PROT */
5692+
5693+ LWLockRelease (content_lock );
5694+
5695+ #ifdef ENFORCE_BUFFER_PROT
5696+ if (was_exclusive )
5697+ SetBufferProtection (buffer , true, false);
5698+ #endif /* ENFORCE_BUFFER_PROT */
5699+ }
55755700 else if (mode == BUFFER_LOCK_SHARE )
5576- LWLockAcquire (BufferDescriptorGetContentLock ( buf ) , LW_SHARED );
5701+ LWLockAcquire (content_lock , LW_SHARED );
55775702 else if (mode == BUFFER_LOCK_EXCLUSIVE )
5578- LWLockAcquire (BufferDescriptorGetContentLock (buf ), LW_EXCLUSIVE );
5703+ {
5704+ LWLockAcquire (content_lock , LW_EXCLUSIVE );
5705+
5706+ SetBufferProtection (buffer , true, true);
5707+ }
55795708 else
55805709 elog (ERROR , "unrecognized buffer lock mode: %d" , mode );
55815710}
@@ -5589,15 +5718,21 @@ bool
55895718ConditionalLockBuffer (Buffer buffer )
55905719{
55915720 BufferDesc * buf ;
5721+ bool ret ;
55925722
55935723 Assert (BufferIsPinned (buffer ));
55945724 if (BufferIsLocal (buffer ))
55955725 return true; /* act as though we got it */
55965726
55975727 buf = GetBufferDescriptor (buffer - 1 );
55985728
5599- return LWLockConditionalAcquire (BufferDescriptorGetContentLock (buf ),
5600- LW_EXCLUSIVE );
5729+ ret = LWLockConditionalAcquire (BufferDescriptorGetContentLock (buf ),
5730+ LW_EXCLUSIVE );
5731+
5732+ if (ret )
5733+ SetBufferProtection (buffer , true, true);
5734+
5735+ return ret ;
56015736}
56025737
56035738/*
@@ -6732,6 +6867,11 @@ BufferPrepareToSetHintBits(Buffer buffer)
67326867 {
67336868 ResourceOwnerRememberBufferSettingHints (CurrentResourceOwner , buffer );
67346869
6870+ #ifdef ENFORCE_BUFFER_PROT
6871+ if (!LWLockHeldByMeInMode (BufferDescriptorGetContentLock (desc ), LW_EXCLUSIVE ))
6872+ SetBufferProtection (buffer , true, true);
6873+ #endif /* ENFORCE_BUFFER_PROT */
6874+
67356875 return true;
67366876 }
67376877 }
@@ -6807,6 +6947,11 @@ BufferFinishSetHintBits(Buffer buffer, bool mark_dirty, bool buffer_std)
68076947 * know if there is somebody waiting.
68086948 */
68096949 ConditionVariableBroadcast (BufferDescriptorGetIOCV (desc ));
6950+
6951+ #ifdef ENFORCE_BUFFER_PROT
6952+ if (!LWLockHeldByMeInMode (BufferDescriptorGetContentLock (desc ), LW_EXCLUSIVE ))
6953+ SetBufferProtection (buffer , true, false);
6954+ #endif /* ENFORCE_BUFFER_PROT */
68106955}
68116956
68126957/*
@@ -6853,14 +6998,25 @@ BufferSetHintBits16(Buffer buffer, uint16 *ptr, uint16 val)
68536998 }
68546999 else
68557000 {
7001+ #ifdef ENFORCE_BUFFER_PROT
7002+ bool exclusive ;
7003+ #endif
7004+
68567005 desc = GetBufferDescriptor (buffer - 1 );
7006+
7007+ #ifdef ENFORCE_BUFFER_PROT
7008+ exclusive = LWLockHeldByMeInMode (BufferDescriptorGetContentLock (desc ),
7009+ LW_EXCLUSIVE );
7010+
7011+ if (!exclusive )
7012+ SetBufferProtection (buffer , true, true);
7013+ #endif
7014+
68577015 buf_state = LockBufHdr (desc );
68587016
68597017 if (likely ((buf_state & BM_IO_IN_PROGRESS ) == 0 ))
68607018 {
6861-
68627019 * ptr = val ;
6863-
68647020 did_set = true;
68657021 }
68667022
@@ -6869,6 +7025,11 @@ BufferSetHintBits16(Buffer buffer, uint16 *ptr, uint16 val)
68697025 is_dirty = (buf_state & (BM_DIRTY | BM_JUST_DIRTIED )) == (BM_DIRTY | BM_JUST_DIRTIED );
68707026 if (did_set && !is_dirty )
68717027 MarkBufferDirtyHintImpl (buffer , true, true);
7028+
7029+ #ifdef ENFORCE_BUFFER_PROT
7030+ if (!exclusive )
7031+ SetBufferProtection (buffer , true, false);
7032+ #endif
68727033 }
68737034}
68747035
0 commit comments