@@ -101,11 +101,37 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
101101 uint16 infomask , Relation rel , int * remaining ,
102102 bool logLockFailure );
103103static void index_delete_sort (TM_IndexDeleteOp * delstate );
104+ static inline int heap_ivc_process_block (Relation rel , Buffer * vmbuf ,
105+ TM_VisCheck * checks , int nchecks );
106+ static void heap_ivc_process_all (Relation rel , Buffer * vmbuf ,
107+ TM_VisCheck * checks , int nchecks );
104108static int bottomup_sort_and_shrink (TM_IndexDeleteOp * delstate );
105109static XLogRecPtr log_heap_new_cid (Relation relation , HeapTuple tup );
106110static HeapTuple ExtractReplicaIdentity (Relation relation , HeapTuple tp , bool key_required ,
107111 bool * copy );
108112
113+ /* sort template definitions for index */
114+ #define ST_SORT heap_ivc_sortby_tidheapblk
115+ #define ST_ELEMENT_TYPE TM_VisCheck
116+ #define ST_DECLARE
117+ #define ST_DEFINE
118+ #define ST_SCOPE static inline
119+ #define ST_COMPARE (a , b ) ( \
120+ a->tidblkno < b->tidblkno ? -1 : ( \
121+ a->tidblkno > b->tidblkno ? 1 : 0 \
122+ ) \
123+ )
124+
125+ #include "lib/sort_template.h"
126+
127+ #define ST_SORT heap_ivc_sortby_idx
128+ #define ST_ELEMENT_TYPE TM_VisCheck
129+ #define ST_DECLARE
130+ #define ST_DEFINE
131+ #define ST_SCOPE static inline
132+ #define ST_COMPARE (a , b ) (((int) a->idxoffnum) - ((int) b->idxoffnum))
133+ #include "lib/sort_template.h"
134+
109135
110136/*
111137 * Each tuple lock mode has a corresponding heavyweight lock, and one or two
@@ -8779,6 +8805,157 @@ bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate)
87798805 return nblocksfavorable ;
87808806}
87818807
8808+ /*
8809+ * heapam implementation of tableam's index_vischeck_tuples interface.
8810+ *
8811+ * This helper function is called by index AMs during index-only scans,
8812+ * to do VM-based visibility checks on individual tuples, so that the AM
8813+ * can hold the tuple in memory for e.g. reordering for extended periods of
8814+ * time while without holding thousands of pins to conflict with VACUUM.
8815+ *
8816+ * It's possible for this to generate a fair amount of I/O, since we may be
8817+ * checking hundreds of tuples from a single index block, but that is
8818+ * preferred over holding thousands of pins.
8819+ *
8820+ * We use heuristics to balance the costs of sorting TIDs with VM page
8821+ * lookups.
8822+ */
8823+ void
8824+ heap_index_vischeck_tuples (Relation rel , TM_IndexVisibilityCheckOp * checkop )
8825+ {
8826+ Buffer vmbuf = * checkop -> vmbuf ;
8827+ Buffer storvmbuf = vmbuf ;
8828+ TM_VisCheck * checks = checkop -> checktids ;
8829+ int checkntids = checkop -> checkntids ;
8830+ int upcomingvmbufchanges = 0 ;
8831+
8832+ /*
8833+ * The first index scan will have to pin the VM buffer, and that first
8834+ * change in the vm buffer shouldn't put us into the expensive VM page &
8835+ * sort path; so we special-case this operation.
8836+ */
8837+ if (!BufferIsValid (vmbuf ))
8838+ {
8839+ int processed ;
8840+ processed = heap_ivc_process_block (rel , & vmbuf , checks ,checkntids );
8841+ checkntids -= processed ;
8842+ checks += processed ;
8843+ storvmbuf = vmbuf ;
8844+ Assert (processed > 0 );
8845+ }
8846+
8847+ while (vmbuf == storvmbuf && checkntids > 0 )
8848+ {
8849+ int processed ;
8850+
8851+ processed = heap_ivc_process_block (rel , & vmbuf , checks ,checkntids );
8852+
8853+ Assert (processed <= checkntids );
8854+
8855+ checkntids -= processed ;
8856+ checks += processed ;
8857+ }
8858+
8859+ * checkop -> vmbuf = vmbuf ;
8860+
8861+ if (checkntids == 0 )
8862+ {
8863+ return ;
8864+ }
8865+
8866+ upcomingvmbufchanges = 0 ;
8867+
8868+ for (int i = 1 ; i < checkntids ; i ++ )
8869+ {
8870+ /*
8871+ * Instead of storing the previous iteration's result, we only match
8872+ * the block numbers
8873+ */
8874+ BlockNumber lastblkno = checks [i - 1 ].tidblkno ;
8875+ BlockNumber newblkno = checks [i ].tidblkno ;
8876+ /*
8877+ * divide-by-constant can be faster than BufferGetBlockNumber()
8878+ */
8879+ BlockNumber lastvmblkno = HEAPBLK_TO_VMBLOCK (lastblkno );
8880+ BlockNumber newvmblkno = HEAPBLK_TO_VMBLOCK (newblkno );
8881+
8882+ if (lastvmblkno != newvmblkno )
8883+ upcomingvmbufchanges ++ ;
8884+ }
8885+
8886+ if (upcomingvmbufchanges <= pg_ceil_log2_32 (checkntids ))
8887+ {
8888+ /*
8889+ * No big amount of VM buf changes, so do all visibility checks
8890+ * without sorting.
8891+ */
8892+ heap_ivc_process_all (rel , checkop -> vmbuf , checks , checkntids );
8893+
8894+ return ;
8895+ }
8896+
8897+ /*
8898+ * Order the TIDs to heap order, so that we will only need to visit every
8899+ * VM page at most once.
8900+ */
8901+ heap_ivc_sortby_tidheapblk (checks , checkntids );
8902+
8903+ /* do all visibility checks */
8904+ heap_ivc_process_all (rel , checkop -> vmbuf , checks , checkntids );
8905+
8906+ /* put the checks back in index order */
8907+ heap_ivc_sortby_idx (checks , checkntids );
8908+ }
8909+
8910+
8911+ static inline int
8912+ heap_ivc_process_block (Relation rel , Buffer * vmbuf , TM_VisCheck * checks ,
8913+ int nchecks )
8914+ {
8915+ BlockNumber blkno ;
8916+ BlockNumber prevblkno = blkno = checks -> tidblkno ;
8917+ TMVC_Result result ;
8918+ int processed = 0 ;
8919+
8920+ if (VM_ALL_VISIBLE (rel , blkno , vmbuf ))
8921+ result = TMVC_Visible ;
8922+ else
8923+ result = TMVC_MaybeVisible ;
8924+
8925+ do
8926+ {
8927+ checks -> vischeckresult = result ;
8928+
8929+ nchecks -- ;
8930+ processed ++ ;
8931+ checks ++ ;
8932+
8933+ if (nchecks <= 0 )
8934+ return processed ;
8935+
8936+ blkno = checks -> tidblkno ;
8937+ } while (blkno == prevblkno );
8938+
8939+ return processed ;
8940+ }
8941+
8942+ static void
8943+ heap_ivc_process_all (Relation rel , Buffer * vmbuf ,
8944+ TM_VisCheck * checks , int nchecks )
8945+ {
8946+ while (nchecks > 0 )
8947+ {
8948+ int processed ;
8949+
8950+ processed = heap_ivc_process_block (rel , vmbuf , checks , nchecks );
8951+
8952+ Assert (processed <= nchecks );
8953+
8954+ nchecks -= processed ;
8955+ checks += processed ;
8956+ }
8957+ }
8958+
87828959/*
87838960 * Perform XLogInsert for a heap-visible operation. 'block' is the block
87848961 * being marked all-visible, and vm_buffer is the buffer containing the
0 commit comments