postgresql-cfbot
diff --git a/‎doc/src/sgml/btree.sgml‎
Lines changed: 33 additions & 1 deletion b/‎doc/src/sgml/btree.sgml‎
Lines changed: 33 additions & 1 deletion
diff --git a/‎doc/src/sgml/indexam.sgml‎
Lines changed: 2 additions & 1 deletion b/‎doc/src/sgml/indexam.sgml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎doc/src/sgml/indices.sgml‎
Lines changed: 30 additions & 19 deletions b/‎doc/src/sgml/indices.sgml‎
Lines changed: 30 additions & 19 deletions
diff --git a/‎doc/src/sgml/monitoring.sgml‎
Lines changed: 3 additions & 1 deletion b/‎doc/src/sgml/monitoring.sgml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/src/sgml/perform.sgml‎
Lines changed: 31 additions & 0 deletions b/‎doc/src/sgml/perform.sgml‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎doc/src/sgml/xindex.sgml‎
Lines changed: 13 additions & 3 deletions b/‎doc/src/sgml/xindex.sgml‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎src/backend/access/index/indexam.c‎
Lines changed: 2 additions & 1 deletion b/‎src/backend/access/index/indexam.c‎
Lines changed: 2 additions & 1 deletion
@@ -207,7 +207,7 @@
 
  <para>
   As shown in <xref linkend="xindex-btree-support-table"/>, btree defines
-  one required and four optional support functions.  The five
+  one required and five optional support functions.  The six
   user-defined methods are:
  </para>
  <variablelist>
@@ -583,6 +583,38 @@ options(<replaceable>relopts</replaceable> <type>local_relopts *</type>) returns
     </para>
    </listitem>
   </varlistentry>
+  <varlistentry>
+   <term><function>skipsupport</function></term>
+   <listitem>
+    <para>
+     Optionally, a btree operator family may provide a <firstterm>skip
+      support</firstterm> function, registered under support function
+     number 6.  These functions allow the B-tree code to more efficiently
+     navigate the index structure during an index skip scan.  Operator classes
+     that implement skip support provide the core B-Tree code with a way of
+     enumerating and iterating through every possible value from the domain of
+     indexable values.   The APIs involved in this are defined in
+     <filename>src/include/utils/skipsupport.h</filename>.
+    </para>
+    <para>
+     Operator classes that do not provide a skip support function are still
+     eligible to use skip scan.  The core code can still use a fallback
+     strategy, though it might be somewhat less efficient with discrete types.
+     It usually doesn't make sense (and may not even be feasible) for operator
+     classes on continuous types to provide a skip support function.
+    </para>
+    <para>
+     It is not sensible for an operator family to register a cross-type
+     <function>skipsupport</function> function, and attempting to do so will
+     result in an error.  This is because determining the next indexable value
+     from some earlier value does not just depend on sorting/equality
+     semantics, which are more or less defined at the operator family level.
+     Skip scan works by exhaustively considering every possible value that
+     might be stored in an index, so the domain of the particular data type
+     stored within the index (the input opclass type) must also be considered.
+    </para>
+   </listitem>
+  </varlistentry>
  </variablelist>
 
 </sect2>
 
@@ -835,7 +835,8 @@ amrestrpos (IndexScanDesc scan);
   <para>
 <programlisting>
 Size
-amestimateparallelscan (int nkeys,
+amestimateparallelscan (Relation indexRelation,
+                        int nkeys,
                         int norderbys);
 </programlisting>
    Estimate and return the number of bytes of dynamic shared memory which
 
@@ -457,23 +457,26 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
   <para>
    A multicolumn B-tree index can be used with query conditions that
    involve any subset of the index's columns, but the index is most
-   efficient when there are constraints on the leading (leftmost) columns.
-   The exact rule is that equality constraints on leading columns, plus
-   any inequality constraints on the first column that does not have an
-   equality constraint, will be used to limit the portion of the index
-   that is scanned.  Constraints on columns to the right of these columns
-   are checked in the index, so they save visits to the table proper, but
-   they do not reduce the portion of the index that has to be scanned.
+   efficient when there are equality constraints on the leading (leftmost) columns.
+   B-Tree index scans can use the index skip scan strategy to generate
+   equality constraints on prefix columns that were wholly omitted from the
+   query predicate, as well as prefix columns whose values were constrained by
+   inequality conditions.
    For example, given an index on <literal>(a, b, c)</literal> and a
    query condition <literal>WHERE a = 5 AND b &gt;= 42 AND c &lt; 77</literal>,
    the index would have to be scanned from the first entry with
    <literal>a</literal> = 5 and <literal>b</literal> = 42 up through the last entry with
-   <literal>a</literal> = 5.  Index entries with <literal>c</literal> &gt;= 77 would be
-   skipped, but they'd still have to be scanned through.
+   <literal>a</literal> = 5.  Intervening groups of index entries with
+   <literal>c</literal> &gt;= 77 would not need to be returned by the scan,
+   and can be skipped over entirely by applying the skip scan strategy.
    This index could in principle be used for queries that have constraints
    on <literal>b</literal> and/or <literal>c</literal> with no constraint on <literal>a</literal>
-   &mdash; but the entire index would have to be scanned, so in most cases
-   the planner would prefer a sequential table scan over using the index.
+   &mdash; but that approach is generally only taken when there are so few
+   distinct <literal>a</literal> values that the planner expects the skip scan
+   strategy to allow the scan to skip over most individual index leaf pages.
+   If there are many distinct <literal>a</literal> values, then the entire
+   index will have to be scanned, so in most cases the planner will prefer a
+   sequential table scan over using the index.
   </para>
 
   <para>
@@ -508,11 +511,15 @@ CREATE INDEX test2_mm_idx ON test2 (major, minor);
   </para>
 
   <para>
-   Multicolumn indexes should be used sparingly.  In most situations,
-   an index on a single column is sufficient and saves space and time.
-   Indexes with more than three columns are unlikely to be helpful
-   unless the usage of the table is extremely stylized.  See also
-   <xref linkend="indexes-bitmap-scans"/> and
+   Multicolumn indexes should only be used when testing shows that they'll
+   offer a clear advantage over simply using multiple single column indexes.
+   Indexes with more than three columns can make sense, but only when most
+   queries that make use of later columns also make use of earlier prefix
+   columns.  It's possible for B-Tree index scans to make use of <quote>skip
+    scan</quote> optimizations with queries that omit a low cardinality
+   leading prefix column, but this is usually much less efficient than a scan
+   of an index without the extra prefix column.  See <xref
+    linkend="indexes-bitmap-scans"/> and
    <xref linkend="indexes-index-only-scans"/> for some discussion of the
    merits of different index configurations.
   </para>
@@ -669,9 +676,13 @@ CREATE INDEX test3_desc_index ON test3 (id DESC NULLS LAST);
    multicolumn index on <literal>(x, y)</literal>.  This index would typically be
    more efficient than index combination for queries involving both
    columns, but as discussed in <xref linkend="indexes-multicolumn"/>, it
-   would be almost useless for queries involving only <literal>y</literal>, so it
-   should not be the only index.  A combination of the multicolumn index
-   and a separate index on <literal>y</literal> would serve reasonably well.  For
+   would be less useful for queries involving only <literal>y</literal>.  Just
+   how useful might depend on how effective the B-Tree index skip scan
+   optimization is; if <literal>x</literal> has no more than several hundred
+   distinct values, skip scan will make searches for specific
+   <literal>y</literal> values execute reasonably efficiently.  A combination
+   of a multicolumn index on <literal>(x, y)</literal> and a separate index on
+   <literal>y</literal> might also serve reasonably well.  For
    queries involving only <literal>x</literal>, the multicolumn index could be
    used, though it would be larger and hence slower than an index on
    <literal>x</literal> alone.  The last alternative is to create all three
 
@@ -4263,7 +4263,9 @@ description | Waiting for a newly initialized WAL file to reach durable storage
      <replaceable>column_name</replaceable> =
      <replaceable>value2</replaceable> ...</literal> construct, though only
     when the optimizer transforms the construct into an equivalent
-    multi-valued array representation.
+    multi-valued array representation.  Similarly, when B-Tree index scans use
+    the skip scan strategy, an index search is performed each time the scan is
+    repositioned to the next index leaf page that might have matching tuples.
    </para>
   </note>
   <tip>
 
@@ -860,6 +860,37 @@ EXPLAIN ANALYZE SELECT * FROM tenk1 WHERE thousand IN (1, 2, 3, 4);
     <structname>tenk1_thous_tenthous</structname> index leaf page.
    </para>
 
+   <para>
+    The <quote>Index Searches</quote> line is also useful with B-tree index
+    scans that apply the <firstterm>skip scan</firstterm> optimization to
+    more efficiently traverse through an index:
+<screen>
+EXPLAIN ANALYZE SELECT four, unique1 FROM tenk1 WHERE four BETWEEN 1 AND 3 AND unique1 = 42;
+                                                              QUERY PLAN
+-------------------------------------------------------------------&zwsp;---------------------------------------------------------------
+ Index Only Scan using tenk1_four_unique1_idx on tenk1  (cost=0.29..6.90 rows=1 width=8) (actual time=0.006..0.007 rows=1.00 loops=1)
+   Index Cond: ((four &gt;= 1) AND (four &lt;= 3) AND (unique1 = 42))
+   Heap Fetches: 0
+   Index Searches: 3
+   Buffers: shared hit=7
+ Planning Time: 0.029 ms
+ Execution Time: 0.012 ms
+</screen>
+
+    Here we see an Index-Only Scan node using
+    <structname>tenk1_four_unique1_idx</structname>, a composite index on the
+    <structname>tenk1</structname> table's <structfield>four</structfield> and
+    <structfield>unique1</structfield> columns.  The scan performs 3 searches
+    that each read a single index leaf page:
+    <quote><literal>four = 1 AND unique1 = 42</literal></quote>,
+    <quote><literal>four = 2 AND unique1 = 42</literal></quote>, and
+    <quote><literal>four = 3 AND unique1 = 42</literal></quote>.  This index
+    is generally a good target for skip scan, since its leading column (the
+    <structfield>four</structfield> column) contains only 4 distinct values,
+    while its second/final column (the <structfield>unique1</structfield>
+    column) contains many distinct values.
+   </para>
+
    <para>
     Another type of extra information is the number of rows removed by a
     filter condition:
 
@@ -461,6 +461,13 @@
        </entry>
        <entry>5</entry>
       </row>
+      <row>
+       <entry>
+        Return the addresses of C-callable skip support function(s)
+        (optional)
+       </entry>
+       <entry>6</entry>
+      </row>
      </tbody>
     </tgroup>
    </table>
@@ -1062,7 +1069,8 @@ DEFAULT FOR TYPE int8 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint8cmp(int8, int8) ,
   FUNCTION 2 btint8sortsupport(internal) ,
   FUNCTION 3 in_range(int8, int8, int8, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint8skipsupport(internal) ;
 
 CREATE OPERATOR CLASS int4_ops
 DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS
@@ -1075,7 +1083,8 @@ DEFAULT FOR TYPE int4 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint4cmp(int4, int4) ,
   FUNCTION 2 btint4sortsupport(internal) ,
   FUNCTION 3 in_range(int4, int4, int4, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint4skipsupport(internal) ;
 
 CREATE OPERATOR CLASS int2_ops
 DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS
@@ -1088,7 +1097,8 @@ DEFAULT FOR TYPE int2 USING btree FAMILY integer_ops AS
   FUNCTION 1 btint2cmp(int2, int2) ,
   FUNCTION 2 btint2sortsupport(internal) ,
   FUNCTION 3 in_range(int2, int2, int2, boolean, boolean) ,
-  FUNCTION 4 btequalimage(oid) ;
+  FUNCTION 4 btequalimage(oid) ,
+  FUNCTION 6 btint2skipsupport(internal) ;
 
 ALTER OPERATOR FAMILY integer_ops USING btree ADD
   -- cross-type comparisons int8 vs int2
 
@@ -489,7 +489,8 @@ index_parallelscan_estimate(Relation indexRelation, int nkeys, int norderbys,
 	if (parallel_aware &&
 		indexRelation->rd_indam->amestimateparallelscan != NULL)
 		nbytes = add_size(nbytes,
-						  indexRelation->rd_indam->amestimateparallelscan(nkeys,
+						  indexRelation->rd_indam->amestimateparallelscan(indexRelation,
+																		  nkeys,
 																		  norderbys));
 
 	return nbytes;