Skip to content

Commit 0ce3873

Browse files
committed
Micro-optimize AllocSetFreeIndex() by reference to pg_bitutils code.
Use __builtin_clz() where available. Where it isn't, we can still win a little by using the pg_leftmost_one_pos[] lookup table instead of having a private table. Also drop the initial right shift by ALLOC_MINBITS in favor of subtracting ALLOC_MINBITS from the leftmost-one-pos result. This is a win because the compiler can fold that adjustment into other constants it'd have to add anyway, making the shift-removal free. Also, we can explain this coding as an unrolled form of pg_leftmost_one_pos32(), even though that's a bit ahistorical since it long predates pg_bitutils.h. John Naylor, with some cosmetic adjustments by me Discussion: https://postgr.es/m/CACPNZCuNUGMxjK7WTn_=WZnRbfASDdBxmjsVf2+m9MdmeNw_sg@mail.gmail.com
1 parent 27a3b2a commit 0ce3873

File tree

1 file changed

+31
-25
lines changed

1 file changed

+31
-25
lines changed

src/backend/utils/mmgr/aset.c

+31-25
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646

4747
#include "postgres.h"
4848

49+
#include "port/pg_bitutils.h"
4950
#include "utils/memdebug.h"
5051
#include "utils/memutils.h"
5152

@@ -297,18 +298,6 @@ static const MemoryContextMethods AllocSetMethods = {
297298
#endif
298299
};
299300

300-
/*
301-
* Table for AllocSetFreeIndex
302-
*/
303-
#define LT16(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
304-
305-
static const unsigned char LogTable256[256] =
306-
{
307-
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
308-
LT16(5), LT16(6), LT16(6), LT16(7), LT16(7), LT16(7), LT16(7),
309-
LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8), LT16(8)
310-
};
311-
312301
/* ----------
313302
* Debug macros
314303
* ----------
@@ -337,24 +326,41 @@ static inline int
337326
AllocSetFreeIndex(Size size)
338327
{
339328
int idx;
340-
unsigned int t,
341-
tsize;
342329

343330
if (size > (1 << ALLOC_MINBITS))
344331
{
345-
tsize = (size - 1) >> ALLOC_MINBITS;
346-
347-
/*
348-
* At this point we need to obtain log2(tsize)+1, ie, the number of
349-
* not-all-zero bits at the right. We used to do this with a
350-
* shift-and-count loop, but this function is enough of a hotspot to
351-
* justify micro-optimization effort. The best approach seems to be
352-
* to use a lookup table. Note that this code assumes that
353-
* ALLOCSET_NUM_FREELISTS <= 17, since we only cope with two bytes of
354-
* the tsize value.
332+
/*----------
333+
* At this point we must compute ceil(log2(size >> ALLOC_MINBITS)).
334+
* This is the same as
335+
* pg_leftmost_one_pos32((size - 1) >> ALLOC_MINBITS) + 1
336+
* or equivalently
337+
* pg_leftmost_one_pos32(size - 1) - ALLOC_MINBITS + 1
338+
*
339+
* However, rather than just calling that function, we duplicate the
340+
* logic here, allowing an additional optimization. It's reasonable
341+
* to assume that ALLOC_CHUNK_LIMIT fits in 16 bits, so we can unroll
342+
* the byte-at-a-time loop in pg_leftmost_one_pos32 and just handle
343+
* the last two bytes.
344+
*
345+
* Yes, this function is enough of a hot-spot to make it worth this
346+
* much trouble.
347+
*----------
355348
*/
349+
#ifdef HAVE__BUILTIN_CLZ
350+
idx = 31 - __builtin_clz((uint32) size - 1) - ALLOC_MINBITS + 1;
351+
#else
352+
uint32 t,
353+
tsize;
354+
355+
/* Statically assert that we only have a 16-bit input value. */
356+
StaticAssertStmt(ALLOC_CHUNK_LIMIT < (1 << 16),
357+
"ALLOC_CHUNK_LIMIT must be less than 64kB");
358+
359+
tsize = size - 1;
356360
t = tsize >> 8;
357-
idx = t ? LogTable256[t] + 8 : LogTable256[tsize];
361+
idx = t ? pg_leftmost_one_pos[t] + 8 : pg_leftmost_one_pos[tsize];
362+
idx -= ALLOC_MINBITS - 1;
363+
#endif
358364

359365
Assert(idx < ALLOCSET_NUM_FREELISTS);
360366
}

0 commit comments

Comments
 (0)