diff --git a/config/c-compiler.m4 b/config/c-compiler.m4 index 236a59e8536c..40927d56e6a1 100644 --- a/config/c-compiler.m4 +++ b/config/c-compiler.m4 @@ -581,6 +581,32 @@ fi undefine([Ac_cachevar])dnl ])# PGAC_SSE42_CRC32_INTRINSICS +# PGAC_AVX2_SUPPORT +# --------------------------- +# Check if the compiler supports AVX2 target attribute. +# This is used for optimized checksum calculations with runtime detection. +# +# If AVX2 target attribute is supported, sets pgac_avx2_support. +AC_DEFUN([PGAC_AVX2_SUPPORT], +[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl +AC_CACHE_CHECK([for AVX2 target attribute support], [Ac_cachevar], +[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([#include + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("avx2"))) + static int avx2_test(void) + { + return 0; + } + #endif], + [return avx2_test();])], + [Ac_cachevar=yes], + [Ac_cachevar=no])]) +if test x"$Ac_cachevar" = x"yes"; then + pgac_avx2_support=yes +fi +undefine([Ac_cachevar])dnl +])# PGAC_AVX2_SUPPORT + # PGAC_AVX512_PCLMUL_INTRINSICS # --------------------------- # Check if the compiler supports AVX-512 carryless multiplication diff --git a/configure b/configure index f7c24c8f5768..08a046192842 100755 --- a/configure +++ b/configure @@ -17552,6 +17552,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h fi +# Check for AVX2 target and intrinsic support +# +if test x"$host_cpu" = x"x86_64"; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5 +$as_echo_n "checking for AVX2 support... " >&6; } +if ${pgac_cv_avx2_support+:} false; then : + $as_echo_n "(cached) " >&6 +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include + #include + #if defined(__has_attribute) && __has_attribute (target) + __attribute__((target("avx2"))) + #endif + static int avx2_test(void) + { + const char buf[sizeof(__m256i)]; + __m256i accum = _mm256_loadu_si256((const __m256i *) buf); + accum = _mm256_add_epi32(accum, accum); + int result = _mm256_extract_epi32(accum, 0); + return (int) result; + } +int +main () +{ +return avx2_test(); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + pgac_cv_avx2_support=yes +else + pgac_cv_avx2_support=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5 +$as_echo "$pgac_cv_avx2_support" >&6; } +if test x"$pgac_cv_avx2_support" = x"yes"; then + pgac_avx2_support=yes +fi + + if test x"$pgac_avx2_support" = x"yes"; then + +$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h + + fi +fi + # Check for AVX-512 popcount intrinsics # if test x"$host_cpu" = x"x86_64"; then diff --git a/configure.ac b/configure.ac index 6c802deaacb1..00a701db1f20 100644 --- a/configure.ac +++ b/configure.ac @@ -2077,6 +2077,15 @@ else fi fi +# Check for AVX2 target and intrinsic support +# +if test x"$host_cpu" = x"x86_64"; then + PGAC_AVX2_SUPPORT() + if test x"$pgac_avx2_support" = x"yes"; then + AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.]) + fi +fi + # Check for XSAVE intrinsics # PGAC_XSAVE_INTRINSICS() diff --git a/contrib/meson.build b/contrib/meson.build index ed30ee7d639f..fe5149aadff1 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -12,6 +12,7 @@ contrib_doc_args = { 'install_dir': contrib_doc_dir, } +subdir('pg_checksum_bench') subdir('amcheck') subdir('auth_delay') subdir('auto_explain') diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index aef442b5db30..7beb7765da94 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -23,7 +23,7 @@ #include "miscadmin.h" #include "pageinspect.h" #include "storage/bufmgr.h" -#include "storage/checksum.h" +#include "port/checksum.h" #include "utils/builtins.h" #include "utils/pg_lsn.h" #include "utils/rel.h" diff --git a/contrib/pg_checksum_bench/meson.build b/contrib/pg_checksum_bench/meson.build new file mode 100644 index 000000000000..32ccd9efa0fb --- /dev/null +++ b/contrib/pg_checksum_bench/meson.build @@ -0,0 +1,23 @@ +# Copyright (c) 2022-2025, PostgreSQL Global Development Group + +pg_checksum_bench_sources = files( + 'pg_checksum_bench.c', +) + +if host_system == 'windows' + pg_checksum_bench_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'pg_checksum_bench', + '--FILEDESC', 'pg_checksum_bench',]) +endif + +pg_checksum_bench = shared_module('pg_checksum_bench', + pg_checksum_bench_sources, + kwargs: contrib_mod_args, +) +contrib_targets += pg_checksum_bench + +install_data( + 'pg_checksum_bench--1.0.sql', + 'pg_checksum_bench.control', + kwargs: contrib_data_args, +) diff --git a/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql new file mode 100644 index 000000000000..5f13cbe3c5e6 --- /dev/null +++ b/contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql @@ -0,0 +1,8 @@ +/* contrib/pg_checksum_bench/pg_checksum_bench--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +-- \echo Use "CREATE EXTENSION pg_checksum_bench" to load this file. \quit + +CREATE FUNCTION drive_pg_checksum(page_count int) + RETURNS pg_catalog.void + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.c b/contrib/pg_checksum_bench/pg_checksum_bench.c new file mode 100644 index 000000000000..e5b150e6b13c --- /dev/null +++ b/contrib/pg_checksum_bench/pg_checksum_bench.c @@ -0,0 +1,34 @@ +#include "postgres.h" +#include "fmgr.h" +#include "port/checksum_impl.h" + +#include +#include + +PG_MODULE_MAGIC; + +#define REPEATS 1000000 + +PG_FUNCTION_INFO_V1(drive_pg_checksum); +Datum +drive_pg_checksum(PG_FUNCTION_ARGS) +{ + int page_count = PG_GETARG_INT32(0); + + PGChecksummablePage * pages = palloc(page_count * sizeof(PGChecksummablePage)); + srand(0); + for (size_t i = 0; i < page_count * sizeof(PGChecksummablePage); i++){ + char * byte_ptr = (char *) pages; + byte_ptr[i] = rand() % 256; + } + + for (int i = 0; i < REPEATS; i++){ + const PGChecksummablePage * test_page = pages + (i % page_count); + volatile uint32 result = pg_checksum_block(test_page); + (void) result; + } + + pfree((void *) pages); + + PG_RETURN_VOID(); +} diff --git a/contrib/pg_checksum_bench/pg_checksum_bench.control b/contrib/pg_checksum_bench/pg_checksum_bench.control new file mode 100644 index 000000000000..4a4e2c9363c1 --- /dev/null +++ b/contrib/pg_checksum_bench/pg_checksum_bench.control @@ -0,0 +1,4 @@ +comment = 'pg_checksum benchmark' +default_version = '1.0' +module_pathname = '$libdir/pg_checksum_bench' +relocatable = true diff --git a/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql new file mode 100644 index 000000000000..4b347699953e --- /dev/null +++ b/contrib/pg_checksum_bench/sql/pg_checksum_bench.sql @@ -0,0 +1,17 @@ +CREATE EXTENSION pg_checksum_bench; + +SELECT drive_pg_checksum(-1); + +\timing on + +SELECT drive_pg_checksum(1); +SELECT drive_pg_checksum(2); +SELECT drive_pg_checksum(4); +SELECT drive_pg_checksum(8); +SELECT drive_pg_checksum(16); +SELECT drive_pg_checksum(32); +SELECT drive_pg_checksum(64); +SELECT drive_pg_checksum(128); +SELECT drive_pg_checksum(256); +SELECT drive_pg_checksum(512); +SELECT drive_pg_checksum(1024); diff --git a/meson.build b/meson.build index 0f61ff6a7006..55b26814efcc 100644 --- a/meson.build +++ b/meson.build @@ -2293,6 +2293,36 @@ int main(void) endif +############################################################### +# Check for the availability of AVX2 support +############################################################### + +if host_cpu == 'x86_64' + + prog = ''' +#include +#include +#if defined(__has_attribute) && __has_attribute (target) +__attribute__((target("avx2"))) +#endif +static int avx2_test(void) +{ + return 0; +} + +int main(void) +{ + return avx2_test(); +} +''' + + if cc.links(prog, name: 'AVX2 support', args: test_c_args) + cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1) + endif + +endif + + ############################################################### # Check for the availability of AVX-512 popcount intrinsics. ############################################################### diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c index bb7d90aa5d96..d84ced4b47cf 100644 --- a/src/backend/backup/basebackup.c +++ b/src/backend/backup/basebackup.c @@ -39,7 +39,7 @@ #include "replication/walsender.h" #include "replication/walsender_private.h" #include "storage/bufpage.h" -#include "storage/checksum.h" +#include "port/checksum.h" #include "storage/dsm_impl.h" #include "storage/ipc.h" #include "storage/reinit.h" diff --git a/src/backend/storage/page/Makefile b/src/backend/storage/page/Makefile index da539b113a69..788fee403f64 100644 --- a/src/backend/storage/page/Makefile +++ b/src/backend/storage/page/Makefile @@ -12,12 +12,8 @@ subdir = src/backend/storage/page top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = \ +OBJS = \ bufpage.o \ - checksum.o \ itemptr.o include $(top_srcdir)/src/backend/common.mk - -# Provide special optimization flags for checksum.c -checksum.o: CFLAGS += ${CFLAGS_UNROLL_LOOPS} ${CFLAGS_VECTORIZE} diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index aac6e6959546..73f42dc0c495 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -18,7 +18,7 @@ #include "access/itup.h" #include "access/xlog.h" #include "pgstat.h" -#include "storage/checksum.h" +#include "port/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" diff --git a/src/backend/storage/page/meson.build b/src/backend/storage/page/meson.build index 112f00ff3655..cf92a8f55f05 100644 --- a/src/backend/storage/page/meson.build +++ b/src/backend/storage/page/meson.build @@ -1,14 +1,5 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group -checksum_backend_lib = static_library('checksum_backend_lib', - 'checksum.c', - dependencies: backend_build_deps, - kwargs: internal_lib_args, - c_args: vectorize_cflags + unroll_loops_cflags, -) - -backend_link_with += checksum_backend_lib - backend_sources += files( 'bufpage.c', 'itemptr.c', diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 46cb2f36efaa..2e0212c029cd 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -29,8 +29,7 @@ #include "getopt_long.h" #include "pg_getopt.h" #include "storage/bufpage.h" -#include "storage/checksum.h" -#include "storage/checksum_impl.h" +#include "port/checksum.h" static int64 files_scanned = 0; diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 91ed16acb088..f9a5ed02ee4a 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -24,8 +24,7 @@ #include "common/file_perm.h" #include "pg_upgrade.h" #include "storage/bufpage.h" -#include "storage/checksum.h" -#include "storage/checksum_impl.h" +#include "port/checksum.h" /* diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index f52f14cc5664..66556985a639 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -678,6 +678,9 @@ /* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */ #undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK +/* Define to 1 to use AVX2 instructions with a runtime check. */ +#undef USE_AVX2_WITH_RUNTIME_CHECK + /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */ #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK diff --git a/src/include/storage/checksum.h b/src/include/port/checksum.h similarity index 94% rename from src/include/storage/checksum.h rename to src/include/port/checksum.h index 25d13a798d10..c2faed83ede7 100644 --- a/src/include/storage/checksum.h +++ b/src/include/port/checksum.h @@ -6,7 +6,7 @@ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * src/include/storage/checksum.h + * src/include/port/checksum.h * *------------------------------------------------------------------------- */ diff --git a/src/include/storage/checksum_impl.h b/src/include/port/checksum_impl.h similarity index 72% rename from src/include/storage/checksum_impl.h rename to src/include/port/checksum_impl.h index da87d61ba524..0e1eef452496 100644 --- a/src/include/storage/checksum_impl.h +++ b/src/include/port/checksum_impl.h @@ -5,13 +5,13 @@ * * This file exists for the benefit of external programs that may wish to * check Postgres page checksums. They can #include this to get the code - * referenced by storage/checksum.h. (Note: you may need to redefine + * referenced by port/checksum.h. (Note: you may need to redefine * Assert() as empty to compile this successfully externally.) * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * src/include/storage/checksum_impl.h + * src/include/port/checksum_impl.h * *------------------------------------------------------------------------- */ @@ -100,8 +100,23 @@ * manually unroll the inner loop. */ +#include "pg_config.h" #include "storage/bufpage.h" +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + +#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT) +#include +#endif + +#include + +#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX) +#include +#endif + +#endif + /* number of checksums to calculate in parallel */ #define N_SUMS 32 /* prime multiplier of FNV-1a hash */ @@ -114,6 +129,9 @@ typedef union uint32 data[BLCKSZ / (sizeof(uint32) * N_SUMS)][N_SUMS]; } PGChecksummablePage; +/* Forward declaration */ +static uint32 pg_checksum_block_choose(const PGChecksummablePage *page); + /* * Base offsets to initialize each of the parallel FNV hashes into a * different initial state. @@ -129,6 +147,71 @@ static const uint32 checksumBaseOffsets[N_SUMS] = { 0x9FBF8C76, 0x15CA20BE, 0xF2CA9FD3, 0x959BD756 }; +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + +/* + * Does CPUID say there's support for XSAVE instructions? + */ +static inline bool +xsave_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); +#else +#error cpuid instruction not available +#endif + return (exx[2] & (1 << 27)) != 0; /* osxsave */ +} + +/* + * Does XGETBV say the YMM registers are enabled? + * + * NB: Caller is responsible for verifying that xsave_available() returns true + * before calling this. + */ +#ifdef HAVE_XSAVE_INTRINSICS +pg_attribute_target("xsave") +#endif +static inline bool +ymm_regs_available(void) +{ +#ifdef HAVE_XSAVE_INTRINSICS + return (_xgetbv(0) & 0x06) == 0x06; +#else + return false; +#endif +} + +/* + * Check for AVX2 support using manual CPUID detection + */ +static inline bool +avx2_available(void) +{ +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + unsigned int exx[4] = {0, 0, 0, 0}; + + if (!xsave_available() || !ymm_regs_available()) + return false; + +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); +#else +#error cpuid instruction not available +#endif + return (exx[1] & (1 << 5)) != 0; /* avx2 */ +#else + return false; +#endif +} +#endif /* USE_AVX2_WITH_RUNTIME_CHECK */ + /* * Calculate one round of the checksum. */ @@ -143,7 +226,7 @@ do { \ * (at least on 4-byte boundary). */ static uint32 -pg_checksum_block(const PGChecksummablePage *page) +pg_checksum_block_default(const PGChecksummablePage *page) { uint32 sums[N_SUMS]; uint32 result = 0; @@ -173,6 +256,63 @@ pg_checksum_block(const PGChecksummablePage *page) return result; } +#ifdef USE_AVX2_WITH_RUNTIME_CHECK +/* + * AVX2-optimized block checksum algorithm. + * Same algorithm as default, but compiled with AVX2 target for auto-vectorization. + */ +pg_attribute_target("avx2") +static uint32 +pg_checksum_block_avx2(const PGChecksummablePage *page) +{ + uint32 sums[N_SUMS]; + uint32 result = 0; + uint32 i, + j; + + /* ensure that the size is compatible with the algorithm */ + Assert(sizeof(PGChecksummablePage) == BLCKSZ); + + /* initialize partial checksums to their corresponding offsets */ + memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets)); + + /* main checksum calculation */ + for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++) + for (j = 0; j < N_SUMS; j++) + CHECKSUM_COMP(sums[j], page->data[i][j]); + + /* finally add in two rounds of zeroes for additional mixing */ + for (i = 0; i < 2; i++) + for (j = 0; j < N_SUMS; j++) + CHECKSUM_COMP(sums[j], 0); + + /* xor fold partial checksums together */ + for (i = 0; i < N_SUMS; i++) + result ^= sums[i]; + + return result; +} +#endif + +/* Function pointer - external linkage */ +static uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_choose; + +/* Choose the best available checksum implementation */ +static uint32 +pg_checksum_block_choose(const PGChecksummablePage *page) +{ +#ifdef USE_AVX2_WITH_RUNTIME_CHECK + if (avx2_available()) + { + pg_checksum_block = pg_checksum_block_avx2; + return pg_checksum_block(page); + } +#endif + /* fallback to default implementation */ + pg_checksum_block = pg_checksum_block_default; + return pg_checksum_block(page); +} + /* * Compute the checksum for a Postgres page. * diff --git a/src/port/Makefile b/src/port/Makefile index 4274949dfa4c..4f1f460bff2b 100644 --- a/src/port/Makefile +++ b/src/port/Makefile @@ -39,6 +39,7 @@ OBJS = \ $(LIBOBJS) \ $(PG_CRC32C_OBJS) \ bsearch_arg.o \ + checksum.o \ chklocale.o \ inet_net_ntop.o \ noblock.o \ @@ -90,6 +91,11 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC) pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC) +# Provide special optimization flags for checksum.c +checksum.o: CFLAGS += ${CFLAGS_UNROLL_LOOPS} ${CFLAGS_VECTORIZE} +checksum_shlib.o: CFLAGS += ${CFLAGS_UNROLL_LOOPS} ${CFLAGS_VECTORIZE} +checksum_srv.o: CFLAGS += ${CFLAGS_UNROLL_LOOPS} ${CFLAGS_VECTORIZE} + # # Shared library versions of object files # diff --git a/src/backend/storage/page/checksum.c b/src/port/checksum.c similarity index 73% rename from src/backend/storage/page/checksum.c rename to src/port/checksum.c index c913459b5a37..de61a46231d4 100644 --- a/src/backend/storage/page/checksum.c +++ b/src/port/checksum.c @@ -7,16 +7,16 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * src/backend/storage/page/checksum.c + * src/port/checksum.c * *------------------------------------------------------------------------- */ #include "postgres.h" -#include "storage/checksum.h" +#include "port/checksum.h" /* - * The actual code is in storage/checksum_impl.h. This is done so that + * The actual code is in port/checksum_impl.h. This is done so that * external programs can incorporate the checksum code by #include'ing * that file from the exported Postgres headers. (Compare our CRC code.) */ -#include "storage/checksum_impl.h" /* IWYU pragma: keep */ +#include "port/checksum_impl.h" /* IWYU pragma: keep */ diff --git a/src/port/meson.build b/src/port/meson.build index fc7b059fee50..d3e63bce9e7c 100644 --- a/src/port/meson.build +++ b/src/port/meson.build @@ -104,8 +104,8 @@ replace_funcs_pos = [ ['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'], ] -pgport_cflags = {'crc': cflags_crc} -pgport_sources_cflags = {'crc': []} +pgport_cflags = {'crc': cflags_crc, 'checksum': vectorize_cflags + unroll_loops_cflags} +pgport_sources_cflags = {'crc': [], 'checksum': [files('checksum.c')]} foreach f : replace_funcs_neg func = f.get(0) diff --git a/src/test/modules/test_aio/test_aio.c b/src/test/modules/test_aio/test_aio.c index c55cf6c0aac0..175e491c0bc2 100644 --- a/src/test/modules/test_aio/test_aio.c +++ b/src/test/modules/test_aio/test_aio.c @@ -24,7 +24,7 @@ #include "storage/aio_internal.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" -#include "storage/checksum.h" +#include "port/checksum.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "utils/builtins.h"