diff --git a/var/spack/repos/builtin/packages/zlib-ng/package.py b/var/spack/repos/builtin/packages/zlib-ng/package.py index c02ca28a86..d069545dc1 100644 --- a/var/spack/repos/builtin/packages/zlib-ng/package.py +++ b/var/spack/repos/builtin/packages/zlib-ng/package.py @@ -31,7 +31,11 @@ class ZlibNg(AutotoolsPackage, CMakePackage): build_system("autotools", "cmake", default="autotools") # rpath shenanigans, see https://github.com/zlib-ng/zlib-ng/pull/1546 - patch("pr-1546.patch", when="@2.1.3 platform=darwin") + with when("@2.1.3"): + patch("pr-1546.patch", when="platform=darwin") + patch("pr-1542.patch") # fix sse4.2 detection + patch("pr-1561.patch", when="build_system=autotools") # drop bash dependency + patch("pr-1562.patch") # improve intrinsics detection with when("build_system=cmake"): depends_on("cmake@3.5.1:", type="build") diff --git a/var/spack/repos/builtin/packages/zlib-ng/pr-1542.patch b/var/spack/repos/builtin/packages/zlib-ng/pr-1542.patch new file mode 100644 index 0000000000..675c2c1a3d --- /dev/null +++ b/var/spack/repos/builtin/packages/zlib-ng/pr-1542.patch @@ -0,0 +1,224 @@ +From 8c5d5eca51d9e4cd9aa046dba8f939b3f4012256 Mon Sep 17 00:00:00 2001 +From: Hans Kristian Rosbach +Date: Fri, 21 Jul 2023 13:43:15 +0200 +Subject: [PATCH 1/3] Clean up SSE4.2 support, and no longer use asm fallback + or gcc builtin. + +Defines changing meaning: +X86_SSE42 used to mean the compiler supports crc asm fallback. +X86_SSE42_CRC_INTRIN used to mean compiler supports SSE4.2 intrinsics. + +X86_SSE42 now means compiler supports SSE4.2 intrinsics. + +This therefore also fixes the adler32_sse42 checks, since those were depending +on SSE4.2 intrinsics but was mistakenly checking the X86_SSE42 define. +Now the X86_SSE42 define actually means what it appears to. +--- + CMakeLists.txt | 5 +---- + arch/x86/insert_string_sse42.c | 36 +++++---------------------------- + cmake/detect-intrinsics.cmake | 23 +++------------------ + configure | 37 ++++++++-------------------------- + win32/Makefile.msc | 1 - + 5 files changed, 17 insertions(+), 85 deletions(-) + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 420a5c78..1e42239a 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -827,15 +827,12 @@ if(WITH_OPTIM) + endif() + if(WITH_SSE42) + check_sse42_intrinsics() +- if(HAVE_SSE42CRC_INLINE_ASM OR HAVE_SSE42CRC_INTRIN) ++ if(HAVE_SSE42_INTRIN) + add_definitions(-DX86_SSE42) + set(SSE42_SRCS ${ARCHDIR}/adler32_sse42.c ${ARCHDIR}/insert_string_sse42.c) + add_feature_info(SSE42_CRC 1 "Support SSE4.2 optimized CRC hash generation, using \"${SSE42FLAG}\"") + list(APPEND ZLIB_ARCH_SRCS ${SSE42_SRCS}) + set_property(SOURCE ${SSE42_SRCS} PROPERTY COMPILE_FLAGS "${SSE42FLAG} ${NOLTOFLAG}") +- if(HAVE_SSE42CRC_INTRIN) +- add_definitions(-DX86_SSE42_CRC_INTRIN) +- endif() + else() + set(WITH_SSE42 OFF) + endif() +diff --git a/arch/x86/insert_string_sse42.c b/arch/x86/insert_string_sse42.c +index 565d92f9..ae092a7e 100644 +--- a/arch/x86/insert_string_sse42.c ++++ b/arch/x86/insert_string_sse42.c +@@ -5,38 +5,13 @@ + * + */ + ++#ifdef X86_SSE42 + #include "../../zbuild.h" +-#include +-#ifdef _MSC_VER +-# include +-#endif ++#include + #include "../../deflate.h" + +-#ifdef X86_SSE42_CRC_INTRIN +-# ifdef _MSC_VER +-# define HASH_CALC(s, h, val)\ +- h = _mm_crc32_u32(h, val) +-# else +-# define HASH_CALC(s, h, val)\ +- h = __builtin_ia32_crc32si(h, val) +-# endif +-#else +-# ifdef _MSC_VER +-# define HASH_CALC(s, h, val) {\ +- __asm mov edx, h\ +- __asm mov eax, val\ +- __asm crc32 eax, edx\ +- __asm mov h, eax\ +- } +-# else +-# define HASH_CALC(s, h, val) \ +- __asm__ __volatile__ (\ +- "crc32 %1,%0\n\t"\ +- : "+r" (h)\ +- : "r" (val)\ +- ); +-# endif +-#endif ++#define HASH_CALC(s, h, val)\ ++ h = _mm_crc32_u32(h, val) + + #define HASH_CALC_VAR h + #define HASH_CALC_VAR_INIT uint32_t h = 0 +@@ -45,6 +20,5 @@ + #define INSERT_STRING insert_string_sse42 + #define QUICK_INSERT_STRING quick_insert_string_sse42 + +-#ifdef X86_SSE42 +-# include "../../insert_string_tpl.h" ++#include "../../insert_string_tpl.h" + #endif +diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake +index 9cbc5908..52c54dc8 100644 +--- a/cmake/detect-intrinsics.cmake ++++ b/cmake/detect-intrinsics.cmake +@@ -481,35 +481,18 @@ macro(check_sse42_intrinsics) + set(SSE42FLAG "-msse4.2") + endif() + endif() +- # Check whether compiler supports SSE4.2 CRC inline asm ++ # Check whether compiler supports SSE4.2 intrinsics + set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( +- "int main(void) { +- unsigned val = 0, h = 0; +- #if defined(_MSC_VER) +- { __asm mov edx, h __asm mov eax, val __asm crc32 eax, edx __asm mov h, eax } +- #else +- __asm__ __volatile__ ( \"crc32 %1,%0\" : \"+r\" (h) : \"r\" (val) ); +- #endif +- return (int)h; +- }" +- HAVE_SSE42CRC_INLINE_ASM +- ) +- # Check whether compiler supports SSE4.2 CRC intrinsics +- check_c_source_compile_or_run( +- "#include ++ "#include + int main(void) { + unsigned crc = 0; + char c = 'c'; +- #if defined(_MSC_VER) + crc = _mm_crc32_u32(crc, c); +- #else +- crc = __builtin_ia32_crc32qi(crc, c); +- #endif + (void)crc; + return 0; + }" +- HAVE_SSE42CRC_INTRIN ++ HAVE_SSE42_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) + endmacro() +diff --git a/configure b/configure +index 8714590e..6b4e7fff 100755 +--- a/configure ++++ b/configure +@@ -1431,38 +1431,23 @@ EOF + } + + check_sse42_intrinsics() { +- # Check whether compiler supports SSE4.2 CRC inline asm +- cat > $test.c << EOF +-int main(void) { +- unsigned val = 0, h = 0; +- __asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) ); +- return (int) h; +-} +-EOF +- if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then +- echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log +- HAVE_SSE42CRC_INLINE_ASM=1 +- else +- echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log +- HAVE_SSE42CRC_INLINE_ASM=0 +- fi +- +- # Check whether compiler supports SSE4.2 CRC intrinsics ++ # Check whether compiler supports SSE4.2 intrinsics + cat > $test.c << EOF ++#include + int main(void) { + unsigned crc = 0; + char c = 'c'; +- crc = __builtin_ia32_crc32qi(crc, c); ++ crc = _mm_crc32_u32(crc, c); + (void)crc; + return 0; + } + EOF + if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then +- echo "Checking for SSE4.2 CRC intrinsics ... Yes." | tee -a configure.log +- HAVE_SSE42CRC_INTRIN=1 ++ echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log ++ HAVE_SSE42_INTRIN=1 + else +- echo "Checking for SSE4.2 CRC intrinsics ... No." | tee -a configure.log +- HAVE_SSE42CRC_INTRIN=0 ++ echo "Checking for SSE4.2 intrinsics ... No." | tee -a configure.log ++ HAVE_SSE42_INTRIN=0 + fi + } + +@@ -1606,15 +1591,9 @@ case "${ARCH}" in + + check_sse42_intrinsics + +- if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then ++ if test ${HAVE_SSE42_INTRIN} -eq 1; then + CFLAGS="${CFLAGS} -DX86_SSE42" + SFLAGS="${SFLAGS} -DX86_SSE42" +- +- if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then +- CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN" +- SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN" +- fi +- + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_sse42.o insert_string_sse42.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_sse42.lo insert_string_sse42.lo" + fi +diff --git a/win32/Makefile.msc b/win32/Makefile.msc +index 9ed26f28..3035072b 100644 +--- a/win32/Makefile.msc ++++ b/win32/Makefile.msc +@@ -31,7 +31,6 @@ WFLAGS = \ + -DX86_PCLMULQDQ_CRC \ + -DX86_SSE2 \ + -DX86_SSE42 \ +- -DX86_SSE42_CRC_INTRIN \ + -DX86_SSSE3 \ + -DX86_AVX2 + +-- +2.39.2 + diff --git a/var/spack/repos/builtin/packages/zlib-ng/pr-1561.patch b/var/spack/repos/builtin/packages/zlib-ng/pr-1561.patch new file mode 100644 index 0000000000..f87aea684b --- /dev/null +++ b/var/spack/repos/builtin/packages/zlib-ng/pr-1561.patch @@ -0,0 +1,210 @@ +From f6fb1d350a7b8210cc9c45ed502b3cc34e4dac32 Mon Sep 17 00:00:00 2001 +From: Harmen Stoppels +Date: Mon, 21 Aug 2023 10:13:10 +0200 +Subject: [PATCH 2/3] PR #1561 + +--- + configure | 50 +++++++++++++++++++++++++------------------------- + 1 file changed, 25 insertions(+), 25 deletions(-) + +diff --git a/configure b/configure +index 6b4e7fff..fc78a135 100755 +--- a/configure ++++ b/configure +@@ -1,4 +1,4 @@ +-#!/usr/bin/env bash ++#!/bin/sh + # configure script for zlib. + # + # Normally configure builds both a static and a shared library. +@@ -230,13 +230,13 @@ test=ztest$$ + # put arguments in log, also put test file in log if used in arguments + show() + { +- case "$*" in ++ case "$@" in + *$test.c*) + echo "=== $test.c ===" >> configure.log + cat $test.c >> configure.log + echo "===" >> configure.log;; + esac +- echo $* >> configure.log ++ echo "$@" >> configure.log + } + + # check for gcc vs. cc and set compile and link flags based on the system identified by uname +@@ -246,7 +246,7 @@ int main() {return getchar();} + EOF + + cc=${CC-${CROSS_PREFIX}gcc} +-echo -n "Checking for compiler... " | tee -a configure.log ++printf "Checking for compiler... " | tee -a configure.log + case "$cc" in + *gcc*) gcc=1 ;; + *clang*) gcc=1 ;; +@@ -401,7 +401,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then + SFLAGS="${SFLAGS} -DNDEBUG" + fi + if test -z "$uname"; then +- uname=$((uname -s || echo unknown) 2>/dev/null) ++ uname=$( (uname -s || echo unknown) 2>/dev/null) + fi + case "$uname" in + Linux* | linux* | GNU | GNU/* | solaris*) +@@ -483,7 +483,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then + fi + RC="${CROSS_PREFIX}windres" + RCFLAGS="-I ${BUILDDIR}" +- if [ "$CC" == "mingw32-gcc" ]; then ++ if [ "$CC" = "mingw32-gcc" ]; then + case $ARCH in + i386 | i486 | i586 | i686) RCFLAGS="${RCFLAGS} -F pe-i386";; + esac; +@@ -498,7 +498,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then + HP-UX*) + LDSHARED=${LDSHARED-"$cc"} + LDSHAREDFLAGS="-shared" +- case $((uname -m || echo unknown) 2>/dev/null) in ++ case $( (uname -m || echo unknown) 2>/dev/null) in + ia64) + shared_ext='.so' + SHAREDLIB='${LIBNAME}.so' ;; +@@ -539,14 +539,14 @@ else + gcc=0 + echo "$CC" | tee -a configure.log + if test -z "$uname"; then +- uname=$((uname -sr || echo unknown) 2>/dev/null) ++ uname=$( (uname -sr || echo unknown) 2>/dev/null) + fi + case "$uname" in + HP-UX*) SFLAGS=${CFLAGS-"-O +z"} + CFLAGS=${CFLAGS-"-O"} + LDSHARED=${LDSHARED-"ld"} + LDSHAREDFLAGS="-b" +- case $((uname -m || echo unknown) 2>/dev/null) in ++ case $( (uname -m || echo unknown) 2>/dev/null) in + ia64) + shared_ext='.so' + SHAREDLIB='${LIBNAME}.so' ;; +@@ -591,15 +591,15 @@ EOF + if ($CC -c $CFLAGS $test.c) 2>/dev/null; then + try() + { +- show $* +- test "$(\( $* \) 2>&1 | tee -a configure.log)" = "" ++ show "$@" ++ test "$( ("$@") 2>&1 | tee -a configure.log)" = "" + } + echo - using any output from compiler to indicate an error >> configure.log + else + try() + { +- show $* +- ( $* ) >> configure.log 2>&1 ++ show "$@" ++ ( "$@" ) >> configure.log 2>&1 + ret=$? + if test $ret -ne 0; then + echo "(exit code $ret)" >> configure.log +@@ -627,7 +627,7 @@ extern int getchar(); + int hello() {return getchar();} + EOF + if test $shared -eq 1; then +- echo -n "Checking for shared library support... " | tee -a configure.log ++ printf "Checking for shared library support... " | tee -a configure.log + # we must test in two steps (cc then ld), required at least on SunOS 4.x + if try $CC -w -c $SFLAGS $test.c && + try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o $LDSHAREDLIBC; then +@@ -784,7 +784,7 @@ fi + # Rename @ZLIB_SYMBOL_PREFIX@ to $symbol_prefix in gzread.c, zlib.h and zlib_name_mangling.h + sed < $SRCDIR/gzread.c.in "s/@ZLIB_SYMBOL_PREFIX@/$symbol_prefix/g" > gzread.c + sed < $SRCDIR/zlib${SUFFIX}.h.in "s/@ZLIB_SYMBOL_PREFIX@/$symbol_prefix/g" > zlib${SUFFIX}.h +-if [[ ! -z $symbol_prefix ]]; then ++if [ ! -z "$symbol_prefix" ]; then + sed < $SRCDIR/zlib_name_mangling${SUFFIX}.h.in "s/@ZLIB_SYMBOL_PREFIX@/$symbol_prefix/g" > zlib_name_mangling${SUFFIX}.h + else + # symbol_prefix is not set, copy the empty mangling header +@@ -814,7 +814,7 @@ fi + echo >> configure.log + + # check for ptrdiff_t and save result in zconf.h +-echo -n "Checking for ptrdiff_t... " | tee -a configure.log ++printf "Checking for ptrdiff_t... " | tee -a configure.log + cat > $test.c < + int fun(ptrdiff_t *a) { (void)a; return 0; } +@@ -826,7 +826,7 @@ else + sed < zconf${SUFFIX}.h "/^#ifdef NEED_PTRDIFF_T.* may be/s/def NEED_PTRDIFF_T\(.*\) may be/ 1\1 was/" > zconf${SUFFIX}.temp.h + mv zconf${SUFFIX}.temp.h zconf${SUFFIX}.h + +- echo -n "Checking for sizeof(void *)... " | tee -a configure.log ++ printf "Checking for sizeof(void *)... " | tee -a configure.log + cat > $test.c < + #define COMPILE_TIME_ASSERT(pred) struct s { int x: (pred) ? 1 : -1; } +@@ -864,7 +864,7 @@ if test $compat -eq 1; then + esac + fi + +-if [[ ! -z $DEFFILE ]]; then ++if [ ! -z "$DEFFILE" ]; then + mkdir -p win32 + sed < $SRCDIR/$DEFFILE.in "s/@ZLIB_SYMBOL_PREFIX@/$symbol_prefix/g" > $DEFFILE + fi +@@ -1476,14 +1476,14 @@ EOF + + check_vgfma_intrinsics() { + # Check whether "VECTOR GALOIS FIELD MULTIPLY SUM AND ACCUMULATE" intrinsic is available +- echo -n "Checking for -mzarch... " | tee -a configure.log ++ printf "Checking for -mzarch... " | tee -a configure.log + if try $CC -x c -c /dev/null -o /dev/null -mzarch; then + echo Yes. | tee -a configure.log + vgfmaflag="${vgfmaflag} -mzarch" + else + echo No. | tee -a configure.log + fi +- echo -n "Checking for -fzvector... " | tee -a configure.log ++ printf "Checking for -fzvector... " | tee -a configure.log + if try $CC -x c -c /dev/null -o /dev/null -fzvector; then + echo Yes. | tee -a configure.log + vgfmaflag="${vgfmaflag} -fzvector" +@@ -1500,7 +1500,7 @@ int main(void) { + return c[0]; + } + EOF +- echo -n "Checking for VGFMA support... " | tee -a configure.log ++ printf "Checking for VGFMA support... " | tee -a configure.log + if try $CC -c $CFLAGS $vgfmaflag $test.c; then + HAVE_VGFMA_INTRIN=1 + echo "Yes." | tee -a configure.log +@@ -2156,11 +2156,11 @@ for file in $SRCDIR/*.c $SRCDIR/test/*.c $SRCDIR/test/fuzz/*.c $SRCDIR/$ARCHDIR/ + # Check that the include file exists in the current dir, + # otherwise it may be one of the system include header. + if test -e $SRCDIR/$i; then +- echo -n " \$(SRCDIR)/$i" ++ printf " \$(SRCDIR)/$i" + fi + # We also need to check whether the include file is in the ARCHDIR. + if test -e $SRCDIR/$ARCHDIR/$i; then +- echo -n " \$(SRCDIR)/$ARCHDIR/$i" ++ printf " \$(SRCDIR)/$ARCHDIR/$i" + fi + done) + obj=$(basename $(echo $file | sed -e 's/\.c/\.o/g' -e 's#^\./##g')) +@@ -2233,11 +2233,11 @@ for file in $SRCDIR/$ARCHDIR/*.c; do + # Check that the include file exists in the current dir, + # otherwise it may be one of the system include header. + if test -e $SRCDIR/$i; then +- echo -n " \$(SRCTOP)/$i" ++ printf " \$(SRCTOP)/$i" + fi + # We also need to check whether the include file is in the ARCHDIR. + if test -e $SRCDIR/$ARCHDIR/$i; then +- echo -n " \$(SRCDIR)/$i" ++ printf " \$(SRCDIR)/$i" + fi + done) + obj=$(basename $(echo $file | sed -e 's/\.c/\.o/g' -e 's#^\./##g')) +-- +2.39.2 + diff --git a/var/spack/repos/builtin/packages/zlib-ng/pr-1562.patch b/var/spack/repos/builtin/packages/zlib-ng/pr-1562.patch new file mode 100644 index 0000000000..70806e900c --- /dev/null +++ b/var/spack/repos/builtin/packages/zlib-ng/pr-1562.patch @@ -0,0 +1,432 @@ +From 13df84c54aaf06cc7aeb1813ef60b17591d29ea3 Mon Sep 17 00:00:00 2001 +From: Harmen Stoppels +Date: Mon, 21 Aug 2023 11:10:29 +0200 +Subject: [PATCH 3/3] PR #1562 + +--- + cmake/detect-intrinsics.cmake | 118 ++++++++++++---------------------- + configure | 115 ++++++++++----------------------- + 2 files changed, 73 insertions(+), 160 deletions(-) + +diff --git a/cmake/detect-intrinsics.cmake b/cmake/detect-intrinsics.cmake +index 52c54dc8..d476093f 100644 +--- a/cmake/detect-intrinsics.cmake ++++ b/cmake/detect-intrinsics.cmake +@@ -62,28 +62,19 @@ macro(check_avx512_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${AVX512FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m512i x = _mm512_set1_epi8(2); +- const __m512i y = _mm512_set_epi32(0x1020304, 0x5060708, 0x90a0b0c, 0xd0e0f10, +- 0x11121314, 0x15161718, 0x191a1b1c, 0x1d1e1f20, +- 0x21222324, 0x25262728, 0x292a2b2c, 0x2d2e2f30, +- 0x31323334, 0x35363738, 0x393a3b3c, 0x3d3e3f40); +- x = _mm512_sub_epi8(x, y); +- (void)x; +- return 0; +- }" ++ __m512i f(__m512i y) { ++ __m512i x = _mm512_set1_epi8(2); ++ return _mm512_sub_epi8(x, y); ++ } ++ int main(void) { return 0; }" + HAVE_AVX512_INTRIN + ) + + # Evidently both GCC and clang were late to implementing these + check_c_source_compile_or_run( + "#include +- int main(void) { +- __mmask16 a = 0xFF; +- a = _knot_mask16(a); +- (void)a; +- return 0; +- }" ++ __mmask16 f(__mmask16 x) { return _knot_mask16(x); } ++ int main(void) { return 0; }" + HAVE_MASK_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -117,17 +108,11 @@ macro(check_avx512vnni_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${AVX512VNNIFLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m512i x = _mm512_set1_epi8(2); +- const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, +- 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, +- 56, 57, 58, 59, 60, 61, 62, 63, 64); ++ __m512i f(__m512i x, __m512i y) { + __m512i z = _mm512_setzero_epi32(); +- z = _mm512_dpbusd_epi32(z, x, y); +- (void)z; +- return 0; +- }" ++ return _mm512_dpbusd_epi32(z, x, y); ++ } ++ int main(void) { return 0; }" + HAVE_AVX512VNNI_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -151,13 +136,11 @@ macro(check_avx2_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${AVX2FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m256i x = _mm256_set1_epi16(2); ++ __m256i f(__m256i x) { + const __m256i y = _mm256_set1_epi16(1); +- x = _mm256_subs_epu16(x, y); +- (void)x; +- return 0; +- }" ++ return _mm256_subs_epu16(x, y); ++ } ++ int main(void) { return 0; }" + HAVE_AVX2_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -204,12 +187,8 @@ macro(check_neon_ld4_intrinsics) + #else + # include + #endif +- int main(void) { +- int stack_var[16]; +- int32x4x4_t v = vld1q_s32_x4(stack_var); +- (void)v; +- return 0; +- }" ++ int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); } ++ int main(void) { return 0; }" + NEON_HAS_LD4) + set(CMAKE_REQUIRED_FLAGS) + endmacro() +@@ -226,13 +205,9 @@ macro(check_pclmulqdq_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${PCLMULFLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m128i a = _mm_setzero_si128(); +- __m128i b = _mm_setzero_si128(); +- __m128i c = _mm_clmulepi64_si128(a, b, 0x10); +- (void)c; +- return 0; +- }" ++ #include ++ __m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); } ++ int main(void) { return 0; }" + HAVE_PCLMULQDQ_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -252,13 +227,12 @@ macro(check_vpclmulqdq_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${VPCLMULFLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m512i a = _mm512_setzero_si512(); ++ #include ++ __m512i f(__m512i a) { + __m512i b = _mm512_setzero_si512(); +- __m512i c = _mm512_clmulepi64_epi128(a, b, 0x10); +- (void)c; +- return 0; +- }" ++ return _mm512_clmulepi64_epi128(a, b, 0x10); ++ } ++ int main(void) { return 0; }" + HAVE_VPCLMULQDQ_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -431,11 +405,8 @@ macro(check_sse2_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${SSE2FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m128i zero = _mm_setzero_si128(); +- (void)zero; +- return 0; +- }" ++ __m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); } ++ int main(void) { return 0; }" + HAVE_SSE2_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -457,14 +428,11 @@ macro(check_ssse3_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${SSSE3FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- __m128i u, v, w; +- u = _mm_set1_epi32(1); +- v = _mm_set1_epi32(2); +- w = _mm_hadd_epi32(u, v); +- (void)w; +- return 0; +- }" ++ __m128i f(__m128i u) { ++ __m128i v = _mm_set1_epi32(1); ++ return _mm_hadd_epi32(u, v); ++ } ++ int main(void) { return 0; }" + HAVE_SSSE3_INTRIN + ) + endmacro() +@@ -485,13 +453,8 @@ macro(check_sse42_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${SSE42FLAG} ${NATIVEFLAG}") + check_c_source_compile_or_run( + "#include +- int main(void) { +- unsigned crc = 0; +- char c = 'c'; +- crc = _mm_crc32_u32(crc, c); +- (void)crc; +- return 0; +- }" ++ unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); } ++ int main(void) { return 0; }" + HAVE_SSE42_INTRIN + ) + set(CMAKE_REQUIRED_FLAGS) +@@ -529,13 +492,12 @@ macro(check_xsave_intrinsics) + set(CMAKE_REQUIRED_FLAGS "${XSAVEFLAG} ${NATIVEFLAG}") + check_c_source_compiles( + "#ifdef _WIN32 +- # include +- #else +- # include +- #endif +- int main(void) { +- return _xgetbv(0); +- }" ++ # include ++ #else ++ # include ++ #endif ++ unsigned int f(unsigned int a) { return _xgetbv(a); } ++ int main(void) { return 0; }" + HAVE_XSAVE_INTRIN FAIL_REGEX "not supported") + set(CMAKE_REQUIRED_FLAGS) + endmacro() +diff --git a/configure b/configure +index fc78a135..e5a1965f 100755 +--- a/configure ++++ b/configure +@@ -1023,12 +1023,8 @@ fi + + # Check for __builtin_ctz() support in compiler + cat > $test.c << EOF +-int main(void) { +- unsigned int zero = 0; +- long test = __builtin_ctz(zero); +- (void)test; +- return 0; +-} ++long f(unsigned int x) { return __builtin_ctz(x); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then + echo "Checking for __builtin_ctz ... Yes." | tee -a configure.log +@@ -1040,12 +1036,8 @@ fi + + # Check for __builtin_ctzll() support in compiler + cat > $test.c << EOF +-int main(void) { +- unsigned long long zero = 0; +- long test = __builtin_ctzll(zero); +- (void)test; +- return 0; +-} ++long f(unsigned long long x) { return __builtin_ctzll(x); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then + echo "Checking for __builtin_ctzll ... Yes." | tee -a configure.log +@@ -1059,13 +1051,11 @@ check_avx2_intrinsics() { + # Check whether compiler supports AVX2 intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- __m256i x = _mm256_set1_epi16(2); ++__m256i f(__m256i x) { + const __m256i y = _mm256_set1_epi16(1); +- x = _mm256_subs_epu16(x, y); +- (void)x; +- return 0; ++ return _mm256_subs_epu16(x, y); + } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then + echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log +@@ -1080,16 +1070,11 @@ check_avx512_intrinsics() { + # Check whether compiler supports AVX512 intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- __m512i x = _mm512_set1_epi8(2); +- const __m512i y = _mm512_set_epi32(0x1020304, 0x5060708, 0x90a0b0c, 0xd0e0f10, +- 0x11121314, 0x15161718, 0x191a1b1c, 0x1d1e1f20, +- 0x21222324, 0x25262728, 0x292a2b2c, 0x2d2e2f30, +- 0x31323334, 0x35363738, 0x393a3b3c, 0x3d3e3f40); +- x = _mm512_sub_epi8(x, y); +- (void)x; +- return 0; ++__m512i f(__m512i y) { ++ __m512i x = _mm512_set1_epi8(2); ++ return _mm512_sub_epi8(x, y); + } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${avx512flag} $test.c; then + echo "Checking for AVX512 intrinsics ... Yes." | tee -a configure.log +@@ -1133,17 +1118,11 @@ check_avx512vnni_intrinsics() { + # Check whether compiler supports AVX512-VNNI intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- __m512i x = _mm512_set1_epi8(2); +- const __m512i y = _mm512_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, +- 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, +- 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, +- 56, 57, 58, 59, 60, 61, 62, 63, 64); ++__m512i f(__m512i x, __m512i y) { + __m512i z = _mm512_setzero_epi32(); +- z = _mm512_dpbusd_epi32(z, x, y); +- (void)z; +- return 0; ++ return _mm512_dpbusd_epi32(z, x, y); + } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${avx512vnniflag} $test.c; then + echo "Checking for AVX512VNNI intrinsics ... Yes." | tee -a configure.log +@@ -1158,12 +1137,8 @@ check_mask_intrinsics() { + # Check whether compiler supports AVX512 k-mask intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- __mmask16 a = 0xFF; +- a = _knot_mask16(a); +- (void)a; +- return 0; +-} ++__mmask16 f(__mmask16 x) { return _knot_mask16(x); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${avx512flag} $test.c; then + echo "Checking for AVX512 k-mask intrinsics ... Yes." | tee -a configure.log +@@ -1230,12 +1205,8 @@ check_neon_ld4_intrinsics() { + #else + # include + #endif +-int main(void) { +- int stack_var[16]; +- int32x4x4_t v = vld1q_s32_x4(stack_var); +- (void)v; +- return 0; +-} ++int32x4x4_t f(int var[16]) { return vld1q_s32_x4(var); } ++int main(void) { return 0; } + EOF + if try $CC -c $CFLAGS $neonflag $test.c; then + NEON_HAS_LD4=1 +@@ -1251,13 +1222,8 @@ check_pclmulqdq_intrinsics() { + cat > $test.c << EOF + #include + #include +-int main(void) { +- __m128i a = _mm_setzero_si128(); +- __m128i b = _mm_setzero_si128(); +- __m128i c = _mm_clmulepi64_si128(a, b, 0x10); +- (void)c; +- return 0; +-} ++__m128i f(__m128i a, __m128i b) { return _mm_clmulepi64_si128(a, b, 0x10); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${pclmulflag} $test.c; then + echo "Checking for PCLMULQDQ intrinsics ... Yes." | tee -a configure.log +@@ -1273,13 +1239,11 @@ check_vpclmulqdq_intrinsics() { + cat > $test.c << EOF + #include + #include +-int main(void) { +- __m512i a = _mm512_setzero_si512(); ++__m512i f(__m512i a) { + __m512i b = _mm512_setzero_si512(); +- __m512i c = _mm512_clmulepi64_epi128(a, b, 0x10); +- (void)c; +- return 0; ++ return _mm512_clmulepi64_epi128(a, b, 0x10); + } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${vpclmulflag} $test.c; then + echo "Checking for VPCLMULQDQ intrinsics ... Yes." | tee -a configure.log +@@ -1298,9 +1262,8 @@ check_xsave_intrinsics() { + #else + # include + #endif +-int main(void) { +- return _xgetbv(0); +-} ++unsigned int f(unsigned int a) { return _xgetbv(a); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${xsaveflag} $test.c; then + echo "Checking for XSAVE intrinsics ... Yes." | tee -a configure.log +@@ -1415,11 +1378,8 @@ check_sse2_intrinsics() { + # Check whether compiler supports SSE2 intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- __m128i zero = _mm_setzero_si128(); +- (void)zero; +- return 0; +-} ++__m128i f(__m128i x, __m128i y) { return _mm_sad_epu8(x, y); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${sse2flag} $test.c; then + echo "Checking for SSE2 intrinsics ... Yes." | tee -a configure.log +@@ -1434,13 +1394,8 @@ check_sse42_intrinsics() { + # Check whether compiler supports SSE4.2 intrinsics + cat > $test.c << EOF + #include +-int main(void) { +- unsigned crc = 0; +- char c = 'c'; +- crc = _mm_crc32_u32(crc, c); +- (void)crc; +- return 0; +-} ++unsigned int f(unsigned int a, unsigned int b) { return _mm_crc32_u32(a, b); } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then + echo "Checking for SSE4.2 intrinsics ... Yes." | tee -a configure.log +@@ -1455,15 +1410,11 @@ check_ssse3_intrinsics() { + # Check whether compiler supports SSSE3 intrinsics + cat > $test.c << EOF + #include +-int main(void) +-{ +- __m128i u, v, w; +- u = _mm_set1_epi32(1); +- v = _mm_set1_epi32(2); +- w = _mm_hadd_epi32(u, v); +- (void)w; +- return 0; ++__m128i f(__m128i u) { ++ __m128i v = _mm_set1_epi32(1); ++ return _mm_hadd_epi32(u, v); + } ++int main(void) { return 0; } + EOF + if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then + echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log +-- +2.39.2 +