1 # ===========================================================================
2 # https://www.gnu.org/software/autoconf-archive/ax_ext.html
3 # ===========================================================================
11 # Find supported SIMD extensions by requesting cpuid. When a SIMD
12 # extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
13 # compiler supports it. For example, if "sse2" is available then "-msse2"
14 # is added to SIMD_FLAGS.
16 # Find other supported CPU extensions by requesting cpuid. When a
17 # processor extension is found, the -m"extensionname" is added to
18 # CPUEXT_FLAGS if compiler supports it. For example, if "bmi2" is
19 # available then "-mbmi2" is added to CPUEXT_FLAGS.
23 # AC_SUBST(SIMD_FLAGS)
24 # AC_SUBST(CPUEXT_FLAGS)
28 # HAVE_RDRND / HAVE_BMI1 / HAVE_BMI2 / HAVE_ADX / HAVE_MPX
29 # HAVE_PREFETCHWT1 / HAVE_ABM / HAVE_MMX / HAVE_SSE / HAVE_SSE2
30 # HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4_1 / HAVE_SSE4_2 / HAVE_SSE4a
31 # HAVE_SHA / HAVE_AES / HAVE_AVX / HAVE_FMA3 / HAVE_FMA4 / HAVE_XOP
32 # HAVE_AVX2 / HAVE_AVX512_F / HAVE_AVX512_CD / HAVE_AVX512_PF
33 # HAVE_AVX512_ER / HAVE_AVX512_VL / HAVE_AVX512_BW / HAVE_AVX512_DQ
34 # HAVE_AVX512_IFMA / HAVE_AVX512_VBMI / HAVE_ALTIVEC / HAVE_VSX
38 # Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
39 # Copyright (c) 2013,2015 Michael Petch <mpetch@capp-sysware.com>
40 # Copyright (c) 2017 Rafael de Lucena Valle <rafaeldelucena@gmail.com>
42 # Copying and distribution of this file, with or without modification, are
43 # permitted in any medium without royalty provided the copyright notice
44 # and this notice are preserved. This file is offered as-is, without any
51 AC_REQUIRE([AC_CANONICAL_HOST])
52 AC_REQUIRE([AC_PROG_CC])
59 AC_CACHE_CHECK([whether altivec is supported for old distros], [ax_cv_have_altivec_old_ext],
61 if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
62 if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
63 ax_cv_have_altivec_old_ext=yes
68 if test "$ax_cv_have_altivec_old_ext" = yes; then
69 AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
70 AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
73 AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
75 if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c altivec` != 0; then
76 ax_cv_have_altivec_ext=yes
80 if test "$ax_cv_have_altivec_ext" = yes; then
81 AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
82 AX_CHECK_COMPILE_FLAG(-maltivec, SIMD_FLAGS="$SIMD_FLAGS -maltivec", [])
85 AC_CACHE_CHECK([whether vsx is supported], [ax_cv_have_vsx_ext],
87 if test `LD_SHOW_AUXV=1 /bin/true 2>/dev/null|grep -c vsx` != 0; then
88 ax_cv_have_vsx_ext=yes
92 if test "$ax_cv_have_vsx_ext" = yes; then
93 AC_DEFINE(HAVE_VSX,,[Support VSX instructions])
94 AX_CHECK_COMPILE_FLAG(-mvsx, SIMD_FLAGS="$SIMD_FLAGS -mvsx", [])
98 i[[3456]]86*|x86_64*|amd64*)
100 AC_REQUIRE([AX_GCC_X86_CPUID])
101 AC_REQUIRE([AX_GCC_X86_CPUID_COUNT])
102 AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
105 AX_GCC_X86_CPUID(0x00000000)
106 if test "$ax_cv_gcc_x86_cpuid_0x00000000" != "unknown";
108 eax_cpuid0=`echo $ax_cv_gcc_x86_cpuid_0x00000000 | cut -d ":" -f 1`
112 AX_GCC_X86_CPUID(0x80000000)
113 if test "$ax_cv_gcc_x86_cpuid_0x80000000" != "unknown";
115 eax_cpuid80000000=`echo $ax_cv_gcc_x86_cpuid_0x80000000 | cut -d ":" -f 1`
120 if test "$((0x$eax_cpuid0))" -ge 1 ; then
121 AX_GCC_X86_CPUID(0x00000001)
122 if test "$ax_cv_gcc_x86_cpuid_0x00000001" != "unknown";
124 ecx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
125 edx_cpuid1=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
131 if test "$((0x$eax_cpuid0))" -ge 7 ; then
132 AX_GCC_X86_CPUID_COUNT(0x00000007, 0x00)
133 if test "$ax_cv_gcc_x86_cpuid_0x00000007" != "unknown";
135 ebx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 2`
136 ecx_cpuid7=`echo $ax_cv_gcc_x86_cpuid_0x00000007 | cut -d ":" -f 3`
142 if test "$((0x$eax_cpuid80000000))" -ge "$((0x80000001))" ; then
143 AX_GCC_X86_CPUID(0x80000001)
144 if test "$ax_cv_gcc_x86_cpuid_0x80000001" != "unknown";
146 ecx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 3`
147 edx_cpuid80000001=`echo $ax_cv_gcc_x86_cpuid_0x80000001 | cut -d ":" -f 4`
151 AC_CACHE_VAL([ax_cv_have_mmx_os_support_ext],
153 ax_cv_have_mmx_os_support_ext=yes
156 ax_cv_have_none_os_support_ext=yes
158 AC_CACHE_VAL([ax_cv_have_sse_os_support_ext],
160 ax_cv_have_sse_os_support_ext=no,
161 if test "$((0x$edx_cpuid1>>25&0x01))" = 1; then
166 /* No way at ring1 to ring3 in protected mode to check the CR0 and CR4
167 control registers directly. Execute an SSE instruction.
168 If it raises SIGILL then OS doesn't support SSE based instructions */
169 void sig_handler(int signum){ exit(1); }
171 signal(SIGILL, sig_handler);
172 /* SSE instruction xorps %xmm0,%xmm0 */
173 __asm__ __volatile__ (".byte 0x0f, 0x57, 0xc0");
176 ax_cv_have_sse_os_support_ext=yes,
177 ax_cv_have_sse_os_support_ext=no,
178 ax_cv_have_sse_os_support_ext=no)
184 if test "$((0x$ecx_cpuid1>>28&0x01))" = 1; then
185 AX_GCC_X86_AVX_XGETBV(0x00000000)
187 if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
188 xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
191 AC_CACHE_VAL([ax_cv_have_avx_os_support_ext],
193 ax_cv_have_avx_os_support_ext=no
194 if test "$((0x$ecx_cpuid1>>27&0x01))" = 1; then
195 if test "$((0x$xgetbv_eax&0x6))" = 6; then
196 ax_cv_have_avx_os_support_ext=yes
202 AC_CACHE_VAL([ax_cv_have_avx512_os_support_ext],
204 ax_cv_have_avx512_os_support_ext=no
205 if test "$ax_cv_have_avx_os_support_ext" = yes; then
206 if test "$((0x$xgetbv_eax&0xe6))" = "$((0xe6))"; then
207 ax_cv_have_avx512_os_support_ext=yes
212 for ac_instr_info dnl
213 in "none;rdrnd;RDRND;ecx_cpuid1,30;-mrdrnd;HAVE_RDRND;CPUEXT_FLAGS" dnl
214 "none;bmi1;BMI1;ebx_cpuid7,3;-mbmi;HAVE_BMI1;CPUEXT_FLAGS" dnl
215 "none;bmi2;BMI2;ebx_cpuid7,8;-mbmi2;HAVE_BMI2;CPUEXT_FLAGS" dnl
216 "none;adx;ADX;ebx_cpuid7,19;-madx;HAVE_ADX;CPUEXT_FLAGS" dnl
217 "none;mpx;MPX;ebx_cpuid7,14;-mmpx;HAVE_MPX;CPUEXT_FLAGS" dnl
218 "none;prefetchwt1;PREFETCHWT1;ecx_cpuid7,0;-mprefetchwt1;HAVE_PREFETCHWT1;CPUEXT_FLAGS" dnl
219 "none;abm;ABM;ecx_cpuid80000001,5;-mabm;HAVE_ABM;CPUEXT_FLAGS" dnl
220 "mmx;mmx;MMX;edx_cpuid1,23;-mmmx;HAVE_MMX;SIMD_FLAGS" dnl
221 "sse;sse;SSE;edx_cpuid1,25;-msse;HAVE_SSE;SIMD_FLAGS" dnl
222 "sse;sse2;SSE2;edx_cpuid1,26;-msse2;HAVE_SSE2;SIMD_FLAGS" dnl
223 "sse;sse3;SSE3;ecx_cpuid1,1;-msse3;HAVE_SSE3;SIMD_FLAGS" dnl
224 "sse;ssse3;SSSE3;ecx_cpuid1,9;-mssse3;HAVE_SSSE3;SIMD_FLAGS" dnl
225 "sse;sse41;SSE4.1;ecx_cpuid1,19;-msse4.1;HAVE_SSE4_1;SIMD_FLAGS" dnl
226 "sse;sse42;SSE4.2;ecx_cpuid1,20;-msse4.2;HAVE_SSE4_2;SIMD_FLAGS" dnl
227 "sse;sse4a;SSE4a;ecx_cpuid80000001,6;-msse4a;HAVE_SSE4a;SIMD_FLAGS" dnl
228 "sse;sha;SHA;ebx_cpuid7,29;-msha;HAVE_SHA;SIMD_FLAGS" dnl
229 "sse;aes;AES;ecx_cpuid1,25;-maes;HAVE_AES;SIMD_FLAGS" dnl
230 "avx;avx;AVX;ecx_cpuid1,28;-mavx;HAVE_AVX;SIMD_FLAGS" dnl
231 "avx;fma3;FMA3;ecx_cpuid1,12;-mfma;HAVE_FMA3;SIMD_FLAGS" dnl
232 "avx;fma4;FMA4;ecx_cpuid80000001,16;-mfma4;HAVE_FMA4;SIMD_FLAGS" dnl
233 "avx;xop;XOP;ecx_cpuid80000001,11;-mxop;HAVE_XOP;SIMD_FLAGS" dnl
234 "avx;avx2;AVX2;ebx_cpuid7,5;-mavx2;HAVE_AVX2;SIMD_FLAGS" dnl
235 "avx512;avx512f;AVX512-F;ebx_cpuid7,16;-mavx512f;HAVE_AVX512_F;SIMD_FLAGS" dnl
236 "avx512;avx512cd;AVX512-CD;ebx_cpuid7,28;-mavx512cd;HAVE_AVX512_CD;SIMD_FLAGS" dnl
237 "avx512;avx512pf;AVX512-PF;ebx_cpuid7,26;-mavx512pf;HAVE_AVX512_PF;SIMD_FLAGS" dnl
238 "avx512;avx512er;AVX512-ER;ebx_cpuid7,27;-mavx512er;HAVE_AVX512_ER;SIMD_FLAGS" dnl
239 "avx512;avx512vl;AVX512-VL;ebx_cpuid7,31;-mavx512vl;HAVE_AVX512_VL;SIMD_FLAGS" dnl
240 "avx512;avx512bw;AVX512-BW;ebx_cpuid7,30;-mavx512bw;HAVE_AVX512_BW;SIMD_FLAGS" dnl
241 "avx512;avx512dq;AVX512-DQ;ebx_cpuid7,17;-mavx512dq;HAVE_AVX512_DQ;SIMD_FLAGS" dnl
242 "avx512;avx512ifma;AVX512-IFMA;ebx_cpuid7,21;-mavx512ifma;HAVE_AVX512_IFMA;SIMD_FLAGS" dnl
243 "avx512;avx512vbmi;AVX512-VBMI;ecx_cpuid7,1;-mavx512vbmi;HAVE_AVX512_VBMI;SIMD_FLAGS" dnl
245 do ac_instr_os_support=$(eval echo \$ax_cv_have_$(echo $ac_instr_info | cut -d ";" -f 1)_os_support_ext)
246 ac_instr_acvar=$(echo $ac_instr_info | cut -d ";" -f 2)
247 ac_instr_shortname=$(echo $ac_instr_info | cut -d ";" -f 3)
248 ac_instr_chk_loc=$(echo $ac_instr_info | cut -d ";" -f 4)
249 ac_instr_chk_reg=0x$(eval echo \$$(echo $ac_instr_chk_loc | cut -d "," -f 1))
250 ac_instr_chk_bit=$(echo $ac_instr_chk_loc | cut -d "," -f 2)
251 ac_instr_compiler_flags=$(echo $ac_instr_info | cut -d ";" -f 5)
252 ac_instr_have_define=$(echo $ac_instr_info | cut -d ";" -f 6)
253 ac_instr_flag_type=$(echo $ac_instr_info | cut -d ";" -f 7)
255 AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor], [ax_cv_have_${ac_instr_acvar}_cpu_ext],
257 eval ax_cv_have_${ac_instr_acvar}_cpu_ext=no
258 if test "$((${ac_instr_chk_reg}>>${ac_instr_chk_bit}&0x01))" = 1 ; then
259 eval ax_cv_have_${ac_instr_acvar}_cpu_ext=yes
263 if test x"$(eval echo \$ax_cv_have_${ac_instr_acvar}_cpu_ext)" = x"yes"; then
264 AC_CACHE_CHECK([whether ${ac_instr_shortname} is supported by the processor and OS], [ax_cv_have_${ac_instr_acvar}_ext],
266 eval ax_cv_have_${ac_instr_acvar}_ext=no
267 if test x"${ac_instr_os_support}" = x"yes"; then
268 eval ax_cv_have_${ac_instr_acvar}_ext=yes
272 if test "$(eval echo \$ax_cv_have_${ac_instr_acvar}_ext)" = yes; then
273 AX_CHECK_COMPILE_FLAG(${ac_instr_compiler_flags}, eval ax_cv_support_${ac_instr_acvar}_ext=yes,
274 eval ax_cv_support_${ac_instr_acvar}_ext=no)
275 if test x"$(eval echo \$ax_cv_support_${ac_instr_acvar}_ext)" = x"yes"; then
276 eval ${ac_instr_flag_type}=\"\$${ac_instr_flag_type} ${ac_instr_compiler_flags}\"
277 AC_DEFINE_UNQUOTED([${ac_instr_have_define}])
279 AC_MSG_WARN([Your processor and OS supports ${ac_instr_shortname} instructions but not your compiler, can you try another compiler?])
282 if test x"${ac_instr_os_support}" = x"no"; then
283 AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no)
284 AC_MSG_WARN([Your processor supports ${ac_instr_shortname}, but your OS doesn't])
288 AC_CACHE_VAL(ax_cv_have_${ac_instr_acvar}_ext, eval ax_cv_have_${ac_instr_acvar}_ext=no)
289 AC_CACHE_VAL(ax_cv_support_${ac_instr_acvar}_ext, eval ax_cv_support_${ac_instr_acvar}_ext=no)
295 AH_TEMPLATE([HAVE_RDRND],[Define to 1 to support Digital Random Number Generator])
296 AH_TEMPLATE([HAVE_BMI1],[Define to 1 to support Bit Manipulation Instruction Set 1])
297 AH_TEMPLATE([HAVE_BMI2],[Define to 1 to support Bit Manipulation Instruction Set 2])
298 AH_TEMPLATE([HAVE_ADX],[Define to 1 to support Multi-Precision Add-Carry Instruction Extensions])
299 AH_TEMPLATE([HAVE_MPX],[Define to 1 to support Memory Protection Extensions])
300 AH_TEMPLATE([HAVE_PREFETCHWT1],[Define to 1 to support Prefetch Vector Data Into Caches WT1])
301 AH_TEMPLATE([HAVE_ABM],[Define to 1 to support Advanced Bit Manipulation])
302 AH_TEMPLATE([HAVE_MMX],[Define to 1 to support Multimedia Extensions])
303 AH_TEMPLATE([HAVE_SSE],[Define to 1 to support Streaming SIMD Extensions])
304 AH_TEMPLATE([HAVE_SSE2],[Define to 1 to support Streaming SIMD Extensions])
305 AH_TEMPLATE([HAVE_SSE3],[Define to 1 to support Streaming SIMD Extensions 3])
306 AH_TEMPLATE([HAVE_SSSE3],[Define to 1 to support Supplemental Streaming SIMD Extensions 3])
307 AH_TEMPLATE([HAVE_SSE4_1],[Define to 1 to support Streaming SIMD Extensions 4.1])
308 AH_TEMPLATE([HAVE_SSE4_2],[Define to 1 to support Streaming SIMD Extensions 4.2])
309 AH_TEMPLATE([HAVE_SSE4a],[Define to 1 to support AMD Streaming SIMD Extensions 4a])
310 AH_TEMPLATE([HAVE_SHA],[Define to 1 to support Secure Hash Algorithm Extension])
311 AH_TEMPLATE([HAVE_AES],[Define to 1 to support Advanced Encryption Standard New Instruction Set (AES-NI)])
312 AH_TEMPLATE([HAVE_AVX],[Define to 1 to support Advanced Vector Extensions])
313 AH_TEMPLATE([HAVE_FMA3],[Define to 1 to support Fused Multiply-Add Extensions 3])
314 AH_TEMPLATE([HAVE_FMA4],[Define to 1 to support Fused Multiply-Add Extensions 4])
315 AH_TEMPLATE([HAVE_XOP],[Define to 1 to support eXtended Operations Extensions])
316 AH_TEMPLATE([HAVE_AVX2],[Define to 1 to support Advanced Vector Extensions 2])
317 AH_TEMPLATE([HAVE_AVX512_F],[Define to 1 to support AVX-512 Foundation Extensions])
318 AH_TEMPLATE([HAVE_AVX512_CD],[Define to 1 to support AVX-512 Conflict Detection Instructions])
319 AH_TEMPLATE([HAVE_AVX512_PF],[Define to 1 to support AVX-512 Conflict Prefetch Instructions])
320 AH_TEMPLATE([HAVE_AVX512_ER],[Define to 1 to support AVX-512 Exponential & Reciprocal Instructions])
321 AH_TEMPLATE([HAVE_AVX512_VL],[Define to 1 to support AVX-512 Vector Length Extensions])
322 AH_TEMPLATE([HAVE_AVX512_BW],[Define to 1 to support AVX-512 Byte and Word Instructions])
323 AH_TEMPLATE([HAVE_AVX512_DQ],[Define to 1 to support AVX-512 Doubleword and Quadword Instructions])
324 AH_TEMPLATE([HAVE_AVX512_IFMA],[Define to 1 to support AVX-512 Integer Fused Multiply Add Instructions])
325 AH_TEMPLATE([HAVE_AVX512_VBMI],[Define to 1 to support AVX-512 Vector Byte Manipulation Instructions])
327 AC_SUBST(CPUEXT_FLAGS)