Implementation notes: amd64, speed2supercop, crypto_core/mult3sntrup761

Computer: speed2supercop
Microarchitecture: amd64; Haswell+AES (306c3)
Architecture: amd64
CPU ID: GenuineIntel-000306c3-1fc9cbf5
SUPERCOP version: 20240808
Operation: crypto_core
Primitive: mult3sntrup761
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
687244861 0 058237 752 832avx2unsignedgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
765636491 0 051142 792 776avx2unsignedclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
796030995 0 045470 792 776avx2unsignedclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
858416561 0 027949 752 832avx2unsignedgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
930823559 0 034150 792 760avx2unsignedclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
966821094 0 032631 784 856avx2unsignedclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
980816836 0 027972 744 832avx2unsignedgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1056011471 0 021552 728 800avx2unsignedgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1185616839 0 029334 792 776avxclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1200017899 0 030510 792 776avxclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1209215522 0 030518 792 776avx800clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1210414462 0 029342 792 776avx800clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1283616300 0 024910 792 760avxclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1283614761 0 028302 792 776round2clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1289613738 0 024710 792 760avx800clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1292815789 0 029446 792 776round2clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1310013128 0 024903 784 856avx800clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1310814944 0 024647 784 856avxclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1380412197 0 022310 792 760round2clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1404010922 0 022151 784 856round2clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1418415527 0 028893 752 832round2gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1434013566 0 024676 744 832avx800gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1511212780 0 024157 752 832round2gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1521610730 0 021860 744 832round2gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1533610895 0 020952 728 800round2gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1562814997 0 026076 744 832avxgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1664814224 0 027549 752 832avx800gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1680413153 0 023176 728 800avx800gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1731216057 0 029365 752 832avxgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1746413493 0 024845 752 832avx800gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1756814210 0 024232 728 800avxgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1816815326 0 026661 752 832avxgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
3158413634 0 026997 752 832round1gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
3257210603 0 025670 792 776round1clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
326969991 0 024894 792 776round1clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
342569675 0 021069 752 832round1gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
351488283 0 020175 784 856round1clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
352128167 0 018216 728 800round1gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
353808935 0 019934 792 760round1clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
376649098 0 020301 752 832round1gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
879521632 0 014965 752 832compactgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
924681926 0 016766 792 776compactclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1137005490 0 020542 792 776compactclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1643763863 0 017189 752 83232gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1714002565 0 013885 752 83232gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1868721767 0 016798 792 776refclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
187000997 0 012052 744 83232gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1924881467 0 015078 792 760compactclang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1940721767 0 016606 792 776refclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1971441764 0 016806 792 77632clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1984441764 0 016598 792 77632clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
201908820 0 010816 728 80032gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2021601068 0 014678 792 76032clang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2061241019 0 012871 784 85632clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
206264843 0 011798 792 76032clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2400404472 0 017797 752 832refgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
4084241157 0 014766 792 760refclang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1094084499 0 012351 784 856refclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1094816543 0 011494 792 760refclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
11421761356 0 012669 752 832refgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1320328374 0 012231 784 856compactclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1320436412 0 011358 792 760compactclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1480900418 0 011773 752 832compactgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
1761784332 0 010328 728 800compactgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2088992611 0 011668 744 832refgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2443500402 0 011492 744 832compactgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808
2604116495 0 010488 728 800refgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024080920240808

Compiler output


mult768.c: mult768.c:209:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:209:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:210:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:211:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avxclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult3_32x32.c: mult3_32x32.c:255:6: warning: unused function 'mult3_64x64_inplace_karatsuba_x2' [-Wunused-function]
mult3_32x32.c: void mult3_64x64_inplace_karatsuba_x2( __m256i * a0 , __m256i * a1) {
mult3_32x32.c:      ^
mult3_32x32.c: mult3_32x32.c:668:6: warning: unused function 'mult3_256x256_inplace_karatsuba' [-Wunused-function]
mult3_32x32.c: void mult3_256x256_inplace_karatsuba( __m256i * a ) {
mult3_32x32.c:      ^
mult3_32x32.c: mult3_32x32.c:765:9: warning: unused function 'right_shift_1' [-Wunused-function]
mult3_32x32.c: __m256i right_shift_1( __m256i a0 )
mult3_32x32.c:         ^
mult3_32x32.c: mult3_32x32.c:779:9: warning: unused function 'right_shift_2' [-Wunused-function]
mult3_32x32.c: __m256i right_shift_2( __m256i a0 )
mult3_32x32.c:         ^
mult3_32x32.c: mult3_32x32.c:793:9: warning: unused function 'left_shift_1' [-Wunused-function]
mult3_32x32.c: __m256i left_shift_1( __m256i a1 )
mult3_32x32.c:         ^
mult3_32x32.c: mult3_32x32.c:800:9: warning: unused function 'left_shift_2_high' [-Wunused-function]
mult3_32x32.c: __m256i left_shift_2_high( __m256i a1 , __m256i a0 )
mult3_32x32.c:         ^
mult3_32x32.c: mult3_32x32.c:807:9: warning: unused function 'left_shift_2' [-Wunused-function]
mult3_32x32.c: __m256i left_shift_2( __m256i a1 )
mult3_32x32.c:         ^
mult3_32x32.c: 7 warnings generated.

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
avx2unsignedclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult.c: mult.c:48:59: error: always_inline function '_mm256_load_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'avx'
mult.c:     _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c:                                                           ^
mult.c: mult.c:48:59: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult.c: mult.c:48:43: warning: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI [-Wpsabi]
mult.c:     _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c:                                           ^
mult.c: mult.c:48:5: error: always_inline function '_mm256_store_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'avx'
mult.c:     _mm256_store_si256( (__m256i*)(a+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(a+i))) );
mult.c:     ^
mult.c: mult.c:48:5: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult.c: mult.c:51:59: error: always_inline function '_mm256_load_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'avx'
mult.c:     _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c:                                                           ^
mult.c: mult.c:51:59: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult.c: mult.c:51:43: warning: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI [-Wpsabi]
mult.c:     _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c:                                           ^
mult.c: mult.c:51:5: error: always_inline function '_mm256_store_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'avx'
mult.c:     _mm256_store_si256( (__m256i*)(b+i) , cvt_to_unsigned(_mm256_load_si256((__m256i*)(b+i))) );
mult.c:     ^
mult.c: mult.c:51:5: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult.c: mult.c:64:54: error: always_inline function '_mm256_load_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx2unsigned_constbranchindex' that is compiled without support for 'avx'
mult.c:     _mm256_store_si256( (__m256i*)(c+i) , cvt_to_int(_mm256_load_si256((__m256i*)(c+i))) );
mult.c:                                                      ^
mult.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avx2unsignedclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult3_32x32.c: mult3_32x32.c:668:6: warning: 'mult3_256x256_inplace_karatsuba' defined but not used [-Wunused-function]
mult3_32x32.c:   668 | void mult3_256x256_inplace_karatsuba( __m256i * a ) {
mult3_32x32.c:       |      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
avx2unsignedgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)

Compiler output


mult768.c: mult768.c:209:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:209:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:210:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:210:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:211:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avx800clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult.c: mult.c:147:22: error: invalid output size for constraint '=&x'
mult.c:   MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c:                      ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c:                          ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c:                          ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c:                          ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c:                          ^
mult.c: mult.c:153:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c:                          ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c:   MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c:                        ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c:   MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c:                        ^
mult.c: mult.c:157:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round1clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult.c: mult.c: In function 'mult768_mix2_m256i':
mult.c: mult.c:568:3: warning: 'mult96x16' accessing 6144 bytes in a region of size 512 [-Wstringop-overflow=]
mult.c:   568 |   mult96x16(hkara[12],fkara[6],(__m256i *) (1 + (__m128i *) gkara));
mult.c:       |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mult.c: mult.c:568:3: note: referencing argument 1 of type '__m256i[192]'
mult.c: mult.c:568:3: warning: 'mult96x16' reading 3072 bytes from a region of size 512 [-Wstringop-overread]
mult.c: mult.c:568:3: note: referencing argument 2 of type 'const __m256i[96]'
mult.c: mult.c:568:3: warning: 'mult96x16' reading 3072 bytes from a region of size 3056 [-Wstringop-overread]
mult.c: mult.c:568:3: note: referencing argument 3 of type 'const __m256i[96]'
mult.c: mult.c:279:13: note: in a call to function 'mult96x16'
mult.c:   279 | static void mult96x16(__m256i h[192],const __m256i f[96],const __m256i g[96])
mult.c:       |             ^~~~~~~~~
mult.c: mult.c:569:3: warning: 'mult96x16' accessing 6144 bytes in a region of size 512 [-Wstringop-overflow=]
mult.c:   569 |   mult96x16(hkara[0],fkara[0],gkara[0]);
mult.c:       |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mult.c: mult.c:569:3: note: referencing argument 1 of type '__m256i[192]'
mult.c: mult.c:569:3: warning: 'mult96x16' reading 3072 bytes from a region of size 512 [-Wstringop-overread]
mult.c: mult.c:569:3: note: referencing argument 2 of type 'const __m256i[96]'
mult.c: mult.c:569:3: warning: 'mult96x16' reading 3072 bytes from a region of size 1024 [-Wstringop-overread]
mult.c: mult.c:569:3: note: referencing argument 3 of type 'const __m256i[96]'
mult.c: mult.c:279:13: note: in a call to function 'mult96x16'
mult.c:   279 | static void mult96x16(__m256i h[192],const __m256i f[96],const __m256i g[96])
mult.c:       |             ^~~~~~~~~

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
round1gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)

Compiler output


ntt.c: ntt.c:444:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:444:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:444:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:444:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:741:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:741:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:741:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:741:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:742:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c: ...

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
round2clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


mult768.c: mult768.c:210:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:210:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:11:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:211:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:211:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:212:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_mult3sntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:10:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))

Compiler output


ntt.c: ntt.c: In function 'ntt512':
ntt.c: ntt.c:444:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   444 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                            ^~
ntt.c: ntt.c:444:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   444 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                         ^~
ntt.c: ntt.c:444:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   444 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                      ^~
ntt.c: ntt.c:444:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   444 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                   ^~
ntt.c: ntt.c: In function 'invntt512':
ntt.c: ntt.c:742:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c:   742 |   int16 *origf = f;
ntt.c:       |          ^~~~~
ntt.c: ntt.c:741:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   741 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                            ^~
ntt.c: ntt.c:741:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   741 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                         ^~
ntt.c: ntt.c:741:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   741 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c: ...

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
round2gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)

Passed TIMECOP


TIMECOP iterations: 1

Number of similar (implementation,compiler) pairs: 67, namely:
ImplementationCompiler
32clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
32clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
32clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
32clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
32clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
32gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
32gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
32gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
32gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avxclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avxclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avxclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avxclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avxgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avxgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avxgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avxgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx2unsignedgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx2unsignedgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx800clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx800clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx800clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx800clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
avx800gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx800gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx800gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
avx800gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
compactclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
compactclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
compactclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
compactclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
compactclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
compactgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
compactgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
compactgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
compactgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
refclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
refclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
refclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
refclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
refclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
refgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
refgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
refgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
refgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round1clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round1clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round1clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round1gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round1gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Debian_Clang_16.0.6_(27+b1))
round2gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)
round2gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (13.3.0)