Implementation notes: amd64, titan0, crypto_core/multsntrup761

Computer: titan0
Microarchitecture: amd64; Haswell+AES (306c3)
Architecture: amd64
CPU ID: GenuineIntel-000306c3-bfebfbff
SUPERCOP version: 20240625
Operation: crypto_core
Primitive: multsntrup761
TimeObject sizeTest sizeImplementationCompilerBenchmark dateSUPERCOP version
1649719532 0 034920 860 960avx800clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1654220742 0 036416 860 960avx800clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1657721134 0 036808 860 960avxclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1664619924 0 035312 860 960avxclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1720419494 0 034013 804 992avx800gcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1765720058 0 034541 804 992avxgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1810719815 0 031504 860 928avx800clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1812419690 0 031376 860 928avxclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1877817544 0 029922 852 1024avx800clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1893517797 0 030170 852 1024avxclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1956418531 0 030708 796 992avx800gcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1969018160 0 029348 788 960avx800gcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1977519164 0 031316 796 992avxgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
2005518776 0 029932 788 960avxgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
2030218464 0 030973 804 992avx800gcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
2076619028 0 031501 804 992avxgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
2905512862 0 028600 860 960round1clang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
2949511298 0 026768 860 960round1clang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
345068564 0 021042 852 1024round1clang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
346709090 0 020784 860 928round1clang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1890164458 0 018941 804 992refgcc_-march=native_-mtune=native_-O3_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
3228633053 0 018768 860 960refclang_-march=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
3343333053 0 018488 860 960refclang_-march=native_-O2_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
6152391787 0 015856 860 928refclang_-mcpu=native_-O3_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1100928634 0 012288 860 928refclang_-march=native_-O_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1103237591 0 013018 852 1024refclang_-march=native_-Os_-fwrapv_-Qunused-arguments_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1161468646 0 012764 796 992refgcc_-march=native_-mtune=native_-O_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1208291697 0 013133 804 992refgcc_-march=native_-mtune=native_-O2_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625
1489565556 0 011676 788 960refgcc_-march=native_-mtune=native_-Os_-fwrapv_-fPIC_-fPIE_-gdwarf-4_-Wall2024062620240625

Compiler output


mult768.c: mult768.c:265:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:265:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:266:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:266:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:267:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avxclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


mult768.c: mult768.c:265:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:265:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:266:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:266:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:267:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_avx800_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
avx800clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


mult.c: mult.c:146:22: error: invalid output size for constraint '=&x'
mult.c:   MULSTEP_fromzero(0,h0,h1,h2,h3,h4)
mult.c:                      ^
mult.c: mult.c:148:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c:                          ^
mult.c: mult.c:149:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c:                          ^
mult.c: mult.c:150:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 3,h3,h4,h0,h1,h2)
mult.c:                          ^
mult.c: mult.c:151:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 4,h4,h0,h1,h2,h3)
mult.c:                          ^
mult.c: mult.c:152:26: error: invalid output size for constraint '+x'
mult.c:     MULSTEP_noload(j + 5,h0,h1,h2,h3,h4)
mult.c:                          ^
mult.c: mult.c:154:24: error: invalid output size for constraint '+x'
mult.c:   MULSTEP_noload(j + 1,h1,h2,h3,h4,h0)
mult.c:                        ^
mult.c: mult.c:155:24: error: invalid output size for constraint '+x'
mult.c:   MULSTEP_noload(j + 2,h2,h3,h4,h0,h1)
mult.c:                        ^
mult.c: mult.c:156:24: error: invalid output size for constraint '+x'
mult.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round1clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


mult.c: In function 'mult768_mix2_m256i',
mult.c:     inlined from 'crypto_core_multsntrup761_round1_constbranchindex' at mult.c:755:3:
mult.c: mult.c:567:3: warning: 'mult96x16' accessing 6144 bytes in a region of size 512 [-Wstringop-overflow=]
mult.c:   567 |   mult96x16(hkara[12],fkara[6],(__m256i *) (1 + (__m128i *) gkara));
mult.c:       |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mult.c: mult.c: In function 'crypto_core_multsntrup761_round1_constbranchindex':
mult.c: mult.c:567:3: note: referencing argument 1 of type '__m256i *'
mult.c: In function 'mult768_mix2_m256i',
mult.c:     inlined from 'crypto_core_multsntrup761_round1_constbranchindex' at mult.c:755:3:
mult.c: mult.c:567:3: warning: 'mult96x16' reading 3072 bytes from a region of size 512 [-Wstringop-overread]
mult.c: mult.c: In function 'crypto_core_multsntrup761_round1_constbranchindex':
mult.c: mult.c:567:3: note: referencing argument 2 of type 'const __m256i *'
mult.c: In function 'mult768_mix2_m256i',
mult.c:     inlined from 'crypto_core_multsntrup761_round1_constbranchindex' at mult.c:755:3:
mult.c: mult.c:567:3: warning: 'mult96x16' reading 3072 bytes from a region of size 3056 [-Wstringop-overread]
mult.c: mult.c: In function 'crypto_core_multsntrup761_round1_constbranchindex':
mult.c: mult.c:567:3: note: referencing argument 3 of type 'const __m256i *'
mult.c: mult.c:278:13: note: in a call to function 'mult96x16'
mult.c:   278 | static void mult96x16(__m256i h[192],const __m256i f[96],const __m256i g[96])
mult.c:       |             ^~~~~~~~~
mult.c: In function 'mult768_mix2_m256i',
mult.c:     inlined from 'crypto_core_multsntrup761_round1_constbranchindex' at mult.c:755:3:
mult.c: mult.c:568:3: warning: 'mult96x16' accessing 6144 bytes in a region of size 512 [-Wstringop-overflow=]
mult.c:   568 |   mult96x16(hkara[0],fkara[0],gkara[0]);
mult.c:       |   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mult.c: ...

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
round1gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round1gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round1gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round1gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)

Compiler output


ntt.c: ntt.c:562:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:562:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:562:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:562:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:864:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:864:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:864:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:864:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:865:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


ntt.c: ntt.c:562:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:562:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:562:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:562:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:864:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:864:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:865:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c:   int16 *origf = f;
ntt.c:          ^
ntt.c: ntt.c:864:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:864:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


ntt.c: ntt.c:562:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:562:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:562:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:562:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:864:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:864:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:865:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c:   int16 *origf = f;
ntt.c:          ^
ntt.c: ntt.c:864:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:864:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


ntt.c: ntt.c:562:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                   ^
ntt.c: ntt.c:562:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:562:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:562:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:864:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                            ^
ntt.c: ntt.c:864:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                         ^
ntt.c: ntt.c:865:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c:   int16 *origf = f;
ntt.c:          ^
ntt.c: ntt.c:864:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:                                      ^
ntt.c: ntt.c:864:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


mult768.c: mult768.c:266:7: error: always_inline function '_mm256_set1_epi16' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   x = const_x16(0);
mult768.c:       ^
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:266:7: error: AVX vector return of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:10:19: note: expanded from macro 'const_x16'
mult768.c: #define const_x16 _mm256_set1_epi16
mult768.c:                   ^
mult768.c: mult768.c:267:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&f[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:267:35: error: AVX vector argument of type '__m256i' (vector of 4 'long long' values) without 'avx' enabled changes the ABI
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c:                        ^
mult768.c: mult768.c:268:35: error: always_inline function '_mm256_storeu_si256' requires target feature 'avx', but would be inlined into function 'crypto_core_multsntrup761_round2_constbranchindex' that is compiled without support for 'avx'
mult768.c:   for (i = p&~15;i < 768;i += 16) store_x16(&g[i],x);
mult768.c:                                   ^
mult768.c: mult768.c:9:24: note: expanded from macro 'store_x16'
mult768.c: #define store_x16(p,v) _mm256_storeu_si256((int16x16 *) (p),(v))
mult768.c: ...

Number of similar (implementation,compiler) pairs: 1, namely:
ImplementationCompiler
round2clang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)

Compiler output


ntt.c: ntt.c: In function 'ntt512':
ntt.c: ntt.c:562:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   562 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                            ^~
ntt.c: ntt.c:562:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   562 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                         ^~
ntt.c: ntt.c:562:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   562 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                      ^~
ntt.c: ntt.c:562:35: warning: unused variable 'h0' [-Wunused-variable]
ntt.c:   562 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                   ^~
ntt.c: ntt.c: In function 'invntt512':
ntt.c: ntt.c:865:10: warning: unused variable 'origf' [-Wunused-variable]
ntt.c:   865 |   int16 *origf = f;
ntt.c:       |          ^~~~~
ntt.c: ntt.c:864:44: warning: unused variable 'h3' [-Wunused-variable]
ntt.c:   864 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                            ^~
ntt.c: ntt.c:864:41: warning: unused variable 'h2' [-Wunused-variable]
ntt.c:   864 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c:       |                                         ^~
ntt.c: ntt.c:864:38: warning: unused variable 'h1' [-Wunused-variable]
ntt.c:   864 |   __m256i f0,f1,f2,f3,g0,g1,g2,g3,h0,h1,h2,h3;
ntt.c: ...

Number of similar (implementation,compiler) pairs: 4, namely:
ImplementationCompiler
round2gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round2gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round2gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round2gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)

Passed TIMECOP


TIMECOP iterations: 1

Number of similar (implementation,compiler) pairs: 29, namely:
ImplementationCompiler
avxclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avxclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avxclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avxclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avxgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avxgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avxgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avxgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avx800clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avx800clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avx800clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avx800clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
avx800gcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avx800gcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avx800gcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
avx800gcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
refclang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
refclang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
refclang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
refclang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
refclang -mcpu=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
refgcc -march=native -mtune=native -O2 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
refgcc -march=native -mtune=native -O3 -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
refgcc -march=native -mtune=native -O -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
refgcc -march=native -mtune=native -Os -fwrapv -fPIC -fPIE -gdwarf-4 -Wall (11.4.0)
round1clang -march=native -O2 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
round1clang -march=native -O3 -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
round1clang -march=native -O -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)
round1clang -march=native -Os -fwrapv -Qunused-arguments -fPIC -fPIE -gdwarf-4 -Wall (Ubuntu_Clang_14.0.0)